/* ALU 0 & 1 */\r
/* im(W) * im(b) */\r
aluexp Wixbi = west(fmul(rd1(in.W_im), rb1(in.b_im)));\r
+ \r
/* re(W * b) = re(W) * re(b) - im(W) * im(b) */\r
aluexp Wxbr = ssub_acc(fmul(rc1(in.W_re), ra1(in.b_re)), Wixbi);\r
+\r
\r
/* re(out_a) = re(a) + re(W * b) */\r
out.a_re = p0o0(sadd_bf(rb1(in.a_re), Wxbr));\r
set_offset(m.output_b_re, 0-2);\r
set_offset(m.output_b_im, 0-2);\r
} else {\r
- set_offset(m.output_a_re, 1-2);\r
- set_offset(m.output_a_im, 1-2);\r
- set_offset(m.output_b_re, 0-2);\r
- set_offset(m.output_b_im, 0-2);\r
+ set_offset(m.output_a_re, 0-2);\r
+ set_offset(m.output_a_im, 0-2);\r
+ set_offset(m.output_b_re, 1-2);\r
+ set_offset(m.output_b_im, 1-2);\r
}\r
}\r
\r
struct bf_in in = read_input_regular(m, EVEN_CYCLE, stage_odd);\r
struct bf_out out = butterfly(in);\r
\r
- /* Now, do a single stage. That means N_t / 2 cycles. Since we do 2\r
+ /* Now, do half a single stage. That means N_t / 4 cycles. Since we do 2\r
* cycles on every iteration, plus one before and after the loop,\r
- * we will loop N_t / 4 - 1 times. */\r
- init_loop(LC2, (PARAM_N_t / 4) - 1);\r
+ * we will loop N_t / 8 - 1 times. */\r
+ init_loop(LC2, (PARAM_N_t / 8) - 1);\r
do {\r
/* Write outputs of previous cycle */\r
write_output_regular(m, out, second_half);\r
void run() {\r
do { freeze(); } while (gpi(0) == 0);\r
struct mems m;\r
- \r
- m = init_mem_mapping(EVEN_STAGE);\r
- init_input_addresses_regular(m, EVEN_STAGE);\r
- /* do_half_regular_stage will init output addresses */\r
- next_cycle();\r
- do_half_regular_stage(m, EVEN_STAGE, FIRST_HALF);\r
- do_half_regular_stage(m, EVEN_STAGE, SECOND_HALF);\r
- next_cycle();\r
- init_input_addresses_regular(m, ODD_STAGE);\r
- m = init_mem_mapping(ODD_STAGE);\r
- next_cycle();\r
- do_half_regular_stage(m, ODD_STAGE, FIRST_HALF);\r
- do_half_regular_stage(m, ODD_STAGE, SECOND_HALF);\r
+\r
+ /* We need to do n_t regular stages. Since we do two stages each\r
+ * iteration, we'll do n_t / 2 iterations. */\r
+ init_loop(LC1, (PARAM_n_t / 2));\r
+ do {\r
+ m = init_mem_mapping(EVEN_STAGE);\r
+ init_input_addresses_regular(m, EVEN_STAGE);\r
+ /* do_half_regular_stage will init output addresses */\r
+ next_cycle();\r
+ do_half_regular_stage(m, EVEN_STAGE, FIRST_HALF);\r
+ do_half_regular_stage(m, EVEN_STAGE, SECOND_HALF);\r
+ next_cycle();\r
+ init_input_addresses_regular(m, ODD_STAGE);\r
+ m = init_mem_mapping(ODD_STAGE);\r
+ next_cycle();\r
+ do_half_regular_stage(m, ODD_STAGE, FIRST_HALF);\r
+ do_half_regular_stage(m, ODD_STAGE, SECOND_HALF);\r
+ } while (loop_next(LC1));\r
}\r
\r
\r
/* Didn't the Montium use Q15 instead of Q14? */\r
-#define FIXED_POINT 14\r
+#define FIXED_POINT 15\r
#define WORD_SIZE 16\r
\r
#define WORDS_PER_LINE 4\r
input_a_im = alloc_mem(P1M0);\r
input_b_re = alloc_mem(P2M0);\r
input_b_im = alloc_mem(P3M0);\r
- output_a_re = alloc_mem(P0M1);\r
- output_a_im = alloc_mem(P1M1);\r
- output_b_re = alloc_mem(P2M1);\r
- output_b_im = alloc_mem(P3M1);\r
+ \r
+ twiddle_re = alloc_mem(P4M0);\r
+ twiddle_im = alloc_mem(P4M1);\r
\r
/* TODO: Init memory and twiddles */\r
- for (i=0;i<SIZE/2;i++)\r
+ for (i=0;i<PARAM_N_t/2;i++)\r
{\r
- set_mem(twiddle_re->id, i, to_fixed(cos(2*M_PI/SIZE*i)));\r
- set_mem(twiddle_im->id, i, to_fixed(sin(2*M_PI/SIZE*i)));\r
+ set_mem(twiddle_re->id, i, to_fixed(cos(i*2*M_PI/PARAM_N_t)));\r
+ set_mem(twiddle_im->id, i, to_fixed(sin(i*2*M_PI/PARAM_N_t)));\r
}\r
\r
- for (i=0;i<SIZE;i++)\r
+ for (i=0;i<PARAM_N_t;i++)\r
{\r
- int value = to_fixed(sin((float)i/SIZE*2*2*M_PI));\r
- if (i<SIZE/2)\r
+ int value = to_fixed(sin((float)i*2*M_PI/PARAM_N_t));\r
+\r
+ if (i<PARAM_N_t/2)\r
{\r
- set_mem(input_a_re->id, i, value);\r
- set_mem(input_a_im->id, i, 0);\r
+ if (i % 2 == 0) {\r
+ set_mem(input_a_re->id, i, value);\r
+ set_mem(input_a_im->id, i, 0);\r
+ } else {\r
+ set_mem(input_b_re->id, i, value);\r
+ set_mem(input_b_im->id, i, 0);\r
+ }\r
}\r
else\r
{\r
- set_mem(input_a_re->id, i - SIZE / 2, value);\r
- set_mem(input_a_im->id, i - SIZE / 2, 0);\r
+ if (i % 2 == 0) {\r
+ set_mem(input_b_re->id, i - PARAM_N_t/2, value);\r
+ set_mem(input_b_im->id, i - PARAM_N_t/2, 0);\r
+ } else {\r
+ set_mem(input_a_re->id, i - PARAM_N_t/2, value);\r
+ set_mem(input_a_im->id, i - PARAM_N_t/2, 0);\r
+ }\r
}\r
}\r
-}\r
-\r
-void post_run()\r
-{\r
+ \r
printf("re(W)\n");\r
- print_mem(twiddle_re, 0, SIZE, true);\r
+ print_mem(twiddle_re, 0, PARAM_N_t/2, true);\r
printf("im(W)\n");\r
- print_mem(twiddle_im, 0, SIZE, true);\r
+ print_mem(twiddle_im, 0, PARAM_N_t/2, true);\r
printf("re(in_a)\n");\r
- print_mem(input_a_re, 0, SIZE, true);\r
+ print_mem(input_a_re, 0, PARAM_N_t/2, true);\r
printf("re(in_b)\n");\r
- print_mem(input_b_re, 0, SIZE, true);\r
+ print_mem(input_b_re, 0, PARAM_N_t/2, true);\r
+}\r
+\r
+void post_run()\r
+{\r
+ if (PARAM_n_t % 2 == 0) {\r
+ /* When the number of stages is odd, the \r
+ * outputs end up at the left memories again */\r
+ output_a_re = alloc_mem(P0M0);\r
+ output_a_im = alloc_mem(P1M0);\r
+ output_b_re = alloc_mem(P2M0);\r
+ output_b_im = alloc_mem(P3M0);\r
+ } else {\r
+ output_a_re = alloc_mem(P0M1);\r
+ output_a_im = alloc_mem(P1M1);\r
+ output_b_re = alloc_mem(P2M1);\r
+ output_b_im = alloc_mem(P3M1);\r
+ }\r
printf("re(out_a)\n");\r
- print_mem(output_a_re, 0, SIZE, true);\r
+ print_mem(output_a_re, 0, PARAM_N_t/2, true);\r
+ print_mem(output_b_re, 0, PARAM_N_t/2, true);\r
printf("im(out_a)\n");\r
- print_mem(output_a_im, 0, SIZE, true);\r
+ print_mem(output_a_im, 0, PARAM_N_t/2, true);\r
+ print_mem(output_b_im, 0, PARAM_N_t/2, true);\r
+\r
}\r