\r
#include "FFT.h"\r
\r
+int stage = 1;\r
/**\r
* Executes a single butterfly on ALU 0-3. The inputs are the words taken from\r
* in, which will be read on various inputs of ALU 0-3. Outputs will be\r
/* ALU 0 & 1 */\r
/* im(W) * im(b) */\r
aluexp Wixbi = west(fmul(rd1(in.W_im), rb1(in.b_im)));\r
+ \r
/* re(W * b) = re(W) * re(b) - im(W) * im(b) */\r
aluexp Wxbr = ssub_acc(fmul(rc1(in.W_re), ra1(in.b_re)), Wixbi);\r
+\r
\r
/* re(out_a) = re(a) + re(W * b) */\r
out.a_re = p0o0(sadd_bf(rb1(in.a_re), Wxbr));\r
add_offset(m.output_b_re, 2);\r
add_offset(m.output_b_im, 2);\r
\r
- if (second_half) {\r
+ if (!second_half) {\r
write_mem(m.output_a_re, res.a_re);\r
write_mem(m.output_a_im, res.a_im);\r
write_mem(m.output_b_re, res.b_re);\r
add_offset(m.input_a_im, 1);\r
add_offset(m.input_b_re, 1);\r
add_offset(m.input_b_im, 1);\r
- /* TODO: Update twiddle offsets */\r
+ \r
+ /* TODO: Is this true? */\r
+ add_offset(m.twiddle_re, (PARAM_N_t>>stage));\r
+ add_offset(m.twiddle_im, (PARAM_N_t>>stage));\r
+ use_mask(m.twiddle_re, (PARAM_N_t/2)-1);\r
+ use_mask(m.twiddle_im, (PARAM_N_t/2)-1);\r
+\r
return in;\r
}\r
\r
set_offset(m.output_b_re, 0-2);\r
set_offset(m.output_b_im, 0-2);\r
} else {\r
- set_offset(m.output_a_re, 1-2);\r
- set_offset(m.output_a_im, 1-2);\r
- set_offset(m.output_b_re, 0-2);\r
- set_offset(m.output_b_im, 0-2);\r
+ set_offset(m.output_a_re, 0-2);\r
+ set_offset(m.output_a_im, 0-2);\r
+ set_offset(m.output_b_re, 1-2);\r
+ set_offset(m.output_b_im, 1-2);\r
}\r
}\r
\r
struct bf_in in = read_input_regular(m, EVEN_CYCLE, stage_odd);\r
struct bf_out out = butterfly(in);\r
\r
- /* Now, do a single stage. That means N_t / 2 cycles. Since we do 2\r
+ /* Now, do half a single stage. That means N_t / 4 cycles. Since we do 2\r
* cycles on every iteration, plus one before and after the loop,\r
- * we will loop N_t / 4 - 1 times. */\r
- init_loop(LC2, (N_t / 4) - 1);\r
- do {\r
+ * we will loop N_t / 8 - 1 times. We add an extra - 1 because this is a do while loop... */\r
+ init_loop(LC2, (PARAM_N_t / 8) - 1);\r
+ while (loop_next(LC2)) {\r
/* Write outputs of previous cycle */\r
write_output_regular(m, out, second_half);\r
\r
/* Even cycle */\r
in = read_input_regular(m, EVEN_CYCLE, second_half);\r
out = butterfly(in);\r
- } while (loop_next(LC2));\r
+ }\r
\r
/* Write outputs of previous cycle */\r
write_output_regular(m, out, second_half);\r
void run() {\r
do { freeze(); } while (gpi(0) == 0);\r
struct mems m;\r
- \r
- m = init_mem_mapping(EVEN_STAGE);\r
- init_input_addresses_regular(m, EVEN_STAGE);\r
- /* do_half_regular_stage will init output addresses */\r
- next_cycle();\r
- do_half_regular_stage(m, EVEN_STAGE, FIRST_HALF);\r
- do_half_regular_stage(m, EVEN_STAGE, SECOND_HALF);\r
- next_cycle();\r
- init_input_addresses_regular(m, ODD_STAGE);\r
- m = init_mem_mapping(ODD_STAGE);\r
- next_cycle();\r
- do_half_regular_stage(m, ODD_STAGE, FIRST_HALF);\r
- do_half_regular_stage(m, ODD_STAGE, SECOND_HALF);\r
+\r
+ /* We need to do n_t regular stages. Since we do two stages each\r
+ * iteration, we'll do n_t / 2 iterations. */\r
+ init_loop(LC1, (PARAM_n_t / 2));\r
+ while (loop_next(LC1)) {\r
+ m = init_mem_mapping(EVEN_STAGE);\r
+ init_input_addresses_regular(m, EVEN_STAGE);\r
+ /* do_half_regular_stage will init output addresses */\r
+ next_cycle();\r
+ do_half_regular_stage(m, EVEN_STAGE, FIRST_HALF);\r
+ do_half_regular_stage(m, EVEN_STAGE, SECOND_HALF);\r
+ stage++;\r
+ next_cycle();\r
+ init_input_addresses_regular(m, ODD_STAGE);\r
+ m = init_mem_mapping(ODD_STAGE);\r
+ next_cycle();\r
+ do_half_regular_stage(m, ODD_STAGE, FIRST_HALF);\r
+ do_half_regular_stage(m, ODD_STAGE, SECOND_HALF);\r
+ stage++;\r
+ }\r
}\r