X-Git-Url: https://git.stderr.nl/gitweb?a=blobdiff_plain;f=FFT.mc;h=5969315c8a174d1cadc4b8e49e60bee63f813f1c;hb=7a5f3f2a9974d0fa0f3c888944d9120b22f254eb;hp=330b7f5be67632df7d362477e7651dc8492ac31c;hpb=7b57bd2ad91ccd7b8faff015adf1cbe87ddff3ea;p=matthijs%2Fprojects%2Fmontium-fft.git diff --git a/FFT.mc b/FFT.mc index 330b7f5..5969315 100644 --- a/FFT.mc +++ b/FFT.mc @@ -6,6 +6,7 @@ #include "FFT.h" +int stage = 1; /** * Executes a single butterfly on ALU 0-3. The inputs are the words taken from * in, which will be read on various inputs of ALU 0-3. Outputs will be @@ -16,8 +17,10 @@ INLINE struct bf_out butterfly(struct bf_in in) { /* ALU 0 & 1 */ /* im(W) * im(b) */ aluexp Wixbi = west(fmul(rd1(in.W_im), rb1(in.b_im))); + /* re(W * b) = re(W) * re(b) - im(W) * im(b) */ aluexp Wxbr = ssub_acc(fmul(rc1(in.W_re), ra1(in.b_re)), Wixbi); + /* re(out_a) = re(a) + re(W * b) */ out.a_re = p0o0(sadd_bf(rb1(in.a_re), Wxbr)); @@ -49,7 +52,7 @@ INLINE void write_output_regular(struct mems m, struct bf_out res, bool second_h add_offset(m.output_b_re, 2); add_offset(m.output_b_im, 2); - if (second_half) { + if (!second_half) { write_mem(m.output_a_re, res.a_re); write_mem(m.output_a_im, res.a_im); write_mem(m.output_b_re, res.b_re); @@ -96,7 +99,13 @@ INLINE struct bf_in read_input_regular(struct mems m, bool cycle_odd, bool stage add_offset(m.input_a_im, 1); add_offset(m.input_b_re, 1); add_offset(m.input_b_im, 1); - /* TODO: Update twiddle offsets */ + + /* TODO: Is this true? */ + add_offset(m.twiddle_re, (PARAM_N_t>>stage)); + add_offset(m.twiddle_im, (PARAM_N_t>>stage)); + use_mask(m.twiddle_re, (PARAM_N_t/2)-1); + use_mask(m.twiddle_im, (PARAM_N_t/2)-1); + return in; } @@ -147,10 +156,10 @@ INLINE void init_output_addresses_regular(struct mems m, bool stage_odd, bool se set_offset(m.output_b_re, 0-2); set_offset(m.output_b_im, 0-2); } else { - set_offset(m.output_a_re, 1-2); - set_offset(m.output_a_im, 1-2); - set_offset(m.output_b_re, 0-2); - set_offset(m.output_b_im, 0-2); + set_offset(m.output_a_re, 0-2); + set_offset(m.output_a_im, 0-2); + set_offset(m.output_b_re, 1-2); + set_offset(m.output_b_im, 1-2); } } @@ -176,10 +185,10 @@ INLINE void do_half_regular_stage(struct mems m, bool stage_odd, bool second_hal struct bf_in in = read_input_regular(m, EVEN_CYCLE, stage_odd); struct bf_out out = butterfly(in); - /* Now, do a single stage. That means N_t / 2 cycles. Since we do 2 + /* Now, do half a single stage. That means N_t / 4 cycles. Since we do 2 * cycles on every iteration, plus one before and after the loop, - * we will loop N_t / 4 - 1 times. */ - init_loop(LC2, (PARAM_N_t / 4) - 1); + * we will loop N_t / 8 - 1 times. We add an extra - 1 because this is a do while loop... */ + init_loop(LC2, (PARAM_N_t / 8) - 1 - 1); do { /* Write outputs of previous cycle */ write_output_regular(m, out, second_half); @@ -243,17 +252,24 @@ INLINE struct mems init_mem_mapping(bool stage_odd){ void run() { do { freeze(); } while (gpi(0) == 0); struct mems m; - - m = init_mem_mapping(EVEN_STAGE); - init_input_addresses_regular(m, EVEN_STAGE); - /* do_half_regular_stage will init output addresses */ - next_cycle(); - do_half_regular_stage(m, EVEN_STAGE, FIRST_HALF); - do_half_regular_stage(m, EVEN_STAGE, SECOND_HALF); - next_cycle(); - init_input_addresses_regular(m, ODD_STAGE); - m = init_mem_mapping(ODD_STAGE); - next_cycle(); - do_half_regular_stage(m, ODD_STAGE, FIRST_HALF); - do_half_regular_stage(m, ODD_STAGE, SECOND_HALF); + + /* We need to do n_t regular stages. Since we do two stages each + * iteration, we'll do n_t / 2 iterations (and a -1 because we check after looping) */ + init_loop(LC1, (PARAM_n_t / 2) - 1); + do { + m = init_mem_mapping(EVEN_STAGE); + init_input_addresses_regular(m, EVEN_STAGE); + /* do_half_regular_stage will init output addresses */ + next_cycle(); + do_half_regular_stage(m, EVEN_STAGE, FIRST_HALF); + do_half_regular_stage(m, EVEN_STAGE, SECOND_HALF); + stage++; + next_cycle(); + init_input_addresses_regular(m, ODD_STAGE); + m = init_mem_mapping(ODD_STAGE); + next_cycle(); + do_half_regular_stage(m, ODD_STAGE, FIRST_HALF); + do_half_regular_stage(m, ODD_STAGE, SECOND_HALF); + stage++; + } while (loop_next(LC1)); }