From a62dbd706365d44bdcac9f8ac4c7c6dd05484642 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 27 Mar 2008 16:45:56 +0100 Subject: [PATCH] * Actually make the output addressing different in the second half, it was still the same as the first half. * Actually do half a stage in do_half_regular_stage() instead of a full stage. * Let run do n_t stages instead of just two. * Call init_libmontiumc() in our main(), so libmontiumc actually works. --- FFT.mc | 47 +++++++++++++++++------------- FFT_support.cpp | 77 +++++++++++++++++++++++++++++++++---------------- main.cpp | 3 +- 3 files changed, 81 insertions(+), 46 deletions(-) diff --git a/FFT.mc b/FFT.mc index 330b7f5..3ad0816 100644 --- a/FFT.mc +++ b/FFT.mc @@ -16,8 +16,10 @@ INLINE struct bf_out butterfly(struct bf_in in) { /* ALU 0 & 1 */ /* im(W) * im(b) */ aluexp Wixbi = west(fmul(rd1(in.W_im), rb1(in.b_im))); + /* re(W * b) = re(W) * re(b) - im(W) * im(b) */ aluexp Wxbr = ssub_acc(fmul(rc1(in.W_re), ra1(in.b_re)), Wixbi); + /* re(out_a) = re(a) + re(W * b) */ out.a_re = p0o0(sadd_bf(rb1(in.a_re), Wxbr)); @@ -147,10 +149,10 @@ INLINE void init_output_addresses_regular(struct mems m, bool stage_odd, bool se set_offset(m.output_b_re, 0-2); set_offset(m.output_b_im, 0-2); } else { - set_offset(m.output_a_re, 1-2); - set_offset(m.output_a_im, 1-2); - set_offset(m.output_b_re, 0-2); - set_offset(m.output_b_im, 0-2); + set_offset(m.output_a_re, 0-2); + set_offset(m.output_a_im, 0-2); + set_offset(m.output_b_re, 1-2); + set_offset(m.output_b_im, 1-2); } } @@ -176,10 +178,10 @@ INLINE void do_half_regular_stage(struct mems m, bool stage_odd, bool second_hal struct bf_in in = read_input_regular(m, EVEN_CYCLE, stage_odd); struct bf_out out = butterfly(in); - /* Now, do a single stage. That means N_t / 2 cycles. Since we do 2 + /* Now, do half a single stage. That means N_t / 4 cycles. Since we do 2 * cycles on every iteration, plus one before and after the loop, - * we will loop N_t / 4 - 1 times. */ - init_loop(LC2, (PARAM_N_t / 4) - 1); + * we will loop N_t / 8 - 1 times. */ + init_loop(LC2, (PARAM_N_t / 8) - 1); do { /* Write outputs of previous cycle */ write_output_regular(m, out, second_half); @@ -243,17 +245,22 @@ INLINE struct mems init_mem_mapping(bool stage_odd){ void run() { do { freeze(); } while (gpi(0) == 0); struct mems m; - - m = init_mem_mapping(EVEN_STAGE); - init_input_addresses_regular(m, EVEN_STAGE); - /* do_half_regular_stage will init output addresses */ - next_cycle(); - do_half_regular_stage(m, EVEN_STAGE, FIRST_HALF); - do_half_regular_stage(m, EVEN_STAGE, SECOND_HALF); - next_cycle(); - init_input_addresses_regular(m, ODD_STAGE); - m = init_mem_mapping(ODD_STAGE); - next_cycle(); - do_half_regular_stage(m, ODD_STAGE, FIRST_HALF); - do_half_regular_stage(m, ODD_STAGE, SECOND_HALF); + + /* We need to do n_t regular stages. Since we do two stages each + * iteration, we'll do n_t / 2 iterations. */ + init_loop(LC1, (PARAM_n_t / 2)); + do { + m = init_mem_mapping(EVEN_STAGE); + init_input_addresses_regular(m, EVEN_STAGE); + /* do_half_regular_stage will init output addresses */ + next_cycle(); + do_half_regular_stage(m, EVEN_STAGE, FIRST_HALF); + do_half_regular_stage(m, EVEN_STAGE, SECOND_HALF); + next_cycle(); + init_input_addresses_regular(m, ODD_STAGE); + m = init_mem_mapping(ODD_STAGE); + next_cycle(); + do_half_regular_stage(m, ODD_STAGE, FIRST_HALF); + do_half_regular_stage(m, ODD_STAGE, SECOND_HALF); + } while (loop_next(LC1)); } diff --git a/FFT_support.cpp b/FFT_support.cpp index 6f21e5a..23d07f0 100644 --- a/FFT_support.cpp +++ b/FFT_support.cpp @@ -4,7 +4,7 @@ /* Didn't the Montium use Q15 instead of Q14? */ -#define FIXED_POINT 14 +#define FIXED_POINT 15 #define WORD_SIZE 16 #define WORDS_PER_LINE 4 @@ -55,46 +55,73 @@ void pre_run() input_a_im = alloc_mem(P1M0); input_b_re = alloc_mem(P2M0); input_b_im = alloc_mem(P3M0); - output_a_re = alloc_mem(P0M1); - output_a_im = alloc_mem(P1M1); - output_b_re = alloc_mem(P2M1); - output_b_im = alloc_mem(P3M1); + + twiddle_re = alloc_mem(P4M0); + twiddle_im = alloc_mem(P4M1); /* TODO: Init memory and twiddles */ - for (i=0;iid, i, to_fixed(cos(2*M_PI/SIZE*i))); - set_mem(twiddle_im->id, i, to_fixed(sin(2*M_PI/SIZE*i))); + set_mem(twiddle_re->id, i, to_fixed(cos(i*2*M_PI/PARAM_N_t))); + set_mem(twiddle_im->id, i, to_fixed(sin(i*2*M_PI/PARAM_N_t))); } - for (i=0;iid, i, value); - set_mem(input_a_im->id, i, 0); + if (i % 2 == 0) { + set_mem(input_a_re->id, i, value); + set_mem(input_a_im->id, i, 0); + } else { + set_mem(input_b_re->id, i, value); + set_mem(input_b_im->id, i, 0); + } } else { - set_mem(input_a_re->id, i - SIZE / 2, value); - set_mem(input_a_im->id, i - SIZE / 2, 0); + if (i % 2 == 0) { + set_mem(input_b_re->id, i - PARAM_N_t/2, value); + set_mem(input_b_im->id, i - PARAM_N_t/2, 0); + } else { + set_mem(input_a_re->id, i - PARAM_N_t/2, value); + set_mem(input_a_im->id, i - PARAM_N_t/2, 0); + } } } -} - -void post_run() -{ + printf("re(W)\n"); - print_mem(twiddle_re, 0, SIZE, true); + print_mem(twiddle_re, 0, PARAM_N_t/2, true); printf("im(W)\n"); - print_mem(twiddle_im, 0, SIZE, true); + print_mem(twiddle_im, 0, PARAM_N_t/2, true); printf("re(in_a)\n"); - print_mem(input_a_re, 0, SIZE, true); + print_mem(input_a_re, 0, PARAM_N_t/2, true); printf("re(in_b)\n"); - print_mem(input_b_re, 0, SIZE, true); + print_mem(input_b_re, 0, PARAM_N_t/2, true); +} + +void post_run() +{ + if (PARAM_n_t % 2 == 0) { + /* When the number of stages is odd, the + * outputs end up at the left memories again */ + output_a_re = alloc_mem(P0M0); + output_a_im = alloc_mem(P1M0); + output_b_re = alloc_mem(P2M0); + output_b_im = alloc_mem(P3M0); + } else { + output_a_re = alloc_mem(P0M1); + output_a_im = alloc_mem(P1M1); + output_b_re = alloc_mem(P2M1); + output_b_im = alloc_mem(P3M1); + } printf("re(out_a)\n"); - print_mem(output_a_re, 0, SIZE, true); + print_mem(output_a_re, 0, PARAM_N_t/2, true); + print_mem(output_b_re, 0, PARAM_N_t/2, true); printf("im(out_a)\n"); - print_mem(output_a_im, 0, SIZE, true); + print_mem(output_a_im, 0, PARAM_N_t/2, true); + print_mem(output_b_im, 0, PARAM_N_t/2, true); + } diff --git a/main.cpp b/main.cpp index 3c2d4a6..f8cec10 100644 --- a/main.cpp +++ b/main.cpp @@ -1,7 +1,8 @@ #include "FFT.h" int main(int argc, char* argv[]) { - + init_libmontiumc(); + pre_run(); set_gpi(0, 1); run(); -- 2.30.2