#ifndef __MONTIUMCC__ #include #include #include #endif #include "FFT.h" word in_a_re, in_a_im, in_b_re, in_b_im, in_W_re, in_W_im; //out_a_re, out_a_im, out_b_re, out_b_im; //mem input_a_re, input_a_im, input_b_re, input_b_im, output_a_re, output_a_im, output_b_re, output_b_im, twiddle_re, twiddle_im; void init() { } /*void update_gpi() { set_gpi(0, 1); }*/ INLINE struct bf_out butterfly(struct bf_in in) { struct bf_out out; /* ALU 0 & 1 */ /* im(W) * im(b) */ aluexp Wixbi = west(fmul(rd1(in.W_im), rb1(in.b_im))); /* re(W * b) = re(W) * re(b) - im(W) * im(b) */ aluexp Wxbr = ssub_acc(fmul(rc1(in.W_re), ra1(in.b_re)), Wixbi); /* re(out_a) = re(a) + re(W * b) */ out.a_re = p0o0(sadd_bf(rb1(in.a_re), Wxbr)); /* re(out_b) = re(a) - re(W * b) */ out.b_re = p0o1(ssub_bf(rb1(in.a_re), Wxbr)); /* ALU 2 & 3 */ /* im(W) * re(b) */ aluexp Wixbr = west(fmul(rd1(in.W_im), rb1(in.b_re))); /* im(W * b) = re(W) * im(b) + im(W) * re(b) */ aluexp Wxbi = sadd_acc(fmul(rc1(in.W_re), ra1(in.b_im)), Wixbr); /* im(out_a) = im(a) + im(W * b) */ out.a_im = p2o0(sadd_bf(rb1(in.a_im), Wxbi)); /* im(out_b) = im(a) - im(W * b) */ out.b_im = p2o1(ssub_bf(rb1(in.a_im), Wxbi)); return out; } INLINE void write_output_regular(struct mems m, struct bf_out res, bool second_half) { add_offset(m.output_a_re, 2); add_offset(m.output_a_im, 2); add_offset(m.output_b_re, 2); add_offset(m.output_b_im, 2); if (second_half) { write_mem(m.output_a_re, res.a_re); write_mem(m.output_a_im, res.a_im); write_mem(m.output_b_re, res.b_re); write_mem(m.output_b_im, res.b_im); } else { /* Write a results to memory b and v.v. */ write_mem(m.output_a_re, res.b_re); write_mem(m.output_a_im, res.b_im); write_mem(m.output_b_re, res.a_re); write_mem(m.output_b_im, res.a_im); } } /** * Reads four inputs and two twiddle factors from memory. * Also increases memory offsets by 1 after reading. * @param stage_odd Is this an odd stage? If so, read from the left * memories, else read from the right memories * (not implemented yet). * @param cycle_odd Is this an odd cycle within the stage? If so, * read input a from memory b and v.v. If not, * simply read a from memory a and b from memory b. */ INLINE struct bf_in read_input_regular(struct mems m, bool cycle_odd, bool stage_odd) { struct bf_in in; /* TODO: Select left or right memories */ if (cycle_odd) { in.a_re = read_mem(m.input_a_re); in.a_im = read_mem(m.input_a_im); in.b_re = read_mem(m.input_b_re); in.b_im = read_mem(m.input_b_im); } else { in.b_re = read_mem(m.input_a_re); in.b_im = read_mem(m.input_a_im); in.a_re = read_mem(m.input_b_re); in.a_im = read_mem(m.input_b_im); } in.W_re = read_mem(m.twiddle_re); in.W_im = read_mem(m.twiddle_im); add_offset(m.input_a_re, 1); add_offset(m.input_a_im, 1); add_offset(m.input_b_re, 1); add_offset(m.input_b_im, 1); /* TODO: Update twiddle offsets */ return in; } /** * Initializes the addresses for the various memories. * @param stage_odd True if this is an odd stage. *@param second_half True if we are initing halfway a stage. */ INLINE void init_input_addresses_regular(struct mems m, bool stage_odd) { /* TODO: Select left or right memories */ set_base(m.input_a_im, 0); set_base(m.input_b_re, 0); set_base(m.input_b_im, 0); set_base(m.twiddle_re, 0); set_base(m.twiddle_im, 0); set_offset(m.input_a_re, 0); set_offset(m.input_a_im, 0); set_offset(m.input_b_re, 0); set_offset(m.input_b_im, 0); set_offset(m.twiddle_re, 0); set_offset(m.twiddle_im, 0); } INLINE void init_output_addresses_regular(struct mems m, bool stage_odd, bool second_half) { /* * For the second half of the stage, the starting addresses are * reversed. write_output_regular above will also swap the output * memories. * TODO: Better comments :-) */ set_base(m.output_a_re, 0); set_base(m.output_a_im, 0); set_base(m.output_b_re, 0); set_base(m.output_b_im, 0); /* We subtract two from every address, since write_output_regular * adds two to the offset before writing the first (and every other) * result. */ if (second_half) { set_offset(m.output_a_re, 1-2); set_offset(m.output_a_im, 1-2); set_offset(m.output_b_re, 0-2); set_offset(m.output_b_im, 0-2); } else { set_offset(m.output_a_re, 1-2); set_offset(m.output_a_im, 1-2); set_offset(m.output_b_re, 0-2); set_offset(m.output_b_im, 0-2); } } INLINE void do_half_regular_stage(struct mems m, bool stage_odd, bool second_half){ struct bf_in in = read_input_regular(m, EVEN_CYCLE, stage_odd); struct bf_out out = butterfly(in); /* Now, do a single stage. That means N_t / 2 cycles. Since we do 2 * cycles on every iteration, plus one before and after the loop, * we will loop N_t / 4 - 1 times. */ init_loop(LC2, (N_t / 4) - 1); do { init_output_addresses_regular(m, stage_odd, second_half); write_output_regular(m, out, second_half); in = read_input_regular(m, ODD_CYCLE, second_half); out = butterfly(in); next_cycle(); write_output_regular(m, out, second_half); in = read_input_regular(m, EVEN_CYCLE, second_half); out = butterfly(in); } while (loop_next(LC2)); write_output_regular(m, out, second_half); in = read_input_regular(m, ODD_CYCLE, second_half); out = butterfly(in); next_cycle(); write_output_regular(m, out, second_half); } INLINE struct mems init_mem_mapping(bool stage_odd){ struct mems res; if (stage_odd) { res.input_a_re = alloc_mem(P0M1); res.input_a_im = alloc_mem(P1M1); res.input_b_re = alloc_mem(P2M1); res.input_b_im = alloc_mem(P3M1); res.output_a_re = alloc_mem(P0M0); res.output_a_im = alloc_mem(P1M0); res.output_b_re = alloc_mem(P2M0); res.output_b_im = alloc_mem(P3M0); } else { res.input_a_re = alloc_mem(P0M0); res.input_a_im = alloc_mem(P1M0); res.input_b_re = alloc_mem(P2M0); res.input_b_im = alloc_mem(P3M0); res.output_a_re = alloc_mem(P0M1); res.output_a_im = alloc_mem(P1M1); res.output_b_re = alloc_mem(P2M1); res.output_b_im = alloc_mem(P3M1); } res.twiddle_re = alloc_mem(P4M0); res.twiddle_im = alloc_mem(P4M1); return res; } void run() { #ifdef __MONTIUMCC__ /* main.cpp will call init before pre_run(), so only need to call init for MontiumCC */ init(); #endif do { freeze(); } while (gpi(0) == 0); struct mems m; m = init_mem_mapping(EVEN_STAGE); init_input_addresses_regular(m, EVEN_STAGE); /* do_half_regular_stage will init output addresses */ next_cycle(); do_half_regular_stage(m, EVEN_STAGE, FIRST_HALF); do_half_regular_stage(m, EVEN_STAGE, SECOND_HALF); next_cycle(); init_input_addresses_regular(m, ODD_STAGE); m = init_mem_mapping(ODD_STAGE); next_cycle(); do_half_regular_stage(m, ODD_STAGE, FIRST_HALF); do_half_regular_stage(m, ODD_STAGE, SECOND_HALF); }