#ifndef __MONTIUMCC__ #include #include #include #endif #include "FFT.h" /** * Executes a single butterfly on ALU 0-3. The inputs are the words taken from * in, which will be read on various inputs of ALU 0-3. Outputs will be * returned and will we be available on the output ports of ALU 0 and 2. */ INLINE struct bf_out butterfly(struct bf_in in) { struct bf_out out; /* ALU 0 & 1 */ /* im(W) * im(b) */ aluexp Wixbi = west(fmul(rd1(in.W_im), rb1(in.b_im))); /* re(W * b) = re(W) * re(b) - im(W) * im(b) */ aluexp Wxbr = ssub_acc(fmul(rc1(in.W_re), ra1(in.b_re)), Wixbi); /* re(out_a) = re(a) + re(W * b) */ out.a_re = p0o0(sadd_bf(rb1(in.a_re), Wxbr)); /* re(out_b) = re(a) - re(W * b) */ out.b_re = p0o1(ssub_bf(rb1(in.a_re), Wxbr)); /* ALU 2 & 3 */ /* im(W) * re(b) */ aluexp Wixbr = west(fmul(rd1(in.W_im), rb1(in.b_re))); /* im(W * b) = re(W) * im(b) + im(W) * re(b) */ aluexp Wxbi = sadd_acc(fmul(rc1(in.W_re), ra1(in.b_im)), Wixbr); /* im(out_a) = im(a) + im(W * b) */ out.a_im = p2o0(sadd_bf(rb1(in.a_im), Wxbi)); /* im(out_b) = im(a) - im(W * b) */ out.b_im = p2o1(ssub_bf(rb1(in.a_im), Wxbi)); return out; } /** * Writes the output of a butterfly given in res to the correct memory * locations. * @param second_half Are we in the second half of the stage? */ INLINE void write_output_regular(struct mems m, struct bf_out res, bool second_half) { add_offset(m.output_a_re, 2); add_offset(m.output_a_im, 2); add_offset(m.output_b_re, 2); add_offset(m.output_b_im, 2); if (second_half) { write_mem(m.output_a_re, res.a_re); write_mem(m.output_a_im, res.a_im); write_mem(m.output_b_re, res.b_re); write_mem(m.output_b_im, res.b_im); } else { /* Write a results to memory b and v.v. */ write_mem(m.output_a_re, res.b_re); write_mem(m.output_a_im, res.b_im); write_mem(m.output_b_re, res.a_re); write_mem(m.output_b_im, res.a_im); } } /** * Reads four inputs and two twiddle factors from memory. * Also increases memory offsets by 1 after reading. * @param stage_odd Is this an odd stage? If so, read from the left * memories, else read from the right memories * (not implemented yet). * @param cycle_odd Is this an odd cycle within the stage? If so, * read input a from memory b and v.v. If not, * simply read a from memory a and b from memory b. */ INLINE struct bf_in read_input_regular(struct mems m, bool cycle_odd, bool stage_odd) { struct bf_in in; /* Swap memory a and b during the odd cycles */ if (cycle_odd) { in.a_re = read_mem(m.input_a_re); in.a_im = read_mem(m.input_a_im); in.b_re = read_mem(m.input_b_re); in.b_im = read_mem(m.input_b_im); } else { in.b_re = read_mem(m.input_a_re); in.b_im = read_mem(m.input_a_im); in.a_re = read_mem(m.input_b_re); in.a_im = read_mem(m.input_b_im); } in.W_re = read_mem(m.twiddle_re); in.W_im = read_mem(m.twiddle_im); /* Read inputs sequentially */ add_offset(m.input_a_re, 1); add_offset(m.input_a_im, 1); add_offset(m.input_b_re, 1); add_offset(m.input_b_im, 1); /* TODO: Update twiddle offsets */ return in; } /** * Initializes the addresses for writing the outputs. * @param stage_odd True if this is an odd stage. * @param second_half True if we are initing halfway a stage. */ INLINE void init_input_addresses_regular(struct mems m, bool stage_odd) { /* We simply start reading at address 0 incrementally */ set_base(m.input_a_im, 0); set_base(m.input_b_re, 0); set_base(m.input_b_im, 0); set_base(m.twiddle_re, 0); set_base(m.twiddle_im, 0); set_offset(m.input_a_re, 0); set_offset(m.input_a_im, 0); set_offset(m.input_b_re, 0); set_offset(m.input_b_im, 0); set_offset(m.twiddle_re, 0); set_offset(m.twiddle_im, 0); } /** * Initializes the addresses for reading the inputs. This function must be * called twice per stage, since halfway the stage the addressing changes. */ INLINE void init_output_addresses_regular(struct mems m, bool stage_odd, bool second_half) { /* * For the second half of the stage, the starting addresses are * reversed. write_output_regular above will also swap the output * memories. * TODO: Better comments :-) */ set_base(m.output_a_re, 0); set_base(m.output_a_im, 0); set_base(m.output_b_re, 0); set_base(m.output_b_im, 0); /* We subtract two from every address, since write_output_regular * adds two to the offset before writing the first (and every other) * result. */ if (second_half) { set_offset(m.output_a_re, 1-2); set_offset(m.output_a_im, 1-2); set_offset(m.output_b_re, 0-2); set_offset(m.output_b_im, 0-2); } else { set_offset(m.output_a_re, 1-2); set_offset(m.output_a_im, 1-2); set_offset(m.output_b_re, 0-2); set_offset(m.output_b_im, 0-2); } } INLINE void do_half_regular_stage(struct mems m, bool stage_odd, bool second_half){ /* * We are doing two cycles in each iteration, so we can alternate the * cycle_odd argument (which only works with constants, I don't expect * the optimizer to do this loop unrolling for us). Since we need to * write outputs before reading, but don't have any outputs to write * in the first cycle, we must put the first cycle outside of the * loop. Since the loop does two cycles at a time, this means there * must be two cycles outside of the loop, so we put one at the end as * well. Additionally, we also need to write the outputs of the last * cycle in an extra cycle at the end. We probably can't combine this * last cycle with the first cycle of the next stage, because they * need the same memories (input becomes output and v.v.). */ /* Initialize output addresses, this must be done twice per stage */ init_output_addresses_regular(m, stage_odd, second_half); /* First cycle (no previous output to write) */ struct bf_in in = read_input_regular(m, EVEN_CYCLE, stage_odd); struct bf_out out = butterfly(in); /* Now, do a single stage. That means N_t / 2 cycles. Since we do 2 * cycles on every iteration, plus one before and after the loop, * we will loop N_t / 4 - 1 times. */ init_loop(LC2, (N_t / 4) - 1); do { /* Write outputs of previous cycle */ write_output_regular(m, out, second_half); /* Odd cycle */ in = read_input_regular(m, ODD_CYCLE, second_half); out = butterfly(in); next_cycle(); /* Write outputs of previous cycle */ write_output_regular(m, out, second_half); /* Even cycle */ in = read_input_regular(m, EVEN_CYCLE, second_half); out = butterfly(in); } while (loop_next(LC2)); /* Write outputs of previous cycle */ write_output_regular(m, out, second_half); /* Last cycle */ in = read_input_regular(m, ODD_CYCLE, second_half); out = butterfly(in); next_cycle(); /* Write outputs of last cycle */ write_output_regular(m, out, second_half); /* Force the next cycle, because the next stage must read from * the memory we just wrote to */ next_cycle(); } INLINE struct mems init_mem_mapping(bool stage_odd){ struct mems res; if (stage_odd) { res.input_a_re = alloc_mem(P0M1); res.input_a_im = alloc_mem(P1M1); res.input_b_re = alloc_mem(P2M1); res.input_b_im = alloc_mem(P3M1); res.output_a_re = alloc_mem(P0M0); res.output_a_im = alloc_mem(P1M0); res.output_b_re = alloc_mem(P2M0); res.output_b_im = alloc_mem(P3M0); } else { res.input_a_re = alloc_mem(P0M0); res.input_a_im = alloc_mem(P1M0); res.input_b_re = alloc_mem(P2M0); res.input_b_im = alloc_mem(P3M0); res.output_a_re = alloc_mem(P0M1); res.output_a_im = alloc_mem(P1M1); res.output_b_re = alloc_mem(P2M1); res.output_b_im = alloc_mem(P3M1); } res.twiddle_re = alloc_mem(P4M0); res.twiddle_im = alloc_mem(P4M1); return res; } void run() { do { freeze(); } while (gpi(0) == 0); struct mems m; m = init_mem_mapping(EVEN_STAGE); init_input_addresses_regular(m, EVEN_STAGE); /* do_half_regular_stage will init output addresses */ next_cycle(); do_half_regular_stage(m, EVEN_STAGE, FIRST_HALF); do_half_regular_stage(m, EVEN_STAGE, SECOND_HALF); next_cycle(); init_input_addresses_regular(m, ODD_STAGE); m = init_mem_mapping(ODD_STAGE); next_cycle(); do_half_regular_stage(m, ODD_STAGE, FIRST_HALF); do_half_regular_stage(m, ODD_STAGE, SECOND_HALF); }