#ifndef __MONTIUMCC__ #include #include #include #endif #include "FFT.h" /** * Executes a single butterfly on ALU 0-3. The inputs are the words taken from * in, which will be read on various inputs of ALU 0-3. Outputs will be * returned and will we be available on the output ports of ALU 0 and 2. */ INLINE struct bf_out butterfly(struct bf_in in) { struct bf_out out; /* ALU 0 & 1 */ /* im(W) * im(b) */ aluexp Wixbi = west(fmul(rd1(in.W_im), rb1(in.b_im))); /* re(W * b) = re(W) * re(b) - im(W) * im(b) */ aluexp Wxbr = ssub_acc(fmul(rc1(in.W_re), ra1(in.b_re)), Wixbi); /* re(out_a) = re(a) + re(W * b) */ out.a_re = p0o0(sadd_bf(rb1(in.a_re), Wxbr)); /* re(out_b) = re(a) - re(W * b) */ out.b_re = p0o1(ssub_bf(rb1(in.a_re), Wxbr)); /* ALU 2 & 3 */ /* im(W) * re(b) */ aluexp Wixbr = west(fmul(rd1(in.W_im), rb1(in.b_re))); /* im(W * b) = re(W) * im(b) + im(W) * re(b) */ aluexp Wxbi = sadd_acc(fmul(rc1(in.W_re), ra1(in.b_im)), Wixbr); /* im(out_a) = im(a) + im(W * b) */ out.a_im = p2o0(sadd_bf(rb1(in.a_im), Wxbi)); /* im(out_b) = im(a) - im(W * b) */ out.b_im = p2o1(ssub_bf(rb1(in.a_im), Wxbi)); return out; } /** * Writes the output of a butterfly given in res to the correct memory * locations. * @param second_half Are we in the second half of the stage? */ INLINE void write_output_regular(struct mems m, struct bf_out res, bool second_half) { add_offset(m.output_a_re, 2); add_offset(m.output_a_im, 2); add_offset(m.output_b_re, 2); add_offset(m.output_b_im, 2); if (!second_half) { write_mem(m.output_a_re, res.a_re); write_mem(m.output_a_im, res.a_im); write_mem(m.output_b_re, res.b_re); write_mem(m.output_b_im, res.b_im); } else { /* Write a results to memory b and v.v. */ write_mem(m.output_a_re, res.b_re); write_mem(m.output_a_im, res.b_im); write_mem(m.output_b_re, res.a_re); write_mem(m.output_b_im, res.a_im); } } /** * Reads four inputs and two twiddle factors from memory. * Also increases memory offsets by 1 after reading. * @param stage_odd Is this an odd stage? If so, read from the left * memories, else read from the right memories * (not implemented yet). * @param cycle_odd Is this an odd cycle within the stage? If so, * read input a from memory b and v.v. If not, * simply read a from memory a and b from memory b. */ INLINE struct bf_in read_input_regular(struct mems m, bool cycle_odd, int stage) { struct bf_in in; /* Swap memory a and b during the odd cycles */ if (cycle_odd) { in.a_re = read_mem(m.input_a_re); in.a_im = read_mem(m.input_a_im); in.b_re = read_mem(m.input_b_re); in.b_im = read_mem(m.input_b_im); } else { in.b_re = read_mem(m.input_a_re); in.b_im = read_mem(m.input_a_im); in.a_re = read_mem(m.input_b_re); in.a_im = read_mem(m.input_b_im); } in.W_re = read_mem(m.twiddle_re); in.W_im = read_mem(m.twiddle_im); /* Read inputs sequentially */ add_offset(m.input_a_re, 1); add_offset(m.input_a_im, 1); add_offset(m.input_b_re, 1); add_offset(m.input_b_im, 1); /* TODO: Is this true? */ add_offset(m.twiddle_re, (PARAM_N_t>>stage)); add_offset(m.twiddle_im, (PARAM_N_t>>stage)); use_mask(m.twiddle_re, (PARAM_N_t/2)-1); use_mask(m.twiddle_im, (PARAM_N_t/2)-1); return in; } /** * Initializes the addresses for reading the inputs and twiddel factors. * Should be called once at the start of each stage. */ INLINE void init_input_addresses_regular(struct mems m) { /* We simply start reading at address 0 incrementally */ set_base(m.input_a_im, 0); set_base(m.input_b_re, 0); set_base(m.input_b_im, 0); set_base(m.twiddle_re, 0); set_base(m.twiddle_im, 0); set_offset(m.input_a_re, 0); set_offset(m.input_a_im, 0); set_offset(m.input_b_re, 0); set_offset(m.input_b_im, 0); set_offset(m.twiddle_re, 0); set_offset(m.twiddle_im, 0); } /** * Initializes the addresses for reading the inputs. This function must be * called twice per stage, since halfway the stage the addressing changes. */ INLINE void init_output_addresses_regular(struct mems m, bool second_half) { /* * For the second half of the stage, the starting addresses are * reversed. write_output_regular above will also swap the output * memories. * TODO: Better comments :-) */ set_base(m.output_a_re, 0); set_base(m.output_a_im, 0); set_base(m.output_b_re, 0); set_base(m.output_b_im, 0); /* We subtract two from every address, since write_output_regular * adds two to the offset before writing the first (and every other) * result. */ if (second_half) { set_offset(m.output_a_re, 1-2); set_offset(m.output_a_im, 1-2); set_offset(m.output_b_re, 0-2); set_offset(m.output_b_im, 0-2); } else { set_offset(m.output_a_re, 0-2); set_offset(m.output_a_im, 0-2); set_offset(m.output_b_re, 1-2); set_offset(m.output_b_im, 1-2); } } INLINE void do_half_regular_stage(struct mems m, int stage, bool second_half){ /* * We are doing two cycles in each iteration, so we can alternate the * cycle_odd argument (which only works with constants, I don't expect * the optimizer to do this loop unrolling for us). Since we need to * write outputs before reading, but don't have any outputs to write * in the first cycle, we must put the first cycle outside of the * loop. Since the loop does two cycles at a time, this means there * must be two cycles outside of the loop, so we put one at the end as * well. Additionally, we also need to write the outputs of the last * cycle in an extra cycle at the end. We probably can't combine this * last cycle with the first cycle of the next stage, because they * need the same memories (input becomes output and v.v.). */ /* Initialize output addresses, this must be done twice per stage */ init_output_addresses_regular(m, second_half); /* First cycle (no previous output to write) */ struct bf_in in = read_input_regular(m, EVEN_CYCLE, stage); struct bf_out out = butterfly(in); /* Now, do half a single stage. That means N_t / 4 cycles. Since we do 2 * cycles on every iteration, plus one before and after the loop, * we will loop N_t / 8 - 1 times. We add an extra - 1 because this is a do while loop... */ init_loop(LC2, (PARAM_N_t / 8) - 1 - 1); do { /* Write outputs of previous cycle */ write_output_regular(m, out, second_half); /* Odd cycle */ in = read_input_regular(m, ODD_CYCLE, stage); out = butterfly(in); next_cycle(); /* Write outputs of previous cycle */ write_output_regular(m, out, second_half); /* Even cycle */ in = read_input_regular(m, EVEN_CYCLE, stage); out = butterfly(in); } while (loop_next(LC2)); /* Write outputs of previous cycle */ write_output_regular(m, out, second_half); /* Last cycle */ in = read_input_regular(m, ODD_CYCLE, stage); out = butterfly(in); next_cycle(); /* Write outputs of last cycle */ write_output_regular(m, out, second_half); /* Force the next cycle, because the next stage must read from * the memory we just wrote to */ next_cycle(); } /** * Assign the input and output memories, based on the current stage. Also * assigns the twiddle memories, but those are fixed. */ INLINE struct mems init_mem_mapping(int stage){ struct mems res; /* Use left memories for input on odd (ie, first) * stages and right memories on even stages. */ if ((stage % 2) == 0) { res.input_a_re = alloc_mem(P0M1); res.input_a_im = alloc_mem(P1M1); res.input_b_re = alloc_mem(P2M1); res.input_b_im = alloc_mem(P3M1); res.output_a_re = alloc_mem(P0M0); res.output_a_im = alloc_mem(P1M0); res.output_b_re = alloc_mem(P2M0); res.output_b_im = alloc_mem(P3M0); } else { res.input_a_re = alloc_mem(P0M0); res.input_a_im = alloc_mem(P1M0); res.input_b_re = alloc_mem(P2M0); res.input_b_im = alloc_mem(P3M0); res.output_a_re = alloc_mem(P0M1); res.output_a_im = alloc_mem(P1M1); res.output_b_re = alloc_mem(P2M1); res.output_b_im = alloc_mem(P3M1); } res.twiddle_re = alloc_mem(P4M0); res.twiddle_im = alloc_mem(P4M1); return res; } INLINE void do_regular_stage(int stage) { struct mems m = init_mem_mapping(stage); init_input_addresses_regular(m); /* do_half_regular_stage will init output addresses */ next_cycle(); do_half_regular_stage(m, stage, FIRST_HALF); do_half_regular_stage(m, stage, SECOND_HALF); } void run() { do { freeze(); } while (gpi(0) == 0); do_regular_stage(1); do_regular_stage(2); do_regular_stage(3); do_regular_stage(4); set_gpo(0); next_cycle(); freeze(); clear_gpo(0); next_cycle(); freeze(); }