--- /dev/null
+#ifndef __MONTIUMCC__\r
+#include <memory.h>\r
+#include <math.h>\r
+#include <stdio.h>\r
+#endif\r
+\r
+#include "FFT.h"\r
+\r
+ word in_a_re, in_a_im, in_b_re, in_b_im, in_W_re, in_W_im;\r
+ //out_a_re, out_a_im, out_b_re, out_b_im;\r
+ //mem input_a_re, input_a_im, input_b_re, input_b_im, output_a_re, output_a_im, output_b_re, output_b_im, twiddle_re, twiddle_im; \r
+\r
+void init()\r
+{\r
+\r
+}\r
+\r
+\r
+\r
+/*void update_gpi() {\r
+ set_gpi(0, 1);\r
+}*/\r
+\r
+INLINE struct bf_out butterfly(struct bf_in in) {\r
+ struct bf_out out;\r
+ /* ALU 0 & 1 */\r
+ /* im(W) * im(b) */\r
+ aluexp Wixbi = west(fmul(rd1(in.W_im), rb1(in.b_im)));\r
+ /* re(W * b) = re(W) * re(b) - im(W) * im(b) */\r
+ aluexp Wxbr = ssub_acc(fmul(rc1(in.W_re), ra1(in.b_re)), Wixbi);\r
+ \r
+ /* re(out_a) = re(a) + re(W * b) */\r
+ out.a_re = p0o0(sadd_bf(rb1(in.a_re), Wxbr));\r
+ /* re(out_b) = re(a) - re(W * b) */\r
+ out.b_re = p0o1(ssub_bf(rb1(in.a_re), Wxbr));\r
+ \r
+ /* ALU 2 & 3 */ \r
+ /* im(W) * re(b) */\r
+ aluexp Wixbr = west(fmul(rd1(in.W_im), rb1(in.b_re)));\r
+ /* im(W * b) = re(W) * im(b) + im(W) * re(b) */\r
+ aluexp Wxbi = sadd_acc(fmul(rc1(in.W_re), ra1(in.b_im)), Wixbr);\r
+ \r
+ /* im(out_a) = im(a) + im(W * b) */\r
+ out.a_im = p2o0(sadd_bf(rb1(in.a_im), Wxbi));\r
+ /* im(out_b) = im(a) - im(W * b) */\r
+ out.b_im = p2o1(ssub_bf(rb1(in.a_im), Wxbi));\r
+ \r
+ return out;\r
+}\r
+\r
+INLINE void write_output_regular(struct mems m, struct bf_out res, bool second_half) {\r
+ add_offset(m.output_a_re, 2);\r
+ add_offset(m.output_a_im, 2);\r
+ add_offset(m.output_b_re, 2);\r
+ add_offset(m.output_b_im, 2);\r
+ \r
+ if (second_half) {\r
+ write_mem(m.output_a_re, res.a_re);\r
+ write_mem(m.output_a_im, res.a_im);\r
+ write_mem(m.output_b_re, res.b_re);\r
+ write_mem(m.output_b_im, res.b_im);\r
+ } else {\r
+ /* Write a results to memory b and v.v. */\r
+ write_mem(m.output_a_re, res.b_re);\r
+ write_mem(m.output_a_im, res.b_im);\r
+ write_mem(m.output_b_re, res.a_re);\r
+ write_mem(m.output_b_im, res.a_im);\r
+ }\r
+}\r
+\r
+/**\r
+ * Reads four inputs and two twiddle factors from memory. \r
+ * Also increases memory offsets by 1 after reading.\r
+ * @param stage_odd Is this an odd stage? If so, read from the left\r
+ * memories, else read from the right memories\r
+ * (not implemented yet).\r
+ * @param cycle_odd Is this an odd cycle within the stage? If so, \r
+ * read input a from memory b and v.v. If not, \r
+ * simply read a from memory a and b from memory b.\r
+ */\r
+INLINE struct bf_in read_input_regular(struct mems m, bool cycle_odd, bool stage_odd) {\r
+ struct bf_in in;\r
+ /* TODO: Select left or right memories */\r
+ if (cycle_odd) {\r
+ in.a_re = read_mem(m.input_a_re);\r
+ in.a_im = read_mem(m.input_a_im);\r
+ in.b_re = read_mem(m.input_b_re); \r
+ in.b_im = read_mem(m.input_b_im);\r
+ } else {\r
+ in.b_re = read_mem(m.input_a_re);\r
+ in.b_im = read_mem(m.input_a_im);\r
+ in.a_re = read_mem(m.input_b_re); \r
+ in.a_im = read_mem(m.input_b_im);\r
+ }\r
+ in.W_re = read_mem(m.twiddle_re);\r
+ in.W_im = read_mem(m.twiddle_im);\r
+ \r
+\r
+ add_offset(m.input_a_re, 1);\r
+ add_offset(m.input_a_im, 1);\r
+ add_offset(m.input_b_re, 1);\r
+ add_offset(m.input_b_im, 1);\r
+ /* TODO: Update twiddle offsets */\r
+ return in;\r
+}\r
+\r
+/**\r
+ * Initializes the addresses for the various memories.\r
+ * @param stage_odd True if this is an odd stage.\r
+ *@param second_half True if we are initing halfway a stage.\r
+ */ \r
+INLINE void init_input_addresses_regular(struct mems m, bool stage_odd) {\r
+ /* TODO: Select left or right memories */\r
+ set_base(m.input_a_im, 0);\r
+ set_base(m.input_b_re, 0);\r
+ set_base(m.input_b_im, 0);\r
+ set_base(m.twiddle_re, 0);\r
+ set_base(m.twiddle_im, 0);\r
+ \r
+ set_offset(m.input_a_re, 0);\r
+ set_offset(m.input_a_im, 0);\r
+ set_offset(m.input_b_re, 0);\r
+ set_offset(m.input_b_im, 0);\r
+ set_offset(m.twiddle_re, 0);\r
+ set_offset(m.twiddle_im, 0);\r
+}\r
+ \r
+ \r
+INLINE void init_output_addresses_regular(struct mems m, bool stage_odd, bool second_half) {\r
+ /* \r
+ * For the second half of the stage, the starting addresses are \r
+ * reversed. write_output_regular above will also swap the output\r
+ * memories.\r
+ * TODO: Better comments :-)\r
+ */\r
+\r
+ set_base(m.output_a_re, 0);\r
+ set_base(m.output_a_im, 0);\r
+ set_base(m.output_b_re, 0);\r
+ set_base(m.output_b_im, 0);\r
+ \r
+ /* We subtract two from every address, since write_output_regular \r
+ * adds two to the offset before writing the first (and every other) \r
+ * result. */\r
+ if (second_half) {\r
+ set_offset(m.output_a_re, 1-2);\r
+ set_offset(m.output_a_im, 1-2);\r
+ set_offset(m.output_b_re, 0-2);\r
+ set_offset(m.output_b_im, 0-2);\r
+ } else {\r
+ set_offset(m.output_a_re, 1-2);\r
+ set_offset(m.output_a_im, 1-2);\r
+ set_offset(m.output_b_re, 0-2);\r
+ set_offset(m.output_b_im, 0-2);\r
+ }\r
+}\r
+\r
+INLINE void do_half_regular_stage(struct mems m, bool stage_odd, bool second_half){\r
+ struct bf_in in = read_input_regular(m, EVEN_CYCLE, stage_odd);\r
+ struct bf_out out = butterfly(in);\r
+ \r
+ /* Now, do a single stage. That means N_t / 2 cycles. Since we do 2\r
+ * cycles on every iteration, plus one before and after the loop,\r
+ * we will loop N_t / 4 - 1 times. */\r
+ init_loop(LC2, (N_t / 4) - 1);\r
+ do {\r
+ init_output_addresses_regular(m, stage_odd, second_half);\r
+ write_output_regular(m, out, second_half);\r
+\r
+ in = read_input_regular(m, ODD_CYCLE, second_half);\r
+ out = butterfly(in);\r
+ next_cycle();\r
+ write_output_regular(m, out, second_half);\r
+\r
+ in = read_input_regular(m, EVEN_CYCLE, second_half);\r
+ out = butterfly(in);\r
+ } while (loop_next(LC2));\r
+ \r
+ write_output_regular(m, out, second_half);\r
+ in = read_input_regular(m, ODD_CYCLE, second_half);\r
+ out = butterfly(in);\r
+ \r
+ next_cycle();\r
+ write_output_regular(m, out, second_half);\r
+}\r
+\r
+INLINE struct mems init_mem_mapping(bool stage_odd){\r
+ struct mems res;\r
+ if (stage_odd) {\r
+ res.input_a_re = alloc_mem(P0M1);\r
+ res.input_a_im = alloc_mem(P1M1);\r
+ res.input_b_re = alloc_mem(P2M1);\r
+ res.input_b_im = alloc_mem(P3M1);\r
+ res.output_a_re = alloc_mem(P0M0);\r
+ res.output_a_im = alloc_mem(P1M0);\r
+ res.output_b_re = alloc_mem(P2M0);\r
+ res.output_b_im = alloc_mem(P3M0);\r
+ } else {\r
+ res.input_a_re = alloc_mem(P0M0);\r
+ res.input_a_im = alloc_mem(P1M0);\r
+ res.input_b_re = alloc_mem(P2M0);\r
+ res.input_b_im = alloc_mem(P3M0);\r
+ res.output_a_re = alloc_mem(P0M1);\r
+ res.output_a_im = alloc_mem(P1M1);\r
+ res.output_b_re = alloc_mem(P2M1);\r
+ res.output_b_im = alloc_mem(P3M1);\r
+ }\r
+ \r
+ res.twiddle_re = alloc_mem(P4M0);\r
+ res.twiddle_im = alloc_mem(P4M1);\r
+ \r
+ return res;\r
+}\r
+void run() {\r
+#ifdef __MONTIUMCC__\r
+ /* main.cpp will call init before pre_run(), so only need to call init for MontiumCC */\r
+ init();\r
+#endif\r
+ do { freeze(); } while (gpi(0) == 0);\r
+ struct mems m;\r
+ \r
+ m = init_mem_mapping(EVEN_STAGE);\r
+ init_input_addresses_regular(m, EVEN_STAGE);\r
+ /* do_half_regular_stage will init output addresses */\r
+ next_cycle();\r
+ do_half_regular_stage(m, EVEN_STAGE, FIRST_HALF);\r
+ do_half_regular_stage(m, EVEN_STAGE, SECOND_HALF);\r
+ next_cycle();\r
+ init_input_addresses_regular(m, ODD_STAGE);\r
+ m = init_mem_mapping(ODD_STAGE);\r
+ next_cycle();\r
+ do_half_regular_stage(m, ODD_STAGE, FIRST_HALF);\r
+ do_half_regular_stage(m, ODD_STAGE, SECOND_HALF);\r
+}\r
--- /dev/null
+#include "FFT.h"\r
+#include <cstdio>\r
+#include <cmath>\r
+\r
+\r
+/* Didn't the Montium use Q15 instead of Q14? */\r
+#define FIXED_POINT 14\r
+#define WORD_SIZE 16\r
+\r
+#define WORDS_PER_LINE 4\r
+#define WORDS_PER_GROUP 1\r
+\r
+extern mem input_a_re, input_a_im, input_b_re, input_b_im, output_a_re, output_a_im, output_b_re, output_b_im, twiddle_re, twiddle_im; \r
+\r
+\r
+\r
+int to_fixed(float n)\r
+{\r
+ int res = (int)(n * (1 << FIXED_POINT));\r
+ /* Crop results */\r
+ if (res > (1 << WORD_SIZE - 1) - 1)\r
+ return (1 << WORD_SIZE - 1) - 1;\r
+ if (res < -(1 << WORD_SIZE - 1))\r
+ return -(1 << WORD_SIZE - 1);\r
+ return res;\r
+}\r
+\r
+float from_fixed(int n)\r
+{\r
+ return n / (float)(1<<FIXED_POINT);\r
+}\r
+\r
+void print_mem(mem m, int offset, int size, bool fixed)\r
+{\r
+ int i;\r
+ for(i = offset;i<offset+size;i++)\r
+ {\r
+ if (fixed)\r
+ printf("%0.4f", from_fixed(get_mem(m->id, i)));\r
+ else\r
+ printf("%04hx", (short)get_mem(m->id, i));\r
+ if ((i + 1) % WORDS_PER_LINE == 0)\r
+ printf("\n");\r
+ else if ((i + 1) % WORDS_PER_GROUP == 0)\r
+ printf(" ");\r
+ }\r
+ if (i % WORDS_PER_LINE != 0)\r
+ printf("\n");\r
+}\r
+\r
+\r
+void pre_run()\r
+{\r
+ int i;\r
+\r
+ /* TODO: Init memory and twiddles */\r
+ for (i=0;i<SIZE/2;i++)\r
+ {\r
+ set_mem(twiddle_re->id, i, to_fixed(cos(2*M_PI/SIZE*i)));\r
+ set_mem(twiddle_im->id, i, to_fixed(sin(2*M_PI/SIZE*i)));\r
+ }\r
+ \r
+ for (i=0;i<SIZE;i++)\r
+ {\r
+ int value = to_fixed(sin((float)i/SIZE*2*2*M_PI));\r
+ if (i<SIZE/2)\r
+ {\r
+ set_mem(input_a_re->id, i, value);\r
+ set_mem(input_a_im->id, i, 0);\r
+ }\r
+ else\r
+ {\r
+ set_mem(input_a_re->id, i - SIZE / 2, value);\r
+ set_mem(input_a_im->id, i - SIZE / 2, 0);\r
+ }\r
+ }\r
+}\r
+\r
+void post_run()\r
+{\r
+ printf("re(W)\n");\r
+ print_mem(twiddle_re, 0, SIZE, true);\r
+ printf("im(W)\n");\r
+ print_mem(twiddle_im, 0, SIZE, true);\r
+ printf("re(in_a)\n");\r
+ print_mem(input_a_re, 0, SIZE, true);\r
+ printf("re(in_b)\n");\r
+ print_mem(input_b_re, 0, SIZE, true);\r
+ printf("re(out_a)\n");\r
+ print_mem(output_a_re, 0, SIZE, true);\r
+ printf("im(out_a)\n");\r
+ print_mem(output_a_im, 0, SIZE, true);\r
+}\r