1 #ifndef __MONTIUMCC__
\r
9 word in_a_re, in_a_im, in_b_re, in_b_im, in_W_re, in_W_im;
\r
10 //out_a_re, out_a_im, out_b_re, out_b_im;
\r
11 //mem input_a_re, input_a_im, input_b_re, input_b_im, output_a_re, output_a_im, output_b_re, output_b_im, twiddle_re, twiddle_im;
\r
20 /*void update_gpi() {
\r
24 INLINE struct bf_out butterfly(struct bf_in in) {
\r
28 aluexp Wixbi = west(fmul(rd1(in.W_im), rb1(in.b_im)));
\r
29 /* re(W * b) = re(W) * re(b) - im(W) * im(b) */
\r
30 aluexp Wxbr = ssub_acc(fmul(rc1(in.W_re), ra1(in.b_re)), Wixbi);
\r
32 /* re(out_a) = re(a) + re(W * b) */
\r
33 out.a_re = p0o0(sadd_bf(rb1(in.a_re), Wxbr));
\r
34 /* re(out_b) = re(a) - re(W * b) */
\r
35 out.b_re = p0o1(ssub_bf(rb1(in.a_re), Wxbr));
\r
39 aluexp Wixbr = west(fmul(rd1(in.W_im), rb1(in.b_re)));
\r
40 /* im(W * b) = re(W) * im(b) + im(W) * re(b) */
\r
41 aluexp Wxbi = sadd_acc(fmul(rc1(in.W_re), ra1(in.b_im)), Wixbr);
\r
43 /* im(out_a) = im(a) + im(W * b) */
\r
44 out.a_im = p2o0(sadd_bf(rb1(in.a_im), Wxbi));
\r
45 /* im(out_b) = im(a) - im(W * b) */
\r
46 out.b_im = p2o1(ssub_bf(rb1(in.a_im), Wxbi));
\r
51 INLINE void write_output_regular(struct mems m, struct bf_out res, bool second_half) {
\r
52 add_offset(m.output_a_re, 2);
\r
53 add_offset(m.output_a_im, 2);
\r
54 add_offset(m.output_b_re, 2);
\r
55 add_offset(m.output_b_im, 2);
\r
58 write_mem(m.output_a_re, res.a_re);
\r
59 write_mem(m.output_a_im, res.a_im);
\r
60 write_mem(m.output_b_re, res.b_re);
\r
61 write_mem(m.output_b_im, res.b_im);
\r
63 /* Write a results to memory b and v.v. */
\r
64 write_mem(m.output_a_re, res.b_re);
\r
65 write_mem(m.output_a_im, res.b_im);
\r
66 write_mem(m.output_b_re, res.a_re);
\r
67 write_mem(m.output_b_im, res.a_im);
\r
72 * Reads four inputs and two twiddle factors from memory.
\r
73 * Also increases memory offsets by 1 after reading.
\r
74 * @param stage_odd Is this an odd stage? If so, read from the left
\r
75 * memories, else read from the right memories
\r
76 * (not implemented yet).
\r
77 * @param cycle_odd Is this an odd cycle within the stage? If so,
\r
78 * read input a from memory b and v.v. If not,
\r
79 * simply read a from memory a and b from memory b.
\r
81 INLINE struct bf_in read_input_regular(struct mems m, bool cycle_odd, bool stage_odd) {
\r
83 /* TODO: Select left or right memories */
\r
85 in.a_re = read_mem(m.input_a_re);
\r
86 in.a_im = read_mem(m.input_a_im);
\r
87 in.b_re = read_mem(m.input_b_re);
\r
88 in.b_im = read_mem(m.input_b_im);
\r
90 in.b_re = read_mem(m.input_a_re);
\r
91 in.b_im = read_mem(m.input_a_im);
\r
92 in.a_re = read_mem(m.input_b_re);
\r
93 in.a_im = read_mem(m.input_b_im);
\r
95 in.W_re = read_mem(m.twiddle_re);
\r
96 in.W_im = read_mem(m.twiddle_im);
\r
99 add_offset(m.input_a_re, 1);
\r
100 add_offset(m.input_a_im, 1);
\r
101 add_offset(m.input_b_re, 1);
\r
102 add_offset(m.input_b_im, 1);
\r
103 /* TODO: Update twiddle offsets */
\r
108 * Initializes the addresses for the various memories.
\r
109 * @param stage_odd True if this is an odd stage.
\r
110 *@param second_half True if we are initing halfway a stage.
\r
112 INLINE void init_input_addresses_regular(struct mems m, bool stage_odd) {
\r
113 /* TODO: Select left or right memories */
\r
114 set_base(m.input_a_im, 0);
\r
115 set_base(m.input_b_re, 0);
\r
116 set_base(m.input_b_im, 0);
\r
117 set_base(m.twiddle_re, 0);
\r
118 set_base(m.twiddle_im, 0);
\r
120 set_offset(m.input_a_re, 0);
\r
121 set_offset(m.input_a_im, 0);
\r
122 set_offset(m.input_b_re, 0);
\r
123 set_offset(m.input_b_im, 0);
\r
124 set_offset(m.twiddle_re, 0);
\r
125 set_offset(m.twiddle_im, 0);
\r
129 INLINE void init_output_addresses_regular(struct mems m, bool stage_odd, bool second_half) {
\r
131 * For the second half of the stage, the starting addresses are
\r
132 * reversed. write_output_regular above will also swap the output
\r
134 * TODO: Better comments :-)
\r
137 set_base(m.output_a_re, 0);
\r
138 set_base(m.output_a_im, 0);
\r
139 set_base(m.output_b_re, 0);
\r
140 set_base(m.output_b_im, 0);
\r
142 /* We subtract two from every address, since write_output_regular
\r
143 * adds two to the offset before writing the first (and every other)
\r
146 set_offset(m.output_a_re, 1-2);
\r
147 set_offset(m.output_a_im, 1-2);
\r
148 set_offset(m.output_b_re, 0-2);
\r
149 set_offset(m.output_b_im, 0-2);
\r
151 set_offset(m.output_a_re, 1-2);
\r
152 set_offset(m.output_a_im, 1-2);
\r
153 set_offset(m.output_b_re, 0-2);
\r
154 set_offset(m.output_b_im, 0-2);
\r
158 INLINE void do_half_regular_stage(struct mems m, bool stage_odd, bool second_half){
\r
159 struct bf_in in = read_input_regular(m, EVEN_CYCLE, stage_odd);
\r
160 struct bf_out out = butterfly(in);
\r
162 /* Now, do a single stage. That means N_t / 2 cycles. Since we do 2
\r
163 * cycles on every iteration, plus one before and after the loop,
\r
164 * we will loop N_t / 4 - 1 times. */
\r
165 init_loop(LC2, (N_t / 4) - 1);
\r
167 init_output_addresses_regular(m, stage_odd, second_half);
\r
168 write_output_regular(m, out, second_half);
\r
170 in = read_input_regular(m, ODD_CYCLE, second_half);
\r
171 out = butterfly(in);
\r
173 write_output_regular(m, out, second_half);
\r
175 in = read_input_regular(m, EVEN_CYCLE, second_half);
\r
176 out = butterfly(in);
\r
177 } while (loop_next(LC2));
\r
179 write_output_regular(m, out, second_half);
\r
180 in = read_input_regular(m, ODD_CYCLE, second_half);
\r
181 out = butterfly(in);
\r
184 write_output_regular(m, out, second_half);
\r
187 INLINE struct mems init_mem_mapping(bool stage_odd){
\r
190 res.input_a_re = alloc_mem(P0M1);
\r
191 res.input_a_im = alloc_mem(P1M1);
\r
192 res.input_b_re = alloc_mem(P2M1);
\r
193 res.input_b_im = alloc_mem(P3M1);
\r
194 res.output_a_re = alloc_mem(P0M0);
\r
195 res.output_a_im = alloc_mem(P1M0);
\r
196 res.output_b_re = alloc_mem(P2M0);
\r
197 res.output_b_im = alloc_mem(P3M0);
\r
199 res.input_a_re = alloc_mem(P0M0);
\r
200 res.input_a_im = alloc_mem(P1M0);
\r
201 res.input_b_re = alloc_mem(P2M0);
\r
202 res.input_b_im = alloc_mem(P3M0);
\r
203 res.output_a_re = alloc_mem(P0M1);
\r
204 res.output_a_im = alloc_mem(P1M1);
\r
205 res.output_b_re = alloc_mem(P2M1);
\r
206 res.output_b_im = alloc_mem(P3M1);
\r
209 res.twiddle_re = alloc_mem(P4M0);
\r
210 res.twiddle_im = alloc_mem(P4M1);
\r
215 #ifdef __MONTIUMCC__
\r
216 /* main.cpp will call init before pre_run(), so only need to call init for MontiumCC */
\r
219 do { freeze(); } while (gpi(0) == 0);
\r
222 m = init_mem_mapping(EVEN_STAGE);
\r
223 init_input_addresses_regular(m, EVEN_STAGE);
\r
224 /* do_half_regular_stage will init output addresses */
\r
226 do_half_regular_stage(m, EVEN_STAGE, FIRST_HALF);
\r
227 do_half_regular_stage(m, EVEN_STAGE, SECOND_HALF);
\r
229 init_input_addresses_regular(m, ODD_STAGE);
\r
230 m = init_mem_mapping(ODD_STAGE);
\r
232 do_half_regular_stage(m, ODD_STAGE, FIRST_HALF);
\r
233 do_half_regular_stage(m, ODD_STAGE, SECOND_HALF);
\r