From 6599667fbc3945f5a32569832a79c543aa233d44 Mon Sep 17 00:00:00 2001
From: unknown <s0042331@.dynamic.ewi.utwente.nl>
Date: Thu, 27 Mar 2008 13:28:15 +0100
Subject: [PATCH]  * Remove some unused code.  * Add more comments.  * Slightly
 change the behaviour of do_half_regular_stage(), it now intializes    the
 output addresses once instead of on every cycle. It also waits for the   
 last write cycle to complete before returning.

---
 FFT.h  |  3 ---
 FFT.mc | 83 ++++++++++++++++++++++++++++++++++++++--------------------
 2 files changed, 54 insertions(+), 32 deletions(-)

diff --git a/FFT.h b/FFT.h
index cb4cdf1..07f3948 100644
--- a/FFT.h
+++ b/FFT.h
@@ -58,7 +58,6 @@
 		mem input_a_re, input_a_im, input_b_re, input_b_im, output_a_re, output_a_im, output_b_re, output_b_im, twiddle_re, twiddle_im;
 	};
 	
-	void init();
 	INLINE struct bf_out butterfly(struct bf_in in);
 	void run(void);	
 	
@@ -74,6 +73,4 @@
 #define EVEN_CYCLE 0
 #define ODD_CYCLE 1
 
-
-	
 #endif // !FFT_H_INCLUDED
diff --git a/FFT.mc b/FFT.mc
index e58d2fa..6df71ad 100644
--- a/FFT.mc
+++ b/FFT.mc
@@ -6,21 +6,11 @@
 
 #include "FFT.h"
 
-	word in_a_re, in_a_im, in_b_re, in_b_im, in_W_re, in_W_im;
-	//out_a_re, out_a_im, out_b_re, out_b_im;
-	//mem input_a_re, input_a_im, input_b_re, input_b_im, output_a_re, output_a_im, output_b_re, output_b_im, twiddle_re, twiddle_im; 
-
-void init()
-{
-
-}
-
-
-
-/*void update_gpi() {
-  set_gpi(0, 1);
-}*/
-
+/**
+ * Executes a single butterfly on ALU 0-3. The inputs are the words taken from
+ * in, which will be read on various inputs of ALU 0-3. Outputs will be
+ * returned and will we be available on the output ports of ALU 0 and 2.
+ */
 INLINE struct bf_out butterfly(struct bf_in in) {
 	struct bf_out out;
 	/* ALU 0 & 1 */
@@ -48,6 +38,11 @@ INLINE struct bf_out butterfly(struct bf_in in) {
 		return out;
 }
 
+/**
+ * Writes the output of a butterfly given in res to the correct memory
+ * locations.
+ * @param  second_half   Are we in the second half of the stage?
+ */
 INLINE void write_output_regular(struct mems m, struct bf_out res, bool second_half) {
 	add_offset(m.output_a_re, 2);
 	add_offset(m.output_a_im, 2);
@@ -80,7 +75,7 @@ INLINE void write_output_regular(struct mems m, struct bf_out res, bool second_h
  */
 INLINE struct bf_in read_input_regular(struct mems m, bool cycle_odd, bool stage_odd) {
 	struct bf_in in;
-	/* TODO: Select left or right memories */
+	 /* Swap memory a and b during the odd cycles */
 	if (cycle_odd) {
 		in.a_re = read_mem(m.input_a_re);
 		in.a_im = read_mem(m.input_a_im);
@@ -95,7 +90,8 @@ INLINE struct bf_in read_input_regular(struct mems m, bool cycle_odd, bool stage
 	in.W_re = read_mem(m.twiddle_re);
 	in.W_im = read_mem(m.twiddle_im);
 	
-
+	
+	/* Read inputs sequentially */
 	add_offset(m.input_a_re, 1);
 	add_offset(m.input_a_im, 1);
 	add_offset(m.input_b_re, 1);
@@ -105,12 +101,12 @@ INLINE struct bf_in read_input_regular(struct mems m, bool cycle_odd, bool stage
 }
 
 /**
- *  Initializes the addresses for the various memories.
+ * Initializes the addresses for writing the outputs.
  * @param stage_odd   True if this is an odd stage.
- *@param second_half True if we are initing halfway a stage.
+ * @param second_half True if we are initing halfway a stage.
  */ 
 INLINE void init_input_addresses_regular(struct mems m, bool stage_odd) {
-	/* TODO: Select left or right memories */
+	/* We simply start reading at address 0 incrementally */
 	set_base(m.input_a_im, 0);
 	set_base(m.input_b_re, 0);
 	set_base(m.input_b_im, 0);
@@ -124,8 +120,11 @@ INLINE void init_input_addresses_regular(struct mems m, bool stage_odd) {
 	set_offset(m.twiddle_re, 0);
 	set_offset(m.twiddle_im, 0);
 }
-	
-	
+
+/**
+ * Initializes the addresses for reading the inputs. This function must be
+ * called twice per stage, since halfway the stage the addressing changes.
+ */
 INLINE void init_output_addresses_regular(struct mems m, bool stage_odd, bool second_half) {
 	/* 
 	 * For the second half of the stage, the starting addresses are 
@@ -156,32 +155,62 @@ INLINE void init_output_addresses_regular(struct mems m, bool stage_odd, bool se
 }
 
 INLINE void do_half_regular_stage(struct mems m, bool stage_odd, bool second_half){
+	 /*
+	 * We are doing two cycles in each iteration, so we can alternate the
+	 * cycle_odd argument (which only works with constants, I don't expect
+	 * the optimizer to do this loop unrolling for us). Since we need to
+	 * write outputs before reading, but don't have any outputs to write
+	 * in the first cycle, we must put the first cycle outside of the
+	 * loop. Since the loop does two cycles at a time, this means there
+	 * must be two cycles outside of the loop, so we put one at the end as
+	 * well. Additionally, we also need to write the outputs of the last
+	 * cycle in an extra cycle at the end. We probably can't combine this
+	 * last cycle with the first cycle of the next stage, because they
+	 * need the same memories (input becomes output and v.v.).
+	 */
+
+	/* Initialize output addresses, this must be done twice per stage */
+	init_output_addresses_regular(m, stage_odd, second_half);
+
+	/* First cycle (no previous output to write) */
 	struct bf_in in = read_input_regular(m, EVEN_CYCLE, stage_odd);
 	struct bf_out out = butterfly(in);
-	
+
 	/* Now, do a single stage. That means N_t / 2 cycles. Since we do 2
 	 * cycles on every iteration, plus one before and after the loop,
 	 * we will loop N_t / 4 - 1 times. */
 	init_loop(LC2, (N_t / 4) - 1);
 	do {
-		init_output_addresses_regular(m, stage_odd, second_half);
+		/* Write outputs of previous cycle */
 		write_output_regular(m, out, second_half);
 
+		/* Odd cycle */
 		in = read_input_regular(m, ODD_CYCLE, second_half);
 		out = butterfly(in);
 		next_cycle();
+
+		/* Write outputs of previous cycle */
 		write_output_regular(m, out, second_half);
 
+		/* Even cycle */
 		in = read_input_regular(m, EVEN_CYCLE, second_half);
 		out = butterfly(in);
 	} while (loop_next(LC2));
 	
+	/* Write outputs of previous cycle */
 	write_output_regular(m, out, second_half);
+
+	/* Last cycle */
 	in = read_input_regular(m, ODD_CYCLE, second_half);
 	out = butterfly(in);
-	
 	next_cycle();
+
+	/* Write outputs of last cycle */
 	write_output_regular(m, out, second_half);
+	
+	/* Force the next cycle, because the next stage must read from
+	 * the memory we just wrote to */
+	next_cycle();
 }
 
 INLINE struct mems init_mem_mapping(bool stage_odd){
@@ -212,10 +241,6 @@ INLINE struct mems init_mem_mapping(bool stage_odd){
 	return res;
 }
 void run() {
-#ifdef __MONTIUMCC__
-	/* main.cpp will call init before pre_run(), so only need to call init for MontiumCC */
-	init();
-#endif
 	do { freeze(); } while (gpi(0) == 0);
 	struct mems m;
 	
-- 
2.30.2