+ /* Simply write output linearly */\r
+ add_offset(m.output_a_re, 1);\r
+ add_offset(m.output_a_im, 1);\r
+ add_offset(m.output_b_re, 1);\r
+ add_offset(m.output_b_im, 1);\r
+ }\r
+ if (out_s == BITREVERSED_OUT) {\r
+ /* \r
+ Use the memories (which are n_t - 1 bits wide) bitreversed.\r
+ Since we are generating the samples in sequence (0, 1, 2, 3,\r
+ ...) but are writing them to two different memories (0, 8,\r
+ 1, 9, ...) The last bit is already bitreversed, so in effect\r
+ we have fully bitreversed the results. Note that this holds\r
+ in the non-distributed case (Q = 1), but might also hold in\r
+ the distributed case (if the tile numbers are bitreversed\r
+ before concatenating memory).\r
+ */\r
+ use_bitreverse(m.output_a_re, PARAM_n_t - 1);\r
+ use_bitreverse(m.output_a_im, PARAM_n_t - 1);\r
+ use_bitreverse(m.output_b_re, PARAM_n_t - 1);\r
+ use_bitreverse(m.output_b_im, PARAM_n_t - 1);\r
+ }\r
+ \r
+ if (out_s == REGULAR_OUT && second_half) {\r
+ /* When in the regular stages, reverse memory a and b during\r
+ * the second half */\r