📄 accelerator_optimized_fft.c
字号:
#include "accelerator_optimized_fft.h"
#include "pound_defines.h"
#include "system.h"
#include "alt_types.h"
void accelerator_optimized_fft(alt_16 * __restrict__ InData, /* real part of input data */
alt_16 * __restrict__ OutData) /* real part of output data */
{
alt_u16 twiddle_index;
alt_u16 twiddle_incr;
alt_u16 loop_element;
alt_u16 loop_element_div2;
alt_u16 l;
alt_u16 sub_stage_index;
alt_u16 stage_index;
alt_u16 butterfly_index;
/* These restricted pointers make up the bulk of the avalon masters
* in this system */
/* Real data ping pong buffers */
alt_16 * __restrict__ BufferedRealCalcDataRead;
alt_16 * __restrict__ BufferedRealCalcDataReadPort2;
alt_16 * __restrict__ BufferedRealCalcDataWrite;
alt_16 * __restrict__ BufferedRealCalcDataWritePort2;
/* Imaginary data ping pong buffers */
alt_16 * __restrict__ BufferedImagCalcDataRead;
alt_16 * __restrict__ BufferedImagCalcDataReadPort2;
alt_16 * __restrict__ BufferedImagCalcDataWrite;
alt_16 * __restrict__ BufferedImagCalcDataWritePort2;
/* Cosine and Sine Tables */
alt_16 * __restrict__ CosineTable;
alt_16 * __restrict__ SineTable;
/* Pointers to in and out buffers in main memory */
alt_u32 * __restrict__ tempInputPtr = (alt_u32 *)InData;
alt_u32 * __restrict__ tempOutputPtr = (alt_u32 *)OutData;
/* Temporary registers for the input stage */
alt_u16 bit_rev_index;
alt_u32 tempInput;
/* Registers for the calculation stage */
alt_16 CosReal;
alt_16 SinReal;
alt_16 tRealData;
alt_16 tImagData;
alt_16 temp1, temp2, temp3, temp4;
/* Counters for the input and output stages */
alt_u16 inputCounter, outputCounter;
/* Assign the ping ping buffers default address locations */
BufferedRealCalcDataRead = BufferRAM1;
BufferedRealCalcDataReadPort2 = BufferRAM1;
BufferedRealCalcDataWrite = BufferRAM2;
BufferedRealCalcDataWritePort2 = BufferRAM2;
BufferedImagCalcDataRead = BufferRAM3;
BufferedImagCalcDataReadPort2 = BufferRAM3;
BufferedImagCalcDataWrite = BufferRAM4;
BufferedImagCalcDataWritePort2 = BufferRAM4;
/* Point the Cosine and Sine Tables to the CosRAM and SinRAM on-chip memory
* buffers. These memories are local to the accelerator and are not shared
* with the Nios II processor. */
CosineTable = CosRAM;
SineTable = SinRAM;
/* Data input buffering (Stage 1) */
/* Calculate the bitreversal index and read
* 32 bits of data from the input buffer in SDRAM (real and imaginary pair).
* Split the data read into half and write them into real and imaginary
* buffers concurrently */
for (inputCounter = 0; inputCounter < NUM_POINTS; inputCounter++) {
bit_rev_index = bitrev(inputCounter);
tempInput = tempInputPtr[inputCounter];
BufferedRealCalcDataRead[bit_rev_index] = (alt_16)(tempInput & 0x0000FFFF);
BufferedImagCalcDataRead[bit_rev_index] = (alt_16)((tempInput & 0xFFFF0000)>>16);
}
/* FFT Computation (Stage 2) */
/* Step through the fft stages */
for (stage_index = 1; stage_index <= FFT_SIZE; stage_index++) {
loop_element = 1<<stage_index;
loop_element_div2 = loop_element/2;
/* Initialize twiddle factor lookup indicies */
twiddle_index = 0;
twiddle_incr = 1 << (FFT_SIZE-stage_index);
/* Step through the butterflies */
for(sub_stage_index = 0; sub_stage_index < loop_element_div2; sub_stage_index++) {
/* Lookup twiddle factors */
CosReal = CosineTable[twiddle_index];
SinReal = SineTable[twiddle_index];
/* Process butterflies with the same twiddle factors */
for(butterfly_index = sub_stage_index; butterfly_index < NUM_POINTS; butterfly_index += loop_element) {
l = butterfly_index + loop_element_div2;
/* using temps (regs) to allow this to happen concurrently since
* these are DP RAM accesses that do not overlap. We are using read
* pointers here so that the write pointers at the bottom can work in
* parallel */
temp1 = BufferedRealCalcDataRead[l];
temp2 = BufferedImagCalcDataRead[l];
temp3 = BufferedRealCalcDataReadPort2[butterfly_index];
temp4 = BufferedImagCalcDataReadPort2[butterfly_index];
/* Scale twiddle products to accomodate 16 bit storage */
/* CosReal, SinReal, temp1, and temp2 are all registers so no
* waiting occurs here (this happens concurrently) */
tRealData = (( CosReal * temp1 ) + ( SinReal * temp2 ))>> PRESCALE;
tImagData = (( CosReal * temp2 ) - ( SinReal * temp1 ))>> PRESCALE;
/* tRealData, tImagData, temp3, temp4 are all registers so no
* waiting occurs here (this happens concurrently). We are using write
* pointers here so that the read pointers at the top can work in
* parallel */
BufferedRealCalcDataWrite[l] = temp3 - tRealData;
BufferedImagCalcDataWrite[l] = temp4 - tImagData;
BufferedRealCalcDataWritePort2[butterfly_index] = temp3 + tRealData;
BufferedImagCalcDataWritePort2[butterfly_index] = temp4 + tImagData;
}
twiddle_index += twiddle_incr;
}
/* Ping-Pong Buffering
* At the end of each iteration of the stage loop it is time to swap the
* pin-pong buffers. So for example we want the BufferedRealCalcRead
* buffer to point to the location in memory where BufferedRealCalcWrite
* was stored on the previous pass. We can do this with a straight
* assignment. Then we need to have the BufferedRealCalcDataWrite buffer
* point to the address in memory where BufferedRealCalcRead was stored
* on the previous pass. We have already updated BufferedRealCalcRead to
* point to BufferedRealCalcWrites address. So to update
* BufferedRealCalcWrite we can take advantage of the facts that the buffers
* are dual-ported and assign it to the location where
* BufferedRealCalcDataReadPort2 was pointing to on the previous pass.*/
BufferedRealCalcDataRead = BufferedRealCalcDataWrite;
BufferedRealCalcDataWrite = BufferedRealCalcDataReadPort2;
BufferedRealCalcDataReadPort2 = BufferedRealCalcDataRead;
BufferedRealCalcDataWritePort2 = BufferedRealCalcDataWrite;
BufferedImagCalcDataRead = BufferedImagCalcDataWrite;
BufferedImagCalcDataWrite = BufferedImagCalcDataReadPort2;
BufferedImagCalcDataReadPort2 = BufferedImagCalcDataRead;
BufferedImagCalcDataWritePort2 = BufferedImagCalcDataWrite;
}
/* returning the interleaved results to sdram (Stage 3)
* Since the data is 16 bit and interleaved we'll stick the real and
* imaginary parts together and send them off to sdram */
for(outputCounter = 0; outputCounter < NUM_POINTS; outputCounter++) {
tempOutputPtr[outputCounter] = (((alt_u32)(BufferedImagCalcDataRead[outputCounter]) & 0x0000FFFF)<<16) | ((alt_u32)BufferedRealCalcDataRead[outputCounter] & 0x0000FFFF);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -