📄 ofdm_help.c
字号:
//#include<stdio.h>#include<math.h>#include "spc.h"#include "ofdm_help.h"#define DBG_LVL 2//complex_ofdm *input,*output,*mul,*mul2,*w,*temp;//int insize;//,i,N;void handle2(complex_ofdm *input,complex_ofdm *output) { complex_ofdm input1[4]; input1[0]=input[0]; input1[1]=input1[0]; input1[2]=input[2]; input1[3].real=-1*(input[2].real); input1[3].imag=-1*(input[2].imag); asm ("movups %0,%%xmm0;movups %1,%%xmm1;"::"m" (*input1),"m" (input1[2])); asm("addps %xmm0, %xmm1");asm("movups %%xmm1,%0":"=m" (*output));}void handle4(complex_ofdm * input,complex_ofdm *output,complex_ofdm * mul) { mul[0].real=1; mul[0].imag=1; mul[1].real=1; mul[1].imag=-1; // Moving 1:0 in xmm0 3:2 in xmm2 with 0 at lower 64 bits // taking sum and diff of these two to get 2:0 and 3:1 in xmm0 and xmm1 // respectively Stage 1 ends // multiplied the highest 32 bits of xmm1 by -1(put in xmm3) to take car //e of multiplication by j of 3 // copied xmm0 to xmm2 and shuffled appropriately to get 1:0 in xmm0 and // 3:2 in xmm2 // Then again took sum and difference to get outputs asm (" movaps %0,%%xmm0;movaps %1,%%xmm2;movaps %%xmm0,%%xmm1;addps %%xmm2,%%xmm0;subps %%xmm2,%%xmm1"::"m" (input[0]),"m" (input[2])); asm("movaps %0,%%xmm3"::"m" (*mul)); asm("mulps %xmm3,%xmm1"); asm("movups %xmm0,%xmm2"); asm("shufps $0x44,%xmm1,%xmm0"); asm("shufps $0xBE,%xmm1,%xmm2"); asm("movups %xmm0,%xmm1"); asm("addps %xmm2,%xmm0"); asm("subps %xmm2,%xmm1");asm("movups %%xmm0,%0":"=m" (output[0]));asm("movups %%xmm1,%0":"=m" (output[2]));}void handle8(int offset,complex_ofdm *input,complex_ofdm *output,complex_ofdm *mul,complex_ofdm * mul2,int cas) { // Case 0 for FFT , Case 1 for iFFT if(cas==0) { mul[0].real=1; mul[0].imag=-1; } else { mul[0].real=-1; mul[0].imag=1; } mul[1]=mul[0]; asm("movaps %0, %%xmm0;movaps %1, %%xmm1;movaps %2, %%xmm2;movaps %3, %%xmm3"::"m" (input[offset]),"m" (input[2+offset]),"m" (input[4+offset]),"m" (input[6+offset])); asm("movaps %xmm0,%xmm4;movaps %xmm1,%xmm5"); asm("addps %xmm2,%xmm0;addps %xmm3,%xmm1;subps %xmm2,%xmm4;subps %xmm3,%xmm5"); // in xmm0 4:0 xmm1 6:2 xmm4 5:1 xmm5 7:3 Stage 1 ends // now multiply xmm5 by i //asm("movups %xmm5,%xmm2;shufps $0x11,%xmm5,%xmm2"); asm("shufps $0xB1,%xmm5,%xmm5"); asm("movaps %0,%%xmm3"::"m" (*mul)); asm("mulps %xmm3,%xmm5"); //Stage 2 additions and subtractions start asm("movaps %xmm0,%xmm2;addps %xmm1,%xmm0;subps %xmm1,%xmm2;movaps %xmm4,%xmm1;addps %xmm5,%xmm4;subps %xmm5,%xmm1"); // Stage 2 ends xmm0 4:0 xmm2 6:2 xmm1 7:3 xmm4 5:1 (hopefully!!!) asm("movaps %xmm2,%xmm5"); // Now try and get them in order 1:0,3:2,5:4,7:6 asm("movaps %xmm0,%xmm2;shufps $0x44,%xmm4,%xmm0"); asm("shufps $0xEE,%xmm4,%xmm2"); asm("movaps %xmm5,%xmm4;shufps $0x44,%xmm1,%xmm4;shufps $0xEE,%xmm1,%xmm5"); // Now have xmm0 1:0 xmm2 5:4 xmm4 3:2 xmm5 7:6 // Now have to multiply by the required factors mul[0].real=1; mul[0].imag=1; mul[1].real=cos(M_PI/4); mul[1].imag=mul[1].real; mul2[0].real=0; mul2[0].imag=0; if(cas) { mul2[1].real=-sin(M_PI/4); mul2[1].imag=-1*mul2[1].real; // these commented lines will be useful for the inverse case } else { mul2[1].real= sin(M_PI/4); mul2[1].imag=-1*mul2[1].real; } asm("movaps %0,%%xmm3"::"m" (*mul)); asm("movaps %0,%%xmm6"::"m" (*mul2)); asm("movaps %xmm2,%xmm1"); asm("shufps $0xB1,%xmm1,%xmm2"); asm("mulps %xmm3,%xmm1;mulps %xmm6,%xmm2"); asm("addps %xmm1,%xmm2"); //In xmm2 5:4 modified mul[0].real=0; mul[0].imag=0; mul[1].real=cos(3*M_PI/4); mul[1].imag=mul[1].real; if(cas) { mul2[0].real=-1; mul2[0].imag=1; mul2[1].real=-sin(6*M_PI/8); mul2[1].imag=-1*mul2[1].real; // These lines are useful for the inverse case } else { mul2[0].real=1; mul2[0].imag=-1; mul2[1].real=sin(6*M_PI/8); mul2[1].imag=-1*mul2[1].real; } asm("movaps %xmm5,%xmm1"); asm("shufps $0xB1,%xmm1,%xmm5"); asm("movaps %0,%%xmm3"::"m" (*mul)); asm("movaps %0,%%xmm6"::"m" (*mul2)); asm("mulps %xmm3,%xmm1;mulps %xmm6,%xmm5"); asm("addps %xmm1,%xmm5"); // Stage 3 additions and subtractions start xmm0 1:0 xmm4 3:2 xmm2 5:4 xmm5 7:6 asm("movaps %xmm0,%xmm1"); asm("addps %xmm2,%xmm0"); asm("subps %xmm2,%xmm1"); asm("movaps %xmm4,%xmm2"); asm("addps %xmm5,%xmm4; subps %xmm5,%xmm2"); //Stage 3 ends xmm0 1:0 xmm4 3:2 xmm1 5:4 xmm2 7:6asm("movaps %%xmm0,%0;movaps %%xmm4,%1;movaps %%xmm1,%2;movaps %%xmm2,%3":"=m" (output[0+offset]),"=m" (output[2+offset]),"=m" (output[4+offset]),"=m" (output[6+offset])); /* PR_DBG(0,"Output at offset %d is \n%f,%f\n,%f,%f\n,%f,%f\n,%f,%f\n,%f,%f\n,%f,%f\n,%f,%f\n,%f,%f\n",offset,output[offset].real,output[offset].imag,output[offset+1].real,output[offset+1].imag,output[offset+2].real,output[offset+2].imag,output[offset+3].real,output[offset+3].imag,output[offset+4].real,output[offset+4].imag,output[offset+5].real,output[offset+5].imag,output[offset+6].real,output[offset+6].imag,output[offset+7].real,output[offset+7].imag); */}void compmult(int index1,int index2,complex_ofdm* in,complex_ofdm * w,complex_ofdm * temp,int tempindex,complex_ofdm *fact1,complex_ofdm *fact2) { fact1[0].real=w[index1].real; fact1[0].imag=fact1[0].real; fact1[1].real=w[index2].real; fact1[1].imag= fact1[1].real; fact2[0].imag = w[index1].imag; fact2[0].real=-1*fact2[0].imag; fact2[1].imag = w[index2].imag; fact2[1].real=-1*fact2[1].imag; asm("movaps %0 , %%xmm0 ; movaps %%xmm0 , %%xmm1"::"m" (*in)); asm("shufps $0xB1 , %xmm0 , %xmm1"); asm("movaps %0,%%xmm2;movaps %1,%%xmm3"::"m" (*fact1),"m" (*fact2)); asm("mulps %xmm2,%xmm0;mulps %xmm3, %xmm1;addps %xmm1,%xmm0");asm("movaps %%xmm0,%0":"=m" (temp[tempindex]));}void handle( int size, int insize, int offset, complex_ofdm * input, complex_ofdm * output, complex_ofdm * temp, complex_ofdm *w, complex_ofdm *mul, complex_ofdm *mul2, int cas, complex_ofdm *fact1, complex_ofdm *fact2 ) { int tem,tem1; switch (size) { case 2 : handle2(input,output); break; case 4 : handle4(input,output,mul); break; case 8 : handle8(offset,input,output,mul,mul2,cas); break; default: //temp = (complex_ofdm*) malloc(size * sizeof(complex_ofdm)); // First we reorder the input in even , odd format for(tem=0;tem<size/2;tem++) { temp[tem]=input[offset+tem*2]; temp[tem+size/2]=input[offset+2*tem+1]; } //PR_DBG(0,"reordered inputs\n"); for(tem=0;tem<size;tem++) { input[offset+tem]=temp[tem]; //PR_DBG(0,"%f , %f\n",input[offset+tem].real,input[offset+tem].imag); } //free(temp); handle(size/2,insize,offset,input,output,temp,w,mul,mul2,cas,fact1,fact2); handle(size/2,insize,offset+size/2,input,output,temp,w,mul,mul2,cas,fact1,fact2); //if(size==32) // print(output,32); /*for(tem=0;tem<size;tem++) PR_DBG(0,"output[%d] %f %f\n",tem,output[tem].real,output[tem].imag); */ for(tem=offset+size/2;tem<offset+size;tem+=2) { tem1=tem-size/2-offset; compmult((tem1*insize)/size,((tem1+1)*insize)/size,&output[tem],w,temp,tem1,fact1,fact2); //PR_DBG(0,"\n%f %f \n %f %f\n",w[tem1*insize/size].real,w[tem1*insize/size].imag,w[(tem1+1)*insize/size].real,w[(tem1+1)*insize/size].imag); } for(tem=0;tem<size/2;tem++) { //PR_DBG(0,"%f %f\n",temp[tem].real,temp[tem].imag); output[tem+offset].real=output[tem+offset].real+temp[tem].real; output[tem+offset].imag=output[tem+offset].imag+temp[tem].imag; output[tem+offset+size/2].real=output[tem+offset].real-2*temp[tem].real; output[tem+offset+size/2].imag=output[tem+offset].imag-2*temp[tem].imag; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -