⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 estimation.c

📁 软件无线电的平台
💻 C
📖 第 1 页 / 共 2 页
字号:
/** * Channel Estimation Routines *  * Alessandro Nordio, 01/03 * Linus Gasser 02/10/25 * * This version is optimized for MMX. * */#include "system.h"#include "mmx.h"#include "estimation.h"#include "debugging.h"#define DBG_LVL 2/** * @short fft of 2048 16-bit ints */int channel_estimation_2048(mmx_t *x, mmx_t *y, mmx_t *seq,                            mmx_t *tw, mmx_t *tw2, unsigned short *rev) {  mmx_t temp[2048];                          // temp buffer  PR_DBG( 4, "ce of 2048\n" );  // does FFT of the input, (real input) and reverse bit ordering  fft_radix2_fixpoint_mmx(x, &temp[0], tw, rev,11,5,0);  // multiply temp and seq vectors : temp[i] = temp[i]*seq[i]  mult_cpx_vector(&temp[0],   &seq[0],&temp[0],  512,5);  // multiply temp and seq vectors : temp[i] = temp[i]*seq[i]  mult_cpx_vector(&temp[512], &seq[0],&temp[512],512,5);  /* do IFFT of temp and put the result in y (this routine assumes   * the second half of the vector x to be 0 (no need to zero it)   */  fft_radix2_fixpoint_mmx(&temp[0],y, tw2,rev,11,4,4);  return(0);}/** * @short fft of 512 16-bit ints */int channel_estimation_512(mmx_t *x,mmx_t *y, mmx_t *seq, mmx_t *tw,                           mmx_t *tw2, unsigned short *rev) {  mmx_t temp[512];   // temp buffer  PR_DBG( 4, "ce of 512\n" );  // does FFT of the input, (real input) and reverse bit ordering  fft_radix2_fixpoint_mmx(x, &temp[0], tw, rev,9,5,0);  // multiply temp and seq vectors : temp[i] = temp[i]*seq[i]  mult_cpx_vector(&temp[0],   &seq[0],&temp[0],  128,5);  // multiply temp and seq vectors : temp[i] = temp[i]*seq[i]  mult_cpx_vector(&temp[128], &seq[0],&temp[128],128,5);  /* do IFFT of temp and put the result in y (this routine assumes   * the second half of the vector x to be 0 (no need to zero it)    */  fft_radix2_fixpoint_mmx(&temp[0],y, tw2,rev,9,3,4);  return(0);}/** * @short fft of 768 16-bit ints */int channel_estimation_768(mmx_t *x,mmx_t *y, mmx_t *seq, mmx_t *tw,                           mmx_t *tw2, unsigned short *rev) {  mmx_t temp[768]; // temp buffer  PR_DBG( 4, "ce of 768\n" );  // does FFT of the input, (real input -> flag = 0)  fft_768_fixpoint_mmx(&x[0], &temp[0], tw,   &rev[0],4,0);  // multiply temp and seq vectors : temp[i] = temp[i]*seq[i]  mult_cpx_vector(&temp[0],   &seq[0],&temp[0],  192, 2);  // multiply temp and seq vectors : temp[i] = temp[i]*seq[i]  mult_cpx_vector(&temp[192], &seq[0],&temp[192],192, 2);  /** do IFFT of temp and put the result in y (this routine assumes   * the second half of the vector x to be 0 (no need to zero it)    */  fft_768_fixpoint_mmx(&temp[0], &y[0], tw2,  &rev[0],2,2);  return(0);}/** * @short This function computes a 768 point complex FFT in fixed point */int fft_768_fixpoint_mmx(mmx_t *x, mmx_t *y, mmx_t *twiddle,                         unsigned short *rev, unsigned short log2_amp,                         unsigned short flag) {  reverse_order768(&x[0],&y[0],&rev[0],flag);   // does reverse bit ordering  if(flag==0) {    // does 3 256-points FFTs    fft_radix2_fixpoint_mmx(&y[0],   &y[0],   &twiddle[0],&rev[0],8,log2_amp,3);    fft_radix2_fixpoint_mmx(&y[256], &y[256], &twiddle[0],&rev[0],8,log2_amp,3);    fft_radix2_fixpoint_mmx(&y[512], &y[512], &twiddle[0],&rev[0],8,log2_amp,3);    butterfly3(&y[0],&y[0],&twiddle[255],256,1);  } else if(flag==2) {    // does 3 256-points IFFTs    fft_radix2_fixpoint_mmx(&y[0],   &y[0],   &twiddle[0],&rev[0],8,log2_amp,5);    fft_radix2_fixpoint_mmx(&y[256], &y[256], &twiddle[0],&rev[0],8,log2_amp,5);    fft_radix2_fixpoint_mmx(&y[512], &y[512], &twiddle[0],&rev[0],8,log2_amp,5);    butterfly3(&y[0],&y[0],&twiddle[255],256,0);  }  return(0);}/* * flag = 0 input is real x0,x1,x2,x3,... * flag = 1 input is cpx and in mmx_t format Re0 Im0 re0 Im0, .... * flag = 2 as in 1 but the second half of the input vector is assumed to be 0 */int reverse_order768(mmx_t *x,mmx_t *y,unsigned short *rev,unsigned short flag) {  int i;  short index;  if(flag==0) {    pxor_r2r(mm6,mm6);            // clear mm6: in mm6 there is always 0    for(i=0;i<192;i++) {           // reverse 4 samples at a time => does the loop 192 times 192*4 = 768      movq_m2r(x[i],mm0);         // load x0, x1,x2 and x3 at the same time      movq_r2r(mm0,mm1);          // and make a copy of it      punpcklwd_r2r(mm6,mm0);     // now in mm0 there is | x0 0 x1 0 |      punpckhwd_r2r(mm6,mm1);     // now in mm1 there is | x2 0 x3 0 |      movq_r2r(mm0,mm2);          // copy mm0 in mm2      movq_r2r(mm1,mm3);          // copy mm1 in mm3      punpckldq_r2r(mm0,mm2);     // now in mm2 there is |x0 0 x0 0 |      punpckldq_r2r(mm1,mm3);     // now in mm3 there is |x2 0 x2 0 |      movq_r2r(mm0,mm4);          // copy mm0 in mm4      movq_r2r(mm1,mm5);          // copy mm1 in mm5      punpckhdq_r2r(mm0,mm4);     // now in mm4 there is |x1 0 x1 0 |      punpckhdq_r2r(mm1,mm5);     // now in mm5 there is |x3 0 x3 0 |      index = rev[0];             // get the output index      movq_r2m(mm2,y[index]);     // write |x0 0 x0 0 | to y[index]      index = rev[1];             // get the output index      movq_r2m(mm4,y[index]);     // write |x1 0 x1 0 | to y[index]      index = rev[2];             // get the output index      movq_r2m(mm3,y[index]);     // write |x2 0 x2 0 | to y[index]      index = rev[3];             // get the output index      movq_r2m(mm5,y[index]);     // write |x3 0 x3 0 | to y[index]      rev+=4;                     // increase reverse buffer pointer    }  }  // end flag=0  if(flag==1) {    //    WARNING: Here we assume the input format to be [Re0,Im0,Re0,Im0, Re1,Im1,Re1,Im1,.....    //    we do reverse bit ordering and we reorder 4 samples per loop    for(i=0;i<192;i++) {      movq_m2r(x[0],mm0);         // load 1st sample      movq_m2r(x[1],mm1);         // load 2nd sample      movq_m2r(x[2],mm2);         // load 3rd sample      movq_m2r(x[3],mm3);         // load 4th sample      index = rev[0];             // get the output index      movq_r2m(mm0,y[index]);     // write |x0 0 x0 0 | to y[index]      index = rev[1];             // get the output index      movq_r2m(mm1,y[index]);     // write |x1 0 x1 0 | to y[index]      index = rev[2];             // get the output index      movq_r2m(mm2,y[index]);     // write |x2 0 x2 0 | to y[index]      index = rev[3];             // get the output index      movq_r2m(mm3,y[index]);     // write |x3 0 x3 0 | to y[index]      rev+=4;                     // increase reverse buffer pointer      x+=4;    }  } // end flag 1  if(flag==2) {    /* WARNING: Here we assume only the first half of the vector x     * is significant: the second half is assumed to be 0. After reordering      * the significant samples and the zeros are interleaved:      * (the odd samples are 0) So we can modify a bit the reordering      * loop in order to avoid the first stage of the IFFT. In fact the      * first butterfly is (w0 = (1,0))<ul>     * <li>X0 = x0+w0*x1  = x0+x1 = x0</li>     * <li>X1 = x0-w0*x1  = x0-x1 = x0</li>     * </ul>     * since x1 = 0 (odd sample)     */    for(i=0;i<96;i++) {      movq_m2r(x[0],mm0);         // load 1st sample      movq_m2r(x[1],mm1);         // load 2nd sample      movq_m2r(x[2],mm2);         // load 3rd sample      movq_m2r(x[3],mm3);         // load 4th sample      index = rev[0];             // get the output index      movq_r2m(mm0,y[index]);     // write |x0 0 x0 0 | to y[index]      movq_r2m(mm0,y[index+1]);   // write |x0 0 x0 0 | to y[index+1]      index = rev[1];             // get the output index      movq_r2m(mm1,y[index]);     // write |x1 0 x1 0 | to y[index]      movq_r2m(mm1,y[index+1]);   // write |x1 0 x1 0 | to y[index+1]      index = rev[2];             // get the output index      movq_r2m(mm2,y[index]);     // write |x2 0 x2 0 | to y[index]      movq_r2m(mm2,y[index+1]);   // write |x2 0 x2 0 | to y[index+1]      index = rev[3];             // get the output index      movq_r2m(mm3,y[index]);     // write |x3 0 x3 0 | to y[index]      movq_r2m(mm3,y[index+1]);   // write |x3 0 x3 0 | to y[index+1]      rev+=4;                     // increase reverse buffer pointer      x+=4;    }  }  emms();  return(0);}/** * @short compute the reverse budder for a 768 point FFT */int reverse_buffer768(unsigned short *buf) {  int i;  unsigned short j;  for(i=0;i<256;i++) {    j = ReverseBits(i,8);    buf[3*i]   = j;    buf[3*i+1] = 256+j;    buf[3*i+2] = 512+j;  }  return(0);}/** * @short This function computes N butterflies of size 3 */int butterfly3( mmx_t *x, mmx_t *y, mmx_t *tw,                unsigned int N, unsigned short flag ) {  int i;  mmx_t *tw1,*tw2;  tw1 = &tw[0];  tw2 = &tw[3*N];  if(flag==0) {    for(i=0;i<N;i++) {      movq_m2r(x[i+N],mm1);         // load x1      movq_m2r(x[i+2*N],mm2);       // load x2      movq_r2r(mm1,mm3);            // make a copy of x1      movq_m2r(x[i]    ,mm0);       // load x0      movq_r2r(mm2,mm4);            // make a copy of x2      pmaddwd_m2r(tw1[i+N],mm3);    // compute w1b*x1      movq_r2r(mm1,mm5);            // make a copy of x1      pmaddwd_m2r(tw2[i+N],mm4);    // compute w2b*x2      movq_r2r(mm2,mm6);            // make a copy of x2      pmaddwd_m2r(tw1[i],mm1);      // compute w1a*x1      paddd_r2r(mm3,mm4);           // compute zb = w1b*x1+w2b*x2      pmaddwd_m2r(tw2[i],mm2);      // compute w2a*x2      psrad_i2r(14, mm4);           // shift zb by 14 (twiddle dinamic)      paddd_r2r(mm1,mm2);           // compute za = w1a*x1+w2a*x2      packssdw_r2r(mm4,mm4);        // pack zb in the format [re im re im]      psrad_i2r(14, mm2);           // shift za by 14 (twiddle dinamic)      paddw_r2r(mm0,mm4);           // compute X1 = zb+x0      pmaddwd_m2r(tw1[i+2*N],mm5);  // compute w1c*x1      psraw_i2r(1, mm4);            // shift X1 right by 1      pmaddwd_m2r(tw2[i+2*N],mm6);  // compute w2c*x2      movq_r2m(mm4,y[i+N]);         // store X1      packssdw_r2r(mm2,mm2);        // pack za in the format [re im re im]      paddd_r2r(mm5,mm6);           // compute zc = w1c*x1+w2c*x2      paddw_r2r(mm0,mm2);           // compute X0 = za+x0      psrad_i2r(14, mm6);           // shift zc by 14 (twiddle dinamic)      psraw_i2r(1, mm2);            // shift X0 right by 1      packssdw_r2r(mm6,mm6);        // pack zc in the format [re im re im]      movq_r2m(mm2,y[i]);           // store X0      paddw_r2r(mm0,mm6);           // compute X2 = zc+x0      psraw_i2r(1, mm6);            // shift X2 right by 1      movq_r2m(mm6,y[i+2*N]);       // store X2    }  }  if(flag==1) {    /* this loop computes half of the butterflies but only     * for the first and second thirds of the output vector     */    for(i=0;i<(N>>1);i++) {      movq_m2r(x[i+N],mm1);         // load x1      movq_m2r(x[i+2*N],mm2);       // load x2      movq_r2r(mm1,mm3);            // make a copy of x1      movq_m2r(x[i]    ,mm0);       // load x0      movq_r2r(mm2,mm4);            // make a copy of x2      pmaddwd_m2r(tw1[i],mm1);      // compute w1a*x1      pmaddwd_m2r(tw2[i],mm2);      // compute w2a*x2      pmaddwd_m2r(tw1[i+N],mm3);    // compute w1b*x1      paddd_r2r(mm1,mm2);           // compute za = w1a*x1+w2a*x2      pmaddwd_m2r(tw2[i+N],mm4);    // compute w2b*x2      paddd_r2r(mm3,mm4);           // compute zb = w1b*x1+w2b*x2      psrad_i2r(14, mm2);           // shift za by 14 (twiddle dinamic)      psrad_i2r(14, mm4);           // shift zb by 14 (twiddle dinamic)      packssdw_r2r(mm2,mm2);        // pack za in the format [re im re im]      packssdw_r2r(mm4,mm4);        // pack zb in the format [re im re im]      paddw_r2r(mm0,mm2);           // compute X0 = za+x0      paddw_r2r(mm0,mm4);           // compute X1 = zb+x0      psraw_i2r(1, mm2);            // shift X0 right by 1      psraw_i2r(1, mm4);            // shift X1 right by 1      movq_r2m(mm2,y[i]);           // store X0      movq_r2m(mm4,y[i+N]);         // store X1    }    // this loop computes the rest of the butterflies    for(i=(N>>1);i<N;i++) {      movq_m2r(x[i+N],mm1);         // load x1      movq_m2r(x[i+2*N],mm2);       // load x2      movq_m2r(x[i],mm0);           // load x0      pmaddwd_m2r(tw1[i],mm1);      // compute w1a*x1      pmaddwd_m2r(tw2[i],mm2);      // compute w2a*x2      paddd_r2r(mm1,mm2);           // compute za = w1a*x1+w2a*x2      psrad_i2r(14, mm2);           // shift za by 14 (twiddle dinamic)      packssdw_r2r(mm2,mm2);        // pack za in the format [re im re im]      paddw_r2r(mm0,mm2);           // compute X0 = za+x0      psraw_i2r(1, mm2);            // shift X0 right by 1      movq_r2m(mm2,y[i]);           // store X0    }  }  emms();  return(0);}/** * @short FFT base 2 in fixed point */int fft_radix2_fixpoint_mmx(mmx_t *x,                            mmx_t *y,                            mmx_t *twiddle,                            unsigned short *rev,                            unsigned short log2size,                            unsigned short log2_amp,                            short flag) {  int i,j,k;             // counters  int bs;                // block_size  int n_b;               // numer of blocks  short int index;       // reverse index

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -