tonal.c

来自「mpeg文件格式类型的文档」· C语言 代码 · 共 1,382 行 · 第 1/4 页

C
1,382
字号
	    q = sblimit_ml;
	    II_smr (ltmin[7+k], smr[7+k], spike[k], scale[7+k], sblimit_ml, i, q);
	
	} /* k-loop*/
    } /*n_ml_ch>0*/

    mem_free ((void **) &sample);
    mem_free ((void **) &spike); 
} /* II_psycho_One_ml */

/**********************************************************************/
/*
/*        This module implements the psychoacoustic model I for the
/* MPEG encoder layer I. It uses simplified tonal and noise masking
/* threshold analysis to generate SMR for the encoder bit allocation
/* routine.
/*
/**********************************************************************/

/****************************************************************/
/*
/*        Fast Fourier transform of the input samples.
/*
/****************************************************************/

void I_f_f_t (double *sample, mask *power)  /* this function calculates */
					    /* an FFT analysis for the  */
					    /* freq. domain             */
{
 int i,j,k,ll,l=0;
 int ip, le, le1;
 double t_r, t_i, u_r, u_i;
 static int M, MM1, init = 0, N, NV2, NM1;
 double *x_r, *x_i, *energy;
 static int *rev;
 static double *w_r, *w_i;

 x_r = (double *) mem_alloc(sizeof(DFFT2), "x_r");
 x_i = (double *) mem_alloc(sizeof(DFFT2), "x_i");
 energy = (double *) mem_alloc(sizeof(DFFT2), "energy");
 for(i=0;i<FFT_SIZE/2;i++) x_r[i] = x_i[i] = energy[i] = 0;
 if(!init){
    rev = (int *) mem_alloc(sizeof(IFFT2), "rev");
    w_r = (double *) mem_alloc(sizeof(D9), "w_r");
    w_i = (double *) mem_alloc(sizeof(D9), "w_i");
    M = 9;
    MM1 = 8;
    N = FFT_SIZE/2;
    NV2 = FFT_SIZE/2 >> 1;
    NM1 = FFT_SIZE/2 - 1;
    for(ll=0;ll<M;ll++){
       le = 1 << (M-ll);
       le1 = le >> 1;
       w_r[ll] = cos(PI/le1);
       w_i[ll] = -sin(PI/le1);
    }
    for(i=0;i<FFT_SIZE/2;rev[i] = l,i++) for(j=0,l=0;j<9;j++){
       k=(i>>j) & 1;
       l |= (k<<8-j);                
    }
    init = 1;
 }
 memcpy( (char *) x_r, (char *) sample, sizeof(double) * FFT_SIZE/2);
 for(ll=0;ll<MM1;ll++){
    le = 1 << (M-ll);
    le1 = le >> 1;
    u_r = 1;
    u_i = 0;
    for(j=0;j<le1;j++){
       for(i=j;i<N;i+=le){
          ip = i + le1;
          t_r = x_r[i] + x_r[ip];
          t_i = x_i[i] + x_i[ip];
          x_r[ip] = x_r[i] - x_r[ip];
          x_i[ip] = x_i[i] - x_i[ip];
          x_r[i] = t_r;
          x_i[i] = t_i;
          t_r = x_r[ip];
          x_r[ip] = x_r[ip] * u_r - x_i[ip] * u_i;
          x_i[ip] = x_i[ip] * u_r + t_r * u_i;
       }
       t_r = u_r;
       u_r = u_r * w_r[ll] - u_i * w_i[ll];
       u_i = u_i * w_r[ll] + t_r * w_i[ll];
    }
 }
 for(i=0;i<N;i+=2){
    ip = i + 1;
    t_r = x_r[i] + x_r[ip];
    t_i = x_i[i] + x_i[ip];
    x_r[ip] = x_r[i] - x_r[ip];
    x_i[ip] = x_i[i] - x_i[ip];
    x_r[i] = t_r;
    x_i[i] = t_i;
    energy[i] = x_r[i] * x_r[i] + x_i[i] * x_i[i];
 }
 for(i=0;i<FFT_SIZE/2;i++) if(i<rev[i]){
    t_r = energy[i];
    energy[i] = energy[rev[i]];
    energy[rev[i]] = t_r;
 }
 for(i=0;i<HAN_SIZE/2;i++){                     /* calculate power  */
    if(energy[i] < 1E-20) energy[i] = 1E-20;    /* density spectrum */
       /* power calculation corrected with a factor 4, both positive
	  and negative frequencies exist, 1992-11-06 shn */
       power[i].x = 10 * log10(energy[i]*4) + POWERNORM;
       power[i].next = STOP;
       power[i].type = FALSE;
 }
    mem_free ((void **) &x_r);
    mem_free ((void **) &x_i);
    mem_free ((void **) &energy);
}

/****************************************************************/
/*
/*         Window the incoming audio signal.
/*
/****************************************************************/

void I_hann_win (double *sample)    /* this function calculates a  */
				    /* Hann window for PCM (input) */
{				    /* samples for a 512-pt. FFT   */
 register int i;
 register double sqrt_8_over_3;
 static int init = 0;
 static double *window;

 if(!init){  /* calculate window function for the Fourier transform */
    window = (double *) mem_alloc (sizeof (DFFT2), "window");
    sqrt_8_over_3 = pow(8.0/3.0, 0.5);
    for(i=0;i<FFT_SIZE/2;i++){
      /* Hann window formula */
      window[i]=sqrt_8_over_3*0.5*(1-cos(2.0*PI*i/(FFT_SIZE/2-1)))/(FFT_SIZE/2);
    }
    init = 1;
 }
 for(i=0;i<FFT_SIZE/2;i++) sample[i] *= window[i];
}

/*******************************************************************/
/*
/*        This function finds the maximum spectral component in each
/* subband and return them to the encoder for time-domain threshold
/* determination.
/*
/*******************************************************************/

void I_pick_max (mask *power, double *spike)
{
 double max;
 int i,j;

 /* calculate the spectral component in each subband */
 for(i=0;i<HAN_SIZE/2;spike[i>>3] = max, i+=8)
    for(j=0, max = DBMIN;j<8;j++) max = (max>power[i+j].x) ? max : power[i+j].x;
}

/****************************************************************/
/*
/*        This function labels the tonal component in the power
/* spectrum.
/*
/****************************************************************/

void I_tonal_label (mask *power, int *tone) /* this function extracts   */
					    /* (tonal) sinusoidals from */
					    /* the spectrum             */
{
 int i,j, last = LAST, first, run;
 double max;
 int last_but_one=LAST;

 *tone = LAST;
 for(i=2;i<HAN_SIZE/2-6;i++){
    if(power[i].x>power[i-1].x && power[i].x>=power[i+1].x){
       power[i].type = TONE;
       power[i].next = LAST;
       if(last != LAST) power[last].next = i;
       else first = *tone = i;
       last = i;
    }
 }
 last = LAST;
 first = *tone;
 *tone = LAST;
 while(first != LAST){                /* conditions for the tonal     */
    if(first<2 || first>249) run = 0; /* otherwise k+/-j will be out of bounds*/
    else if(first<62) run = 2;        /* components in layer II, which */
    else if(first<126) run = 3;       /* are the boundaries for calc.   */
    else run = 6;                     /* the tonal components          */
    max = power[first].x - 7;
    for(j=2;j<=run;j++)  /* after calc. of tonal components, set to loc.*/
       if(max < power[first-j].x || max < power[first+j].x){   /* max   */
          power[first].type = FALSE;
          break;
       }
    if(power[first].type == TONE){    /* extract tonal components */
       int help=first;
       if(*tone == LAST) *tone = first;
       while((power[help].next!=LAST)&&(power[help].next-first)<=run)
          help=power[help].next;
       help=power[help].next;
       power[first].next=help;
       if((first-last)<=run){
          if(last_but_one != LAST) power[last_but_one].next=first;
       }
       if(first>1 && first<255){     /* calculate the sum of the */
          double tmp;                /* powers of the components */
          tmp = add_db(power[first-1].x, power[first+1].x);
          power[first].x = add_db(power[first].x, tmp);
       }
       for(j=1;j<=run;j++){
          power[first-j].x = power[first+j].x = DBMIN;
          power[first-j].next = power[first+j].next = STOP; /*dpwe: 2nd was .x*/
          power[first-j].type = power[first+j].type = FALSE;
       }
       last_but_one=last;
       last = first;
       first = power[first].next;
    }
    else {
       int ll;
       if(last == LAST) ; /* *tone = power[first].next; dpwe */
       else power[last].next = power[first].next;
       ll = first;
       first = power[first].next;
       power[ll].next = STOP;
    }
 }
}                        
                                
/****************************************************************/
/*
/*        This function finds the minimum masking threshold and
/* return the value to the encoder.
/*
/****************************************************************/

void I_minimum_mask(int sub_size, g_thres *ltg, double *ltmin)
{
 double min;
 int i,j;

 j=1;
 for(i=0;i<SBLIMIT;i++)
    if(j>=sub_size-1)                   /* check subband limit, and       */
       ltmin[i] = ltg[sub_size-1].hear; /* calculate the minimum masking  */
    else {                              /* level of LTMIN for each subband*/
       min = ltg[j].x;
       while(ltg[j].line>>3 == i && j < sub_size){
          if (min>ltg[j].x)  min = ltg[j].x;
          j++;
       }
       ltmin[i] = min;
    }
}

/*****************************************************************/
/*
/*        This procedure is called in musicin to pick out the
/* smaller of the scalefactor or threshold.
/*
/*****************************************************************/

void I_smr (double *ltmin, double *spike, double *scale)
{
 int i,j;
 double max;
                
 for(i=0;i<SBLIMIT;i++){                      /* determine the signal   */
    max = 20 * log10(scale[i] * 32768) - 10;  /* level for each subband */
    if(spike[i]>max) max = spike[i];          /* for the scalefactor    */
    max -= ltmin[i];
    ltmin[i] = max;
 }
}
        
/****************************************************************/
/*
/*        This procedure calls all the necessary functions to
/* complete the psychoacoustic analysis.
/*
/****************************************************************/

void
I_Psycho_One(
	double (*buffer)[1152],
	double (*scale)[32],
	double (*ltmin)[32],
	frame_params *fr_ps
) {
 int stereo = fr_ps->stereo;
 the_layer info = fr_ps->header;
 int k,i, tone=0, noise=0;
 static char init = 0;
 static int off[2] = {256,256};
 double *sample;
 DSBL *spike;
 static D640 *fft_buf;
 static mask_ptr power;
 static g_ptr ltg;
 static int crit_band;
 static int *cbound;
 static int sub_size;

 sample = (double *) mem_alloc(sizeof(DFFT2), "sample");
 spike = (DSBL *) mem_alloc(sizeof(D2SBL), "spike");
            /* call functions for critical boundaries, freq. */
 if(!init){ /* bands, bark values, and mapping              */
    fft_buf = (D640 *) mem_alloc(sizeof(D640) * 2, "fft_buf");
    power = (mask_ptr) mem_alloc(sizeof(mask) * HAN_SIZE/2, "power");
    crit_band = read_crit_band(info->lay,info->sampling_frequency);
    cbound = (int *) mem_alloc(sizeof(int) * crit_band, "cbound");
    read_cbound(info->lay,info->sampling_frequency,crit_band,cbound);
    read_freq_band(&sub_size,&ltg,info->lay,info->sampling_frequency);
    make_map(sub_size,power,ltg);
    for(i=0;i<640;i++) fft_buf[0][i] = fft_buf[1][i] = 0;
    init = 1;
 }
 for(k=0;k<stereo;k++){    /* check PCM input for a block of */
    for(i=0;i<384;i++)     /* 384 samples for a 512-pt. FFT  */
       fft_buf[k][(i+off[k])%640]= (double) buffer[k][i]/SCALE;
    for(i=0;i<FFT_SIZE/2;i++)
       sample[i] = fft_buf[k][(i+448+off[k])%640];
    off[k] += 384;
    off[k] %= 640;
                        /* call functions for windowing PCM samples,   */
    I_hann_win(sample); /* location of spectral components in each     */
    for(i=0;i<HAN_SIZE/2;i++) power[i].x = DBMIN;   /* subband with    */
    I_f_f_t(sample, power);              /* labeling, locate remaining */
    I_pick_max(power, &spike[k][0]);     /* non-tonal sinusoidals,     */
    I_tonal_label(power, &tone);         /* reduce noise & tonal com., */
    noise_label(crit_band,cbound,power, &noise, ltg);     /* find global & minimal      */
    subsampling (power, ltg, &tone, &noise);  /* threshold, and sgnl-   */
    threshold(sub_size,power, ltg, &tone, &noise,     /* to-mask ratio          */
      bitrate[info->lay-1][info->bitrate_index]/stereo);
    I_minimum_mask(sub_size,ltg, &ltmin[k][0]);
    I_smr(&ltmin[k][0], &spike[k][0], &scale[k][0]);        
 }
    mem_free ((void **) &sample);
    mem_free ((void **) &spike);
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?