📄 psymodel.c

📁 音频编码
💻 C
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
	}	gfc->nsPsy.last_attacks[chn] = ns_attacks[2];	/*********************************************************************	 *    Calculate the energy and the tonality of each partition.	 *********************************************************************/	for (b = j = 0; b<gfc->npart_l; b++) {	    FLOAT ebb,m;	    m = ebb = fftenergy[j++];	    for (i = gfc->numlines_l[b] - 1; i > 0; --i) {		FLOAT el = fftenergy[j++];		ebb += el;		if (m < el)		    m = el;	    }	    eb[b] = ebb;	    max[b] = m;	    avg[b] = ebb * gfc->rnumlines_l[b];	}	if (gfc->nsPsy.pass1fp)	    nsPsy2dataRead(gfc->nsPsy.pass1fp, eb2, eb, chn, gfc->npart_l);	else {	    FLOAT m,a;	    a = avg[0] + avg[1];	    if (a != 0.0) {		m = max[0]; if (m < max[1]) m = max[1];		a = 20.0 * (m*2.0-a)		    / (a*(gfc->numlines_l[0] + gfc->numlines_l[1] - 1));		k = (int) a;		if (k > sizeof(tab)/sizeof(tab[0]) - 1)		    k = sizeof(tab)/sizeof(tab[0]) - 1;		a = eb[0] * tab[k];	    }	    eb2[0] = a;	    for (b = 1; b < gfc->npart_l-1; b++) {		a = avg[b-1] + avg[b] + avg[b+1];		if (a != 0.0) {		    m = max[b-1];		    if (m < max[b  ]) m = max[b];		    if (m < max[b+1]) m = max[b+1];		    a = 20.0 * (m*3.0-a)			/ (a*(gfc->numlines_l[b-1] + gfc->numlines_l[b] + gfc->numlines_l[b+1] - 1));		    k = (int) a;		    if (k > sizeof(tab)/sizeof(tab[0]) - 1)			k = sizeof(tab)/sizeof(tab[0]) - 1;		    a = eb[b] * tab[k];		}		eb2[b] = a;	    }	    a = avg[gfc->npart_l-2] + avg[gfc->npart_l-1];	    if (a != 0.0) {		m = max[gfc->npart_l-2];		if (m < max[gfc->npart_l-1])		    m = max[gfc->npart_l-1];		a = 20.0 * (m*2.0-a)		    / (a*(gfc->numlines_l[gfc->npart_l-2] + gfc->numlines_l[gfc->npart_l-1] - 1));		k = (int) a;		if (k > sizeof(tab)/sizeof(tab[0]) - 1)		    k = sizeof(tab)/sizeof(tab[0]) - 1;		a = eb[b] * tab[k];	    }	    eb2[b] = a;	}	/*********************************************************************	 *      convolve the partitioned energy and unpredictability	 *      with the spreading function, s3_l[b][k]	 ******************************************************************* */#undef GPSYCHO_BLOCK_TYPE_DECISION#ifdef GPSYCHO_BLOCK_TYPE_DECISION   {    FLOAT pe = 0;#endif	k = 0;	for ( b = 0;b < gfc->npart_l; b++ ) {	    FLOAT ecb;	    /* convolve the partitioned energy with the spreading function */	    int kk = gfc->s3ind[b][0];	    ecb = gfc->s3_ll[k++] * eb2[kk];	    while (++kk <= gfc->s3ind[b][1])		ecb = mask_add(ecb, gfc->s3_ll[k++] * eb2[kk], kk, kk-b, gfc);	    ecb *= 0.158489319246111; /* pow(10,-0.8) */	    /****   long block pre-echo control   ****/	    /* dont use long block pre-echo control if previous granule was 	     * a short block.  This is to avoid the situation:   	     * frame0:  quiet (very low masking)  	     * frame1:  surge  (triggers short blocks)	     * frame2:  regular frame.  looks like pre-echo when compared to 	     *          frame0, but all pre-echo was in frame1.	     */	    /* chn=0,1   L and R channels	       chn=2,3   S and M channels.	    */	    if (gfc->blocktype_old[chn & 1] == SHORT_TYPE)		thr[b] = ecb; /* Min(ecb, rpelev*gfc->nb_1[chn][b]); */	    else		thr[b] = NS_INTERP(Min(ecb,				       Min(rpelev*gfc->nb_1[chn][b],					   rpelev2*gfc->nb_2[chn][b])),				   ecb, pcfact);	    gfc->nb_2[chn][b] = gfc->nb_1[chn][b];	    gfc->nb_1[chn][b] = ecb;#ifdef GPSYCHO_BLOCK_TYPE_DECISION        /* this pe does not match GPSYCHO's pe, because of difference in          * convolution calculation, (mask_add etc.). Therefore the block         * switching does not happen exactly as in GPSYCHO.         */		pe -= gfc->numlines_l[b] * FAST_LOG(ecb / eb[b]);#endif    }#ifdef GPSYCHO_BLOCK_TYPE_DECISION    determine_block_type( gfp, fftenergy_s, uselongblock, chn, gr_out, &pe );   }#endif	/* compute masking thresholds for long blocks */	convert_partition2scalefac_l(gfc, eb, thr, chn);    } /* end loop over chn */    if (gfp->interChRatio != 0.0)	calc_interchannel_masking(gfp, gfp->interChRatio);    if (gfp->mode == JOINT_STEREO) {	FLOAT msfix;	msfix1(gfc);	msfix = gfp->msfix;	if (msfix != 0.0)	    ns_msfix(gfc, msfix, gfp->ATHlower*gfc->ATH->adjust);    }    /***************************************************************      * determine final block type     ***************************************************************/    block_type_set(gfp, uselongblock, blocktype_d, blocktype);    /*********************************************************************     * compute the value of PE to return ... no delay and advance     *********************************************************************/    for(chn=0;chn<numchn;chn++) {	FLOAT *ppe;	int type;	III_psy_ratio *mr;	if (chn > 1) {	    ppe = percep_MS_entropy - 2;	    type = NORM_TYPE;	    if (blocktype_d[0] == SHORT_TYPE || blocktype_d[1] == SHORT_TYPE)		type = SHORT_TYPE;	    mr = &masking_MS_ratio[gr_out][chn-2];	} else {	    ppe = percep_entropy;	    type = blocktype_d[chn];	    mr = &masking_ratio[gr_out][chn];	}	if (type == SHORT_TYPE)	    ppe[chn] = pecalc_s(mr, gfc->masking_lower);	else	    ppe[chn] = pecalc_l(mr, gfc->masking_lower);#if defined(HAVE_GTK)	if (gfp->analysis) gfc->pinfo->pe[gr_out][chn] = ppe[chn];#endif    }    return 0;}/*  *   The spreading function.  Values returned in units of energy */static FLOAT s3_func(FLOAT bark) {    FLOAT tempx,x,tempy,temp;    tempx = bark;    if (tempx>=0) tempx *= 3;    else tempx *=1.5;     if (tempx>=0.5 && tempx<=2.5)      {	temp = tempx - 0.5;	x = 8.0 * (temp*temp - 2.0 * temp);      }    else x = 0.0;    tempx += 0.474;    tempy = 15.811389 + 7.5*tempx - 17.5*sqrt(1.0+tempx*tempx);    if (tempy <= -60.0) return  0.0;    tempx = exp( (x + tempy)*LN_TO_LOG10 );     /* Normalization.  The spreading function should be normalized so that:         +inf           /           |  s3 [ bark ]  d(bark)   =  1           /         -inf    */    tempx /= .6609193;    return tempx;}static intinit_numline(    int *numlines, int *bo, int *bm,    FLOAT *bval, FLOAT *bval_width, FLOAT *mld,    FLOAT sfreq, int blksize, int *scalepos,    FLOAT deltafreq, int sbmax    ){    int partition[HBLKSIZE];    int i, j, k;    int sfb;    sfreq /= blksize;    j = 0;    /* compute numlines, the number of spectral lines in each partition band */    /* each partition band should be about DELBARK wide. */    for (i=0;i<CBANDS;i++) {	FLOAT bark1;	int j2;	bark1 = freq2bark(sfreq*j);	for (j2 = j; freq2bark(sfreq*j2) - bark1 < DELBARK && j2 <= blksize/2;	     j2++)	    ;	numlines[i] = j2 - j;	while (j<j2)	    partition[j++]=i;	if (j > blksize/2) break;    }    for ( sfb = 0; sfb < sbmax; sfb++ ) {	int i1,i2,start,end;	FLOAT arg;	start = scalepos[sfb];	end   = scalepos[sfb+1];	i1 = floor(.5 + deltafreq*(start-.5));	if (i1<0) i1=0;	i2 = floor(.5 + deltafreq*(end-.5));	if (i2>blksize/2) i2=blksize/2;	bm[sfb] = (partition[i1]+partition[i2])/2;	bo[sfb] = partition[i2];	/* setup stereo demasking thresholds */	/* formula reverse enginerred from plot in paper */	arg = freq2bark(sfreq*scalepos[sfb]*deltafreq);	arg = (Min(arg, 15.5)/15.5);	mld[sfb] = pow(10.0, 1.25*(1-cos(PI*arg))-2.5);    }    /* compute bark values of each critical band */    j = 0;    for (k = 0; k < i+1; k++) {	int w = numlines[k];	FLOAT  bark1,bark2;	bark1 = freq2bark (sfreq*(j    ));	bark2 = freq2bark (sfreq*(j+w-1));	bval[k] = .5*(bark1+bark2);	bark1 = freq2bark (sfreq*(j  -.5));	bark2 = freq2bark (sfreq*(j+w-.5));	bval_width[k] = bark2-bark1;	j += w;    }    return i+1;}static intinit_s3_values(    lame_internal_flags *gfc,    FLOAT **p,    int (*s3ind)[2],    int npart,    FLOAT *bval,    FLOAT *bval_width,    FLOAT *norm    ){    FLOAT s3[CBANDS][CBANDS];    /* The s3 array is not linear in the bark scale.     * bval[x] should be used to get the bark value.     */    int i, j, k;    int numberOfNoneZero = 0;    /* s[i][j], the value of the spreading function,     * centered at band j (masker), for band i (maskee)     *     * i.e.: sum over j to spread into signal barkval=i     * NOTE: i and j are used opposite as in the ISO docs     */    for (i = 0; i < npart; i++)	for (j = 0; j < npart; j++)	    s3[i][j] = s3_func(bval[i] - bval[j]) * bval_width[j] * norm[i];    for (i = 0; i < npart; i++) {	for (j = 0; j < npart; j++) {	    if (s3[i][j] != 0.0)		break;	}	s3ind[i][0] = j;	for (j = npart - 1; j > 0; j--) {	    if (s3[i][j] != 0.0)		break;	}	s3ind[i][1] = j;	numberOfNoneZero += (s3ind[i][1] - s3ind[i][0] + 1);    }    *p = malloc(sizeof(FLOAT)*numberOfNoneZero);    if (!*p)	return -1;    k = 0;    for (i = 0; i < npart; i++)	for (j = s3ind[i][0]; j <= s3ind[i][1]; j++)	    (*p)[k++] = s3[i][j];    return 0;}int psymodel_init(lame_global_flags *gfp){    lame_internal_flags *gfc=gfp->internal_flags;    int i,j,b,sb,k;    FLOAT bval[CBANDS];    FLOAT bval_width[CBANDS];    FLOAT norm[CBANDS];    FLOAT sfreq = gfp->out_samplerate;    gfc->ms_ener_ratio_old=.25;    gfc->blocktype_old[0] = gfc->blocktype_old[1] = NORM_TYPE; /* the vbr header is long blocks*/    for (i=0; i<4; ++i) {	for (j=0; j<CBANDS; ++j) {	    gfc->nb_1[i][j]=1e20;	    gfc->nb_2[i][j]=1e20;	    gfc->nb_s1[i][j] = gfc->nb_s2[i][j] = 1.0;	}	for ( sb = 0; sb < SBMAX_l; sb++ ) {	    gfc->en[i].l[sb] = 1e20;	    gfc->thm[i].l[sb] = 1e20;	}	for (j=0; j<3; ++j) {	    for ( sb = 0; sb < SBMAX_s; sb++ ) {		gfc->en[i].s[sb][j] = 1e20;		gfc->thm[i].s[sb][j] = 1e20;	    }	    gfc->nsPsy.last_attacks[i] = 0;	}	for(j=0;j<9;j++)	    gfc->nsPsy.last_en_subshort[i][j] = 10.;    }    j = gfc->PSY->cwlimit/(sfreq/BLKSIZE);    if (j > HBLKSIZE-4) /* j+3 < HBLKSIZE-1 */	j = HBLKSIZE-4;    if (j < CW_LOWER_INDEX)	j = CW_LOWER_INDEX;    gfc->cw_upper_index = j;    for (j = 0; j < HBLKSIZE; j++)	gfc->cw[j] = 0.4f;    /* init. for loudness approx. -jd 2001 mar 27*/    gfc->loudness_sq_save[0] = gfc->loudness_sq_save[1] = 0.0;    /*************************************************************************     * now compute the psychoacoustic model specific constants     ************************************************************************/    /* compute numlines, bo, bm, bval, bval_width, mld */    gfc->npart_l	= init_numline(gfc->numlines_l, gfc->bo_l, gfc->bm_l,		       bval, bval_width, gfc->mld_l,		       sfreq, BLKSIZE, 		       gfc->scalefac_band.l, BLKSIZE/(2.0*576), SBMAX_l);    assert(gfc->npart_l <= CBANDS);    /* compute the spreading function */    for(i=0;i<gfc->npart_l;i++) {	norm[i]=1.0;	gfc->rnumlines_l[i] = 1.0 / gfc->numlines_l[i];    }    i = init_s3_values(gfc, &gfc->s3_ll, gfc->s3ind,		       gfc->npart_l, bval, bval_width, norm);    if (i)	return i;    /* compute long block specific values, ATH and MINVAL */    j = 0;    for ( i = 0; i < gfc->npart_l; i++ ) {	double x;	/* ATH */	x = FLOAT_MAX;	for (k=0; k < gfc->numlines_l[i]; k++, j++) {	    FLOAT  freq = sfreq*j/(1000.0*BLKSIZE);	    FLOAT  level;	    /*	freq = Min(.1,freq);*/      /* ATH below 100 Hz constant, not further climbing */	    level  = ATHformula (freq*1000, gfp) - 20;   /* scale to FFT units; returned value is in dB */	    level  = pow ( 10., 0.1*level );   /* convert from dB -> energy */	    level *= gfc->numlines_l [i];	    if (x > level)		x = level;	}	gfc->ATH->cb[i] = x;	/* MINVAL.	   For low freq, the strength of the masking is limited by minval	   this is an ISO MPEG1 thing, dont know if it is really needed */	x = (-20+bval[i]*20.0/10.0);	if (bval[i]>10) x = 0;	gfc->minval[i]=pow(10.0,x/10);	gfc->PSY->prvTonRed[i] = gfc->minval[i];    }    /************************************************************************     * do the same things for short blocks     ************************************************************************/    gfc->npart_s	= init_numline(gfc->numlines_s, gfc->bo_s, gfc->bm_s,		       bval, bval_width, gfc->mld_s,		       sfreq, BLKSIZE_s,		       gfc->scalefac_band.s, BLKSIZE_s/(2.0*192), SBMAX_s);    assert(gfc->npart_s <= CBANDS);    /* SNR formula. short block is normalized by SNR. is it still right ? */    for(i=0;i<gfc->npart_s;i++) {	double snr=-8.25;	if (bval[i]>=13)	    snr = -4.5 * (bval[i]-13)/(24.0-13.0)		-8.25*(bval[i]-24)/(13.0-24.0);	norm[i]=pow(10.0,snr/10.0);    }    i = init_s3_values(gfc, &gfc->s3_ss, gfc->s3ind_s,		       gfc->npart_s, bval, bval_width, norm);    if (i)	return i;    init_mask_add_max_values();    init_fft(gfc);    /* setup temporal masking */    gfc->decay = exp(-1.0*LOG10/(temporalmask_sustain_sec*sfreq/192.0));    if (gfp->psymodel == PSY_NSPSYTUNE) {        FLOAT msfix;        msfix = NS_MSFIX;        if (gfp->exp_nspsytune & 2) msfix = 1.0;        if (gfp->msfix != 0.0) msfix = gfp->msfix;        gfp->msfix = msfix;        /* spread only from npart_l bands.  Normally, we use the spreading        * function to convolve from npart_l down to npart_l bands         */        for (b=0;b<gfc->npart_l;b++)            if (gfc->s3ind[b][1] > gfc->npart_l-1)                gfc->s3ind[b][1] = gfc->npart_l-1;    }    /*  prepare for ATH auto adjustment:     *  we want to decrease the ATH by 12 dB per second     */#define  frame_duration (576. * gfc->mode_gr / sfreq)    gfc->ATH->decay = pow(10., -12./10. * frame_duration);    gfc->ATH->adjust = 0.01; /* minimum, for leading low loudness */    gfc->ATH->adjust_limit = 1.0; /* on lead, allow adjust up to maximum */#undef  frame_duration    gfc->bo_s[SBMAX_s-1]--;    assert(gfc->bo_l[SBMAX_l-1] <= gfc->npart_l);    assert(gfc->bo_s[SBMAX_s-1] <= gfc->npart_s);    if (gfp->ATHtype != -1) { 	/* compute equal loudness weights (eql_w) */	FLOAT freq;	FLOAT freq_inc = gfp->out_samplerate / (BLKSIZE);	FLOAT eql_balance = 0.0;	freq = 0.0;	for( i = 0; i < BLKSIZE/2; ++i ) {	    /* convert ATH dB to relative power (not dB) */	    /*  to determine eql_w */	    freq += freq_inc;	    gfc->ATH->eql_w[i] = 1. / pow( 10, ATHformula( freq, gfp ) / 10 );	    eql_balance += gfc->ATH->eql_w[i];	}	eql_balance = 1.0 / eql_balance;	for( i = BLKSIZE/2; --i >= 0; ) { /* scale weights */	    gfc->ATH->eql_w[i] *= eql_balance;	}    }    return 0;}
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -