📄 wb_vad.c

📁 通讯协议
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
    {
        test();
        if ((st->stat_count == 0))
        {
            alpha_up = ALPHA_UP2;          move16();
            alpha_down = ALPHA_DOWN2;      move16();
        } else
        {
            alpha_up = 0;                  move16();
            alpha_down = ALPHA3;           move16();
            bckr_add = 0;                  move16();
        }
    }

    /* Update noise estimate (bckr_est) */
    for (i = 0; i < COMPLEN; i++)
    {
        Word16 temp;

        temp = sub(st->old_level[i], st->bckr_est[i]);

        test();
        if (temp < 0)
        {                                  /* update downwards */
            st->bckr_est[i] = add(-2, add(st->bckr_est[i],
                    mult_r(alpha_down, temp))); move16();

            /* limit minimum value of the noise estimate to NOISE_MIN */
            test();
            if (sub(st->bckr_est[i], NOISE_MIN) < 0)
            {
                st->bckr_est[i] = NOISE_MIN;    move16();
            }
        } else
        {                                  /* update upwards */
            st->bckr_est[i] = add(bckr_add, add(st->bckr_est[i],
                    mult_r(alpha_up, temp)));   move16();

            /* limit maximum value of the noise estimate to NOISE_MAX */
            test();
            if (sub(st->bckr_est[i], NOISE_MAX) > 0)
            {
                st->bckr_est[i] = NOISE_MAX;    move16();
            }
        }
    }

    /* Update signal levels of the previous frame (old_level) */
    for (i = 0; i < COMPLEN; i++)
    {
        st->old_level[i] = level[i];       move16();
    }
}

/******************************************************************************
*
*     Function     : vad_decision
*     Purpose      : Calculates VAD_flag
*
*/

static Word16 vad_decision(                /* return value : VAD_flag */
     VadVars * st,                         /* i/o : State structure                       */
     Word16 level[COMPLEN],                /* i   : sub-band levels of the input frame */
     Word32 pow_sum                        /* i   : power of the input frame           */
)
{
    Word16 i;
    Word32 L_snr_sum;
    Word32 L_temp;
    Word16 vad_thr, temp, noise_level;
    Word16 low_power_flag;
    Word16 hang_len, burst_len;
    Word16 ilog2_speech_level, ilog2_noise_level;
    Word16 temp2;

    /* Calculate squared sum of the input levels (level) divided by the background noise components
     * (bckr_est). */
    L_snr_sum = 0;                         move32();
    for (i = 0; i < COMPLEN; i++)
    {
        Word16 exp;

        exp = norm_s(st->bckr_est[i]);
        temp = shl(st->bckr_est[i], exp);
        temp = div_s(shr(level[i], 1), temp);
        temp = shl(temp, sub(exp, UNIRSHFT - 1));
        L_snr_sum = L_mac(L_snr_sum, temp, temp);
    }

    /* Calculate average level of estimated background noise */
    L_temp = 0;                            move32();
    for (i = 1; i < COMPLEN; i++)          /* ignore lowest band */
    {
        L_temp = L_add(L_temp, st->bckr_est[i]);
    }

    noise_level = extract_h(L_shl(L_temp, 12));
    /* if SNR is lower than a threshold (MIN_SPEECH_SNR), and increase speech_level */
    temp = shl(mult(noise_level, MIN_SPEECH_SNR), 3);

    test();
    if (sub(st->speech_level, temp) < 0)
    {
        st->speech_level = temp;           move16();
    }
    ilog2_noise_level = ilog2(noise_level);

    /* If SNR is very poor, speech_level is probably corrupted by noise level. This is correctred by
     * subtracting MIN_SPEECH_SNR*noise_level from speech level */
    ilog2_speech_level = ilog2(sub(st->speech_level, temp));

    temp = add(mult(NO_SLOPE, sub(ilog2_noise_level, NO_P1)), THR_HIGH);

    temp2 = add(SP_CH_MIN, mult(SP_SLOPE, sub(ilog2_speech_level, SP_P1)));
    test();
    if (sub(temp2, SP_CH_MIN) < 0)
    {
        temp2 = SP_CH_MIN;                 move16();
    }
    test();
    if (sub(temp2, SP_CH_MAX) > 0)
    {
        temp2 = SP_CH_MAX;                 move16();
    }
    vad_thr = add(temp, temp2);

    test();
    if (sub(vad_thr, THR_MIN) < 0)
    {
        vad_thr = THR_MIN;                 move16();
    }
    /* Shift VAD decision register */
    st->vadreg = shr(st->vadreg, 1);       move16();

    /* Make intermediate VAD decision */
    test();
    if (L_sub(L_snr_sum, L_mult(vad_thr, 512 * COMPLEN)) > 0)
    {
        st->vadreg = (Word16) (st->vadreg | 0x4000);    logic16();move16();
    }
    /* check if the input power (pow_sum) is lower than a threshold" */
    test();
    if (L_sub(pow_sum, VAD_POW_LOW) < 0)
    {
        low_power_flag = 1;                move16();
    } else
    {
        low_power_flag = 0;                move16();
    }
    /* Update background noise estimates */
    noise_estimate_update(st, level);

    /* Calculate values for hang_len and burst_len based on vad_thr */
    hang_len = add(mult(HANG_SLOPE, sub(vad_thr, HANG_P1)), HANG_HIGH);
    test();
    if (sub(hang_len, HANG_LOW) < 0)
    {
        hang_len = HANG_LOW;               move16();
    };

    burst_len = add(mult(BURST_SLOPE, sub(vad_thr, BURST_P1)), BURST_HIGH);

    return (hangover_addition(st, low_power_flag, hang_len, burst_len));
}

/******************************************************************************
*
*     Estimate_Speech()
*     Purpose      : Estimate speech level
*
* Maximum signal level is searched and stored to the variable sp_max.
* The speech frames must locate within SP_EST_COUNT number of frames.
* Thus, noisy frames having occasional VAD = "1" decisions will not
* affect to the estimated speech_level.
*
*/
static void Estimate_Speech(
     VadVars * st,                         /* i/o : State structure    */
     Word16 in_level                       /* level of the input frame */
)
{
    Word16 alpha;

    /* if the required activity count cannot be achieved, reset counters */
    test();
    /* if (SP_ACTIVITY_COUNT  > SP_EST_COUNT - st->sp_est_cnt + st->sp_max_cnt) */
    if (sub(sub(st->sp_est_cnt, st->sp_max_cnt), SP_EST_COUNT - SP_ACTIVITY_COUNT) > 0)
    {
        st->sp_est_cnt = 0;                move16();
        st->sp_max = 0;                    move16();
        st->sp_max_cnt = 0;                move16();
    }
    st->sp_est_cnt = add(st->sp_est_cnt, 1);    move16();

    logic16();test();test();test();
    if (((st->vadreg & 0x4000) || (sub(in_level, st->speech_level) > 0))
        && (sub(in_level, MIN_SPEECH_LEVEL1) > 0))
    {
        /* update sp_max */
        test();
        if (sub(in_level, st->sp_max) > 0)
        {
            st->sp_max = in_level;         move16();
        }
        st->sp_max_cnt = add(st->sp_max_cnt, 1);        move16();
        test();
        if (sub(st->sp_max_cnt, SP_ACTIVITY_COUNT) >= 0)
        {
            Word16 tmp;

            /* update speech estimate */
            tmp = shr(st->sp_max, 1);      /* scale to get "average" speech level */

            /* select update speed */
            test();
            if (sub(tmp, st->speech_level) > 0)
            {
                alpha = ALPHA_SP_UP;       move16();
            } else
            {
                alpha = ALPHA_SP_DOWN;     move16();
            }
            test();
            if (sub(tmp, MIN_SPEECH_LEVEL2) > 0)
            {
                st->speech_level = add(st->speech_level,
                    mult_r(alpha, sub(tmp, st->speech_level))); move16();
            }
            /* clear all counters used for speech estimation */
            st->sp_max = 0;                move16();
            st->sp_max_cnt = 0;            move16();
            st->sp_est_cnt = 0;            move16();
        }
    }
}

/******************************************************************************
*                         PUBLIC PROGRAM CODE
******************************************************************************/

/******************************************************************************
*
*  Function:   wb_vad_init
*  Purpose:    Allocates state memory and initializes state memory
*
*/

Word16 wb_vad_init(                        /* return: non-zero with error, zero for ok. */
     VadVars ** state                      /* i/o : State structure    */
)
{
    VadVars *s;

    if (state == (VadVars **) NULL)
    {
        fprintf(stderr, "vad_init: invalid parameter\n");
        return -1;
    }
    *state = NULL;

    /* allocate memory */
    if ((s = (VadVars *) malloc(sizeof(VadVars))) == NULL)
    {
        fprintf(stderr, "vad_init: can not malloc state structure\n");
        return -1;
    }
    wb_vad_reset(s);

    *state = s;

    return 0;
}

/******************************************************************************
*
*  Function:   wb_vad_reset
*  Purpose:    Initializes state memory
*
*/
Word16 wb_vad_reset(                       /* return: non-zero with error, zero for ok. */
     VadVars * state                       /* i/o : State structure    */
)
{
    Word16 i, j;

    if (state == (VadVars *) NULL)
    {
        fprintf(stderr, "vad_reset: invalid parameter\n");
        return -1;
    }
    state->tone_flag = 0;
    state->vadreg = 0;
    state->hang_count = 0;
    state->burst_count = 0;
    state->hang_count = 0;

    /* initialize memory used by the filter bank */
    for (i = 0; i < F_5TH_CNT; i++)
    {
        for (j = 0; j < 2; j++)
        {
            state->a_data5[i][j] = 0;
        }
    }

    for (i = 0; i < F_3TH_CNT; i++)
    {
        state->a_data3[i] = 0;
    }

    /* initialize the rest of the memory */
    for (i = 0; i < COMPLEN; i++)
    {
        state->bckr_est[i] = NOISE_INIT;
        state->old_level[i] = NOISE_INIT;
        state->ave_level[i] = NOISE_INIT;
        state->sub_level[i] = 0;
    }

    state->sp_est_cnt = 0;
    state->sp_max = 0;
    state->sp_max_cnt = 0;
    state->speech_level = SPEECH_LEVEL_INIT;
    state->prev_pow_sum = 0;
    return 0;
}

/******************************************************************************
*
*  Function:   wb_vad_exit
*  Purpose:    The memory used for state memory is freed
*
*/
void wb_vad_exit(
     VadVars ** state                      /* i/o : State structure    */
)
{
    if (state == NULL || *state == NULL)
        return;

    /* deallocate memory */
    free(*state);
    *state = NULL;
    return;
}

/******************************************************************************
*
*     Function     : wb_vad_tone_detection
*     Purpose      : Search maximum pitch gain from a frame. Set tone flag if
*                    pitch gain is high. This is used to detect
*                    signaling tones and other signals with high pitch gain.
*
*/
void wb_vad_tone_detection(
     VadVars * st,                         /* i/o : State struct            */
     Word16 p_gain                         /* pitch gain      */
)
{
    /* update tone flag */
    st->tone_flag = shr(st->tone_flag, 1); move16();

    /* if (pitch_gain > TONE_THR) set tone flag */
    test();
    if (sub(p_gain, TONE_THR) > 0)
    {
        st->tone_flag = (Word16) (st->tone_flag | 0x4000);      logic16();move16();
    }
}

/******************************************************************************
*
*     Function     : wb_vad
*     Purpose      : Main program for Voice Activity Detection (VAD) for AMR
*
*/
Word16 wb_vad(                             /* Return value : VAD Decision, 1 = speech, 0 = noise */
     VadVars * st,                         /* i/o : State structure                 */
     Word16 in_buf[]                       /* i   : samples of the input frame   */
)
{
    Word16 level[COMPLEN];
    Word16 i;
    Word16 VAD_flag, temp;
    Word32 L_temp, pow_sum;

    /* Calculate power of the input frame. */
    L_temp = 0L;                           move32();
    for (i = 0; i < FRAME_LEN; i++)
    {
        L_temp = L_mac(L_temp, in_buf[i], in_buf[i]);
    }

    /* pow_sum = power of current frame and previous frame */
    pow_sum = L_add(L_temp, st->prev_pow_sum);  move32();

    /* save power of current frame for next call */
    st->prev_pow_sum = L_temp;             move32();

    /* If input power is very low, clear tone flag */
    test();
    if (L_sub(pow_sum, POW_TONE_THR) < 0)
    {
        st->tone_flag = (Word16) (st->tone_flag & 0x1fff);      logic16();move16();
    }
    /* Run the filter bank and calculate signal levels at each band */
    filter_bank(st, in_buf, level);

    /* compute VAD decision */
    VAD_flag = vad_decision(st, level, pow_sum);

    /* Calculate input level */
    L_temp = 0;                            move32();
    for (i = 1; i < COMPLEN; i++)          /* ignore lowest band */
    {
        L_temp = L_add(L_temp, level[i]);
    }

    temp = extract_h(L_shl(L_temp, 12));

    Estimate_Speech(st, temp);             /* Estimate speech level */
    return (VAD_flag);
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -