📄 enc_dtx.c
字号:
/*
*===================================================================
* 3GPP AMR Wideband Floating-point Speech Codec
*===================================================================
*/
#include <stdlib.h>
#include <memory.h>
#include <math.h>
#include "typedef.h"
#include "enc_lpc.h"
#include "enc_util.h"
#define DTX_HIST_SIZE_MIN_ONE 7
#define DTX_HANG_CONST 7 /* yields eight frames of SP HANGOVER */
#define DTX_ELAPSED_FRAMES_THRESH (24 + 7 -1)
#define MED_THRESH 2.25
#define GAIN_THR 1.406
#define ORDER 16 /* order of linear prediction filter */
#define RANDOM_INITSEED 21845 /* own random init value */
#define MRDTX 10
#define SIZE_BK_NOISE1 64
#define SIZE_BK_NOISE2 64
#define SIZE_BK_NOISE3 64
#define SIZE_BK_NOISE4 32
#define SIZE_BK_NOISE5 32
#define FRAME_LEN 256 /* Length (samples) of the input frame */
#define SCALE 128 /* (UNITY * UNITY) / 512 */
#define TONE_THR 0.65f /* Threshold for tone detection */
/* constants for speech level estimation */
#define SP_EST_COUNT 80
#define SP_ACTIVITY_COUNT 25
#define ALPHA_SP_UP (1.0f - 0.85f)
#define ALPHA_SP_DOWN (1.0f - 0.85f)
#define NOM_LEVEL 2050.0F /* about -26 dBov */
#define SPEECH_LEVEL_INIT NOM_LEVEL
#define MIN_SPEECH_LEVEL1 (NOM_LEVEL * 0.063F) /* NOM_LEVEL -24 dB */
#define MIN_SPEECH_LEVEL2 (NOM_LEVEL * 0.2F) /* NOM_LEVEL -14 dB */
#define MIN_SPEECH_SNR 0.125F /* 0 dB, lowest SNR estimation */
/* Constants for background spectrum update */
#define ALPHA_UP1 (1.0f - 0.95f) /* Normal update, upwards: */
#define ALPHA_DOWN1 (1.0f - 0.936f) /* Normal update, downwards */
#define ALPHA_UP2 (1.0f - 0.985f) /* Forced update, upwards */
#define ALPHA_DOWN2 (1.0f - 0.943f) /* Forced update, downwards */
#define ALPHA3 (1.0f - 0.95f) /* Update downwards */
#define ALPHA4 (1.0f - 0.9f) /* For stationary estimation */
#define ALPHA5 (1.0f - 0.5f) /* For stationary estimation */
/* Constants for VAD threshold */
#define THR_MIN (1.6F * SCALE) /* Minimum threshold */
#define THR_HIGH (6.0F * SCALE) /* Highest threshold */
#define THR_LOW (1.7F * SCALE) /* Lowest threshold */
#define NO_P1 31744.0F /* ilog2(1), Noise level for highest threshold */
#define NO_P2 19786.0F /* ilog2(0.1, Noise level for lowest threshold */
#define NO_SLOPE ((Float32)(THR_LOW - THR_HIGH) / (Float32)(NO_P2 - NO_P1))
#define SP_CH_MIN (-0.75F * SCALE)
#define SP_CH_MAX (0.75F * SCALE)
#define SP_P1 22527.0F /* ilog2(NOM_LEVEL / 4) */
#define SP_P2 17832.0F /* ilog2(NOM_LEVEL * 4) */
#define SP_SLOPE ((Float32)(SP_CH_MAX - SP_CH_MIN) / (Float32)(SP_P2 - SP_P1))
/* Constants for hangover length */
#define HANG_HIGH 12 /* longest hangover */
#define HANG_LOW 2 /* shortest hangover */
#define HANG_P1 THR_LOW /* threshold for longest hangover */
#define HANG_P2 (4 * SCALE) /* threshold for Word16est hangover */
#define HANG_SLOPE ((Float32)(HANG_LOW - HANG_HIGH) / (Float32)(HANG_P2 - HANG_P1))
/* Constants for burst length */
#define BURST_HIGH 8 /* longest burst length */
#define BURST_LOW 3 /* shortest burst length */
#define BURST_P1 THR_HIGH /* threshold for Word32est burst */
#define BURST_P2 THR_LOW /* threshold for Word16est burst */
#define BURST_SLOPE ((Float32)(BURST_LOW - BURST_HIGH) / (Float32)(BURST_P2 - BURST_P1))
/* Parameters for background spectrum recovery function */
#define STAT_COUNT 20 /* threshold of stationary detection counter */
#define STAT_THR_LEVEL 184 /* Threshold level for stationarity detection */
#define STAT_THR 1000 /* Threshold for stationarity detection */
/* Limits for background noise estimate */
#define NOISE_MIN 40 /* minimum */
#define NOISE_MAX 20000 /* maximum */
#define NOISE_INIT 150 /* initial */
/* Thresholds for signal power (now calculated on 2 frames) */
#define VAD_POW_LOW 30000.0f /* If input power is lower than this, VAD is set to 0 */
#define POW_PITCH_TONE_THR 686080.0f /* If input power is lower, pitch detection is ignored */
/* Constants for the filter bank */
#define COEFF3 0.407806f /* coefficient for the 3rd order filter */
#define COEFF5_1 0.670013f /* 1st coefficient the for 5th order filter */
#define COEFF5_2 0.195007f /* 2nd coefficient the for 5th order filter */
extern const Float32 E_ROM_en_adjust[];
extern const Float32 E_ROM_mean_isf_noise[];
extern const Float32 E_ROM_dico1_isf_noise[];
extern const Float32 E_ROM_dico2_isf_noise[];
extern const Float32 E_ROM_dico3_isf_noise[];
extern const Float32 E_ROM_dico4_isf_noise[];
extern const Float32 E_ROM_dico5_isf_noise[];
extern const Float32 E_ROM_isf[];
/*
* E_DTX_isf_history_aver
*
* Parameters:
* isf_old I/O: ISF vectors
* indices I: ISF indices
* isf_aver O: averaged ISFs
*
* Function:
* Perform the ISF averaging
*
* Returns:
* void
*/
static void E_DTX_isf_history_aver(Float32 isf_old[], Word16 indices[],
Float32 isf_aver[])
{
Float32 isf_tmp[2 * M];
Float32 tmp;
Word32 i, j, k;
/*
* Memorize in isf_tmp[][] the ISF vectors to be replaced by
* the median ISF vector prior to the averaging
*/
for (k = 0; k < 2; k++)
{
if (indices[k] != -1)
{
for (i = 0; i < M; i++)
{
isf_tmp[k * M + i] = isf_old[indices[k] * M + i];
isf_old[indices[k] * M + i] = isf_old[indices[2] * M + i];
}
}
}
/* Perform the ISF averaging */
for (j = 0; j < M; j++)
{
tmp = 0;
for (i = 0; i < DTX_HIST_SIZE; i++)
{
tmp += isf_old[i * M + j];
}
isf_aver[j] = tmp;
}
/* Retrieve from isf_tmp[][] the ISF vectors saved prior to averaging */
for (k = 0; k < 2; k++)
{
if (indices[k] != -1)
{
for (i = 0; i < M; i++)
{
isf_old[indices[k] * M + i] = isf_tmp[k * M + i];
}
}
}
return;
}
/*
* E_DTX_dithering_control
*
* Parameters:
* st I: state struct
*
* Function:
* Analysis of the variation and stationarity
* of the background noise.
*
* Returns:
* Dithering decision
*/
static Word16 E_DTX_dithering_control(E_DTX_State * st)
{
Float32 ISF_diff, gain_diff, mean, tmp;
Word32 i;
Word16 CN_dith;
/* determine how stationary the spectrum of background noise is */
ISF_diff = 0.0F;
for (i = 0; i < 8; i++)
{
ISF_diff += st->mem_distance_sum[i];
}
if (ISF_diff > 5147609.0f)
{
CN_dith = 1;
}
else
{
CN_dith = 0;
}
/* determine how stationary the energy of background noise is */
mean = 0.0f;
for (i = 0; i < DTX_HIST_SIZE; i++)
{
mean += st->mem_log_en[i] / (Float32)DTX_HIST_SIZE;
}
gain_diff = 0.0f;
for (i = 0; i < DTX_HIST_SIZE; i++)
{
tmp = (Float32)fabs(st->mem_log_en[i] - mean);
gain_diff += tmp;
}
if (gain_diff > GAIN_THR)
{
CN_dith = 1;
}
return CN_dith;
}
/*
* E_DTX_buffer
*
* Parameters:
* st I/O: state struct
* isf_new I: isf vector
* enr I: residual energy (for L_FRAME)
* codec_mode I: speech coder mode
*
* Function:
* Handles the DTX buffer
*
* Returns:
* void
*/
void E_DTX_buffer(E_DTX_State *st, Float32 isf_new[], Float32 enr,
Word16 codec_mode)
{
Float32 log_en;
/* update pointer to circular buffer */
st->mem_hist_ptr++;
if (st->mem_hist_ptr == DTX_HIST_SIZE)
{
st->mem_hist_ptr = 0;
}
/* copy isf vector into buffer */
memcpy(&st->mem_isf[st->mem_hist_ptr * M], isf_new, M * sizeof(Float32));
enr += 1e-10F;
log_en = (Float32)(log10(enr / ((Float64)L_FRAME)) / log10(2.0F));
/* Subtract ~ 3 dB */
st->mem_log_en[st->mem_hist_ptr] = log_en + E_ROM_en_adjust[codec_mode];
return;
}
/*
* E_DTX_frame_indices_find
*
* Parameters:
* st I/O: state struct
* isf_old_tx I: isf vector
* indices I: distance indices
*
* Function:
* Find indices for min/max distances
*
* Returns:
* void
*/
static void E_DTX_frame_indices_find(E_DTX_State * st, Word16 indices[])
{
Float32 L_tmp, tmp, summin, summax, summax2nd;
Word32 i, j, k;
Word16 ptr;
/*
* Remove the effect of the oldest frame from the column
* sum sumD[0..E_DTX_HIST_SIZE-1]. sumD[E_DTX_HIST_SIZE] is
* not updated since it will be removed later.
*/
k = DTX_HIST_SIZE_MIN_ONE;
j = -1;
for (i = 0; i < DTX_HIST_SIZE_MIN_ONE; i++)
{
j = j + k;
st->mem_distance_sum[i] = st->mem_distance_sum[i] - st->mem_distance[j];
k--;
}
/*
* Shift the column sum sumD. The element sumD[E_DTX_HIST_SIZE-1]
* corresponding to the oldest frame is removed. The sum of
* the distances between the latest isf and other isfs,
* i.e. the element sumD[0], will be computed during this call.
* Hence this element is initialized to zero.
*/
for (i = DTX_HIST_SIZE_MIN_ONE; i > 0; i--)
{
st->mem_distance_sum[i] = st->mem_distance_sum[i - 1];
}
st->mem_distance_sum[0] = 0.0F;
/*
* Remove the oldest frame from the distance matrix.
* Note that the distance matrix is replaced by a one-
* dimensional array to save static memory.
*/
k = 0;
for (i = 27; i >= 12; i = i - k)
{
k++;
for (j = k; j > 0; j--)
{
st->mem_distance[i - j + 1] = st->mem_distance[i - j - k];
}
}
/*
* Compute the first column of the distance matrix D
* (squared Euclidean distances from isf1[] to isf_old_tx[][]).
*/
ptr = st->mem_hist_ptr;
for (i = 1; i < DTX_HIST_SIZE; i++)
{
/* Compute the distance between the latest isf and the other isfs. */
ptr--;
if (ptr < 0)
{
ptr = DTX_HIST_SIZE_MIN_ONE;
}
L_tmp = 0;
for (j = 0; j < M; j++)
{
tmp = st->mem_isf[st->mem_hist_ptr * M + j] - st->mem_isf[ptr * M + j];
L_tmp += tmp * tmp;
}
st->mem_distance[i - 1] = L_tmp;
/* Update also the column sums. */
st->mem_distance_sum[0] += st->mem_distance[i - 1];
st->mem_distance_sum[i] += st->mem_distance[i - 1];
}
/* Find the minimum and maximum distances */
summax = st->mem_distance_sum[0];
summin = st->mem_distance_sum[0];
indices[0] = 0;
indices[2] = 0;
for (i = 1; i < DTX_HIST_SIZE; i++)
{
if (st->mem_distance_sum[i] > summax)
{
indices[0] = (Word16)i;
summax = st->mem_distance_sum[i];
}
if (st->mem_distance_sum[i] < summin)
{
indices[2] = (Word16)i;
summin = st->mem_distance_sum[i];
}
}
/* Find the second largest distance */
summax2nd = -100000000.0;
indices[1] = -1;
for (i = 0; i < DTX_HIST_SIZE; i++)
{
if ((st->mem_distance_sum[i] > summax2nd) && (i != indices[0]))
{
indices[1] = (Word16)i;
summax2nd = st->mem_distance_sum[i];
}
}
for (i = 0; i < 3; i++)
{
indices[i] = (Word16)(st->mem_hist_ptr - indices[i]);
if (indices[i] < 0)
{
indices[i] += DTX_HIST_SIZE;
}
}
/*
* If maximum distance / MED_THRESH is smaller than minimum distance
* then the median ISF vector replacement is not performed
*/
L_tmp = (Float32)(summax / MED_THRESH);
if (L_tmp <= summin)
{
indices[0] = -1;
}
/*
* If second largest distance/MED_THRESH is smaller than
* minimum distance then the median ISF vector replacement is
* not performed
*/
L_tmp = (Float32)(summax2nd / MED_THRESH);
if (L_tmp <= summin)
{
indices[1] = -1;
}
return;
}
/*
* E_DTX_isf_q
*
* Parameters:
* isf I: ISF in the frequency domain (0..6400)
* isf_q O: quantised ISF
* indice O: quantisation indices
*
* Function:
* The ISF vector is quantized using VQ with split-by-5
*
* Returns:
* void
*/
static void E_DTX_isf_q(Float32 *isf, Word16 **indice)
{
Word32 i;
Float32 tmp;
for (i = 0; i < ORDER; i++)
{
isf[i] = isf[i] - E_ROM_mean_isf_noise[i];
}
(*indice)[0] = E_LPC_isf_sub_vq(&isf[0], E_ROM_dico1_isf_noise, 2,
SIZE_BK_NOISE1, &tmp);
(*indice)[1] = E_LPC_isf_sub_vq(&isf[2], E_ROM_dico2_isf_noise, 3,
SIZE_BK_NOISE2, &tmp);
(*indice)[2] = E_LPC_isf_sub_vq(&isf[5], E_ROM_dico3_isf_noise, 3,
SIZE_BK_NOISE3, &tmp);
(*indice)[3] = E_LPC_isf_sub_vq(&isf[8], E_ROM_dico4_isf_noise, 4,
SIZE_BK_NOISE4, &tmp);
(*indice)[4] = E_LPC_isf_sub_vq(&isf[12], E_ROM_dico5_isf_noise, 4,
SIZE_BK_NOISE5, &tmp);
return;
}
/*
* E_DTX_exe
*
* Parameters:
* st I/O: state struct
* exc2 O: CN excitation
* pt_prms O: analysis parameters
*
* Function:
* Confort noise parameters are encoded for the SID frame
*
* Returns:
* void
*/
void E_DTX_exe(E_DTX_State *st, Float32 *exc2, Word16 **pt_prms)
{
Float32 isf[M];
Float32 log_en, level, gain, ener;
Word32 i,j;
Word16 isf_order[3];
Word16 CN_dith;
/* VOX mode computation of SID parameters */
log_en = 0.0F;
memset(isf, 0, M * sizeof(Float32));
/* average energy and isf */
for (i = 0; i < DTX_HIST_SIZE; i++)
{
log_en += st->mem_log_en[i] / (Float32)DTX_HIST_SIZE;
}
E_DTX_frame_indices_find(st, isf_order);
E_DTX_isf_history_aver(st->mem_isf, isf_order, isf);
for (j = 0; j < M; j++)
{
isf[j] = isf[j] / (Float32)DTX_HIST_SIZE; /* divide by 8 */
}
/* quantize logarithmic energy to 6 bits (-6 : 66 dB) */
st->mem_log_en_index = (Word16)((log_en + 2.0F) * 2.625F);
if(st->mem_log_en_index > 63)
{
st->mem_log_en_index = 63;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -