📄 pesqmod.c
字号:
float maximum_of (float *x, long start, long stop) {
long i;
float result = -1E20f;
for (i = start; i < stop; i++) {
if (result < x [i]) {
result = x [i];
}
}
return result;
}
float integral_of (float *x, long frames_after_start) {
double result = 0;
int band;
for (band = 1; band < Nb; band++) {
result += x [frames_after_start * Nb + band] * width_of_band_bark [band];
}
return (float) result;
return (float) result;
}
#define DEBUG_FR 0
void pesq_psychoacoustic_model(SIGNAL_INFO * ref_info,
SIGNAL_INFO * deg_info,
ERROR_INFO * err_info,
float * ftmp)
{
long maxNsamples = max (ref_info-> Nsamples, deg_info-> Nsamples);
long Nf = Downsample * 8L;
long start_frame, stop_frame;
long samples_to_skip_at_start, samples_to_skip_at_end;
float sum_of_5_samples;
long n, i;
float power_ref, power_deg;
long frame;
float *fft_tmp;
float *hz_spectrum_ref, *hz_spectrum_deg;
float *pitch_pow_dens_ref, *pitch_pow_dens_deg;
float *loudness_dens_ref, *loudness_dens_deg;
float *avg_pitch_pow_dens_ref, *avg_pitch_pow_dens_deg;
float *deadzone;
float *disturbance_dens, *disturbance_dens_asym_add;
float total_audible_pow_ref, total_audible_pow_deg;
int *silent;
float oldScale, scale;
int *frame_was_skipped;
float *frame_disturbance;
float *frame_disturbance_asym_add;
float *total_power_ref;
int utt;
#ifdef CALIBRATE
int periodInSamples;
int numberOfPeriodsPerFrame;
float omega;
#endif
float peak;
#define MAX_NUMBER_OF_BAD_INTERVALS 1000
int *frame_is_bad;
int *smeared_frame_is_bad;
int start_frame_of_bad_interval [MAX_NUMBER_OF_BAD_INTERVALS];
int stop_frame_of_bad_interval [MAX_NUMBER_OF_BAD_INTERVALS];
int start_sample_of_bad_interval [MAX_NUMBER_OF_BAD_INTERVALS];
int stop_sample_of_bad_interval [MAX_NUMBER_OF_BAD_INTERVALS];
int number_of_samples_in_bad_interval [MAX_NUMBER_OF_BAD_INTERVALS];
int delay_in_samples_in_bad_interval [MAX_NUMBER_OF_BAD_INTERVALS];
int number_of_bad_intervals= 0;
int search_range_in_samples;
int bad_interval;
float *untweaked_deg = NULL;
float *tweaked_deg = NULL;
float *doubly_tweaked_deg = NULL;
int there_is_a_bad_frame = FALSE;
float *time_weight;
float d_indicator, a_indicator;
int nn;
float Whanning [Nfmax];
for (n = 0L; n < Nf; n++ ) {
Whanning [n] = (float)(0.5 * (1.0 - cos((TWOPI * n) / Nf)));
}
switch (Fs) {
case 8000:
Nb = 42;
Sl = (float) Sl_8k;
Sp = (float) Sp_8k;
nr_of_hz_bands_per_bark_band = nr_of_hz_bands_per_bark_band_8k;
centre_of_band_bark = centre_of_band_bark_8k;
centre_of_band_hz = centre_of_band_hz_8k;
width_of_band_bark = width_of_band_bark_8k;
width_of_band_hz = width_of_band_hz_8k;
pow_dens_correction_factor = pow_dens_correction_factor_8k;
abs_thresh_power = abs_thresh_power_8k;
break;
case 16000:
Nb = 49;
Sl = (float) Sl_16k;
Sp = (float) Sp_16k;
nr_of_hz_bands_per_bark_band = nr_of_hz_bands_per_bark_band_16k;
centre_of_band_bark = centre_of_band_bark_16k;
centre_of_band_hz = centre_of_band_hz_16k;
width_of_band_bark = width_of_band_bark_16k;
width_of_band_hz = width_of_band_hz_16k;
pow_dens_correction_factor = pow_dens_correction_factor_16k;
abs_thresh_power = abs_thresh_power_16k;
break;
default:
printf ("Invalid sample frequency!\n");
exit (1);
}
samples_to_skip_at_start = 0;
do {
sum_of_5_samples= (float) 0;
for (i = 0; i < 5; i++) {
sum_of_5_samples += (float) fabs (ref_info-> data [SEARCHBUFFER * Downsample + samples_to_skip_at_start + i]);
}
if (sum_of_5_samples< CRITERIUM_FOR_SILENCE_OF_5_SAMPLES) {
samples_to_skip_at_start++;
}
} while ((sum_of_5_samples< CRITERIUM_FOR_SILENCE_OF_5_SAMPLES)
&& (samples_to_skip_at_start < maxNsamples / 2));
samples_to_skip_at_end = 0;
do {
sum_of_5_samples= (float) 0;
for (i = 0; i < 5; i++) {
sum_of_5_samples += (float) fabs (ref_info-> data [maxNsamples - SEARCHBUFFER * Downsample + DATAPADDING_MSECS * (Fs / 1000) - 1 - samples_to_skip_at_end - i]);
}
if (sum_of_5_samples< CRITERIUM_FOR_SILENCE_OF_5_SAMPLES) {
samples_to_skip_at_end++;
}
} while ((sum_of_5_samples< CRITERIUM_FOR_SILENCE_OF_5_SAMPLES)
&& (samples_to_skip_at_end < maxNsamples / 2));
start_frame = samples_to_skip_at_start / (Nf /2);
stop_frame = (maxNsamples - 2 * SEARCHBUFFER * Downsample + DATAPADDING_MSECS * (Fs / 1000) - samples_to_skip_at_end) / (Nf /2) - 1;
power_ref = (float) pow_of (ref_info-> data,
SEARCHBUFFER * Downsample,
maxNsamples - SEARCHBUFFER * Downsample + DATAPADDING_MSECS * (Fs / 1000),
maxNsamples - 2 * SEARCHBUFFER * Downsample + DATAPADDING_MSECS * (Fs / 1000));
power_deg = (float) pow_of (deg_info-> data,
SEARCHBUFFER * Downsample,
maxNsamples - SEARCHBUFFER * Downsample + DATAPADDING_MSECS * (Fs / 1000),
maxNsamples - 2 * SEARCHBUFFER * Downsample + DATAPADDING_MSECS * (Fs / 1000));
fft_tmp = (float *) safe_malloc ((Nf + 2) * sizeof (float));
hz_spectrum_ref = (float *) safe_malloc ((Nf / 2) * sizeof (float));
hz_spectrum_deg = (float *) safe_malloc ((Nf / 2) * sizeof (float));
frame_is_bad = (int *) safe_malloc ((stop_frame + 1) * sizeof (int));
smeared_frame_is_bad=(int *) safe_malloc ((stop_frame + 1) * sizeof (int));
silent = (int *) safe_malloc ((stop_frame + 1) * sizeof (int));
pitch_pow_dens_ref = (float *) safe_malloc ((stop_frame + 1) * Nb * sizeof (float));
pitch_pow_dens_deg = (float *) safe_malloc ((stop_frame + 1) * Nb * sizeof (float));
frame_was_skipped = (int *) safe_malloc ((stop_frame + 1) * sizeof (int));
frame_disturbance = (float *) safe_malloc ((stop_frame + 1) * sizeof (float));
frame_disturbance_asym_add = (float *) safe_malloc ((stop_frame + 1) * sizeof (float));
avg_pitch_pow_dens_ref = (float *) safe_malloc (Nb * sizeof (float));
avg_pitch_pow_dens_deg = (float *) safe_malloc (Nb * sizeof (float));
loudness_dens_ref = (float *) safe_malloc (Nb * sizeof (float));
loudness_dens_deg = (float *) safe_malloc (Nb * sizeof (float));;
deadzone = (float *) safe_malloc (Nb * sizeof (float));;
disturbance_dens = (float *) safe_malloc (Nb * sizeof (float));
disturbance_dens_asym_add = (float *) safe_malloc (Nb * sizeof (float));
time_weight = (float *) safe_malloc ((stop_frame + 1) * sizeof (float));
total_power_ref = (float *) safe_malloc ((stop_frame + 1) * sizeof (float));
#ifdef CALIBRATE
periodInSamples = Fs / 1000;
numberOfPeriodsPerFrame = Nf / periodInSamples;
omega = (float) (TWOPI / periodInSamples);
peak;
set_to_sine (ref_info, (float) 29.54, (float) omega);
#endif
for (frame = 0; frame <= stop_frame; frame++) {
int start_sample_ref = SEARCHBUFFER * Downsample + frame * Nf / 2;
int start_sample_deg;
int delay;
short_term_fft (Nf, ref_info, Whanning, start_sample_ref, hz_spectrum_ref, fft_tmp);
if (err_info-> Nutterances < 1) {
printf ("Processing error!\n");
exit (1);
}
utt = err_info-> Nutterances - 1;
while ((utt >= 0) && (err_info-> Utt_Start [utt] * Downsample > start_sample_ref)) {
utt--;
}
if (utt >= 0) {
delay = err_info-> Utt_Delay [utt];
} else {
delay = err_info-> Utt_Delay [0];
}
start_sample_deg = start_sample_ref + delay;
if ((start_sample_deg > 0) && (start_sample_deg + Nf < maxNsamples + DATAPADDING_MSECS * (Fs / 1000))) {
short_term_fft (Nf, deg_info, Whanning, start_sample_deg, hz_spectrum_deg, fft_tmp);
} else {
for (i = 0; i < Nf / 2; i++) {
hz_spectrum_deg [i] = 0;
}
}
freq_warping (Nf / 2, hz_spectrum_ref, Nb, pitch_pow_dens_ref, frame);
peak = maximum_of (pitch_pow_dens_ref, 0, Nb);
freq_warping (Nf / 2, hz_spectrum_deg, Nb, pitch_pow_dens_deg, frame);
total_audible_pow_ref = total_audible (frame, pitch_pow_dens_ref, 1E2);
total_audible_pow_deg = total_audible (frame, pitch_pow_dens_deg, 1E2);
silent [frame] = (total_audible_pow_ref < 1E7);
}
time_avg_audible_of (stop_frame + 1, silent, pitch_pow_dens_ref, avg_pitch_pow_dens_ref, (maxNsamples - 2 * SEARCHBUFFER * Downsample + DATAPADDING_MSECS * (Fs / 1000)) / (Nf / 2) - 1);
time_avg_audible_of (stop_frame + 1, silent, pitch_pow_dens_deg, avg_pitch_pow_dens_deg, (maxNsamples - 2 * SEARCHBUFFER * Downsample + DATAPADDING_MSECS * (Fs / 1000)) / (Nf / 2) - 1);
#ifndef CALIBRATE
freq_resp_compensation (stop_frame + 1, pitch_pow_dens_ref, avg_pitch_pow_dens_ref, avg_pitch_pow_dens_deg, 1000);
#endif
oldScale = 1;
for (frame = 0; frame <= stop_frame; frame++) {
int band;
total_audible_pow_ref = total_audible (frame, pitch_pow_dens_ref, 1);
total_audible_pow_deg = total_audible (frame, pitch_pow_dens_deg, 1);
total_power_ref [frame] = total_audible_pow_ref;
scale = (total_audible_pow_ref + (float) 5E3) / (total_audible_pow_deg + (float) 5E3);
if (frame > 0) {
scale = (float) 0.2 * oldScale + (float) 0.8*scale;
}
oldScale = scale;
#define MAX_SCALE 5.0
if (scale > (float) MAX_SCALE) scale = (float) MAX_SCALE;
#define MIN_SCALE 3E-4
if (scale < (float) MIN_SCALE) {
scale = (float) MIN_SCALE;
}
for (band = 0; band < Nb; band++) {
pitch_pow_dens_deg [frame * Nb + band] *= scale;
}
intensity_warping_of (loudness_dens_ref, frame, pitch_pow_dens_ref);
intensity_warping_of (loudness_dens_deg, frame, pitch_pow_dens_deg);
for (band = 0; band < Nb; band++) {
disturbance_dens [band] = loudness_dens_deg [band] - loudness_dens_ref [band];
}
for (band = 0; band < Nb; band++) {
deadzone [band] = min (loudness_dens_deg [band], loudness_dens_ref [band]);
deadzone [band] *= 0.25;
}
for (band = 0; band < Nb; band++) {
float d = disturbance_dens [band];
float m = deadzone [band];
if (d > m) {
disturbance_dens [band] -= m;
} else {
if (d < -m) {
disturbance_dens [band] += m;
} else {
disturbance_dens [band] = 0;
}
}
}
frame_disturbance [frame] = pseudo_Lp (Nb, disturbance_dens, D_POW_F);
#define THRESHOLD_BAD_FRAMES 30
if (frame_disturbance [frame] > THRESHOLD_BAD_FRAMES)
{
there_is_a_bad_frame = TRUE;
}
multiply_with_asymmetry_factor (disturbance_dens, frame, pitch_pow_dens_ref, pitch_pow_dens_deg);
frame_disturbance_asym_add [frame] = pseudo_Lp (Nb, disturbance_dens, A_POW_F);
}
for (frame = 0; frame <= stop_frame; frame++) {
frame_was_skipped [frame] = FALSE;
}
for (utt = 1; utt < err_info-> Nutterances; utt++) {
int frame1 = (int) floor (((err_info-> Utt_Start [utt] - SEARCHBUFFER ) * Downsample + err_info-> Utt_Delay [utt]) / (Nf / 2));
int j = (int) floor ((err_info-> Utt_End [utt-1] - SEARCHBUFFER) * Downsample + err_info-> Utt_Delay [utt-1]) / (Nf / 2);
int delay_jump = err_info-> Utt_Delay [utt] - err_info-> Utt_Delay [utt-1];
if (frame1 > j) {
frame1 = j;
}
if (frame1 < 0) {
frame1 = 0;
}
if (delay_jump < -(int) (Nf / 2)) {
int frame2 = (int) ((err_info-> Utt_Start [utt] - SEARCHBUFFER) * Downsample + max (0, fabs (delay_jump))) / (Nf / 2) + 1;
for (frame = frame1; frame <= frame2; frame++) {
if (frame < stop_frame) {
frame_was_skipped [frame] = TRUE;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -