📄 wave.cpp
字号:
#include "stdafx.h"
#include <mmsystem.h>
#include <math.h>
#include "ActVoiceCtl.h"
#include "wave.h"
typedef __int16 int16;
#define FRAME_LEN 240
#define FRAME_SHIFT 80
#define FRAMES 300
#define BUFFER_LEN 2400
#define MAX_SAMPLES (FRAMES*FRAME_LEN)
CActVoiceCtrl *control;
HWAVEIN hwi;
WAVEHDR wh1,wh2;
int16 frame_buff_a[BUFFER_LEN]; //录音缓冲区A
int16 frame_buff_b[BUFFER_LEN]; //录音缓冲区B
int16 voice_buff [MAX_SAMPLES]; //录音数据区
int buff_begin, buff_end, buff_ptr; //缓冲区指针
BOOL flag_word_start;
BOOL flag_word_end;
BOOL flag_poss_start;
BOOL flag_user_stop;
BOOL flag_vad_over;
int max_silence_time;
int min_word_length;
int silence_time;
int word_length;
double amp, amp_high, amp_low, amp_buf[FRAMES];
double zcr, zcr_high, zcr_low, zcr_buf[FRAMES];
double zcr_threshold;
void wave_reset(CActVoiceCtrl *pointer)
{
control = pointer;
flag_word_start = FALSE;
flag_poss_start = FALSE;
flag_word_end = FALSE;
flag_user_stop = FALSE;
flag_vad_over = FALSE;
amp_high = 10.0; //语音确信开始能量门限
amp_low = 2.0; //语音可能开始能量门限
zcr_high = 10.0; //语音确信开始过零率门限
zcr_low = 5.0; //语音可能开始过零率门限
zcr_threshold = 0.02;
min_word_length = 20; //最短帧数
max_silence_time = 10; //最长静音时间
silence_time = 0; //静音时间
word_length = 0; //语音帧数
buff_begin = 0;
buff_end = 0;
buff_ptr = 0;
}
void wave_filter(double *buff) //预加重滤波器
{
static double tmp1=0, tmp2=0;
for (int i=0; i<FRAME_LEN; i++)
{
tmp1 = *buff - tmp2*0.9375;
tmp2 = *buff;
*(buff++) = tmp1;
}
}
void wave_energy(double *buff) //计算一帧短时能量
{
amp = 0.0;
for (int i=0; i<FRAME_LEN; i++)
amp += fabs(*buff++);
if (word_length>0)
amp_buf[word_length-1] = amp;
}
void wave_zcr(double *buff) //计算一帧过零率
{
zcr = 0;
for (int i=0; i<FRAME_LEN-1; i++)
{
double tmp1 = *buff++;
double tmp2 = *buff;
if (tmp1*tmp2<0 && fabs(tmp1-tmp2)>zcr_threshold)
zcr++;
}
if (word_length>0)
zcr_buf[word_length-1] = zcr;
}
int wave_vad() //端点检测
{
if (flag_word_start)
{ //检查语音结束
if (amp>=amp_low || zcr>=zcr_low)
goto set_word_start; //还没有结束
silence_time++;
if (silence_time<max_silence_time)
goto inc_sp_count; //语音中间的暂停
if (word_length<min_word_length)
goto reset_vars; //语音过短
flag_word_start = FALSE;
flag_word_end = TRUE;
word_length -= (silence_time/2);
return 1; //有效语音
}
else
{ //检查语音开始
if (amp>=amp_high || zcr>=zcr_high)
goto set_word_start;
if (amp>=amp_low || zcr>=zcr_low)
{
flag_poss_start = TRUE;
goto inc_sp_count;
}
else goto reset_vars;
}
set_word_start:
flag_word_start = TRUE;
flag_poss_start = FALSE;
silence_time = 0;
inc_sp_count:
word_length++;
if (word_length >= FRAMES-3)
{
word_length = FRAMES-3;
flag_word_start = FALSE;
flag_poss_start = FALSE;
flag_word_end = TRUE; //缓冲区满了
return 1;
}
else
return 0;
reset_vars:
flag_poss_start = FALSE;
flag_word_start = FALSE;
flag_word_end = FALSE;
silence_time = 0;
word_length = 0;
return 0;
}
void wave_append(int16 *sample, int len)
{
int i,j;
double buff[FRAME_LEN];
if (flag_word_end) return;
//添加语音数据到循环缓冲区
for (i=0; i<len; i++)
{
if ((buff_end+1)%MAX_SAMPLES == buff_ptr)
break; //缓冲区满了, 语音太长
voice_buff[buff_end++] = *sample++;
buff_end %= MAX_SAMPLES;
}
for(;;)
{ //取一帧数据
for (i=0,j=buff_ptr; i<FRAME_LEN; i++)
{
buff[i] = voice_buff[j++] / 32768.0;
j %= MAX_SAMPLES;
if (j==buff_end)
return; //缓冲区空了, 或者不够一帧
}
//端点检测, 计算参数
wave_zcr (buff);
wave_filter(buff);
wave_energy(buff);
if (word_length==0)
buff_begin = buff_ptr; //设置语音起始点
if (wave_vad())
return;
buff_ptr += FRAME_SHIFT;
buff_ptr %= MAX_SAMPLES;
}
return;
}
void CALLBACK waveInProc( //回调函数
HWAVEIN hwi,
UINT uMsg,
DWORD lpsample,
DWORD lpwvhdr,
DWORD reserved )
{
WAVEHDR *pWhdr;
int16 *pBuff;
switch (uMsg)
{
case WIM_OPEN : break;
case WIM_CLOSE: break;
case WIM_DATA :
pWhdr = (WAVEHDR*) lpwvhdr;
pBuff = (int16 *) (pWhdr->lpData);
wave_append(pBuff, BUFFER_LEN);
if (flag_word_end | flag_user_stop)
{
if (flag_vad_over == FALSE)
{
flag_vad_over = TRUE;
waveInStop(hwi);
waveInUnprepareHeader(hwi, &wh1, sizeof(WAVEHDR));
waveInUnprepareHeader(hwi, &wh2, sizeof(WAVEHDR));
waveInClose(hwi);
control->onWaveDone();
}
}
else
waveInAddBuffer(hwi, pWhdr, sizeof(WAVEHDR));
break;
}
}
void wave_stop() //停止录音
{
flag_user_stop = TRUE;
}
void wave_start() //开始录音
{
MMRESULT tmp;
WAVEFORMATEX wfx;
if (0 == waveInGetNumDevs())
{
AfxMessageBox("没有检测到输入设备");
return;
}
wfx.wFormatTag = WAVE_FORMAT_PCM;
wfx.nChannels = 1;
wfx.nSamplesPerSec = 8000;
wfx.nAvgBytesPerSec = 16000;
wfx.nBlockAlign = 2;
wfx.wBitsPerSample = 16;
wfx.cbSize = 0;
tmp = waveInOpen(
NULL, // ptr can be NULL for query
0, // the device identifier
&wfx, // defines requested format
NULL, // no callback
NULL, // no instance data
WAVE_FORMAT_QUERY); // query only, do not open device
if (tmp!=MMSYSERR_NOERROR)
{
AfxMessageBox("输入设备不支持格式PCM 8KHz 16位采样");
return;
}
tmp = waveInOpen(&hwi, 0, &wfx, (DWORD)&waveInProc, NULL, CALLBACK_FUNCTION);
if (tmp!=MMSYSERR_NOERROR)
{
AfxMessageBox("输入设备不能打开");
return;
}
wh1.lpData = (char *) &frame_buff_a;
wh1.dwBufferLength = BUFFER_LEN*2; //length in bytes
wh1.dwBytesRecorded = NULL;
wh1.dwUser = NULL;
wh1.dwFlags = NULL;
wh1.dwLoops = NULL;
wh1.lpNext = NULL;
wh1.reserved = NULL;
wh2.lpData = (char *) &frame_buff_b;
wh2.dwBufferLength = BUFFER_LEN*2; //length in bytes
wh2.dwBytesRecorded = NULL;
wh2.dwUser = NULL;
wh2.dwFlags = NULL;
wh2.dwLoops = NULL;
wh2.lpNext = NULL;
wh2.reserved = NULL;
waveInPrepareHeader(hwi, &wh1, sizeof(WAVEHDR));
waveInPrepareHeader(hwi, &wh2, sizeof(WAVEHDR));
waveInAddBuffer(hwi, &wh1, sizeof(WAVEHDR));
waveInAddBuffer(hwi, &wh2, sizeof(WAVEHDR));
waveInStart(hwi);
}
int wave_get_len()
{
if (flag_word_end)
return word_length * FRAME_SHIFT;
else
return 0;
}
int wave_get_frames()
{
if (flag_word_end)
return word_length;
else
return 0;
}
void wave_get_data(double *buff)
{
for (int i=0; i<word_length*FRAME_SHIFT; i++)
{
int ptr = (i + buff_begin) % MAX_SAMPLES;
*buff++ = voice_buff[ptr];
}
}
void wave_get_zcr(double *buff)
{
for (int i=0; i<word_length; i++)
*buff++ = zcr_buf[i];
}
void wave_get_energy(double *buff)
{
for (int i=0; i<word_length; i++)
*buff++ = amp_buf[i];
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -