⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kwspot.cpp

📁 语音识别配套的VQ及DHMM模型训练程序(C语言)
💻 CPP
字号:
//	DHMM_MFC.cpp:
//		Defines the entry point for the console application.
//
//	Created 2001/08, By DongMing, MDSR.
//
/////////////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "kwspot.h"
#include "DHMM_VQ_MFC.h"
#include "DHMM_Model_MFC.h"
#include "DHMM_Recog_MFC.h"
#include <direct.h>
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

/////////////////////////////////////////////////////////////////////////////
//	Private functions
int TheMain(void);
void Init(void);
void KittyInit(void);
void Init_Pro_Config(void);
CString FindProp(CString Prop);
void Run(void);
void Done(void);

/////////////////////////////////////////////////////////////////////////////
//	Global variables
PRO_CONFIG u_Pro_Config;

/////////////////////////////////////////////////////////////////////////////
// The one and only application object

CWinApp theApp;

using namespace std;

int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
{
	int nRetCode = 0;

	// initialize MFC and print and error on failure
	if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
	{
		// TODO: change error code to suit your needs
		cerr << _T("Fatal Error: MFC initialization failed") << endl;
		nRetCode = 1;
	}
	else
	{
		// TODO: code your application's behavior here.
		nRetCode = TheMain();
	}

	return nRetCode;
}

//	因为MFC程序框架的原因,此处是程序的main()
static int TheMain(void)
{
	char sz_Date_Time[64];
	int n_Loop_Index, n_Rotate_Test_Index;
//	n_Loop_Index = 0x6789;
	Init_Pro_Config();

	//	写Result.txt,程序结果纪录
	_strdate(sz_Date_Time);
	RESULT_LOG("Current Date: %s, ", sz_Date_Time);
	_strtime(sz_Date_Time);
	RESULT_LOG("Time: %s.\n", sz_Date_Time);
	RESULT_LOG("Total persons Num = %3d, Training set persons Num = %3d.\n", u_Pro_Config.n_Feature_Person_Num, u_Pro_Config.n_DHMM_Model_Train_Set_Person_Num);
	RESULT_LOG("\t\tCoefficient\t\tStart_Person\t\tAccurate_Rate\n", u_Pro_Config.d_Feature_DT_MFCC_Coefficient / 3, u_Pro_Config.n_DHMM_Model_Person_Start_Index, u_Pro_Config.d_Recognition_Accurate_Rate);
	RESULT_LOG("================================================================================\n");

	//	第一层循环,循环DT MFCC的倍乘系数
	for (n_Loop_Index = 0; n_Loop_Index < u_Pro_Config.n_Loop_Num; n_Loop_Index++)
	{
		u_Pro_Config.d_Feature_DT_MFCC_Coefficient = 3 * (u_Pro_Config.d_Feature_DT_MFCC_Coefficient_Start + 0.03F * n_Loop_Index);

		//	写DHMM.log,程序运行信息纪录
		TRACE("\nProgram Started ...\n");
		PRO_LOG("\nProgram Started ...\n");
		_strdate(sz_Date_Time);
		PRO_LOG("\tCur Date: %s, ", sz_Date_Time);
		_strtime(sz_Date_Time);
		PRO_LOG("Time: %s.\n", sz_Date_Time);
		PRO_LOG("\tDT_MFCC_Coefficient = %10.4f.\n\n", u_Pro_Config.d_Feature_DT_MFCC_Coefficient / 3.0F);

		double * pd = new double[u_Pro_Config.n_Rotate_Test_Num];
		ASSERT(pd != NULL);

		//	第二层循环,循环滚动测试的训练集与测试集
		for (n_Rotate_Test_Index = 0; n_Rotate_Test_Index < u_Pro_Config.n_Rotate_Test_Num; n_Rotate_Test_Index++)
		{
			PRO_LOG("Leave-one-out Loop: %4d/%4d.\n", n_Rotate_Test_Index, u_Pro_Config.n_Rotate_Test_Num);

			u_Pro_Config.n_DHMM_Model_Person_Start_Index = (u_Pro_Config.n_DHMM_Model_Person_Start_Index + (u_Pro_Config.n_Feature_Person_Num / u_Pro_Config.n_Rotate_Test_Num) * n_Rotate_Test_Index) % u_Pro_Config.n_Feature_Person_Num;
			u_Pro_Config.n_DHMM_Model_Person_End_Index = (u_Pro_Config.n_DHMM_Model_Train_Set_Person_Num + u_Pro_Config.n_DHMM_Model_Person_Start_Index - 1) % u_Pro_Config.n_Feature_Person_Num;
			u_Pro_Config.n_Recog_Person_Start_Index = (u_Pro_Config.n_DHMM_Model_Person_End_Index + 1) % u_Pro_Config.n_Feature_Person_Num;
			u_Pro_Config.n_Recog_Person_End_Index = (u_Pro_Config.n_Recog_Person_Start_Index + u_Pro_Config.n_Recog_Set_Person_Num - 1) % u_Pro_Config.n_Feature_Person_Num;

			sprintf(u_Pro_Config.sz_Toload_Code_Book_File_Name, u_Pro_Config.sz_Toload_Code_Book_File_fmt, n_Rotate_Test_Index);
			sprintf(u_Pro_Config.sz_Toload_DHMM_Model_File_Name, u_Pro_Config.sz_Toload_DHMM_Model_File_fmt, n_Rotate_Test_Index);

			Run();

			pd[n_Rotate_Test_Index] = u_Pro_Config.d_Recognition_Accurate_Rate;
		}

		//	统计识别率。
		double d = 0.0F;
		for (n_Rotate_Test_Index = 0; n_Rotate_Test_Index < u_Pro_Config.n_Rotate_Test_Num; n_Rotate_Test_Index++)
			d += pd[n_Rotate_Test_Index];
		RESULT_LOG("\nAvg. Accuracy Rate = %8.4f.\n\n", d / u_Pro_Config.n_Rotate_Test_Num);
		RESULT_LOG("Code_Book_Size = %4d.\t", u_Pro_Config.n_VQ_Code_Book_Size);
		RESULT_LOG("\tDHMM_Model_Num = %4d.\t", u_Pro_Config.n_DHMM_Model_Num);
		RESULT_LOG("\tDHMM_Model_State_Num = %4d.\t\n", u_Pro_Config.n_DHMM_Model_State_Num);


		delete [] pd;
	}

	RESULT_LOG("\n");
	TRACE("Program Ended ...\n\n");
	PRO_LOG("Program Ended ...\n");

	Done();

	return 0;
}


static void Init_Pro_Config(void)
{
	memset(&u_Pro_Config, 0, sizeof(u_Pro_Config));

	u_Pro_Config.l_Prgram_Start_Time = time(NULL);
	u_Pro_Config.n_Loop_Num = 1;
	u_Pro_Config.n_Rotate_Test_Num = 1;

	strcpy(u_Pro_Config.sz_Program_Log_File_Name, "..\\..\\DATA\\DHMM.LOG");
	strcpy(u_Pro_Config.sz_Result_File_Name, "..\\..\\DATA\\Result.TXT");

   char buffer[_MAX_PATH];

	/* Get the current working directory: */
	if( _getcwd( buffer, _MAX_PATH ) == NULL )
		perror( "_getcwd error" );
	else
		printf( "%s\n", buffer );

	u_Pro_Config.n_Feature_Person_Num = 100;
	CString pv = FindProp("Feature_Person_Num"); 
	if (pv != "\0")
		sscanf(pv, "%d", &u_Pro_Config.n_Feature_Person_Num);
	u_Pro_Config.n_DHMM_Model_Train_Set_Person_Num = (u_Pro_Config.n_Feature_Person_Num * 4 / 5);
	u_Pro_Config.n_DHMM_Model_Person_Start_Index = 25;
	u_Pro_Config.n_DHMM_Model_Person_End_Index = (u_Pro_Config.n_DHMM_Model_Train_Set_Person_Num + u_Pro_Config.n_DHMM_Model_Person_Start_Index - 1) % u_Pro_Config.n_Feature_Person_Num;
	u_Pro_Config.n_Recog_Person_Start_Index = 0;
	u_Pro_Config.n_Recog_Person_End_Index = 0;
	u_Pro_Config.n_Recog_Set_Person_Num = u_Pro_Config.n_Feature_Person_Num - u_Pro_Config.n_DHMM_Model_Train_Set_Person_Num;

	strcpy(u_Pro_Config.sz_Feature_Origin_File_Name_Format, "..\\..\\kitty_8k_0db\\%03d_LPCCFIX.DAT");
	pv = FindProp("Feature_Origin_File_Name_Format"); 
	if (pv != "\0")
		sscanf(pv, "%s", &u_Pro_Config.sz_Feature_Origin_File_Name_Format);
	strcpy(u_Pro_Config.sz_Recog_Origin_File_Name_Format, u_Pro_Config.sz_Feature_Origin_File_Name_Format);
	pv = FindProp("Recog_Origin_File_Name_Format"); 
	if (pv != "\0")
		sscanf(pv, "%s", &u_Pro_Config.sz_Recog_Origin_File_Name_Format);

	u_Pro_Config.n_Feature_Dim = 16;
	u_Pro_Config.n_Feature_DT_MFCC_Start_Index = u_Pro_Config.n_Feature_Dim / 2;
	u_Pro_Config.n_Feature_DT_MFCC_End_Index = u_Pro_Config.n_Feature_Dim - 1;

	u_Pro_Config.n_Feature_Sentence_Num = 13;
	pv = FindProp("Feature_Sentence_Num"); 
	if (pv != "\0")
		sscanf(pv, "%d", &u_Pro_Config.n_Feature_Sentence_Num);

	u_Pro_Config.n_DHMM_Model_Num = u_Pro_Config.n_Feature_Sentence_Num;
	pv = FindProp("DHMM_Model_Num"); 
	if (pv != "\0")
		sscanf(pv, "%d", &u_Pro_Config.n_DHMM_Model_Num);

	u_Pro_Config.n_Sentence_Start_Index = 0;
	pv = FindProp("Sentence_Start_Index"); 
	if (pv != "\0")
		sscanf(pv, "%d", &u_Pro_Config.n_Sentence_Start_Index);

	u_Pro_Config.n_VQ_Code_Book_Size = 256;
	pv = FindProp("VQ_Code_Book_Size"); 
	if (pv != "\0")
		sscanf(pv, "%d", &u_Pro_Config.n_VQ_Code_Book_Size);
		
	u_Pro_Config.n_DHMM_Model_State_Num = 6;
	pv = FindProp("DHMM_Model_State_Num"); 
	if (pv != "\0")
		sscanf(pv, "%d", &u_Pro_Config.n_DHMM_Model_State_Num);

	u_Pro_Config.d_Feature_DT_MFCC_Coefficient_Start = 0.333;

	u_Pro_Config.USE_SILENCE_FEA = 1;

	u_Pro_Config.WATCH_RECOG_RESULT = 0;
	pv = FindProp("WATCH_RECOG_RESULT"); 
	if (pv != "\0")
		sscanf(pv, "%d", &u_Pro_Config.WATCH_RECOG_RESULT);

	CString pp = "VQ_Config";
	pv = FindProp(pp); 
	if (pv.Find("VQ_CONFIG_LOAD_ONLY_CODE_BOOK",0) != -1)
		u_Pro_Config.l_VQ_Config = VQ_CONFIG_LOAD_ONLY_CODE_BOOK;
	else if (pv.Find("VQ_CONFIG_TRAIN_ONLY_CODE_BOOK",0) != -1)
		u_Pro_Config.l_VQ_Config = VQ_CONFIG_TRAIN_ONLY_CODE_BOOK | VQ_CONFIG_LOAD_TRAIN_SET_ONLY | VQ_CONFIG_TRAIN_PROCEDURE_GL;
	else
    	u_Pro_Config.l_VQ_Config = VQ_CONFIG_LOAD_ONLY_CODE_BOOK;
	
	pp = "DHMM_Model_Config";
	pv = FindProp(pp); 
	if (pv.Find("MODEL_CONFIG_TRAIN_WITH_SILENCE_MODEL",0) != -1)
		u_Pro_Config.l_DHMM_Model_Config = MODEL_CONFIG_TRAIN_WITH_SILENCE_MODEL | MODEL_CONFIG_TRAIN_PROCEDURE_GL;
	else
		u_Pro_Config.l_DHMM_Model_Config = MODEL_CONFIG_LOAD_WITH_SILENCE_MODEL ;

	u_Pro_Config.l_DHMM_Recog_Config = RECOG_CONFIG_METHOD_VITERBI_ONLY | RECOG_CONFIG_RECOG_PROCEDURE_HQ;
	u_Pro_Config.n_Recog_Sentence_Num = u_Pro_Config.n_Feature_Sentence_Num;
	u_Pro_Config.n_Recog_Person_Num = u_Pro_Config.n_Feature_Person_Num;
	
	strcpy(u_Pro_Config.sz_Toload_Code_Book_File_fmt, "..\\..\\DATA\\DWDN_CB_%02d.DAT");
	strcpy(u_Pro_Config.sz_Toload_DHMM_Model_File_fmt, "..\\..\\DATA\\DWDN_Model_%02d.DAT");

	strcpy(u_Pro_Config.sz_Recog_LLS_File, "..\\..\\data\\Recog_Ret\\Kitty_Recog_LLS_Ret.txt");
	pv = FindProp("Recog_LLS_File"); 
	if (pv != "\0")
		sscanf(pv, "%s", &u_Pro_Config.sz_Recog_LLS_File);
	if(u_Pro_Config.WATCH_RECOG_RESULT == 1)
	{
		char RetFile[100];

		strcpy(RetFile,u_Pro_Config.sz_Recog_LLS_File);
		FILE* fp = fopen(RetFile, "wt");
		ASSERT(fp!=NULL);
		fprintf(fp, "\t");
		fclose(fp);
	}
	
	srand(time(NULL));

}

//	一次Run(),可以进行一次码本训练,模型训练及识别的全过程,
//		但是根据程序配置不同,可以略去某些环节
static void Run(void)
{
	int nRetCode;
	char sz_Date_Time[64];

	u_Pro_Config.l_Prgram_Start_Time = time(NULL);

	//	写DHMM.log,程序运行信息纪录
	_strdate(sz_Date_Time);
	PRO_LOG("\tCur Date: %s, ", sz_Date_Time);
	_strtime(sz_Date_Time);
	PRO_LOG("Time: %s.\n", sz_Date_Time);
	PRO_LOG("\tDT_MFCC_Coefficient = %10.4f.\n", u_Pro_Config.d_Feature_DT_MFCC_Coefficient / 3.0F);
	PRO_LOG("\tTraining set person num = %3d.\n", u_Pro_Config.n_DHMM_Model_Train_Set_Person_Num);
	PRO_LOG("\tTraining set start person No. = %3d.\n", u_Pro_Config.n_DHMM_Model_Person_Start_Index);

	//	进行码本训练,使用文件作为输入输出
	//  这里是实际运行的部分。王栋
	PRO_LOG("DHMM_VQ...\n");
	PRO_LOG("\tVQ Method Config = 0x%X.\n", u_Pro_Config.l_VQ_Config);
	nRetCode = DHMM_VQ();
	ASSERT(nRetCode == 0);
	DHMM_VQ_To_519_CodeBook();
	ELAPSED_TIME_OUTPUT();
	PRO_LOG("\n");

	//	进行模型训练,使用文件作为输入输出
	PRO_LOG("DHMM_Model...\n");
	PRO_LOG("\tModel Method Config = 0x%X.\n", u_Pro_Config.l_DHMM_Model_Config);
	nRetCode = DHMM_Model();
	ASSERT(nRetCode == 0);
	DHMM_Model_To_519_Model();
	ELAPSED_TIME_OUTPUT();
	PRO_LOG("\n");

	//	进行识别测试,使用文件作为输入,结果回存向程序配置u_Pro_Config中
	PRO_LOG("DHMM_Recog...\n");
	PRO_LOG("\tRecognition Method Config = 0x%X.\n", u_Pro_Config.l_DHMM_Recog_Config);
	PRO_LOG("\tRecognition set person num = %3d.\n", u_Pro_Config.n_Recog_Set_Person_Num);
	PRO_LOG("\tRecognition set start person No. = %3d.\n", u_Pro_Config.n_Recog_Person_Start_Index);
	nRetCode = DHMM_Recog_Kitty();
	PRO_LOG("Origin File Format = %s\n", u_Pro_Config.sz_Feature_Origin_File_Name_Format);
	PRO_LOG("Recog File Format = %s\n", u_Pro_Config.sz_Recog_Origin_File_Name_Format);
	ASSERT(nRetCode == 0);
	ELAPSED_TIME_OUTPUT();
	PRO_LOG("\n");
}

static void Done(void)
{
}

CString FindProp(CString Prop)
{
	CStdioFile fin;
	CFileException e;
	CString Pro_Config_File_Name =  "..\\..\\DATA\\config_kitty.txt";
	if( !fin.Open( Pro_Config_File_Name, CFile::modeRead, &e ) )
	{
	#ifdef _DEBUG
	afxDump << "File could not be opened " << e.m_cause << "\n";
	#endif
	}

	CString line;
	char t = 't';
	CString pv = "\0";

	while(!feof(fin.m_pStream))
	{
		while(true)
		{
			fin.Read(&t,1); 
			if((t != '\n') & !feof(fin.m_pStream))
			{
				line += t;
			}
			else
				break;
		}
		int i = line.Find(Prop,0); 
		if(i != -1)
		{
			line.Delete(0, Prop.GetAllocLength());
			fin.Close();
			line.TrimLeft();
			line.TrimRight();
			return line;
		}
		line = "\0";
	}
	fin.Close();
	return line;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -