⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dhmm_wp.cpp

📁 语音识别配套的VQ及DHMM模型训练程序(C语言)
💻 CPP
字号:
//	DHMM_WP.cpp:
//		Implementation of the DHMM_WP Module.
//		That is the transform of previous DHMM Code by WangPeng.
//
//	Created 2001/08, By WangPeng, MDSR.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "DHMM_WP.h"

#define KMEAN_LOOP_TIME		40

//////////////////////////////////////////////////////////////////////
//	Private functions
void kmean(
	double * v,
	double * CodeBook,
	int nTotalFrameNum,
	int nCodeBookSize,
	int n_feature_order
);

//////////////////////////////////////////////////////////////////////
//	API functions
int DHMM_VQ_Train_Code_Book_WP(
	DYNA_2DIM_DOUBLE_ARRAY d2dda_train_vector,
	int n_train_vector_num,
	int n_code_word_dim,
	DYNA_2DIM_DOUBLE_ARRAY d2dda_initial_codebook,	// 如果该指针为NULL,则随机
													// 初始化码本,否则利用该参
													// 作为初始码本
	DYNA_2DIM_DOUBLE_ARRAY d2dda_codebook,
	int n_codebook_size
)
{
	double* pd_v;			// 一维动态数组形式的训练向量集
	double* pd_codebook;	// 一维动态数组形式的码本
	int i, j, n_temp;
	int* pn_codeword_frame_index;	// 随机初始化码本时,每个码本在训练向量集中的标号
	int n_codeword_equal_flag;		// 随机初始化码本时,两个码本相同的标志

	PRO_LOG("\tVQ = WP, K-mean loop time = %4d.\n", KMEAN_LOOP_TIME);

	if((pd_v = (double*)new double[n_train_vector_num * n_code_word_dim]) == NULL)
	{
		DEBUG_PRINTF("Allocation of memory for train vectors failed!\n");
		ASSERT(0);
	}

	if((pd_codebook = (double*)new double[n_codebook_size * n_code_word_dim]) == NULL)
	{
		DEBUG_PRINTF("Allocation of memory for codebook failed!\n");
		ASSERT(0);
	}

	// 因为kmean的训练向量参数集v是一维动态数组,因此要把二维数组拷贝到一维数组中
	for(i=0;i<n_train_vector_num;i++)
		memcpy(pd_v + i * n_code_word_dim, d2dda_train_vector[i], n_code_word_dim * sizeof(double));

	// 若码本d2dda_codebook不为空,则把二维数组表示的码本拷贝到一维数组中
	if(d2dda_initial_codebook != NULL)
	{
		for(i=0;i<n_codebook_size;i++)
			memcpy(pd_codebook + i * n_code_word_dim, d2dda_initial_codebook[i], n_code_word_dim * sizeof(double));
	}
	// 若码本d2dda_codebook为空,则随机初始化码本
	else
	{
		pn_codeword_frame_index=(int *)new int[n_codebook_size];
//		srand( (unsigned)time( NULL ) );
		for(i=0;i<n_codebook_size;i++){
			if(i==0){
				n_temp=(int)((double)n_train_vector_num * ((double)rand() / (double)RAND_MAX));
				pn_codeword_frame_index[i]=n_temp;
			}
			else{
				n_codeword_equal_flag = 1;
				while(n_codeword_equal_flag == 1){
					pn_codeword_frame_index[i] = n_temp = (int)((double)n_train_vector_num * ((double)rand() / (double)RAND_MAX));
					n_codeword_equal_flag = 0;
					for(j=i-1;j>=0;j--){
						if(n_temp == pn_codeword_frame_index[j])
							n_codeword_equal_flag = 1;
					}
				}
			}
			memcpy(pd_codebook + i * n_code_word_dim, pd_v + n_temp * n_code_word_dim, n_code_word_dim * sizeof(double));
		}

	}

	kmean(
		pd_v,
		pd_codebook,
		n_train_vector_num,
		n_codebook_size,
		n_code_word_dim
	);

	for(i=0;i<n_codebook_size;i++)
	{
		memcpy(d2dda_codebook[i], pd_codebook + i * n_code_word_dim, n_code_word_dim * sizeof(double));
	}

	delete pd_v;
	delete pd_codebook;
	delete pn_codeword_frame_index;

	return 0;
}

static void kmean(
	double * v,					// 用于训练码本的特征向量集
	double * CodeBook,			// 码本
	int nTotalFrameNum,			// 训练向量总数
	int nCodeBookSize,			// 码本大小(码字个数)
	int n_feature_order			// 特征(训练向量)维数
)
{

	int i,j,k,nTrainCount;
	int nTrainMode;				// nTrainMode=0:	initialize code book randomly
								// nTrainMode=1:	initialize code book from old codebook
	double fTemp;
	double fMinDistance;		// minimum of the distances between an training vector frame and all the code words
	double * fSumDistance;		// sum of the distances between an training vector frame and all the code words
	double * fMeanFrame;		// mean frame feature of all the frames whose minimum reach at the same code word
	int nWordClassFrameNum;		// number of frames whose minimum distances reach at the same code word
	double fDistance;			// distance between every frame of the training vectors and the current code words
	double * OldCodeBook;		// codebook in last iteration
	double fCodeBookDistance;	// code book distance between two consecutive iteration
	double fTotalDistortion;	// sum of each training vector's quantization distortion
	double fAvgDistortion;		// average quantization distortion
	int * nNearestWordIndex;	// each training vector frame's nearest nearest code word's index
	int nFlagFeatureEqual;		// 0: two feature vectors don't equal, 1: two feature vectors equal
	int nRandFrameIndex;		// randomly chosen index of frame feature vector to update code word

	nNearestWordIndex=(int *)new int[nTotalFrameNum];
	OldCodeBook=(double *)new double[nCodeBookSize*n_feature_order];
	fSumDistance=(double *)new double[nCodeBookSize];
	fMeanFrame=(double *)new double[n_feature_order];

	memset(fSumDistance, 0, nCodeBookSize * sizeof(double));

	if(CodeBook != NULL)
		nTrainMode = 1;
	else
		nTrainMode = 0;

	ASSERT(nTrainMode == 1);
	
	// train code book
	if(nTrainMode==1){				// initialize code book from old codebook
	for(nTrainCount = 0; nTrainCount < KMEAN_LOOP_TIME; nTrainCount++){
	
		// save code book in last iteration
		memcpy(OldCodeBook, CodeBook, nCodeBookSize*n_feature_order*sizeof(double));

		printf("Training round: %d\n", nTrainCount);

		// calculate distances between training vectors and code words and the min-distance code word index
		fAvgDistortion=fTotalDistortion=0.0F;
		for(i=0;i<nTotalFrameNum;i++){
			for(j=0;j<nCodeBookSize;j++){
				fDistance = 0.0F;
				for(k=0;k<n_feature_order;k++){
					fTemp = (*(v+i*n_feature_order+k)) - *(CodeBook+j*n_feature_order+k);
					fDistance += fTemp * fTemp;
				}
				if(j==0){
					fMinDistance = fDistance;
					*(nNearestWordIndex+i) = 0;
				}
				else{
					if( fDistance < fMinDistance ){
						fMinDistance = fDistance;
						*(nNearestWordIndex+i) = j;
					}
				}
			}
			fTotalDistortion += fMinDistance;
			fSumDistance[*(nNearestWordIndex+i)] += fMinDistance;
		}
		fAvgDistortion = fTotalDistortion/(double)nTotalFrameNum;
		DEBUG_PRINTF("Last round's average distortion: %-10f\n", fAvgDistortion);

		// update code words
//		srand( (unsigned)time( NULL ) );
		for(i=0;i<nCodeBookSize;i++){
			nWordClassFrameNum=1;

			//memcpy(fMeanFrame, CodeBook+i*FEATURE_ORDER, FEATURE_ORDER*sizeof(double));
			memset(fMeanFrame, 0, n_feature_order*sizeof(double));
			for(j=0;j<nTotalFrameNum;j++){
				if( *(nNearestWordIndex+j) == i ){
					for(k=0;k<n_feature_order;k++)
						fMeanFrame[k] += (*(v+j*n_feature_order+k));
					nWordClassFrameNum++;
				}
			}
		
			// use mean vector to update code word
			if( nWordClassFrameNum>1 )
				for(k=0;k<n_feature_order;k++){
					fMeanFrame[k] /= (nWordClassFrameNum-1);
					*(CodeBook+i*n_feature_order+k) = fMeanFrame[k];
				}

			// use random vector outside the code book to replace the code word
			if( (nWordClassFrameNum==2 && fSumDistance[i] == 0) || nWordClassFrameNum==1 ){
				nFlagFeatureEqual=1;
				while( nFlagFeatureEqual==1 ){
					nRandFrameIndex = (int)((double)nTotalFrameNum * ((double)rand() / (double)RAND_MAX));
					for(j=0;j<nCodeBookSize;j++){
						for(k=0;k<n_feature_order;k++){
							if( *(CodeBook+j*n_feature_order+k) - (*(v+nRandFrameIndex*n_feature_order+k)) !=0 ){
								nFlagFeatureEqual=0;
								break;
							}
						}
						if( nFlagFeatureEqual==0 )
							break;
					}
				}
				memcpy(CodeBook+i*n_feature_order, v+nRandFrameIndex*n_feature_order, n_feature_order*sizeof(double));
			}
		}

		// calculate code book distance between this and last iteration
		fCodeBookDistance=0.0;
		for(i=0;i<nCodeBookSize;i++){
			for(j=0;j<n_feature_order;j++){
				fTemp = *(CodeBook+i*n_feature_order+j) - (*(OldCodeBook+i*n_feature_order+j));
				fCodeBookDistance += fTemp * fTemp;
			}
		}
		DEBUG_PRINTF("change of code book: %-10f\n", fCodeBookDistance);

	}	// end of: 	for(nTrainCount=0;nTrainCount<40;nTrainCount++)
	}	// end of:	if(nTrainMode=1)
	PRO_LOG("\tLast round's average distortion: %-10f\n", fAvgDistortion);
	PRO_LOG("\tchange of code book: %-10f\n", fCodeBookDistance);

/*
	else if(nTrainMode==0){				// initialize code book randomly
		nTrainCount=0;
		fCodeBookDistance=1.0;
	while(fCodeBookDistance>1.0e-3){
			
		// save code book in last iteration
		memcpy(OldCodeBook, CodeBook, nCodeBookSize*n_feature_order*sizeof(double));

		printf("Training round: %d\n", nTrainCount);

		// calculate distances between training vectors and code words and the min-distance code word index
		fAvgDistortion=fTotalDistortion=0.0;
		for(i=0;i<nTotalFrameNum;i++){
			for(j=0;j<nCodeBookSize;j++){
				fDistance = 0.0;
				for(k=0;k<n_feature_order;k++){
					fTemp = (*(v+i*n_feature_order+k)) - *(CodeBook+j*n_feature_order+k);
					fDistance += fTemp * fTemp;
				}
				if(j==0){
					fMinDistance = fDistance;
					*(nNearestWordIndex+i) = 0;
				}
				else{
					if( fDistance < fMinDistance ){
						fMinDistance = fDistance;
						*(nNearestWordIndex+i) = j;
					}
				}
			}
			fTotalDistortion += fMinDistance;
			fSumDistance[*(nNearestWordIndex)] += fMinDistance;
		}
		fAvgDistortion = fTotalDistortion/(double)nTotalFrameNum;
		printf("Last round's average distortion: %-10f\n", fAvgDistortion);

		// update code words
		srand( (unsigned)time( NULL ) );
		for(i=0;i<nCodeBookSize;i++){
			nWordClassFrameNum=1;

			memset(fMeanFrame, 0, n_feature_order*sizeof(double));
			for(j=0;j<nTotalFrameNum;j++){
				if( *(nNearestWordIndex+j) == i ){
					for(k=0;k<n_feature_order;k++)
						fMeanFrame[k] += (*(v+j*n_feature_order+k));
					nWordClassFrameNum++;
				}
			}
		
			// use mean vector to update code word
			if( nWordClassFrameNum>1 )
				for(k=0;k<n_feature_order;k++){
					fMeanFrame[k] /= (nWordClassFrameNum-1);
					*(CodeBook+i*n_feature_order+k) = fMeanFrame[k];
				}

			// use random vector outside the code book to replace the code word
			if( (nWordClassFrameNum==2 && fSumDistance[i] == 0) || nWordClassFrameNum==1 ){
				nFlagFeatureEqual=1;
				while( nFlagFeatureEqual==1 ){
					nRandFrameIndex = (int)((double)nTotalFrameNum * ((double)rand() / (double)RAND_MAX));
					for(j=0;j<nCodeBookSize;j++){
						for(k=0;k<n_feature_order;k++){
							if( *(CodeBook+j*n_feature_order+k) - (*(v+nRandFrameIndex*n_feature_order+k)) !=0 ){
								nFlagFeatureEqual=0;
								break;
							}
						}
						if( nFlagFeatureEqual==0 )
							break;
					}
				}
				memcpy(CodeBook+i*n_feature_order, v+nRandFrameIndex*n_feature_order, n_feature_order*sizeof(double));
			}
		}

		// calculate code book distance between this and last iteration
		fCodeBookDistance=0.0;
		for(i=0;i<nCodeBookSize;i++){
			for(j=0;j<n_feature_order;j++){
				fTemp = *(CodeBook+i*n_feature_order+j) - (*(OldCodeBook+i*n_feature_order+j));
				fCodeBookDistance += fTemp * fTemp;
			}
		}
		printf("change of code book: %-10f\n", fCodeBookDistance);

		nTrainCount++;

	}	// end of: 	while(fCodeBookDistance>0){
	}	// end of:	else if(nTrainMode==0){
*/

	// release memory
	delete fMeanFrame;
	delete fSumDistance;
	delete nNearestWordIndex;
	delete OldCodeBook;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -