📄 dhmm_wp.cpp
字号:
// DHMM_WP.cpp:
// Implementation of the DHMM_WP Module.
// That is the transform of previous DHMM Code by WangPeng.
//
// Created 2001/08, By WangPeng, MDSR.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "DHMM_WP.h"
#define KMEAN_LOOP_TIME 40
//////////////////////////////////////////////////////////////////////
// Private functions
void kmean(
double * v,
double * CodeBook,
int nTotalFrameNum,
int nCodeBookSize,
int n_feature_order
);
//////////////////////////////////////////////////////////////////////
// API functions
int DHMM_VQ_Train_Code_Book_WP(
DYNA_2DIM_DOUBLE_ARRAY d2dda_train_vector,
int n_train_vector_num,
int n_code_word_dim,
DYNA_2DIM_DOUBLE_ARRAY d2dda_initial_codebook, // 如果该指针为NULL,则随机
// 初始化码本,否则利用该参
// 作为初始码本
DYNA_2DIM_DOUBLE_ARRAY d2dda_codebook,
int n_codebook_size
)
{
double* pd_v; // 一维动态数组形式的训练向量集
double* pd_codebook; // 一维动态数组形式的码本
int i, j, n_temp;
int* pn_codeword_frame_index; // 随机初始化码本时,每个码本在训练向量集中的标号
int n_codeword_equal_flag; // 随机初始化码本时,两个码本相同的标志
PRO_LOG("\tVQ = WP, K-mean loop time = %4d.\n", KMEAN_LOOP_TIME);
if((pd_v = (double*)new double[n_train_vector_num * n_code_word_dim]) == NULL)
{
DEBUG_PRINTF("Allocation of memory for train vectors failed!\n");
ASSERT(0);
}
if((pd_codebook = (double*)new double[n_codebook_size * n_code_word_dim]) == NULL)
{
DEBUG_PRINTF("Allocation of memory for codebook failed!\n");
ASSERT(0);
}
// 因为kmean的训练向量参数集v是一维动态数组,因此要把二维数组拷贝到一维数组中
for(i=0;i<n_train_vector_num;i++)
memcpy(pd_v + i * n_code_word_dim, d2dda_train_vector[i], n_code_word_dim * sizeof(double));
// 若码本d2dda_codebook不为空,则把二维数组表示的码本拷贝到一维数组中
if(d2dda_initial_codebook != NULL)
{
for(i=0;i<n_codebook_size;i++)
memcpy(pd_codebook + i * n_code_word_dim, d2dda_initial_codebook[i], n_code_word_dim * sizeof(double));
}
// 若码本d2dda_codebook为空,则随机初始化码本
else
{
pn_codeword_frame_index=(int *)new int[n_codebook_size];
// srand( (unsigned)time( NULL ) );
for(i=0;i<n_codebook_size;i++){
if(i==0){
n_temp=(int)((double)n_train_vector_num * ((double)rand() / (double)RAND_MAX));
pn_codeword_frame_index[i]=n_temp;
}
else{
n_codeword_equal_flag = 1;
while(n_codeword_equal_flag == 1){
pn_codeword_frame_index[i] = n_temp = (int)((double)n_train_vector_num * ((double)rand() / (double)RAND_MAX));
n_codeword_equal_flag = 0;
for(j=i-1;j>=0;j--){
if(n_temp == pn_codeword_frame_index[j])
n_codeword_equal_flag = 1;
}
}
}
memcpy(pd_codebook + i * n_code_word_dim, pd_v + n_temp * n_code_word_dim, n_code_word_dim * sizeof(double));
}
}
kmean(
pd_v,
pd_codebook,
n_train_vector_num,
n_codebook_size,
n_code_word_dim
);
for(i=0;i<n_codebook_size;i++)
{
memcpy(d2dda_codebook[i], pd_codebook + i * n_code_word_dim, n_code_word_dim * sizeof(double));
}
delete pd_v;
delete pd_codebook;
delete pn_codeword_frame_index;
return 0;
}
static void kmean(
double * v, // 用于训练码本的特征向量集
double * CodeBook, // 码本
int nTotalFrameNum, // 训练向量总数
int nCodeBookSize, // 码本大小(码字个数)
int n_feature_order // 特征(训练向量)维数
)
{
int i,j,k,nTrainCount;
int nTrainMode; // nTrainMode=0: initialize code book randomly
// nTrainMode=1: initialize code book from old codebook
double fTemp;
double fMinDistance; // minimum of the distances between an training vector frame and all the code words
double * fSumDistance; // sum of the distances between an training vector frame and all the code words
double * fMeanFrame; // mean frame feature of all the frames whose minimum reach at the same code word
int nWordClassFrameNum; // number of frames whose minimum distances reach at the same code word
double fDistance; // distance between every frame of the training vectors and the current code words
double * OldCodeBook; // codebook in last iteration
double fCodeBookDistance; // code book distance between two consecutive iteration
double fTotalDistortion; // sum of each training vector's quantization distortion
double fAvgDistortion; // average quantization distortion
int * nNearestWordIndex; // each training vector frame's nearest nearest code word's index
int nFlagFeatureEqual; // 0: two feature vectors don't equal, 1: two feature vectors equal
int nRandFrameIndex; // randomly chosen index of frame feature vector to update code word
nNearestWordIndex=(int *)new int[nTotalFrameNum];
OldCodeBook=(double *)new double[nCodeBookSize*n_feature_order];
fSumDistance=(double *)new double[nCodeBookSize];
fMeanFrame=(double *)new double[n_feature_order];
memset(fSumDistance, 0, nCodeBookSize * sizeof(double));
if(CodeBook != NULL)
nTrainMode = 1;
else
nTrainMode = 0;
ASSERT(nTrainMode == 1);
// train code book
if(nTrainMode==1){ // initialize code book from old codebook
for(nTrainCount = 0; nTrainCount < KMEAN_LOOP_TIME; nTrainCount++){
// save code book in last iteration
memcpy(OldCodeBook, CodeBook, nCodeBookSize*n_feature_order*sizeof(double));
printf("Training round: %d\n", nTrainCount);
// calculate distances between training vectors and code words and the min-distance code word index
fAvgDistortion=fTotalDistortion=0.0F;
for(i=0;i<nTotalFrameNum;i++){
for(j=0;j<nCodeBookSize;j++){
fDistance = 0.0F;
for(k=0;k<n_feature_order;k++){
fTemp = (*(v+i*n_feature_order+k)) - *(CodeBook+j*n_feature_order+k);
fDistance += fTemp * fTemp;
}
if(j==0){
fMinDistance = fDistance;
*(nNearestWordIndex+i) = 0;
}
else{
if( fDistance < fMinDistance ){
fMinDistance = fDistance;
*(nNearestWordIndex+i) = j;
}
}
}
fTotalDistortion += fMinDistance;
fSumDistance[*(nNearestWordIndex+i)] += fMinDistance;
}
fAvgDistortion = fTotalDistortion/(double)nTotalFrameNum;
DEBUG_PRINTF("Last round's average distortion: %-10f\n", fAvgDistortion);
// update code words
// srand( (unsigned)time( NULL ) );
for(i=0;i<nCodeBookSize;i++){
nWordClassFrameNum=1;
//memcpy(fMeanFrame, CodeBook+i*FEATURE_ORDER, FEATURE_ORDER*sizeof(double));
memset(fMeanFrame, 0, n_feature_order*sizeof(double));
for(j=0;j<nTotalFrameNum;j++){
if( *(nNearestWordIndex+j) == i ){
for(k=0;k<n_feature_order;k++)
fMeanFrame[k] += (*(v+j*n_feature_order+k));
nWordClassFrameNum++;
}
}
// use mean vector to update code word
if( nWordClassFrameNum>1 )
for(k=0;k<n_feature_order;k++){
fMeanFrame[k] /= (nWordClassFrameNum-1);
*(CodeBook+i*n_feature_order+k) = fMeanFrame[k];
}
// use random vector outside the code book to replace the code word
if( (nWordClassFrameNum==2 && fSumDistance[i] == 0) || nWordClassFrameNum==1 ){
nFlagFeatureEqual=1;
while( nFlagFeatureEqual==1 ){
nRandFrameIndex = (int)((double)nTotalFrameNum * ((double)rand() / (double)RAND_MAX));
for(j=0;j<nCodeBookSize;j++){
for(k=0;k<n_feature_order;k++){
if( *(CodeBook+j*n_feature_order+k) - (*(v+nRandFrameIndex*n_feature_order+k)) !=0 ){
nFlagFeatureEqual=0;
break;
}
}
if( nFlagFeatureEqual==0 )
break;
}
}
memcpy(CodeBook+i*n_feature_order, v+nRandFrameIndex*n_feature_order, n_feature_order*sizeof(double));
}
}
// calculate code book distance between this and last iteration
fCodeBookDistance=0.0;
for(i=0;i<nCodeBookSize;i++){
for(j=0;j<n_feature_order;j++){
fTemp = *(CodeBook+i*n_feature_order+j) - (*(OldCodeBook+i*n_feature_order+j));
fCodeBookDistance += fTemp * fTemp;
}
}
DEBUG_PRINTF("change of code book: %-10f\n", fCodeBookDistance);
} // end of: for(nTrainCount=0;nTrainCount<40;nTrainCount++)
} // end of: if(nTrainMode=1)
PRO_LOG("\tLast round's average distortion: %-10f\n", fAvgDistortion);
PRO_LOG("\tchange of code book: %-10f\n", fCodeBookDistance);
/*
else if(nTrainMode==0){ // initialize code book randomly
nTrainCount=0;
fCodeBookDistance=1.0;
while(fCodeBookDistance>1.0e-3){
// save code book in last iteration
memcpy(OldCodeBook, CodeBook, nCodeBookSize*n_feature_order*sizeof(double));
printf("Training round: %d\n", nTrainCount);
// calculate distances between training vectors and code words and the min-distance code word index
fAvgDistortion=fTotalDistortion=0.0;
for(i=0;i<nTotalFrameNum;i++){
for(j=0;j<nCodeBookSize;j++){
fDistance = 0.0;
for(k=0;k<n_feature_order;k++){
fTemp = (*(v+i*n_feature_order+k)) - *(CodeBook+j*n_feature_order+k);
fDistance += fTemp * fTemp;
}
if(j==0){
fMinDistance = fDistance;
*(nNearestWordIndex+i) = 0;
}
else{
if( fDistance < fMinDistance ){
fMinDistance = fDistance;
*(nNearestWordIndex+i) = j;
}
}
}
fTotalDistortion += fMinDistance;
fSumDistance[*(nNearestWordIndex)] += fMinDistance;
}
fAvgDistortion = fTotalDistortion/(double)nTotalFrameNum;
printf("Last round's average distortion: %-10f\n", fAvgDistortion);
// update code words
srand( (unsigned)time( NULL ) );
for(i=0;i<nCodeBookSize;i++){
nWordClassFrameNum=1;
memset(fMeanFrame, 0, n_feature_order*sizeof(double));
for(j=0;j<nTotalFrameNum;j++){
if( *(nNearestWordIndex+j) == i ){
for(k=0;k<n_feature_order;k++)
fMeanFrame[k] += (*(v+j*n_feature_order+k));
nWordClassFrameNum++;
}
}
// use mean vector to update code word
if( nWordClassFrameNum>1 )
for(k=0;k<n_feature_order;k++){
fMeanFrame[k] /= (nWordClassFrameNum-1);
*(CodeBook+i*n_feature_order+k) = fMeanFrame[k];
}
// use random vector outside the code book to replace the code word
if( (nWordClassFrameNum==2 && fSumDistance[i] == 0) || nWordClassFrameNum==1 ){
nFlagFeatureEqual=1;
while( nFlagFeatureEqual==1 ){
nRandFrameIndex = (int)((double)nTotalFrameNum * ((double)rand() / (double)RAND_MAX));
for(j=0;j<nCodeBookSize;j++){
for(k=0;k<n_feature_order;k++){
if( *(CodeBook+j*n_feature_order+k) - (*(v+nRandFrameIndex*n_feature_order+k)) !=0 ){
nFlagFeatureEqual=0;
break;
}
}
if( nFlagFeatureEqual==0 )
break;
}
}
memcpy(CodeBook+i*n_feature_order, v+nRandFrameIndex*n_feature_order, n_feature_order*sizeof(double));
}
}
// calculate code book distance between this and last iteration
fCodeBookDistance=0.0;
for(i=0;i<nCodeBookSize;i++){
for(j=0;j<n_feature_order;j++){
fTemp = *(CodeBook+i*n_feature_order+j) - (*(OldCodeBook+i*n_feature_order+j));
fCodeBookDistance += fTemp * fTemp;
}
}
printf("change of code book: %-10f\n", fCodeBookDistance);
nTrainCount++;
} // end of: while(fCodeBookDistance>0){
} // end of: else if(nTrainMode==0){
*/
// release memory
delete fMeanFrame;
delete fSumDistance;
delete nNearestWordIndex;
delete OldCodeBook;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -