📄 dhmm_lhs.cpp
字号:
// DHMM_LHS.cpp:
// Implementation of the DHMM_LHS Module.
// That is the transform of previous DHMM Code by LiHuSheng.
//
// Created 2001/08, By DongMing, MDSR.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "DHMM_LHS.h"
#include "DHMM_GL.h"
#include <math.h>
//#define KMEAN_PRECISION (1.0E-3)
//#define Epsilon_B (1.0E-6)
//#define EPSILON_CALC (1.0E-6)
//#define MAX_CALC_NUM 300
//////////////////////////////////////////////////////////////////////
// Private Function Head
void DHMM_VQ(
int DMNU_Train_num, // 训练集人数
double **DMNU_Codebook, // VQ码本
int Codeword_num, // 码字个数
double K_means_precision, // K平均精度
int *DMNU_Train_Speaker, // 训练集说话人标号
int Dimension, // 特征维数
int DMNU_sex, // 性别标志
int DMNU_num, // 测试集的组号(num=0~Test_time)
int DMAD_n_Code_Word_Num,
DYNA_2DIM_DOUBLE_ARRAY DMAD_d2dda_Code_Word,
DYNA_2DIM_DOUBLE_ARRAY DMAD_d2dda_Code_Book
);
void DHMM_train(
int Status_num, // 状态数
int Train_num, // 训练集人数
int *DMNU_Train_Speaker, // 训练集说话人标号
double **DMNU_Codebook, // VQ码本
double *Pi, // 初始概率
double **A, // A矩阵
double **B, // B矩阵
int Output_num, // VQ码本大小(码字个数)
int DMNU_Cepstrum_order, // 特征维数
int DMNU_digit, // 词条标号
FILE *DMNU_Fp1, // 模型文件指针
FILE *DMNU_Fp2, // ???
int DMNU_sex, // sex在该函数中没有引用
int DMNU_Mode, // 程序中出现的Mode参数的取值包括:
// WHOLE_WORD
// WORD_HEAD1
// WORD_HEAD2
// WORD_TAIL
WORD_SAMPLE * DMAD_pu_Word_Sample,
DHMM_MODEL * DMAD_pu_DHMM_Model
);
double DHMM_Calculate(
double ***Alpha,
double ***Belta,
double *Pi,
double **A,
double **B,
int **Out, // 用于训练的各句子的观察序列(输出序列)
int Sample_num, // 训练集人数
int Output_num, // VQ码本大小(码字数,即观察值的个数)
int *T, // 用于训练的各句子的帧数
int n, // 状态数
double **P // ???
);
double DHMM_Priority(double *Pi, double **A, double **B, int *Out, int T, int n);
double Viterbi(
int *VQ_Out, // VQ后的一个识别样本
int Frame_num, // 帧数
double **B, // B矩阵
double **A, // A矩阵
int Status_num, // 状态数
int *Status_sequence // 状态序列
);
//////////////////////////////////////////////////////////////////////
// API functions
int DHMM_VQ_Train_Code_Book_LHS(DYNA_2DIM_DOUBLE_ARRAY d2dda_Code_Word, int n_Code_Word_Num, int n_Code_Word_Dim,
DYNA_2DIM_DOUBLE_ARRAY d2dda_Initial_Code_Book, DYNA_2DIM_DOUBLE_ARRAY d2dda_Code_Book, int n_Code_Book_Size)
{
DHMM_VQ(0, NULL, n_Code_Book_Size, KMEAN_PRECISION, 0, n_Code_Word_Dim, 0, 0,
n_Code_Word_Num, d2dda_Code_Word, d2dda_Code_Book);
return 0;
}
int DHMM_Model_Train_DHMM_Model_LHS(WORD_SAMPLE * pu_Word_Sample, int n_Word_Sample_Num,
DHMM_MODEL * pu_DHMM_Model)
{
DHMM_train(pu_DHMM_Model->n_State_Num, n_Word_Sample_Num, NULL, NULL,
pu_DHMM_Model->pdPi, pu_DHMM_Model->d2dda_A, pu_DHMM_Model->d2dda_B, pu_DHMM_Model->n_Code_Book_Size, 0,
0, NULL, NULL, 0, 0, pu_Word_Sample, pu_DHMM_Model);
return 0;
}
int DHMM_Recog_Viterbi_LHS(DHMM_MODEL * pu_DHMM_Model,
WORD_SAMPLE * pu_Word_Sample,
double * pd_Max_Likelihood, int * pn_Status_Sequence)
{
(*pd_Max_Likelihood) = Viterbi(pu_Word_Sample->pn_VQed_Feature_Sequence, pu_Word_Sample->n_Feature_Sequence_Len,
pu_DHMM_Model->d2dda_B, pu_DHMM_Model->d2dda_A, pu_DHMM_Model->n_State_Num,
pn_Status_Sequence);
return 0;
}
//////////////////////////////////////////////////////////////////////
// Original Code
//////////////////////////////////////////////////////////////////////
// DMNU = DongMing doesn't use this when transforming.
// DMAD = DongMing addes this when transforming.
static void DHMM_VQ(
int DMNU_Train_num, // 训练集人数
double **DMNU_Codebook, // VQ码本
int Codeword_num, // 码字个数
double K_means_precision, // K平均精度
int *DMNU_Train_Speaker, // 训练集说话人标号
int Dimension, // 特征维数
int DMNU_sex, // 性别标志
int DMNU_num, // 测试集的组号(num=0~Test_time)
int DMAD_n_Code_Word_Num,
DYNA_2DIM_DOUBLE_ARRAY DMAD_d2dda_Code_Word,
DYNA_2DIM_DOUBLE_ARRAY DMAD_d2dda_Code_Book
)
{
int i, j, p, /* n, */ m /* q,t,h */;
// int Seg[3]; // 除了在Read_Feature中初始化,Seg在该函数中并没有被引用
int Nearer;
int *Counter; // 长度:Codeword_num
int Total_num; // 训练样本的帧数总和
// int Acoustic_Mode;
// char Feature_file[50]; // 一个训练样本(utterance)的特征数据文件名
double **Codeword; // size: Codeword_num*Dimension
double **Sum; // size: Codeword_num*Dimension
double **s; // size: Total_num*Dimension
// double **Feature; // size: Frame_num*Dimension (一个训练样本(utterance)的特征数据)
double Min_Dist;
double Dist;
double New_D;
double Old_D;
// int Frame_num; // 一个训练样本(utterance)的帧数
// char Codefile_name[2][40] = {{"\\work\\dspv\\comp"},{"\\work\\dspv\\comp"}};
// char Savefile[50];
// FILE * Fp;
// 计算训练样本的帧数总和
/*
Total_num = 0;
for(n=0;n<Train_num;n++)
for(i=0;i<MODEL_NUM;i++)
{
sprintf(Feature_file,"\\work\\dspv\\mm%02d\\mm%d.mfc",Train_Speaker[n],i);
// 每个说话人一个目录,每个utterance一个文件
Total_num += Get_Length(Feature_file);
}
*/
Total_num = DMAD_n_Code_Word_Num;
/*
if((Codeword = new double *[Codeword_num]) == NULL)
{
printf("Not enough memory for Codeword !\n");
exit(-1);
}
*/
if((Sum = new double *[Codeword_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Sum !\n");
ASSERT(0);
}
for(i=0; i<Codeword_num; i++)
{
/*
if((Codeword[i] = new double [Dimension]) == NULL)
{
printf("Not enough memory for Codeword[%d] !\n", i);
exit(-1);
}
*/
if((Sum[i] = new double [Dimension]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Sum[%d] !\n", i);
ASSERT(0);
}
}
Codeword = DMAD_d2dda_Code_Book;
if((Counter = new int [Codeword_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Counter !\n");
ASSERT(0);
}
/*
if((s = new double * [Total_num]) == NULL)
{
printf("Not enough memory for s !\n");
exit(-1);
}
for(i=0; i<Total_num; i++)
if((s[i] = new double [Dimension]) == NULL)
{
printf("Not enough memory for s !\n");
exit(-1);
}
*/
// 读取所有的训练样本,逐帧写入s中,形成训练向量集
/*
q = 0;
for(i=0;i<Train_num;i++)
for(n=0;n<MODEL_NUM;n++)
{
sprintf(Feature_file,"\\work\\dspv\\mm%02d\\mm%d.mfc",Train_Speaker[i],n);
Read_Feature(Feature_file,Dimension,WHOLE_WORD,Frame_num,Feature,Seg,Acoustic_Mode);
for(t=0;t<Frame_num;t++)
for(h=0;h<Dimension;h++)
s[q+t][h] = Feature[t][h];
q += Frame_num;
Free_Feature(Feature,Frame_num);
}
*/
s = DMAD_d2dda_Code_Word;
// k-means Clusteration
for(p=0; p<Dimension; p++) Codeword[0][p] = 0;
for(i=0; i<Total_num; i++)
for(p=0; p<Dimension; p++)
Codeword[0][p] += s[i][p];
for(p=0; p<Dimension; p++) Codeword[0][p] /= Total_num;
// printf(" Clustering...\n");
for(m=2; m<=Codeword_num; m*=2)
{
DEBUG_PRINTF("\tM = %d.\n", m);
for(i=m/2-1; i>=0; i--)
for(j=0; j<Dimension; j++)
{
Codeword[i*2+1][j] = Codeword[i][j] * 0.9;
Codeword[i*2][j] = Codeword[i][j] * 1.1;
}
Old_D = 1.0e20;
New_D = 1.0e15;
while(((Old_D - New_D) / New_D) > K_means_precision)
{
for(j=0; j<m; j++)
{
for(p=0; p<Dimension; p++) Sum[j][p] = 0;
Counter[j] = 0;
}
Old_D = New_D;
New_D = 0;
for(i=0; i<Total_num; i++)
{
Dist = 0;
for(p=0; p<Dimension; p++)
Dist += (Codeword[0][p] - s[i][p]) * (Codeword[0][p] - s[i][p]);
Min_Dist = Dist;
Nearer = 0;
for(j=1; j<m; j++)
{
Dist = 0;
for(p=0; p<Dimension; p++)
Dist += (Codeword[j][p] - s[i][p]) * (Codeword[j][p] - s[i][p]);
if(Dist < Min_Dist)
{
Min_Dist = Dist;
Nearer = j;
}
} // To find out the nearest center to s[i][].
for(p=0; p<Dimension; p++) Sum[Nearer][p] += s[i][p]; // To compute center.
Counter[Nearer] ++; // Class Number.
New_D += Min_Dist;
}
for(j=0; j<m; j++)
{
if(Counter[j] > 0)
for(p=0; p<Dimension; p++)
Codeword[j][p] = Sum[j][p] / Counter[j];
}
}
}
// 训练结束,保存码本到Codebook参数中
/*
for(i=0;i<Codeword_num;i++)
for(j=0;j<Dimension;j++)
Codebook[i][j] = Codeword[i][j];
// 写入到码本文件中
sprintf(Savefile,"%s%d.cod",Codefile_name[sex],num);
Fp = fopen(Savefile, "wb");
if(Fp == NULL)
{
printf("File open error with %s !\n", Codefile_name);
exit(-1);
}
fwrite(&Codeword_num, sizeof(int), 1, Fp);
fwrite(&Dimension, sizeof(int), 1, Fp);
for(i=0; i<Codeword_num; i++)
fwrite(Codeword[i], sizeof(double), Dimension, Fp);
fclose(Fp);
*/
// 释放内存
for(i=0; i<Codeword_num; i++)
{
// delete Codeword[i];
delete Sum[i];
}
// delete Codeword;
delete Sum;
delete Counter;
// for(i=0; i<Total_num; i++) delete s[i];
// delete s;
}
static void DHMM_train(
int Status_num, // 状态数
int Train_num, // 训练集人数
int *DMNU_Train_Speaker, // 训练集说话人标号
double **DMNU_Codebook, // VQ码本
double *Pi, // 初始概率
double **A, // A矩阵
double **B, // B矩阵
int Output_num, // VQ码本大小(码字个数)
int DMNU_Cepstrum_order, // 特征维数
int DMNU_digit, // 词条标号
FILE *DMNU_Fp1, // 模型文件指针
FILE *DMNU_Fp2, // ???
int DMNU_sex, // sex在该函数中没有引用
int DMNU_Mode, // 程序中出现的Mode参数的取值包括:
// WHOLE_WORD
// WORD_HEAD1
// WORD_HEAD2
// WORD_TAIL
WORD_SAMPLE * DMAD_pu_Word_Sample,
DHMM_MODEL * DMAD_pu_DHMM_Model
)
{
int i, j, k,m;
// int Acoustic_Mode;
// int Seg[3];
int **Output; // for each word item model being trained, the output sequence (observation sequence) of each utterance of this model. (每个元素保存该帧的码字标号,每一行是一个utterance,且各行不等长)
// 1st dimension: index of the training utterance;
// 2nd dimension: index of frame
int *Frame_num; // 各训练句子的帧数
double ** P; // Status_num*Output_num矩阵(干什么用的???)
double **Old_A;
double **Old_B;
double ***Alpha;
double ***Belta;
// double **Feature; // feature data of one utterance
double w, u;
// char File_Name[40];
// time_t t;
// srand((unsigned) time(&t));
// 分配内存
if((Old_A = new double *[Status_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Old_A !\n");
ASSERT(0);
}
for(i=0; i<Status_num; i++)
{
if((Old_A[i] = new double [Status_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Old_A[%d] !\n", i);
ASSERT(0);
}
}
if((Old_B = new double *[Status_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Old_B !\n");
ASSERT(0);
}
for(i=0; i<Status_num; i++)
{
if((Old_B[i] = new double [Output_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Old_B[%d] !\n", i);
ASSERT(0);
}
}
if((P = new double*[Status_num]) == NULL)
{
DEBUG_PRINTF("Not eoungh memory for P[]\n");
ASSERT(0);
}
for(i=0;i<Status_num;i++)
if((P[i] = new double[Output_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for P[%d]\n",i);
ASSERT(0);
}
// Initialize Alpha & Belta
if((Alpha = new double **[Train_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Alpha !\n");
ASSERT(0);
}
if((Belta = new double **[Train_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Belta !\n");
ASSERT(0);
}
if((Output = new int *[Train_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Output !\n");
ASSERT(0);
}
if((Frame_num = new int [Train_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Frame_num !\n");
ASSERT(0);
}
// 初始化初始概率Pi:状态0概率为1,其余状态概率为0
Pi[0] = 1;
for(i=1; i<Status_num; i++) Pi[i] = 0;
for(i=0; i<Status_num; i++)
{
// 初始化A矩阵
for(j=0; j<Status_num; j++) A[i][j] = 0;
if(i < Status_num-1)
{
A[i][i] = 0.5;
A[i][i+1] = 0.5;
}
else
{
A[i][i] = 1.0;
}
// 归一化A矩阵
w = 0;
for(j=0; j<Status_num; j++) w = w + A[i][j];
for(j=0; j<Status_num; j++) A[i][j] = A[i][j] / w;
w = 0;
// 初始化B矩阵
for(j=0; j<Output_num; j++)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -