📄 dhmm_gl.cpp
字号:
// DHMM_GL.cpp:
// Implementation of the DHMM_GL Module.
// That is the transform of previous DHMM Code by GuLiang.
//
// Created 2001/08, By DongMing, MDSR.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "DHMM_GL.h"
#include <math.h>
#define REESTIMATE_ALL (0x0001)
#define ENDSTATE_ABSORB (0x0002)
#define REESTIMATE_B (0x0004)
//////////////////////////////////////////////////////////////////////
// Private Function Head
void DHMM_VQ(int *DMNU_Sound_attrib, int DMNU_Speaker_num, char *DMNU_Featurefile_name, char *DMNU_Codefile_name,
int Codeword_num, double K_means_precision, int DMNU_mode,
int DMAD_n_Code_Word_Num, int DMAD_n_Code_Word_Dim,
DYNA_2DIM_DOUBLE_ARRAY DMAD_d2dda_Code_Book, DYNA_2DIM_DOUBLE_ARRAY DMAD_d2dda_Code_Word);
void DHMM_train(int *DMNU_Sound_attrib, int Speaker_num, int Train_num, char *DMNU_Featurefile_name,
char *DMNU_Codefile_name, char *DMNU_Patternfile_name, int Status_num, int mode,
WORD_SAMPLE * DMAD_pu_Word_Sample,
DHMM_MODEL * DMAD_pu_DHMM_Model);
double DHMM_Calculate(double ***Alpha, double ***Belta, double *Pi, double **A,
double **B, int **Out, int Sample_num, int Output_num, int *T,
int n, int mode, int End_state_mode);
double DHMM_Priority(double *Pi, double **A, double **B, int *Out, int T, int n, int mode);
//////////////////////////////////////////////////////////////////////
// API functions
int DHMM_VQ_Train_Code_Book_GL(DYNA_2DIM_DOUBLE_ARRAY d2dda_Code_Word, int n_Code_Word_Num, int n_Code_Word_Dim,
DYNA_2DIM_DOUBLE_ARRAY d2dda_Initial_Code_Book, DYNA_2DIM_DOUBLE_ARRAY d2dda_Code_Book, int n_Code_Book_Size)
{
PRO_LOG("\tVQ = GL, K-mean precision = %15.4E.\n", KMEAN_PRECISION);
DHMM_VQ(NULL, 0, NULL, NULL,
n_Code_Book_Size, KMEAN_PRECISION, 0,
n_Code_Word_Num, n_Code_Word_Dim,
d2dda_Code_Book, d2dda_Code_Word);
return 0;
}
int DHMM_Model_Train_DHMM_Model_GL(WORD_SAMPLE * pu_Word_Sample, int n_Word_Sample_Num,
DHMM_MODEL * pu_DHMM_Model)
{
PRO_LOG("\tModel = GL, max loop = %4d, precision = %15.4E.\n", MAX_CALC_NUM, EPSILON_CALC);
DHMM_train(NULL, n_Word_Sample_Num, n_Word_Sample_Num, NULL,
NULL, NULL, pu_DHMM_Model->n_State_Num, ~ENDSTATE_ABSORB,
pu_Word_Sample, pu_DHMM_Model);
return 0;
}
int DHMM_Recog_Forward_Backward_GL(DHMM_MODEL * pu_DHMM_Model,
WORD_SAMPLE * pu_Word_Sample,
double * pd_Max_Likelihood)
{
(*pd_Max_Likelihood) = DHMM_Priority(pu_DHMM_Model->pdPi, pu_DHMM_Model->d2dda_A, pu_DHMM_Model->d2dda_B,
pu_Word_Sample->pn_VQed_Feature_Sequence, pu_Word_Sample->n_Feature_Sequence_Len,
pu_DHMM_Model->n_State_Num, ~ENDSTATE_ABSORB);
return 0;
}
//////////////////////////////////////////////////////////////////////
// Original Code
//////////////////////////////////////////////////////////////////////
// DMNU = DongMing doesn't use this when transforming.
// DMAD = DongMing addes this when transforming.
// binary split VQ code-book training (clustering) algorithm
static void DHMM_VQ(int *DMNU_Sound_attrib, int DMNU_Speaker_num, char *DMNU_Featurefile_name, char *DMNU_Codefile_name,
int Codeword_num, double K_means_precision, int DMNU_mode,
int DMAD_n_Code_Word_Num, int DMAD_n_Code_Word_Dim,
DYNA_2DIM_DOUBLE_ARRAY DMAD_d2dda_Code_Book, DYNA_2DIM_DOUBLE_ARRAY DMAD_d2dda_Code_Word)
{
int i, j, p, m;
int Dimension; // Dimension: 特征维数
int Nearer;
int *Counter; // 记录各码字聚类到的训练向量个数
int Total_num; // 训练向量集的总帧数
double **Codeword; // VQ码本
double **Sum;
double **s; // 训练向量集
double Min_Dist;
double Dist; // 某一训练向量和某一码字间的距离
double New_D; // 所有训练向量的量化失真之和?
double Old_D;
// Initial Memory Block
Dimension = DMAD_n_Code_Word_Dim;
Total_num = DMAD_n_Code_Word_Num;
Codeword = DMAD_d2dda_Code_Book;
if((Sum = new double *[Codeword_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Sum !\n");
ASSERT(0);
}
for(i=0; i<Codeword_num; i++)
{
if((Sum[i] = new double [Dimension]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Sum[%d] !\n", i);
ASSERT(0);
}
}
if((Counter = new int [Codeword_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Counter !\n");
ASSERT(0);
}
s = DMAD_d2dda_Code_Word;
// k-means Clusteration
for(p=0; p<Dimension; p++) Codeword[0][p] = 0; // 第一个码字清零
for(i=0; i<Total_num; i++)
for(p=0; p<Dimension; p++)
Codeword[0][p] += s[i][p];
for(p=0; p<Dimension; p++) Codeword[0][p] /= Total_num; // 第一个码字置为所有训练向量的平均值
for(m=2; m<=Codeword_num; m*=2) // m: 分裂后码本大小
{
DEBUG_PRINTF("\tM = %d.\n", m);
for(i=m/2-1; i>=0; i--)
for(j=0; j<Dimension; j++) // initialize Codeword[0],Codeword[1],...,Codeword[m-1] by splitting current code words
{
Codeword[i*2+1][j] = Codeword[i][j] * 0.9; // splitting parameter epsilon set to 0.1
Codeword[i*2][j] = Codeword[i][j] * 1.1;
}
Old_D = 1.0e20;
New_D = 1.0e15;
while(((Old_D - New_D) / New_D) > K_means_precision)
{
for(j=0; j<m; j++)
{
for(p=0; p<Dimension; p++) Sum[j][p] = 0;
Counter[j] = 0;
}
Old_D = New_D;
New_D = 0;
for(i=0; i<Total_num; i++)
{
Dist = 0;
for(p=0; p<Dimension; p++)
Dist += (Codeword[0][p] - s[i][p]) * (Codeword[0][p] - s[i][p]);
Min_Dist = Dist; // 计算该训练向量与第一个码字间的距离
Nearer = 0;
for(j=1; j<m; j++) // 计算该训练向量与第2到第m个码字间的距离,并求最小值
{
Dist = 0;
for(p=0; p<Dimension; p++)
Dist += (Codeword[j][p] - s[i][p]) * (Codeword[j][p] - s[i][p]);
if(Dist < Min_Dist)
{
Min_Dist = Dist;
Nearer = j; // 记录距离最近的码字标号
}
}
for(p=0; p<Dimension; p++) Sum[Nearer][p] += s[i][p]; // 累加聚类到标号为Nearer的码字的所有训练向量
Counter[Nearer] ++; // 记录聚类到标号为Nearer的码字的训练向量的个数
New_D += Min_Dist;
}
for(j=0; j<m; j++)
{
if(Counter[j] > 0)
for(p=0; p<Dimension; p++)
Codeword[j][p] = Sum[j][p] / Counter[j]; // 用聚类到标号为j的码字的所有训练的平均值更新该码字
}
}
}
for(i=0; i<Codeword_num; i++)
{
delete Sum[i];
}
delete Sum;
delete Counter;
}
static void DHMM_train(int *DMNU_Sound_attrib, int Speaker_num, int Train_num, char *DMNU_Featurefile_name,
char *DMNU_Codefile_name, char *DMNU_Patternfile_name, int Status_num, int mode,
WORD_SAMPLE * DMAD_pu_Word_Sample,
DHMM_MODEL * DMAD_pu_DHMM_Model)
// Speaker_num 说话人个数
// Train_num 训练集人数(Train_num=Speaker_num-Test_num)
// Status_num 状态数
// mode 训练模式
{
int i, j, k, /* n, */ m;
int Output_num; // VQ码本大小(码字个数)
int **Output; // for each word item model being trained, the output sequence (observation sequence) of each utterance of this model. (每个元素保存该帧的码字标号)
// 1st dimension: index of the training utterance;
// 2nd dimension: index of frame
int *Frame_num; // 各训练句子的帧数
double *Pi; // 初始概率
double **A; // A矩阵:Status_num*Status_num
double **B; // B矩阵:Status_num*Output_num
double ***Alpha; // Train_num*帧数*状态数的double矩阵
double ***Belta; // Train_num*帧数*状态数的double矩阵
double w, u;
// Generate (A, B, Pi)
Output_num = DMAD_pu_DHMM_Model->n_Code_Book_Size;
Pi = DMAD_pu_DHMM_Model->pdPi; // The above block transform to this line. #DongMing#
// allocate memory for A matrix
A = DMAD_pu_DHMM_Model->d2dda_A; // The above block transform to this line. #DongMing#
// allocate memory for B matrix
B = DMAD_pu_DHMM_Model->d2dda_B; // The above block transform to this line. #DongMing#
//事实上,此处为Alpha和Belta分配了过多的内存,没有必要为所有用于训练的说话人分配Alpha和Belta
if((Alpha = new double **[Train_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Alpha !\n");
ASSERT(0);
}
if((Belta = new double **[Train_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Belta !\n");
ASSERT(0);
}
if((Output = new int *[Train_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Output !\n");
ASSERT(0);
}
if((Frame_num = new int [Train_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Frame_num !\n");
ASSERT(0);
}
for(i=1; i<Status_num; i++) Pi[i] = 0; // 初始状态概率:第一个状态为1,其余为0
Pi[0] = 1;
// 初始化A矩阵、B矩阵
for(i=0; i<Status_num; i++)
{
for(j=0; j<Status_num; j++) A[i][j] = 0;
if(i < Status_num-1)
{
A[i][i] = 0.5;
A[i][i+1] = 0.5;
}
else
{
A[i][i] = 1.0;
}
w = 0;
for(j=0; j<Status_num; j++) w = w + A[i][j];
for(j=0; j<Status_num; j++) A[i][j] = A[i][j] / w; // 归一化A矩阵的每一行
w = 0;
for(j=0; j<Output_num; j++) // 随机初始化B矩阵
{
B[i][j] = 1.0 / Output_num * (0.9 + ((rand()%1000) / 5000.0)); // RAND_MAX is defined as 0x7fff in "stdlib.h"
// B[i][j] = 1.0 / Output_num;
w = w + B[i][j];
}
for(j=0; j<Output_num; j++) B[i][j] = B[i][j] / w; // 归一化B矩阵的每一行
}
k = 0; // k: training utterance counter for each word item model being trained
// 装入该词条的训练数据
for(m=0; m<Speaker_num; m++)
{
Frame_num[k] = DMAD_pu_Word_Sample[k].n_Feature_Sequence_Len;
// ????????????? is it necessary to save alpha and beta for each training utterance ??????????????
if((Alpha[k] = new double *[Frame_num[k]]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Alpha[%d] !\n", k);
ASSERT(0);
}
for(i=0; i<Frame_num[k]; i++)
if((Alpha[k][i] = new double [Status_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Alpha[%d][%d] !\n", k, i);
ASSERT(0);
}
if((Belta[k] = new double *[Frame_num[k]]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Belta[%d] !\n", k);
ASSERT(0);
}
for(i=0; i<Frame_num[k]; i++)
if((Belta[k][i] = new double [Status_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Belta[%d][%d] !\n", k, i);
ASSERT(0);
}
if((Output[k] = new int [Frame_num[k]]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Frame_num[%d] !\n", k);
ASSERT(0);
}
for(i=0; i<Frame_num[k]; i++)
Output[k][i] = DMAD_pu_Word_Sample[k].pn_VQed_Feature_Sequence[i];
k ++;
}
w = 1;
m = 0;
while(m < MAX_CALC_NUM)
{
// Alpha和Belta作为事先分配好内存的变量,并未在调用DHMM_Calculate后使用,因此应该在DHMM_Calculate函数内部分配内存,
// 而入口参数只传递大小信息(如训练句子数、帧数、状态数等等),甚至可以不必为Alpha和Belta分配如此多的内存。
u = DHMM_Calculate(Alpha, Belta, Pi, A, B, Output, Train_num, Output_num,
Frame_num, Status_num, REESTIMATE_ALL, mode);
//u = DHMM_Calculate(Alpha, Belta, Pi, A, B, Output, Train_num, Output_num,
//Frame_num, Status_num, REESTIMATE_ALL, mode, DMAD_pu_Word_Sample);
// for (i = 0; i < Status_num; i++) for (j = 0; j < Output_num; j++) PRO_LOG("%d\t%d\t:%15.4E.\n", i, j, B[i][j]);
m ++;
if((u > w) && (fabs((u-w)/w) < EPSILON_CALC)) break;
else w = u;
}
// for (i = 0; i < Status_num; i++) for (j = 0; j < Output_num; j++) TRACE("%d\t%d\t:%15.4E.\n", i, j, B[i][j]);
PRO_LOG("\tLoop = %4d.\n", m);
// 释放内存
for(k=0; k<Train_num; k++)
{
for(i=0; i<Frame_num[k]; i++)
delete Alpha[k][i];
delete Alpha[k];
for(i=0; i<Frame_num[k]; i++)
delete Belta[k][i];
delete Belta[k];
//delete Output[k];
}
delete Alpha;
delete Belta;
//delete Output;
delete Frame_num;
}
static double DHMM_Calculate(double ***Alpha, double ***Belta, double *Pi, double **A,
double **B, int **Out, int Sample_num, int Output_num, int *T,
int n, int mode, int End_state_mode)
// 功能:训练某一词条的模型
// input:
// Pi 初始时刻各状态的概率
// Alpha 前向概率
// Belta 后向概率
// A 初始A矩阵
// B 初始B矩阵
// Out 用于训练的各句子的观察序列(输出序列)
// Sample_num 训练集人数
// Output_num VQ码本大小(码字数,即观察值的个数)
// T 用于训练的各句子的帧数
// n 状态数
// mode 训练模式(REESTIMATE_A, REESTIMATE_B, REESTIMATE_ALL)
// End_state_mode 结尾帧状态模式(ENDSTATE_ABSORB = 0x200)
// output:
// A
// B
// caller(r_dhmm.cpp): u = DHMM_Calculate(Alpha, Belta, Pi, A, B, Output, Train_num, Output_num,
// Frame_num, Status_num, REESTIMATE_ALL, mode);
{
int i, j, k, m;
double w;
double **New_A;
double **New_B;
double **Ct; // to prevent underflow of Alpha and Belta, Ct is the scaling coefficient
double Residue;
if((New_A = new double *[n]) == NULL)
{
DEBUG_PRINTF("Not enough memory for New_A !\n");
ASSERT(0);
}
for(i=0; i<n; i++)
{
if((New_A[i] = new double [n]) == NULL)
{
DEBUG_PRINTF("Not enough memory for New_A[%d] !\n", i);
ASSERT(0);
}
}
if((New_B = new double *[n]) == NULL)
{
DEBUG_PRINTF("Not enough memory for New_B !\n");
ASSERT(0);
}
for(i=0; i<n; i++)
{
if((New_B[i] = new double [Output_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for New_B[%d] !\n", i);
ASSERT(0);
}
}
if((Ct = new double *[Sample_num]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Ct !\n");
ASSERT(0);
}
for(i=0; i<Sample_num; i++)
if((Ct[i] = new double [T[i]]) == NULL)
{
DEBUG_PRINTF("Not enough memory for Ct[%d] !\n", i);
ASSERT(0);
}
// Calculate Alpha & Belta.
for(m=0; m<Sample_num; m++)
{
if (T[m] > 0)
{
w = 0;
for(i=0; i<n; i++)
{
Alpha[m][0][i] = Pi[i] * B[i][ Out[m][0] ];
w = w + Alpha[m][0][i];
}
Ct[m][0] = 1.0 / w; // scaling coefficient
for(i=0; i<n; i++)
Alpha[m][0][i] = Alpha[m][0][i] * Ct[m][0]; // scaling
// 计算Alpha
for(k=1; k<T[m]; k++) // 第m个训练utterance的各帧
{
Ct[m][k] = 0;
for(i=0; i<n; i++)
{
w = 0;
for(j=0; j<n; j++)
w = w + Alpha[m][k-1][j] * A[j][i];
w = w * B[i][ Out[m][k] ];
Alpha[m][k][i] = w;
Ct[m][k] = Ct[m][k] + w;
}
Ct[m][k] = 1.0 / Ct[m][k];
for(i=0; i<n; i++)
Alpha[m][k][i] = Alpha[m][k][i] * Ct[m][k];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -