📄 gauss.cpp
字号:
#include "stdafx.h"
#include "Gauss.h"
//*F///////////////////////////////////////////////////////////////////////////////////////
// Name: ComputeUniModeGauss
// Purpose: The function computes the Gaussian pdf for a sample vector
// Context:
// Parameters: obsVeq - pointer to the sample vector
// mu - pointer to the mean vector of the Gaussian pdf
// var - pointer to the variance vector of the Gaussian pdf
// VecSize - the size of sample vector
//
// float* mu; /*mean vectors corresponding to each mixture*/
// float* inv_var; /* square root of inversed variances corresp. to each mixture*/
// float* log_var_val; /* sum of 0.5 (LN2PI + ln(variance[i]) ) for i=1,n */
// Returns: the pdf of the sample vector given the specified Gaussian
//
// Notes:
//F*/
float ComputeUniModeGauss(float* vect, float* mu,
float* inv_var, float log_var_val, int vect_size)
{
int n;
double tmp;
double prob;
prob = -log_var_val;
for (n = 0; n < vect_size; n++)
{
tmp = (vect[n] - mu[n]) * inv_var[n];//是否要乘0.5
prob = prob - tmp * tmp;
}
prob *= 0.5f;
return (float)prob;
}
/*F///////////////////////////////////////////////////////////////////////////////////////
// Name: ComputeGaussMixture
// Purpose: The function computes the mixture Gaussian pdf of a sample vector.
// Context:
// Parameters: obsVeq - pointer to the sample vector
// mu - two-dimensional pointer to the mean vector of the Gaussian pdf;
// the first dimension is indexed over the number of mixtures and
// the second dimension is indexed along the size of the mean vector
// var - two-dimensional pointer to the variance vector of the Gaussian pdf;
// the first dimension is indexed over the number of mixtures and
// the second dimension is indexed along the size of the variance vector
// VecSize - the size of sample vector
// weight - pointer to the wights of the Gaussian mixture
// NumMix - the number of Gaussian mixtures
//
// float* inv_var; /* square root of inversed variances corresp. to each mixture*/
// float* log_var_val; /* sum of 0.5 (LN2PI + ln(variance[i]) ) for i=1,n */
// 不要乘0.5,可在ComputeUniModeGauss中乘0.5
// int num_mix; /*number of mixtures in this state*/
// float* weight; /*array of mixture weights. Summ of all weights in state is 1. */
//
// Returns: the pdf of the sample vector given the specified Gaussian mixture.
//
// Notes:
//F*/
/* Calculate probability of observation at state in logarithmic scale*/
float ComputeGaussMixture( float* vect, float* mu,
float* inv_var, float* log_var_val,
int vect_size, float* weight, int num_mix )
{
double prob, l_prob;
prob = 0.0f;
if (num_mix == 1)
{
return ComputeUniModeGauss( vect, mu, inv_var, log_var_val[0], vect_size);
}
else
{
int m;
for (m = 0; m < num_mix; m++)
{
if ( weight[m] > 0.0)
{
l_prob = ComputeUniModeGauss(vect, mu + m*vect_size,
inv_var + m * vect_size,
log_var_val[m],
vect_size);
prob = prob + weight[m]*exp((double)l_prob);
}
}
prob = log(prob);
}
return (float)prob;
}
//概率密度函数 p(vect|mu,vars)
float GetPostProb(float* vect,float* mu,float* vars,int vec_size){
float* inv_var = new float[vec_size];
float log_var_val = 0;
for(int i=0;i<vec_size;++i){
float var = sqrt(vars[i]);
if(var <= 1e-30) var = 1e-10; //如果等于0,给一个很小的值
inv_var[i] = 1./ var;
log_var_val += log(var);
}
log_var_val = (log(2*PI)*vec_size+log_var_val);
float l_prob = ComputeUniModeGauss(vect, mu, inv_var,log_var_val,vec_size);
delete[] inv_var;
return exp(l_prob);
}
///////////////////////////////////////////////////////////////////////////////
//log(p(cluster_j|vect)) 的变量部分,省去常量计算,就是最小错误率的Bayes决策准则
// = log(p(cluster_j)) - 1/2*(vect-mu)^2/vars - 1/2*log|vars|
// log(p(cluster_j)) 也不考虑
float GetLogProb(float* vect,float* mu,float* vars,int vec_size){
float l_prob = 0.0;
float log_var_val = 0;//行列式
for(int i=0;i<vec_size;++i){
if(vars[i]<1e-20) continue;
float d = vect[i]-mu[i];
l_prob += d*d / vars[i];
log_var_val += log(vars[i]);
}
l_prob = (- l_prob - log_var_val)/2;
return l_prob;
}
///////////////////////////////////////////////////////////////////////////////////
//作者:施智平
//**** 不直接计算Gauss公式,在logrighm scale下计算,
//**** 注意有时需要再用 exp() 函数返回指数运算域,如原公式中有加法的地方
//实现EM算法,估计模型参数,最后更新聚类结果.使用对数计算
//输入 向量数组samples
//输入 先前KMEAS聚类结果 num_cluster,cluster_index,num_samples_in_clusters
//输出 means,delta,更新cluster_index 作为EM聚类结果
//假定各维独立,不考虑协方差
///////////////////////////////////////////////////////////////////////////////////
//没有调正确,聚类不可用
/*void GaussEM(float **samples, int num_samples, int vec_size, \
int num_cluster, int *cluster_index,int* num_samples_in_clusters,\
float *means,float* delta,int maxIter)
{
int j,k,x;//j:cluster; x:sample; k: dimension
//E step
double** p_x_j = new double*[num_samples];//P(sample_x|Cluster_j)
for(x=0;x<num_samples;++x){
p_x_j[x] = new double[num_cluster];
}
double** p_j_x = new double*[num_cluster];//P(Cluster_j|sample_x)
for(j=0;j<num_cluster;++j){
p_j_x[j] = new double[num_samples];
}
double* p_j = new double[num_cluster];
for(j=0;j<num_cluster;++j){
p_j[j] = (double)num_samples_in_clusters[j]/num_samples;
}
int EMiteration;
double pro_log_likelihood = 0.0;
double cur_log_likelihood = 10000;
float log_var_val = vec_size*log(2.0*PI);
for(EMiteration=0;EMiteration<maxIter;++EMiteration){
if(fabs(pro_log_likelihood - cur_log_likelihood) < 1e-10) break;//两次循环,参数变动不大,退出
pro_log_likelihood = cur_log_likelihood;
cur_log_likelihood = 0.0;
for(j=0;j<num_cluster;++j){
for(k=0;k<vec_size;++k){
// *******if delta == 0********
if(fabs(delta[j*vec_size+k])<=1e-30)
delta[j*vec_size+k] = 1e-30;
delta[j*vec_size+k] = 1./sqrt(delta[j*vec_size+k]);
}
float* mu = means+j*vec_size;
float* inv_var = delta+j*vec_size;
for(x=0;x<num_samples;++x){
p_x_j[x][j] = ComputeUniModeGauss(samples[x],mu,inv_var,log_var_val,vec_size);//- log_var_val - log_det - e;
}
}
for(x=0;x<num_samples;++x){
double p_x = 0.0;
for(j=0;j<num_cluster;++j){
//p_j[j]=1.0/num_cluster;//************样本局限,p_j 设为相同*************
p_x += p_j[j]*exp(p_x_j[x][j]);
}
p_x = log(p_x);
cur_log_likelihood += p_x;
for(j=0;j<num_cluster;++j){
p_j_x[j][x] = p_j[j]*exp(p_x_j[x][j])/ exp(p_x);
// ASSERT(p_j_x[j][x]>=0.0);
}
}
//M step
for(j=0;j<num_cluster;++j){
double sum_pjx = 0.0;
double* sum_pjxx = new double[vec_size];
memset(sum_pjxx,0,vec_size*sizeof(double));
for(x=0;x<num_samples;++x){
sum_pjx += p_j_x[j][x];
for(k=0;k<vec_size;++k){
sum_pjxx[k] += p_j_x[j][x]*samples[x][k];
}
}
ASSERT(sum_pjx>0.0);
p_j[j] = sum_pjx/num_samples;//各类的概率
//update means vector
for(k=0;k<vec_size;++k){
means[j*vec_size+k] = sum_pjxx[k]/sum_pjx;
// ASSERT(means[j*vec_size+k]>=0.0&&means[j*vec_size+k]<100.0);
}
//update delta vector
for(k=0;k<vec_size;++k){
double sum = 0.0;
for(x=0;x<num_samples;++x){
double d = samples[x][k]-means[j*vec_size+k];
sum += p_j_x[j][x]*d*d;
}
delta[j*vec_size+k] = sum / sum_pjx;
// if(delta[j*vec_size+k]==0) delta[j*vec_size+k] = 1e-18;
}
delete[] sum_pjxx;
}
}//for(EMiteration
delete[] p_j;
//update cluster_index according as p_j_x
for(x=0;x<num_samples;++x){
int maxCluster = 0;
double maxProb = p_j_x[0][x];
for(j=1;j<num_cluster;++j){
if(maxProb<p_j_x[j][x]){
maxCluster = j;
maxProb = p_j_x[j][x];
}
}
cluster_index[x] = maxCluster;
}
for(x=0;x<num_samples;++x){
delete[] p_x_j[x];
}
delete[] p_x_j;//P(sample_x|Cluster_j)
for(j=0;j<num_cluster;++j){
delete []p_j_x[j];
}
delete[]p_j_x;//P(Cluster_j|sample_x)
}
//*/
//这个已有合理的结果测试,大概是正确的
void GaussEM(float **samples, int num_samples, int vec_size, \
int num_cluster, int *cluster_index,int* num_samples_in_clusters,\
float *means,float* delta,int maxIter)
{
int j,k,x;//j:cluster; x:sample; k: dimension
//E step
double** p_x_j = new double*[num_samples];//P(sample_x|Cluster_j)
for(x=0;x<num_samples;++x){
p_x_j[x] = new double[num_cluster];
}
double** p_j_x = new double*[num_cluster];//P(Cluster_j|sample_x)
for(j=0;j<num_cluster;++j){
p_j_x[j] = new double[num_samples];
}
double* p_j = new double[num_cluster];
for(j=0;j<num_cluster;++j){
p_j[j] = (double)num_samples_in_clusters[j]/num_samples;
}
int EMiteration;
double pro_log_likelihood = 10000;
double cur_log_likelihood = 0.0;
float log_var_val = vec_size*log(2.0*PI);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -