📄 gauss.cpp

📁 用C＋＋实现的高斯混合模型的算法类
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页

#include "stdafx.h"
#include "Gauss.h"
//*F///////////////////////////////////////////////////////////////////////////////////////
//    Name: ComputeUniModeGauss
//    Purpose: The function computes the Gaussian pdf for a sample vector 
//    Context:
//    Parameters:  obsVeq - pointer to the sample vector
//                 mu - pointer to the mean vector of the Gaussian pdf
//                 var - pointer to the variance vector of the Gaussian pdf
//                 VecSize - the size of sample vector
//                 
//                 float* mu;        /*mean vectors corresponding to each mixture*/
//                 float* inv_var;   /* square root of inversed variances corresp. to each mixture*/
//                 float* log_var_val; /* sum of 0.5 (LN2PI + ln(variance[i]) ) for i=1,n */

//    Returns: the pdf of the sample vector given the specified Gaussian 
//
//    Notes: 
//F*/
float ComputeUniModeGauss(float* vect, float* mu, 
                              float* inv_var, float log_var_val, int vect_size)           
{
    int n; 
    double tmp;
    double prob;

    prob = -log_var_val;

    for (n = 0; n < vect_size; n++)
    {
        tmp = (vect[n] - mu[n]) * inv_var[n];//是否要乘0.5
        prob = prob - tmp * tmp;
   }
   prob *= 0.5f;
  
   return (float)prob;
}                        

/*F///////////////////////////////////////////////////////////////////////////////////////
//    Name: ComputeGaussMixture
//    Purpose: The function computes the mixture Gaussian pdf of a sample vector. 
//    Context:
//    Parameters:  obsVeq - pointer to the sample vector
//                 mu  - two-dimensional pointer to the mean vector of the Gaussian pdf;
//                       the first dimension is indexed over the number of mixtures and 
//                       the second dimension is indexed along the size of the mean vector
//                 var - two-dimensional pointer to the variance vector of the Gaussian pdf;
//                       the first dimension is indexed over the number of mixtures and 
//                       the second dimension is indexed along the size of the variance vector
//                 VecSize - the size of sample vector
//                 weight - pointer to the wights of the Gaussian mixture
//                 NumMix - the number of Gaussian mixtures
//
//                 float* inv_var;   /* square root of inversed variances corresp. to each mixture*/
//                 float* log_var_val; /* sum of 0.5 (LN2PI + ln(variance[i]) ) for i=1,n */ 
//											不要乘0.5，可在ComputeUniModeGauss中乘0.5
//                 int num_mix;      /*number of mixtures in this state*/
//                 float* weight;    /*array of mixture weights. Summ of all weights in state is 1. */
//                 
//    Returns: the pdf of the sample vector given the specified Gaussian mixture.  
//
//    Notes: 
//F*/
/* Calculate probability of observation at state in logarithmic scale*/
float ComputeGaussMixture( float* vect, float* mu, 
                                float* inv_var, float* log_var_val, 
                                int vect_size, float* weight, int num_mix )
{       
    double prob, l_prob;
    
    prob = 0.0f; 

    if (num_mix == 1)
    {
        return ComputeUniModeGauss( vect, mu, inv_var, log_var_val[0], vect_size);    
    }
    else
    {
        int m;
        for (m = 0; m < num_mix; m++)
        {
            if ( weight[m] > 0.0)
            { 
                l_prob = ComputeUniModeGauss(vect, mu + m*vect_size, 
                                                        inv_var + m * vect_size,
                                                        log_var_val[m], 
                                                        vect_size); 

                prob = prob + weight[m]*exp((double)l_prob);
            }
        } 
        prob = log(prob);    
    }                        
    return (float)prob;   
}
//概率密度函数 p(vect|mu,vars)                            
float GetPostProb(float* vect,float* mu,float* vars,int vec_size){
	float* inv_var = new float[vec_size];
	float log_var_val = 0;
	for(int i=0;i<vec_size;++i){
		float var = sqrt(vars[i]);
		if(var <= 1e-30) var = 1e-10; //如果等于0，给一个很小的值
		inv_var[i] = 1./ var;
		log_var_val += log(var);
	}
	log_var_val = (log(2*PI)*vec_size+log_var_val);
	float l_prob = ComputeUniModeGauss(vect, mu, inv_var,log_var_val,vec_size); 
	delete[] inv_var;
	return exp(l_prob);

}
///////////////////////////////////////////////////////////////////////////////
//log(p(cluster_j|vect)) 的变量部分，省去常量计算,就是最小错误率的Bayes决策准则
// = log(p(cluster_j)) - 1/2*(vect-mu)^2/vars - 1/2*log|vars|
// log(p(cluster_j)) 也不考虑
float GetLogProb(float* vect,float* mu,float* vars,int vec_size){
	float l_prob = 0.0; 
	float log_var_val = 0;//行列式
	for(int i=0;i<vec_size;++i){
		if(vars[i]<1e-20) continue;
		float d = vect[i]-mu[i];
		l_prob += d*d / vars[i];
		log_var_val += log(vars[i]);
	}
	l_prob = (- l_prob - log_var_val)/2;
	return l_prob;
	
}
///////////////////////////////////////////////////////////////////////////////////
//作者：施智平
//****   不直接计算Gauss公式，在logrighm scale下计算，
//****   注意有时需要再用 exp() 函数返回指数运算域，如原公式中有加法的地方
//实现EM算法，估计模型参数，最后更新聚类结果.使用对数计算
//输入 向量数组samples
//输入 先前KMEAS聚类结果 num_cluster,cluster_index,num_samples_in_clusters
//输出 means，delta，更新cluster_index 作为EM聚类结果
//假定各维独立，不考虑协方差
///////////////////////////////////////////////////////////////////////////////////
//没有调正确，聚类不可用
/*void GaussEM(float **samples, int num_samples, int vec_size, \
		int num_cluster, int *cluster_index,int* num_samples_in_clusters,\
		float *means,float* delta,int maxIter)
{
	int j,k,x;//j:cluster;   x:sample;     k: dimension
	//E step
	double** p_x_j = new double*[num_samples];//P(sample_x|Cluster_j)
	for(x=0;x<num_samples;++x){
		p_x_j[x] = new double[num_cluster];
	}

	double** p_j_x = new double*[num_cluster];//P(Cluster_j|sample_x)
	for(j=0;j<num_cluster;++j){
		p_j_x[j] = new double[num_samples];
	}
	double* p_j = new double[num_cluster];
	for(j=0;j<num_cluster;++j){
		p_j[j] = (double)num_samples_in_clusters[j]/num_samples;
	}
	
	int EMiteration;
	double pro_log_likelihood = 0.0;
	double cur_log_likelihood = 10000;
	float log_var_val = vec_size*log(2.0*PI); 
	for(EMiteration=0;EMiteration<maxIter;++EMiteration){
		if(fabs(pro_log_likelihood - cur_log_likelihood) < 1e-10) break;//两次循环，参数变动不大，退出
		pro_log_likelihood = cur_log_likelihood;
		cur_log_likelihood = 0.0;
		for(j=0;j<num_cluster;++j){
			for(k=0;k<vec_size;++k){
				// *******if delta == 0********
				if(fabs(delta[j*vec_size+k])<=1e-30) 
					delta[j*vec_size+k] = 1e-30;
				delta[j*vec_size+k] = 1./sqrt(delta[j*vec_size+k]);
			}
			float* mu = means+j*vec_size;
			float* inv_var = delta+j*vec_size;
			for(x=0;x<num_samples;++x){
				p_x_j[x][j] = ComputeUniModeGauss(samples[x],mu,inv_var,log_var_val,vec_size);//- log_var_val - log_det - e;  
			}
		}
		
		for(x=0;x<num_samples;++x){
			double p_x = 0.0;
			for(j=0;j<num_cluster;++j){
				//p_j[j]=1.0/num_cluster;//************样本局限，p_j 设为相同*************
				
				p_x += p_j[j]*exp(p_x_j[x][j]);
			}
			p_x = log(p_x);
			cur_log_likelihood += p_x;
			for(j=0;j<num_cluster;++j){
				p_j_x[j][x] = p_j[j]*exp(p_x_j[x][j])/ exp(p_x);
//				ASSERT(p_j_x[j][x]>=0.0);
			}
		}
		
		//M step
		for(j=0;j<num_cluster;++j){
			double sum_pjx = 0.0;
			double* sum_pjxx = new double[vec_size];
			memset(sum_pjxx,0,vec_size*sizeof(double));
			for(x=0;x<num_samples;++x){
				sum_pjx += p_j_x[j][x];
				for(k=0;k<vec_size;++k){
					sum_pjxx[k] += p_j_x[j][x]*samples[x][k];
				}
			}
			ASSERT(sum_pjx>0.0);
			p_j[j] = sum_pjx/num_samples;//各类的概率
			//update means vector
			for(k=0;k<vec_size;++k){
				means[j*vec_size+k] = sum_pjxx[k]/sum_pjx;
//				ASSERT(means[j*vec_size+k]>=0.0&&means[j*vec_size+k]<100.0);
			}

			//update delta vector
			for(k=0;k<vec_size;++k){
				double sum = 0.0;
				for(x=0;x<num_samples;++x){
					double d = samples[x][k]-means[j*vec_size+k];
					sum += p_j_x[j][x]*d*d;
				}
				delta[j*vec_size+k] = sum / sum_pjx;
//				if(delta[j*vec_size+k]==0) delta[j*vec_size+k] = 1e-18;
			}
			delete[] sum_pjxx;
		}
	}//for(EMiteration
	delete[] p_j;

	//update cluster_index according as p_j_x
	for(x=0;x<num_samples;++x){
		int maxCluster = 0;
		double maxProb = p_j_x[0][x];
		for(j=1;j<num_cluster;++j){
			if(maxProb<p_j_x[j][x]){
				maxCluster = j;
				maxProb = p_j_x[j][x];
			}
		}
		cluster_index[x] = maxCluster;
	}

	for(x=0;x<num_samples;++x){
		delete[] p_x_j[x];
	}
	delete[] p_x_j;//P(sample_x|Cluster_j)
	
	for(j=0;j<num_cluster;++j){
		delete []p_j_x[j];
	}
	delete[]p_j_x;//P(Cluster_j|sample_x)
	
}
//*/
//这个已有合理的结果测试，大概是正确的
void GaussEM(float **samples, int num_samples, int vec_size, \
		int num_cluster, int *cluster_index,int* num_samples_in_clusters,\
		float *means,float* delta,int maxIter)
{
	int j,k,x;//j:cluster;   x:sample;     k: dimension
	//E step
	double** p_x_j = new double*[num_samples];//P(sample_x|Cluster_j)
	for(x=0;x<num_samples;++x){
		p_x_j[x] = new double[num_cluster];
	}

	double** p_j_x = new double*[num_cluster];//P(Cluster_j|sample_x)
	for(j=0;j<num_cluster;++j){
		p_j_x[j] = new double[num_samples];
	}
	double* p_j = new double[num_cluster];
	for(j=0;j<num_cluster;++j){
		p_j[j] = (double)num_samples_in_clusters[j]/num_samples;
	}
	
	int EMiteration;
	double pro_log_likelihood = 10000;
	double cur_log_likelihood = 0.0;
	float log_var_val = vec_size*log(2.0*PI);
12 下一页
💿 文件大小 5 K
👤 上传用户 whyzhao
📂 所属分类数学计算
🏷️ 相关标签

#高斯混合模型 #算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -