📄 guassian.cpp
字号:
//#include "stdafx.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <malloc.h>
#include <iostream>
#include <windows.h>
#include "gaussian.h"
//#include "KMEANS.h"
using namespace std;
//#define n (pow(2,31)-1)
#define PI 3.1415926535897532
//using namespace std;
void Gaussian::Initialize(char *fname)
//int pattern,int dimension,double **sample,int mix_num)
/* Error handler */
{
m_kmeans.LoadPatterns(fname);
m_kmeans.InitClusters();
m_kmeans.RunKMeans();
NumPatterns=m_kmeans.NumPatterns;
SizeVector=m_kmeans.SizeVector;
NumMixture=m_kmeans.NumClusters;
samplelist=matrix(NumPatterns,SizeVector);
mean = vect(NumMixture*SizeVector);
devia= vect(NumMixture*SizeVector);
a= vect(NumMixture);
clu_idx=(int *) malloc (NumPatterns*sizeof(int));
num_samples_clu=(int *) malloc (NumMixture*sizeof(int));
int i,j;
for (i=0;i<NumPatterns;i++)
for (j=0;j<SizeVector;j++)
{
samplelist[i][j]=m_kmeans.Pattern[i*SizeVector+j];
}
for(i=0;i<NumMixture;i++)
{
a[i]=1.0/(double)NumMixture;
}
for(i=0;i<NumMixture;i++)
{
Cal_mean_delta(m_kmeans.Pattern,&m_kmeans.Cluster[i],&mean[i*SizeVector],&devia[i*SizeVector]);
}
for (i=0;i<NumMixture;i++)
{
for (j=0;j<m_kmeans.Cluster[i].NumMembers;j++)
{
clu_idx[m_kmeans.Cluster[i].Member[j]]=i;
}
}
for (i=0;i<NumMixture;i++)
{
num_samples_clu[i]=m_kmeans.Cluster[i].NumMembers;
}
GaussEM(samplelist, NumPatterns, SizeVector, NumMixture, clu_idx ,num_samples_clu ,mean ,devia ,a ,1000);
return;
}
void Gaussian::Cal_mean_delta(double *data,aCluster *Clus,double *Uni_mean,double *Uni_devia)
/* Create m * m covariance matrix from given n * m data matrix. */
{
int i, j;
/* Determine mean of column vectors of input data matrix */
for (j = 0; j < SizeVector; j++)
{
Uni_mean[j] = 0.0;
for (i = 0; i < Clus->NumMembers ; i++)
{
Uni_mean[j] += data[Clus->Member[i]*SizeVector+j];
}
Uni_mean[j] /= (double)Clus->NumMembers;
}
/* Center the column vectors. */
for (i = 0; i < SizeVector; i++)
{
Uni_devia[i]=0;
for (j = 0; j < Clus->NumMembers; j++)
{
Uni_devia[i]+=(data[Clus->Member[j]*SizeVector+i]-Uni_mean[i])*(data[Clus->Member[j]*SizeVector+i]-Uni_mean[i]);
}
Uni_devia[i]/=(double)Clus->NumMembers;
}
return;
}
void Gaussian::erhand(char err_msg[])
/* Error handler */
{
fprintf(stderr,"Run-time error:\n");
fprintf(stderr,"%s\n", err_msg);
fprintf(stderr,"Exiting to system.\n");
exit(1);
}
double ** Gaussian::matrix(int n,int m)
/* Allocate a double matrix with range [1..n][1..m]. */
{
int i;
double **mat;
/* Allocate pointers to rows. */
mat = (double **) malloc((unsigned) (n)*sizeof(double*));
if (!mat) erhand("Allocation failure 1 in matrix().");
for (i = 0; i < n; i++)
{
mat[i] = (double *) malloc((unsigned) (m)*sizeof(double));
if (!mat[i]) erhand("Allocation failure 2 in matrix().");
}
/* Return pointer to array of pointers to rows. */
return mat;
}
/** Allocation of vector storage ***********************************/
double * Gaussian::vect(int n)
/* Allocates a double vector with range [1..n]. */
{
double *v;
v = (double *) malloc ((unsigned) n*sizeof(double));
if (!v) erhand("Allocation failure in vect().");
return v;
}
double * vecth(int n)
/* Allocates a double vector with range [1..n]. */
{
double *v;
v = (double *) malloc ((unsigned) n*sizeof(double));
return v;
}
void Gaussian::GaussEM(double **samples, int num_samples, int vec_size, \
int num_cluster, int *cluster_index,int* num_samples_in_clusters,\
double *means,double* delta,double* weight,int maxIter)
{
int j,k,x;//j:cluster; x:sample; k: dimension
//E step
double** p_x_j = new double*[num_samples];//P(sample_x|Cluster_j)
for(x=0;x<num_samples;++x){
p_x_j[x] = new double[num_cluster];
}
double** p_j_x = new double*[num_cluster];//P(Cluster_j|sample_x)
for(j=0;j<num_cluster;++j){
p_j_x[j] = new double[num_samples];
}
double* p_j = new double[num_cluster];
for(j=0;j<num_cluster;++j){
p_j[j] = (double)num_samples_in_clusters[j]/num_samples;
}
int EMiteration;
double pro_log_likelihood = 10000;
double cur_log_likelihood = 0.0;
float log_var_val = vec_size*log(2.0*PI);
for(EMiteration=0;EMiteration<maxIter;++EMiteration){
if(fabs(pro_log_likelihood - cur_log_likelihood) < 1e-10) break;//两次循环,参数变动不大,退出
pro_log_likelihood = cur_log_likelihood;
cur_log_likelihood = 0.0;
for(j=0;j<num_cluster;++j){
double log_det = 0.0;
for(k=0;k<vec_size;++k){
if(delta[j*vec_size+k]<0.0)return;
// *******if delta == 0********
if(fabs(delta[j*vec_size+k])<=1e-30)
continue;
log_det += log(sqrt(delta[j*vec_size+k]));//行列式的对数
// ASSERT(log_det>0.0);
}
for(x=0;x<num_samples;++x){
double e = 0.0;
for(k=0;k<vec_size;++k){
// ******* if means and delta == 0 ********
if(fabs(delta[j*vec_size+k])<1e-30)
continue;
double d = samples[x][k]-means[j*vec_size+k];
e += d*d/(delta[j*vec_size+k]);//delta 是方差,已经是平方
}
//double denom = pow2pi*sqrt(det);
p_x_j[x][j] = - log_var_val - log_det - e;//p_x_j 是log域的
//p_x_j[x][j] /= denom;
// ASSERT(p_x_j[x][j]>=0.0);
}
}
for(x=0;x<num_samples;++x){
double p_x = 0.0;
for(j=0;j<num_cluster;++j){
//p_j[j]=1.0/num_cluster;//************样本局限,p_j 设为相同*************
p_x += p_j[j]*exp(p_x_j[x][j]);
}
p_x = log(p_x);
cur_log_likelihood += p_x;
for(j=0;j<num_cluster;++j){
p_j_x[j][x] = p_j[j]*exp(p_x_j[x][j]) / exp(p_x);
// ASSERT(p_j_x[j][x]>=0.0);
}
}
//M step
for(j=0;j<num_cluster;++j){
double sum_pjx = 0.0;
double* sum_pjxx = new double[vec_size];
memset(sum_pjxx,0,vec_size*sizeof(double));
for(x=0;x<num_samples;++x){
sum_pjx += p_j_x[j][x];
for(k=0;k<vec_size;++k){
sum_pjxx[k] += p_j_x[j][x]*samples[x][k];
}
}
// ASSERT(sum_pjx>0.0);
p_j[j] = sum_pjx/num_samples;//各类的概率
//update means vector
for(k=0;k<vec_size;++k){
means[j*vec_size+k] = sum_pjxx[k]/sum_pjx;
// ASSERT(means[j*vec_size+k]>=0.0&&means[j*vec_size+k]<100.0);
}
weight[j]=p_j[j];
//update delta vector
for(k=0;k<vec_size;++k){
double sum = 0.0;
for(x=0;x<num_samples;++x){
double d = samples[x][k]-means[j*vec_size+k];
sum += p_j_x[j][x]*d*d;
}
delta[j*vec_size+k] = sum / sum_pjx;
// if(delta[j*vec_size+k]==0) delta[j*vec_size+k] = 1e-18;
}
delete[] sum_pjxx;
}
}//for(EMiteration
delete[] p_j;
//update cluster_index according as p_j_x
for(x=0;x<num_samples;++x){
int maxCluster = 0;
double maxProb = p_j_x[0][x];
for(j=1;j<num_cluster;++j){
if(maxProb<p_j_x[j][x]){
maxCluster = j;
maxProb = p_j_x[j][x];
}
}
cluster_index[x] = maxCluster;
}
for(x=0;x<num_samples;++x){
delete[] p_x_j[x];
}
delete[] p_x_j;//P(sample_x|Cluster_j)
for(j=0;j<num_cluster;++j){
delete []p_j_x[j];
}
delete[]p_j_x;//P(Cluster_j|sample_x)
}
double Gaussian::ComputeUniModeGauss(double* vect, double* mu,
double* inv_var, double log_var_val, int vect_size)
{
int n;
double tmp;
double prob;
// prob = -log_var_val;
prob = 0;
for (n = 0; n < vect_size; n++)
{
tmp = (vect[n] - mu[n]) * inv_var[n];//是否要乘0.5
prob = prob - tmp * tmp;
}
prob *= 0.5f;
return (double)prob;
}
double Gaussian::ComputeGaussMixture(double* vect, double* mu,
double* inv_var, double* log_var_val,
int vect_size, double* weight, int num_mix )
{
double prob, l_prob;
prob = 0.0f;
if (num_mix == 1)
{
return ComputeUniModeGauss( vect, mu, inv_var, log_var_val[0], vect_size);
}
else
{
int m;
for (m = 0; m < num_mix; m++)
{
if ( weight[m] > 0.0)
{
l_prob = ComputeUniModeGauss(vect, mu + m*vect_size,
inv_var + m * vect_size,
log_var_val[m],
vect_size);
prob = prob + weight[m]*exp((double)l_prob);
}
}
prob = log(prob);
}
return (double)prob;
}
void Gaussian::Classifier(double *vector)
{
int i,j;
double* inv_var;
double* log_var_val;
double temp;
inv_var=vect(NumMixture*SizeVector);
log_var_val=vect(NumMixture);
for (i=0;i<NumMixture;i++)
{
for (j=0;j<SizeVector;j++)
{
inv_var[i*SizeVector+j]=1.0/sqrt(devia[i*SizeVector+j]);
}
}
for (i=0;i<NumMixture;i++)
{
log_var_val[i]=0;
for (j=0;j<SizeVector;j++)
{
// log_var_val[i]+=0.5*(SizeVector*log(2*PI)+log(fabs((vector[j]-mean[i*SizeVector+j])*(vector[j]-mean[i*SizeVector+j])/(double)SizeVector)))/log(2.718);
log_var_val[i]+=0.5*(log(2*PI)+log(fabs(devia[i*SizeVector+j])))/log(2.718);
}
}
result=ComputeGaussMixture(vector, mean, inv_var, log_var_val, SizeVector, a, NumMixture);
}
void main()
{
Gaussian Gauss;
double *vector;
Gauss.Initialize("D:\\Coding\\Homecare\\GMM\\test1.dat");
printf("Load trainning samples over!\n");
FILE *InFilePtr;
if((InFilePtr = fopen("D:\\Coding\\Homecare\\GMM\\test.dat", "r")) == NULL)return;
vector=vecth(10);
for (int i=0;i<10;i++)
{
fscanf(InFilePtr, "%lfe", &vector[i]);
}
Gauss.Classifier(vector);
printf("%lf\n",Gauss.result);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -