📄 gauss.cpp
字号:
for(EMiteration=0;EMiteration<maxIter;++EMiteration){
if(fabs(pro_log_likelihood - cur_log_likelihood) < 1e-10) break;//两次循环,参数变动不大,退出
pro_log_likelihood = cur_log_likelihood;
cur_log_likelihood = 0.0;
for(j=0;j<num_cluster;++j){
double log_det = 0.0;
for(k=0;k<vec_size;++k){
ASSERT(delta[j*vec_size+k]>=0.0);
// *******if delta == 0********
if(fabs(delta[j*vec_size+k])<=1e-30)
continue;
log_det += log(sqrt(delta[j*vec_size+k]));//行列式的对数
// ASSERT(log_det>0.0);
}
for(x=0;x<num_samples;++x){
double e = 0.0;
for(k=0;k<vec_size;++k){
// ******* if means and delta == 0 ********
if(fabs(delta[j*vec_size+k])<1e-30)
continue;
double d = samples[x][k]-means[j*vec_size+k];
e += d*d/(delta[j*vec_size+k]);//delta 是方差,已经是平方
}
//double denom = pow2pi*sqrt(det);
p_x_j[x][j] = - log_var_val - log_det - e;//p_x_j 是log域的
//p_x_j[x][j] /= denom;
// ASSERT(p_x_j[x][j]>=0.0);
}
}
for(x=0;x<num_samples;++x){
double p_x = 0.0;
for(j=0;j<num_cluster;++j){
//p_j[j]=1.0/num_cluster;//************样本局限,p_j 设为相同*************
p_x += p_j[j]*exp(p_x_j[x][j]);
}
p_x = log(p_x);
cur_log_likelihood += p_x;
for(j=0;j<num_cluster;++j){
p_j_x[j][x] = p_j[j]*exp(p_x_j[x][j]) / exp(p_x);
// ASSERT(p_j_x[j][x]>=0.0);
}
}
//M step
for(j=0;j<num_cluster;++j){
double sum_pjx = 0.0;
double* sum_pjxx = new double[vec_size];
memset(sum_pjxx,0,vec_size*sizeof(double));
for(x=0;x<num_samples;++x){
sum_pjx += p_j_x[j][x];
for(k=0;k<vec_size;++k){
sum_pjxx[k] += p_j_x[j][x]*samples[x][k];
}
}
// ASSERT(sum_pjx>0.0);
p_j[j] = sum_pjx/num_samples;//各类的概率
//update means vector
for(k=0;k<vec_size;++k){
means[j*vec_size+k] = sum_pjxx[k]/sum_pjx;
// ASSERT(means[j*vec_size+k]>=0.0&&means[j*vec_size+k]<100.0);
}
//update delta vector
for(k=0;k<vec_size;++k){
double sum = 0.0;
for(x=0;x<num_samples;++x){
double d = samples[x][k]-means[j*vec_size+k];
sum += p_j_x[j][x]*d*d;
}
delta[j*vec_size+k] = sum / sum_pjx;
// if(delta[j*vec_size+k]==0) delta[j*vec_size+k] = 1e-18;
}
delete[] sum_pjxx;
}
}//for(EMiteration
delete[] p_j;
//update cluster_index according as p_j_x
for(x=0;x<num_samples;++x){
int maxCluster = 0;
double maxProb = p_j_x[0][x];
for(j=1;j<num_cluster;++j){
if(maxProb<p_j_x[j][x]){
maxCluster = j;
maxProb = p_j_x[j][x];
}
}
cluster_index[x] = maxCluster;
}
for(x=0;x<num_samples;++x){
delete[] p_x_j[x];
}
delete[] p_x_j;//P(sample_x|Cluster_j)
for(j=0;j<num_cluster;++j){
delete []p_j_x[j];
}
delete[]p_j_x;//P(Cluster_j|sample_x)
}
/*使用例子
CvStatus icvMixSegmProb(CvImgObsInfo* obs_info, int num_img, CvEHMM* hmm )
{
int k, i, j, m;
CvEHMMState* state = hmm->ehmm[0].state_info;
for (k = 0; k < num_img; k++)
{
int counter = 0;
CvImgObsInfo* info = obs_info + k;
for (i = 0; i < info->obs_y; i++)
{
for (j = 0; j < info->obs_x; j++, counter++)
{
int e_state = info->in_state[counter];
float max_prob;
max_prob = icvComputeUniModeGauss( info->obs[counter], state[e_state].mu[0],
state[e_state].inv_var[0],
state[e_state].log_var[0],
info->obs_size );
info->mix[counter] = 0;
for (m = 1; m < state[e_state].num_mix; m++)
{
float prob=icvComputeUniModeGauss(info->obs[counter], state[e_state].mu[m],
state[e_state].inv_var[m],
state[e_state].log_var[m],
info->obs_size);
if (prob > max_prob)
{
max_prob = prob;
// assign mixture with greatest probability.
info->mix[counter] = m;
}
}
}
}
}
return CV_NO_ERR;
}
*/
///////////////////////////////////////////////////////////////////////////////////
//作者:施智平
//直接计算Gauss公式
//实现EM算法,估计模型参数,最后更新聚类结果
//输入 向量数组samples
//输入 先前KMEAS聚类结果 num_cluster,cluster_index,num_samples_in_clusters
//输出 means,delta,更新cluster_index 作为EM聚类结果
//假定各维独立,不考虑协方差
//为防止下溢出,太小的值跳过不处理,合理吗?
///////////////////////////////////////////////////////////////////////////////////
void EM(const float **samples, int num_samples, int vec_size, \
int num_cluster, int *cluster_index,int* num_samples_in_clusters,\
float *means,float* delta,int maxIter)
{
int j,k,x;//j:cluster; x:sample; k: dimension
//E step
double** p_x_j = new double*[num_samples];//P(sample_x|Cluster_j)
for(x=0;x<num_samples;++x){
p_x_j[x] = new double[num_cluster];
}
double** p_j_x = new double*[num_cluster];//P(Cluster_j|sample_x)
for(j=0;j<num_cluster;++j){
p_j_x[j] = new double[num_samples];
}
double* p_j = new double[num_cluster];
for(j=0;j<num_cluster;++j){
p_j[j] = (double)num_samples_in_clusters[j]/num_samples;
}
int EMiteration;
double pow2pi = pow( 2*PI, (double)vec_size/2.0);
for(EMiteration=0;EMiteration<maxIter;++EMiteration){
for(j=0;j<num_cluster;++j){
long double det = 1.0;
for(k=0;k<vec_size;++k){
ASSERT(delta[j*vec_size+k]>=0.0);
// *******if delta == 0********
if(fabs(delta[j*vec_size+k])<=1e-20)
continue;
det *= sqrt(delta[j*vec_size+k]);//行列式
ASSERT(det>0.0);
}
for(x=0;x<num_samples;++x){
double e = 0.0;
for(k=0;k<vec_size;++k){
// ******* if means and delta == 0 ********
if(fabs(delta[j*vec_size+k])<1e-20)
continue;
double d = samples[x][k]-means[j*vec_size+k];
e += d*d/(delta[j*vec_size+k]);//delta 是方差,已经是平方
}
//double denom = pow2pi*sqrt(det);
p_x_j[x][j] = exp(e/-2)/(pow2pi*det);
//p_x_j[x][j] /= denom;
ASSERT(p_x_j[x][j]>=0.0);
}
}
for(x=0;x<num_samples;++x){
double p_x = 0.0;
for(j=0;j<num_cluster;++j){
//p_j[j]=1.0/num_cluster;//************样本局限,p_j 设为相同*************
p_x += p_j[j]*p_x_j[x][j];
}
for(j=0;j<num_cluster;++j){
p_j_x[j][x] = p_j[j]*p_x_j[x][j]/ p_x;
ASSERT(p_j_x[j][x]>=0.0);
}
}
//M step
for(j=0;j<num_cluster;++j){
double sum_pjx = 0.0;
double* sum_pjxx = new double[vec_size];
memset(sum_pjxx,0,vec_size*sizeof(double));
for(x=0;x<num_samples;++x){
sum_pjx += p_j_x[j][x];
for(k=0;k<vec_size;++k){
sum_pjxx[k] += p_j_x[j][x]*samples[x][k];
}
}
ASSERT(sum_pjx>0.0);
p_j[j] = sum_pjx/num_samples;
//update means vector
for(k=0;k<vec_size;++k){
means[j*vec_size+k] = sum_pjxx[k]/sum_pjx;
// ASSERT(means[j*vec_size+k]>=0.0&&means[j*vec_size+k]<100.0);
}
//update delta vector
for(k=0;k<vec_size;++k){
double sum = 0.0;
for(x=0;x<num_samples;++x){
double d = samples[x][k]-means[j*vec_size+k];
sum += p_j_x[j][x]*d*d;
}
delta[j*vec_size+k] = sum / sum_pjx;
// if(delta[j*vec_size+k]==0) delta[j*vec_size+k] = 1e-18;
}
delete[] sum_pjxx;
}
}//for(EMiteration
delete[] p_j;
//update cluster_index according as p_j_x
for(x=0;x<num_samples;++x){
int maxCluster = 0;
double maxProb = p_j_x[0][x];
for(j=1;j<num_cluster;++j){
if(maxProb<p_j_x[j][x]){
maxCluster = j;
maxProb = p_j_x[j][x];
}
}
cluster_index[x] = maxCluster;
}
for(x=0;x<num_samples;++x){
delete[] p_x_j[x];
}
delete[] p_x_j;//P(sample_x|Cluster_j)
for(j=0;j<num_cluster;++j){
delete []p_j_x[j];
}
delete[]p_j_x;//P(Cluster_j|sample_x)
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -