📄 chaccluster.cpp
字号:
#pragma warning (disable: 4083 4786)
#include "CHACCluster.h"
typedef map<int, vector<int> > MAP_classmapsample;
CHACCluster::CHACCluster()
{
}
//将整个数组中的第x位置1
void CHACCluster::setbit(int x, vector<int>& a)
{
a[x/32] = a[x/32] | (1<<(32-x%32-1));
}
//求得整个数组中第x位是1,还是0
int CHACCluster::bit(int x, vector<int>& a)
{
int b;
b= (a[x/32] & (1<<(32-x%32-1))) && 1;
return b;
}
//将整个数组中的第x位清0
void CHACCluster::clrbit(int x, vector<int>& a )
{
a[x/32] = a[x/32] & ~(1<<(32-x%32-1));
}
void CHACCluster::ReadData(const vector<vector<int> >& maximaloccurrenceList,
const vector<int>& src_set_classno,
const int sample_num,
const int feature_num,
const int class_num)
{
data.src_set.clear();
data.src_set_classno.clear();
data.des_set_classno.clear();
data.sample_num=sample_num;
data.feature_num=feature_num;
data.class_num=class_num;
data.eachclass_num=new int[class_num];
int size=sample_num*feature_num;
size = size % 32 ? size / 32+1 : size/32;
data.src_set.resize(size,0); //bit representation
for(int i=0; i< feature_num ;i++)
{
for(int j=0; j<maximaloccurrenceList[i].size(); j++)
{
//to setbit()
int pos=maximaloccurrenceList[i][j]*feature_num+i;
setbit(pos,data.src_set);
}
}
data.src_set_classno=src_set_classno;
//for(i=0 ;i<sample_num;i++)
// cout<< data.src_set_classno[i] << endl;
simmartix = new double*[sample_num];
for(i=0; i<sample_num;i++)
simmartix[i] = new double[sample_num];
for(i=0; i<sample_num;i++)
{
samplemapclass.push_back(i);
}
nowNum=data.sample_num;
//for test
/*for(i =0; i<sample_num;i++)
cout<<samplemapclass[i]<<" ";
cout<<endl;
for(i=0; i<sample_num; i++){
for(int j=0; j<classmapsample[i].size();j++)
cout<<classmapsample[i][j]<<" ";
cout<<endl;
}*/
}
double CHACCluster::compute_sim(int sampleone, int sampletwo)
{
//PBClustering采用Euclidian距离计算相似度
/*int sampleonepos=sampleone*data.feature_num;
int sampletwopos=sampletwo*data.feature_num;
int simnum=0;
for(int i=0; i<data.feature_num; i++)
simnum += pow(bit(sampleonepos+i,data.src_set)-bit(sampletwopos+i,data.src_set), 2.0 );
if(simnum == 0) return 1;
else return 1.0/simnum; */
int simnum=0,one1=0,two1=0;
int sampleonepos=sampleone*data.feature_num;
int sampletwopos=sampletwo*data.feature_num;
for(int i=0; i<data.feature_num; i++)
{
if( bit(sampleonepos+i,data.src_set) & bit(sampletwopos+i,data.src_set)) simnum++;
if(bit(sampleonepos+i,data.src_set)) one1++;
if(bit(sampletwopos+i,data.src_set)) two1++;
}
one1 = one1>two1? one1:two1;
double sim=1.0*simnum/one1;
return sim;
}
void CHACCluster::compute_simailarity()
{
int i,j;
for(i=0; i< data.sample_num; i++)
for(j=0 ;j<data.sample_num; j++)
simmartix[i][j]=0;
for(i=0; i< data.sample_num;i++)
for(j=0; j<data.sample_num;j++)
{
if(i!=j)
simmartix[i][j]=compute_sim(i,j);
}
//for test
for(i=0;i<data.sample_num;i++)
{
for(j=0;j<data.sample_num;j++)
cout<<simmartix[i][j]<<" ";
cout<<endl;
}
}
void CHACCluster::GetCloseted(int &clu1, int &clu2)
{
int i,j;
clu1 = 0;
clu2 = 1;
double maxvalue = simmartix[0][1];
for(i=0; i<nowNum; i++)
for(j=0; j<nowNum; j++)
if(i!=j)
{
if(simmartix[i][j]>maxvalue)
{
maxvalue = simmartix[i][j];
clu1 = i;
clu2 = j;
}
}
}
void CHACCluster::AdaptSimirityMatrix(double **p, int clu1, int clu2)
{
int smallClu,bigClu;
vector<int> v1,v2;
int i,j,k,t;
double simSum;
if(clu1 <clu2)
{
smallClu = clu1;
bigClu = clu2;
}
else
{
smallClu = clu2;
bigClu = clu1;
}
for(i=0; i<nowNum; i++)
if(i!=smallClu && i!= bigClu)
{
if(!v1.empty())
v1.clear();
if(!v2.empty())
v2.clear();
for(j=0; j<data.sample_num; j++)
{
if(samplemapclass[j] == i)
v1.push_back(j);
else if(samplemapclass[j] == smallClu || samplemapclass[j] == bigClu)
v2.push_back(j);
}
simSum = 0;
for(k=0; k<v1.size(); k++)
for(t=0; t<v2.size(); t++)
simSum += compute_sim(k,t);
simSum = simSum /(v1.size()*v2.size());
p[i][smallClu] = simSum;
p[smallClu][i] = simSum;
}
for(j=bigClu+1; j<nowNum; j++)
for(i=0; i<nowNum; i++)
p[i][j-1] = p[i][j];
for(i=bigClu+1; i<nowNum; i++)
for(j=0; j<nowNum; j++)
p[i-1][j] = p[i][j];
}
void CHACCluster::AdaptType(int clu1, int clu2)
{
int i;
int smallClu,bigClu;
int nowType;
if(clu1 <clu2)
{
smallClu = clu1;
bigClu = clu2;
}
else
{
smallClu = clu2;
bigClu = clu1;
}
for(i=0; i<data.sample_num; i++)
{
nowType = samplemapclass[i];
if(nowType == clu1 || nowType == clu2)
samplemapclass[i] = smallClu;
else if(nowType > bigClu)
samplemapclass[i] = nowType-1;
}
}
void CHACCluster::HACCluster()
{
clock_t start_time;
clock_t stop_time;
start_time = clock();
//1. compute the simlaerity of all class
compute_simailarity();
//
while(1)
{
int clu1,clu2;
GetCloseted(clu1,clu2);
AdaptSimirityMatrix(simmartix,clu1,clu2);
for(int i=0;i<nowNum;i++)
{
for(int j=0;j<nowNum;j++)
cout<<simmartix[i][j]<<" ";
cout<<endl;
}
AdaptType(clu1,clu2);
nowNum--;
if(nowNum == 42)
nowNum = nowNum;
if(data.class_num == nowNum )
break;
}
stop_time=clock();
cluster_time = (stop_time-start_time)/(double)CLOCKS_PER_SEC;
cout<<"done!"<<endl;
cout<<"time:"<<cluster_time<<endl;
cout<<"参数("<<"类别数:"<<data.class_num<<")"<<endl;
//填入结果集
cout<<endl;
cout<<endl;
short classnum=0;
classmapsample.clear();
MAP_classmapsample::iterator cms;
for(int i=0;i<data.sample_num;i++)
{
int classid=samplemapclass[i];
cms=classmapsample.find(classid);
if(cms!=classmapsample.end())
{
cms->second.push_back(i);
}
else
{
vector<int> aa;
aa.push_back(i);
classmapsample.insert(MAP_classmapsample::value_type(classid,aa));
}
}
for(cms=classmapsample.begin();cms!=classmapsample.end();cms++)
data.eachclass_num[classnum] = cms->second.size();
data.des_set_classno = samplemapclass;
}
void CHACCluster::GetResult(string outputFile,double feature_time)
{
ofstream outFile(outputFile.c_str());
if(!outFile) {
cerr << "cannot open OUTPUT file!" << endl;
exit(1);
}
outFile<<"feature time:"<<feature_time<<endl;
outFile<<"cluster time:"<<cluster_time<<endl;
outFile<<"参数("<<"类别数:"<<data.class_num<<")"<<endl;
outFile<<endl;
outFile<<endl;
outFile<<"样本"<<" "<<"聚类结果"<<" "<<"真实分类"<<endl;
for(int i=0;i<data.sample_num;i++)
outFile<<i<<"\t"<<data.des_set_classno[i]<<"\t"<<data.src_set_classno[i]<<endl;
outFile<<"Total time: "<<feature_time+cluster_time<<endl;
outFile.close();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -