⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 chaccluster.cpp

📁 包含最大频繁序列的挖掘; 包含层次聚类算法
💻 CPP
字号:
#pragma   warning   (disable:   4083   4786)
#include "CHACCluster.h"


typedef map<int, vector<int> >  MAP_classmapsample;
CHACCluster::CHACCluster()
{
	 
}

//将整个数组中的第x位置1
void CHACCluster::setbit(int x, vector<int>& a)
{
	a[x/32] = a[x/32] | (1<<(32-x%32-1));
}
//求得整个数组中第x位是1,还是0
int CHACCluster::bit(int x, vector<int>& a)
{
	int b;
	b= (a[x/32] & (1<<(32-x%32-1))) && 1;
	return b;
}

//将整个数组中的第x位清0
void CHACCluster::clrbit(int x, vector<int>& a )
{
    a[x/32] = a[x/32] & ~(1<<(32-x%32-1));
}

void CHACCluster::ReadData(const vector<vector<int>  >& maximaloccurrenceList,
		 const vector<int>& src_set_classno,
		 const int sample_num,
		 const int feature_num,
		 const int class_num)
{
	data.src_set.clear();
	data.src_set_classno.clear();
	data.des_set_classno.clear();
	data.sample_num=sample_num;
	data.feature_num=feature_num;
	data.class_num=class_num;
	data.eachclass_num=new int[class_num];
	
	int size=sample_num*feature_num;
    size = size % 32 ? size / 32+1 : size/32;
	data.src_set.resize(size,0); //bit representation
    for(int i=0; i< feature_num ;i++)
	{
		for(int j=0; j<maximaloccurrenceList[i].size(); j++)
		{
			//to setbit()
			int pos=maximaloccurrenceList[i][j]*feature_num+i;
			setbit(pos,data.src_set);
		}
	} 
	data.src_set_classno=src_set_classno;
	//for(i=0 ;i<sample_num;i++)
	//	cout<< data.src_set_classno[i] << endl;
	simmartix = new double*[sample_num];
	for(i=0; i<sample_num;i++)
		simmartix[i] = new double[sample_num];
	for(i=0; i<sample_num;i++)
	{
		samplemapclass.push_back(i);
	}
	nowNum=data.sample_num;
	//for test
	/*for(i =0; i<sample_num;i++)
		cout<<samplemapclass[i]<<" ";
	cout<<endl;
	for(i=0; i<sample_num; i++){
		for(int j=0; j<classmapsample[i].size();j++)
			cout<<classmapsample[i][j]<<" ";
		cout<<endl;
	}*/
}
double CHACCluster::compute_sim(int sampleone, int sampletwo)
{
    //PBClustering采用Euclidian距离计算相似度
    /*int sampleonepos=sampleone*data.feature_num;
	int sampletwopos=sampletwo*data.feature_num;
	int simnum=0;
    for(int i=0; i<data.feature_num; i++)
		simnum += pow(bit(sampleonepos+i,data.src_set)-bit(sampletwopos+i,data.src_set), 2.0 );
	if(simnum == 0) return 1;
	else return 1.0/simnum;	*/
	int simnum=0,one1=0,two1=0;
	int sampleonepos=sampleone*data.feature_num;
	int sampletwopos=sampletwo*data.feature_num;
    for(int i=0; i<data.feature_num; i++)
	{
		if( bit(sampleonepos+i,data.src_set) & bit(sampletwopos+i,data.src_set)) simnum++;
		if(bit(sampleonepos+i,data.src_set)) one1++;
		if(bit(sampletwopos+i,data.src_set)) two1++;
	}
	one1 = one1>two1? one1:two1;
    double sim=1.0*simnum/one1;
	return sim;
}

void CHACCluster::compute_simailarity()
{
	int i,j;
	for(i=0; i< data.sample_num; i++)
		for(j=0 ;j<data.sample_num; j++)
	      simmartix[i][j]=0;

	for(i=0; i< data.sample_num;i++)
		for(j=0; j<data.sample_num;j++)
		{
			if(i!=j)
			   simmartix[i][j]=compute_sim(i,j);
		}
   //for test
		for(i=0;i<data.sample_num;i++)
		{
			for(j=0;j<data.sample_num;j++)
				cout<<simmartix[i][j]<<" ";
			cout<<endl;
		}
}

void CHACCluster::GetCloseted(int &clu1, int &clu2)
{
	int i,j;
	clu1 = 0;
	clu2 = 1;
	double maxvalue = simmartix[0][1];
	for(i=0; i<nowNum; i++)
		for(j=0; j<nowNum; j++)
			if(i!=j)
			{
				if(simmartix[i][j]>maxvalue)
				{
					maxvalue = simmartix[i][j];
					clu1 = i;
					clu2 = j;
				}
			}			
}

void CHACCluster::AdaptSimirityMatrix(double **p, int clu1, int clu2)
{
	int smallClu,bigClu;
	vector<int> v1,v2;
	int i,j,k,t;
	double simSum;
	if(clu1 <clu2)
	{
		smallClu = clu1;
		bigClu = clu2;
	}
	else
	{
		smallClu = clu2;
		bigClu = clu1;
	}
	
	for(i=0; i<nowNum; i++)
		if(i!=smallClu && i!= bigClu)
		{
			if(!v1.empty())
				v1.clear();
			if(!v2.empty())
				v2.clear();
			for(j=0; j<data.sample_num; j++)
			{
				if(samplemapclass[j] == i)
					v1.push_back(j);
				else if(samplemapclass[j] == smallClu || samplemapclass[j] == bigClu)
					v2.push_back(j);
			}
			
			simSum = 0;
			for(k=0; k<v1.size(); k++)
				for(t=0; t<v2.size(); t++)
					simSum += compute_sim(k,t);
				simSum = simSum /(v1.size()*v2.size());
				p[i][smallClu] = simSum;
				p[smallClu][i] = simSum;
				
		}
		
		for(j=bigClu+1; j<nowNum; j++)
			for(i=0; i<nowNum; i++)
				p[i][j-1] = p[i][j];
			for(i=bigClu+1; i<nowNum; i++)
				for(j=0; j<nowNum; j++)
					p[i-1][j] = p[i][j];
}

void CHACCluster::AdaptType(int clu1, int clu2)
{
	int i;
	int smallClu,bigClu;
	int nowType;
	if(clu1 <clu2)
	{
		smallClu = clu1;
		bigClu = clu2;
	}
	else
	{
		smallClu = clu2;
		bigClu = clu1;
	}
	for(i=0; i<data.sample_num; i++)
	{
		nowType = samplemapclass[i];
		if(nowType == clu1 || nowType == clu2)
			samplemapclass[i] = smallClu;
		else if(nowType > bigClu)
			samplemapclass[i] = nowType-1;
	}	
}

void CHACCluster::HACCluster()
{
	clock_t start_time;
	clock_t stop_time;
	start_time = clock();
	//1. compute the simlaerity of all class
	compute_simailarity();
	

    //
	
	while(1)
	{
		int clu1,clu2;
		GetCloseted(clu1,clu2);
		AdaptSimirityMatrix(simmartix,clu1,clu2);
		for(int i=0;i<nowNum;i++)
		{
			for(int j=0;j<nowNum;j++)
				cout<<simmartix[i][j]<<" ";
			cout<<endl;
		}
		AdaptType(clu1,clu2);
		nowNum--;
		if(nowNum == 42)
			nowNum = nowNum;
		if(data.class_num == nowNum )
			break;
	}

	stop_time=clock();
	cluster_time = (stop_time-start_time)/(double)CLOCKS_PER_SEC;
	cout<<"done!"<<endl;
	cout<<"time:"<<cluster_time<<endl;
	cout<<"参数("<<"类别数:"<<data.class_num<<")"<<endl;
	
	//填入结果集
	cout<<endl;
	cout<<endl;
	short classnum=0;
	classmapsample.clear();
	MAP_classmapsample::iterator cms;
    for(int i=0;i<data.sample_num;i++)
	{
		int classid=samplemapclass[i];
		cms=classmapsample.find(classid);
		if(cms!=classmapsample.end())
		{
			cms->second.push_back(i);
		}
		else
		{
			vector<int> aa;
			aa.push_back(i);
			classmapsample.insert(MAP_classmapsample::value_type(classid,aa));
		}			
	}
    for(cms=classmapsample.begin();cms!=classmapsample.end();cms++)
		data.eachclass_num[classnum] = cms->second.size();
	data.des_set_classno = samplemapclass;
}

void CHACCluster::GetResult(string outputFile,double feature_time)
{
	ofstream outFile(outputFile.c_str());
	if(!outFile) {
		cerr << "cannot open OUTPUT file!" << endl;
		exit(1);
	}
	outFile<<"feature time:"<<feature_time<<endl;
	outFile<<"cluster time:"<<cluster_time<<endl;
	outFile<<"参数("<<"类别数:"<<data.class_num<<")"<<endl;
	outFile<<endl;
	outFile<<endl;
	outFile<<"样本"<<"  "<<"聚类结果"<<" "<<"真实分类"<<endl;
	for(int i=0;i<data.sample_num;i++)
		outFile<<i<<"\t"<<data.des_set_classno[i]<<"\t"<<data.src_set_classno[i]<<endl;
    outFile<<"Total time: "<<feature_time+cluster_time<<endl;
	outFile.close();
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -