⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cluster.cpp

📁 K-means clustering using Berkeley DB
💻 CPP
字号:
// cluster.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include <db.h>

DB_ENV * DBEnv;				//Berkeley DB环境变量指针
u_int32_t env_flags;		//DBEnv open flags

DB * pdb_ptr;				//pdb_ptr : Primary DB
DB * idb_ptr;				//tdb_ptr : Secondary DB (Index: image_id)

DBC * pcur;					//Primary DB Cursor
DBC * icur;					//Secondary DB (Index: time_stamp) Cursor

typedef struct NearestRecord
{
	int cluster_id;
	float distance;
	int vector[128];
} NRecord;
// Berkeley DB中Primary Database存储的数据结构
typedef struct TypeDBRecordStruct
{
	unsigned int image_id;
	unsigned int cluster_id;
	unsigned char vector[128];
} DBRecord;
// Berkeley DB Primary-Secondary Mapping Callback Functions
int get_record_by_image_id(DB *sdbp, const DBT *pkey, const DBT *pdata, DBT *skey);

using namespace std;

void Process();
void GetCenter(int center[500][128],int stop[500],double sse[500]);
void ImproveCenter(double sse[500], int center[500][128],int stop[500]);
inline float Distance(int vector1[],int vector2[],int dimen);
inline float SDistance(int vector1[],int vector2[],int dimen);
inline NRecord Nearest(int center[500][128],int vector[],int dimen);
void ClusterInit();
int init();

int _tmain(int argc, _TCHAR* argv[])
{

	//ClusterInit();
	init();
	Process();

	return 0;
}

void Process()
{
	

	int iterate,i,j,k,center[500][128],count[500],stop[500],vector[128],dimen=128;
	int iid,cid;
	NRecord *cluster_farest_point;
	cluster_farest_point=new NRecord[500];
	NRecord nearest_cluster;
	double sum[500][128],sse[2][500];

	//////////////////////////////////
	DBT key, data;
	int ret;
	
	memset(&key, 0, sizeof(DBT));
    memset(&data, 0, sizeof(DBT));
    key.size = sizeof(int);
    data.size = sizeof(DBRecord);
	
	DBRecord rec;
	//////////////////////////////////

	fstream log;
	int rec_no = 0;//////////////////////
	key.data = &rec_no;/////////////////

	for(iterate=0;iterate<10;iterate++)
	{

		log.open("iterate_log.txt",ios_base::out|ios_base::app);
		//\***********\///PART2//////////////////////////////////////////start iterate
		cout<<"start iterate "<<iterate<<"..."<<endl;
		log<<"---------iterate "<<iterate<<"---------"<<endl;

		GetCenter(center,stop,sse[0]);////////Get center
		//for(i=0;i<500;i++)cout<<sse[0][i]<<" ";
		//cout<<endl;
		//for(i=0;i<500;i++)cout<<stop[i]<<" ";
		//for(j=0;j<128;j++)cout<<center[0][j]<<" ";
		//cout<<endl;

		for(i=0;i<500;i++)
		{
			cluster_farest_point[i].cluster_id=i;
			cluster_farest_point[i].distance=0;
		}
		for(i=0;i<500;i++)sse[1][i]=0;
		for(i=0;i<500;i++)
		{
			count[i]=0;
			for(j=0;j<128;j++)sum[i][j]=0;
		}

		int t=0;
		//char x;
		
		ret = pcur->get(pcur, &key, &data, DB_FIRST);//////////////////////////////////
		int mark=0;
		do
		{  
			iid = ((DBRecord *)data.data)->image_id;
			for(i=0;i<128;i++)vector[i]=((DBRecord *)data.data)->vector[i];
			//cin>>x;///////////
			nearest_cluster=Nearest(center,vector,dimen);
			//cout<<"nearest_cluster "<<nearest_cluster<<endl;
			((DBRecord *)data.data)->cluster_id=nearest_cluster.cluster_id;
			pcur->put(pcur, &key, &data, DB_CURRENT);
			k=nearest_cluster.cluster_id;
			for(i=0;i<128;i++)sum[k][i]+=vector[i];//////////////////sum for center
			if(cluster_farest_point[k].distance<nearest_cluster.distance)//store the farest point
			{
				cluster_farest_point[k].distance=nearest_cluster.distance;
				for(i=0;i<128;i++)cluster_farest_point[k].vector[i]=nearest_cluster.vector[i];
			}
			count[k]++;
			sse[1][k]+=SDistance(center[k],vector,dimen);//////////////////SSE here
			ret = pcur->get(pcur, &key, &data, DB_NEXT);
			t++;
			if(t % 1000 == 0)cout<<t-1000<<"--"<<t<<endl;

		}while(ret==0);
		

		////////////////////////////////////////////////////////////////////////////////
		fstream radius("cluster_radius.txt",ios_base::out);///////////////////////////output the farnest point of each cluster
		radius<<"Cluster"<<'\t'<<"Max_Radius"<<'\t'<<"Position of the farnest point"<<endl;
		for(i=0;i<500;i++)
		{
			radius<<cluster_farest_point[i].cluster_id<<'\t'<<cluster_farest_point[i].distance<<'\t'<<'\t';
			for(j=0;j<128;j++)
			{ 
				if(cluster_farest_point[i].distance!=0)radius<<cluster_farest_point[i].vector[j]<<" ";
			}
			radius<<endl;
		}
		radius.close();
		///\***********\///PART3//////////////////////////////////////////check state
		for(i=0;i<500;i++)
		{	
			///////////////////////////////////////////////////////////converge threshold
			if(abs(sse[0][i]-sse[1][i])<1000)stop[i]=4;
			else if(abs(sse[0][i]-sse[1][i])<10000)stop[i]=3;
			else if(abs(sse[0][i]-sse[1][i])<100000)stop[i]=2;
			else if(abs(sse[0][i]-sse[1][i])<1000000)stop[i]=1;
			else stop[i]=0;
		}

		/////////////////////////////////////See if there are any empty clusters
		cout<<"empty cluster check..."<<endl;
		int max;
		double temp;
		for(i=0;i<500;i++)
		{
			if(count[i]==0 || cluster_farest_point[i].distance==0)////////
			{
				
				cout<<"find empty cluster "<<i<<"   "<<'\t';
				log<<"find empty cluster "<<i<<"   "<<'\t';
			
				temp=0;//////////////////find max sse
				for(j=0;j<500;j++)
				{
					if(cluster_farest_point[j].cluster_id>=0 && sse[1][j]>temp)
					{
						temp=sse[1][j];
						max=j;
					}
				}
				cout<<"target cluster "<<max<<endl;
				log<<"target cluster "<<max<<endl;

				for(j=0;j<128;j++)center[i][j]=cluster_farest_point[max].vector[j];
				cluster_farest_point[max].cluster_id=-1;
				sse[1][i]=sse[1][max]=ceil(sse[1][max]/2);
				stop[i]=stop[max]=0;
				
			}
		}

		for(i=0;i<500;i++){log<<"sse of cluster "<<i<<'\t'<<sse[1][i]<<endl;}
		cout<<"empty cluste check end"<<endl;
		/////////////////////////////////////////////////////check end

		
		///\***********\///PART3//////////////////////////////////////////improve center
		cout<<"start improve center and state..."<<endl;
		for(i=0;i<500;i++)
		{
			for(j=0;j<128;j++)
			{
				if(count[i])center[i][j]=int(sum[i][j]/count[i]);
			}
		}
		ImproveCenter(sse[1],center,stop);

		/////////////////////////////////////////////////ALL End
		cout<<"---------iterate end---------"<<endl;
		log<<"---------iterate end---------"<<endl;
		log.close();

	}
	pcur->close(pcur);
	icur->close(icur);
	idb_ptr->close(idb_ptr, 0);
	pdb_ptr->close(pdb_ptr, 0);

	delete cluster_farest_point;

}

void GetCenter(int center[500][128],int stop[500],double sse[500])
{
	fstream fileopen("cluster.txt",ios_base::in);
	int i,j,n;
	for(i=0;i<500;i++)
	{
		fileopen>>n;
		fileopen>>sse[n]>>stop[n];
		for(j=0;j<128;j++)fileopen>>center[n][j];
	}

}
void ImproveCenter(double sse[500],int center[500][128],int stop[500])
{
	fstream fileopen("cluster.txt",ios_base::out);
	int i,j;
	for(i=0;i<500;i++)
	{
		fileopen<<i<<'\t'<<sse[i]<<'\t'<<stop[i]<<'\t';
		for(j=0;j<128;j++)fileopen<<center[i][j]<<" ";
		fileopen<<endl;
	}
}

float Distance(int vector1[],int vector2[],int dimen)
{
	int i;
	float t;
	double j=0;
	for(i=0;i<dimen;i++)
	{
		t=float(vector1[i]-vector2[i]);
		j+=pow(t, 2);
	}
	return (float)sqrt(j);
}
float SDistance(int vector1[],int vector2[],int dimen)
{
	int i;
	float t,j=0;
	for(i=0;i<dimen;i++)
	{
		t=float(vector1[i]-vector2[i]);
		j+=(float)pow(t, 2);
	}
	return j;
}
NRecord Nearest(int center[500][128],int vector[],int dimen)
{
	NRecord nr;
	int i,n;
	float k,min=9999999;
	for(i=0;i<500;i++)
	{
		k=Distance(center[i],vector,dimen);
		if(k<min){min=k;n=i;}
	}
	nr.cluster_id=n;
	nr.distance=min;
	for(i=0;i<128;i++)nr.vector[i]=vector[i];
	return nr;
}

void ClusterInit()
{
	//PART1//////////////////////////////////////////////////////////////////get center from txt and put it in cluster file, initial cluster
	fstream fileopen;
	char *path1="center.txt";
	char *path2="cluster.txt";
	int i,j,zero=0,center[500][128];

	fileopen.open(path1,ios_base::in);
	if(fileopen.fail())
	{
		cout<<"cannot open "<<path1<<endl;
		fileopen.clear();
	}
	for(i=0;i<500;i++)
	{
		for(j=0;j<128;j++)fileopen>>center[i][j];
	}
	fileopen.close();

	fileopen.open(path2,ios_base::out);
	if(fileopen.fail())
	{
		cout<<"cannot open "<<path2<<endl;
		fileopen.clear();
	}
	for(i=0;i<500;i++)
	{
		fileopen<<i<<'\t'<<zero<<'\t'<<zero<<'\t';
		for(j=0;j<128;j++)fileopen<<center[i][j]<<" ";
		fileopen<<endl;
	}
	///////////////////////////////////////////////////////////////////////
}
int init()
{
		/* 初始化DB */
	/* Create an environment object and initialize it for error reporting */
	int ret = db_env_create(&DBEnv, 0);
	if(ret != 0)
	{
		fprintf(stderr, "Error creating env handle: %s\n", db_strerror(ret));
		return -1;
	}
	/* Open the environment */
	env_flags = DB_CREATE | DB_INIT_MPOOL;
	ret = DBEnv->open(DBEnv, "f:\\BDB_data", env_flags, 0); /* File mode (default) */
	if (ret != 0)
	{
		fprintf(stderr, "Environment open failed: %s", db_strerror(ret));
		return -1;
	}

	/* 创建数据库 */
	db_create(&pdb_ptr, DBEnv, 0);
	db_create(&idb_ptr, DBEnv, 0);
	
	pdb_ptr->set_re_len(pdb_ptr, sizeof(DBRecord));
	idb_ptr->set_flags(idb_ptr, DB_DUP);

	pdb_ptr->open(pdb_ptr, NULL, "PrimaryDB.db", NULL, DB_QUEUE, DB_CREATE, 0);	//QUEUE TYPE
	idb_ptr->open(idb_ptr, NULL, "image_id.db", NULL, DB_BTREE, DB_CREATE, 0);	//BTREE TYPE
	
	/* 设置Primary DB 与 Secondary DB 的关联 */
	pdb_ptr->associate(pdb_ptr, NULL, idb_ptr, get_record_by_image_id, 0);

	/* 打开游标 */
	pdb_ptr->cursor(pdb_ptr, NULL, &pcur, 0);
	idb_ptr->cursor(idb_ptr, NULL, &icur, 0);
	return 0;
}
int get_record_by_image_id(DB *sdbp, const DBT *pkey, const DBT *pdata, DBT *skey)
{
	DBRecord * record;
    record = (DBRecord *)(pdata->data);
    memset(skey, 0, sizeof(DBT));
	skey->data = &(record->image_id);
    skey->size = 4;
    return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -