📄 cluster.cpp
字号:
// cluster.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
#include <db.h>
DB_ENV * DBEnv; //Berkeley DB环境变量指针
u_int32_t env_flags; //DBEnv open flags
DB * pdb_ptr; //pdb_ptr : Primary DB
DB * idb_ptr; //tdb_ptr : Secondary DB (Index: image_id)
DBC * pcur; //Primary DB Cursor
DBC * icur; //Secondary DB (Index: time_stamp) Cursor
typedef struct NearestRecord
{
int cluster_id;
float distance;
int vector[128];
} NRecord;
// Berkeley DB中Primary Database存储的数据结构
typedef struct TypeDBRecordStruct
{
unsigned int image_id;
unsigned int cluster_id;
unsigned char vector[128];
} DBRecord;
// Berkeley DB Primary-Secondary Mapping Callback Functions
int get_record_by_image_id(DB *sdbp, const DBT *pkey, const DBT *pdata, DBT *skey);
using namespace std;
void Process();
void GetCenter(int center[500][128],int stop[500],double sse[500]);
void ImproveCenter(double sse[500], int center[500][128],int stop[500]);
inline float Distance(int vector1[],int vector2[],int dimen);
inline float SDistance(int vector1[],int vector2[],int dimen);
inline NRecord Nearest(int center[500][128],int vector[],int dimen);
void ClusterInit();
int init();
int _tmain(int argc, _TCHAR* argv[])
{
//ClusterInit();
init();
Process();
return 0;
}
void Process()
{
int iterate,i,j,k,center[500][128],count[500],stop[500],vector[128],dimen=128;
int iid,cid;
NRecord *cluster_farest_point;
cluster_farest_point=new NRecord[500];
NRecord nearest_cluster;
double sum[500][128],sse[2][500];
//////////////////////////////////
DBT key, data;
int ret;
memset(&key, 0, sizeof(DBT));
memset(&data, 0, sizeof(DBT));
key.size = sizeof(int);
data.size = sizeof(DBRecord);
DBRecord rec;
//////////////////////////////////
fstream log;
int rec_no = 0;//////////////////////
key.data = &rec_no;/////////////////
for(iterate=0;iterate<10;iterate++)
{
log.open("iterate_log.txt",ios_base::out|ios_base::app);
//\***********\///PART2//////////////////////////////////////////start iterate
cout<<"start iterate "<<iterate<<"..."<<endl;
log<<"---------iterate "<<iterate<<"---------"<<endl;
GetCenter(center,stop,sse[0]);////////Get center
//for(i=0;i<500;i++)cout<<sse[0][i]<<" ";
//cout<<endl;
//for(i=0;i<500;i++)cout<<stop[i]<<" ";
//for(j=0;j<128;j++)cout<<center[0][j]<<" ";
//cout<<endl;
for(i=0;i<500;i++)
{
cluster_farest_point[i].cluster_id=i;
cluster_farest_point[i].distance=0;
}
for(i=0;i<500;i++)sse[1][i]=0;
for(i=0;i<500;i++)
{
count[i]=0;
for(j=0;j<128;j++)sum[i][j]=0;
}
int t=0;
//char x;
ret = pcur->get(pcur, &key, &data, DB_FIRST);//////////////////////////////////
int mark=0;
do
{
iid = ((DBRecord *)data.data)->image_id;
for(i=0;i<128;i++)vector[i]=((DBRecord *)data.data)->vector[i];
//cin>>x;///////////
nearest_cluster=Nearest(center,vector,dimen);
//cout<<"nearest_cluster "<<nearest_cluster<<endl;
((DBRecord *)data.data)->cluster_id=nearest_cluster.cluster_id;
pcur->put(pcur, &key, &data, DB_CURRENT);
k=nearest_cluster.cluster_id;
for(i=0;i<128;i++)sum[k][i]+=vector[i];//////////////////sum for center
if(cluster_farest_point[k].distance<nearest_cluster.distance)//store the farest point
{
cluster_farest_point[k].distance=nearest_cluster.distance;
for(i=0;i<128;i++)cluster_farest_point[k].vector[i]=nearest_cluster.vector[i];
}
count[k]++;
sse[1][k]+=SDistance(center[k],vector,dimen);//////////////////SSE here
ret = pcur->get(pcur, &key, &data, DB_NEXT);
t++;
if(t % 1000 == 0)cout<<t-1000<<"--"<<t<<endl;
}while(ret==0);
////////////////////////////////////////////////////////////////////////////////
fstream radius("cluster_radius.txt",ios_base::out);///////////////////////////output the farnest point of each cluster
radius<<"Cluster"<<'\t'<<"Max_Radius"<<'\t'<<"Position of the farnest point"<<endl;
for(i=0;i<500;i++)
{
radius<<cluster_farest_point[i].cluster_id<<'\t'<<cluster_farest_point[i].distance<<'\t'<<'\t';
for(j=0;j<128;j++)
{
if(cluster_farest_point[i].distance!=0)radius<<cluster_farest_point[i].vector[j]<<" ";
}
radius<<endl;
}
radius.close();
///\***********\///PART3//////////////////////////////////////////check state
for(i=0;i<500;i++)
{
///////////////////////////////////////////////////////////converge threshold
if(abs(sse[0][i]-sse[1][i])<1000)stop[i]=4;
else if(abs(sse[0][i]-sse[1][i])<10000)stop[i]=3;
else if(abs(sse[0][i]-sse[1][i])<100000)stop[i]=2;
else if(abs(sse[0][i]-sse[1][i])<1000000)stop[i]=1;
else stop[i]=0;
}
/////////////////////////////////////See if there are any empty clusters
cout<<"empty cluster check..."<<endl;
int max;
double temp;
for(i=0;i<500;i++)
{
if(count[i]==0 || cluster_farest_point[i].distance==0)////////
{
cout<<"find empty cluster "<<i<<" "<<'\t';
log<<"find empty cluster "<<i<<" "<<'\t';
temp=0;//////////////////find max sse
for(j=0;j<500;j++)
{
if(cluster_farest_point[j].cluster_id>=0 && sse[1][j]>temp)
{
temp=sse[1][j];
max=j;
}
}
cout<<"target cluster "<<max<<endl;
log<<"target cluster "<<max<<endl;
for(j=0;j<128;j++)center[i][j]=cluster_farest_point[max].vector[j];
cluster_farest_point[max].cluster_id=-1;
sse[1][i]=sse[1][max]=ceil(sse[1][max]/2);
stop[i]=stop[max]=0;
}
}
for(i=0;i<500;i++){log<<"sse of cluster "<<i<<'\t'<<sse[1][i]<<endl;}
cout<<"empty cluste check end"<<endl;
/////////////////////////////////////////////////////check end
///\***********\///PART3//////////////////////////////////////////improve center
cout<<"start improve center and state..."<<endl;
for(i=0;i<500;i++)
{
for(j=0;j<128;j++)
{
if(count[i])center[i][j]=int(sum[i][j]/count[i]);
}
}
ImproveCenter(sse[1],center,stop);
/////////////////////////////////////////////////ALL End
cout<<"---------iterate end---------"<<endl;
log<<"---------iterate end---------"<<endl;
log.close();
}
pcur->close(pcur);
icur->close(icur);
idb_ptr->close(idb_ptr, 0);
pdb_ptr->close(pdb_ptr, 0);
delete cluster_farest_point;
}
void GetCenter(int center[500][128],int stop[500],double sse[500])
{
fstream fileopen("cluster.txt",ios_base::in);
int i,j,n;
for(i=0;i<500;i++)
{
fileopen>>n;
fileopen>>sse[n]>>stop[n];
for(j=0;j<128;j++)fileopen>>center[n][j];
}
}
void ImproveCenter(double sse[500],int center[500][128],int stop[500])
{
fstream fileopen("cluster.txt",ios_base::out);
int i,j;
for(i=0;i<500;i++)
{
fileopen<<i<<'\t'<<sse[i]<<'\t'<<stop[i]<<'\t';
for(j=0;j<128;j++)fileopen<<center[i][j]<<" ";
fileopen<<endl;
}
}
float Distance(int vector1[],int vector2[],int dimen)
{
int i;
float t;
double j=0;
for(i=0;i<dimen;i++)
{
t=float(vector1[i]-vector2[i]);
j+=pow(t, 2);
}
return (float)sqrt(j);
}
float SDistance(int vector1[],int vector2[],int dimen)
{
int i;
float t,j=0;
for(i=0;i<dimen;i++)
{
t=float(vector1[i]-vector2[i]);
j+=(float)pow(t, 2);
}
return j;
}
NRecord Nearest(int center[500][128],int vector[],int dimen)
{
NRecord nr;
int i,n;
float k,min=9999999;
for(i=0;i<500;i++)
{
k=Distance(center[i],vector,dimen);
if(k<min){min=k;n=i;}
}
nr.cluster_id=n;
nr.distance=min;
for(i=0;i<128;i++)nr.vector[i]=vector[i];
return nr;
}
void ClusterInit()
{
//PART1//////////////////////////////////////////////////////////////////get center from txt and put it in cluster file, initial cluster
fstream fileopen;
char *path1="center.txt";
char *path2="cluster.txt";
int i,j,zero=0,center[500][128];
fileopen.open(path1,ios_base::in);
if(fileopen.fail())
{
cout<<"cannot open "<<path1<<endl;
fileopen.clear();
}
for(i=0;i<500;i++)
{
for(j=0;j<128;j++)fileopen>>center[i][j];
}
fileopen.close();
fileopen.open(path2,ios_base::out);
if(fileopen.fail())
{
cout<<"cannot open "<<path2<<endl;
fileopen.clear();
}
for(i=0;i<500;i++)
{
fileopen<<i<<'\t'<<zero<<'\t'<<zero<<'\t';
for(j=0;j<128;j++)fileopen<<center[i][j]<<" ";
fileopen<<endl;
}
///////////////////////////////////////////////////////////////////////
}
int init()
{
/* 初始化DB */
/* Create an environment object and initialize it for error reporting */
int ret = db_env_create(&DBEnv, 0);
if(ret != 0)
{
fprintf(stderr, "Error creating env handle: %s\n", db_strerror(ret));
return -1;
}
/* Open the environment */
env_flags = DB_CREATE | DB_INIT_MPOOL;
ret = DBEnv->open(DBEnv, "f:\\BDB_data", env_flags, 0); /* File mode (default) */
if (ret != 0)
{
fprintf(stderr, "Environment open failed: %s", db_strerror(ret));
return -1;
}
/* 创建数据库 */
db_create(&pdb_ptr, DBEnv, 0);
db_create(&idb_ptr, DBEnv, 0);
pdb_ptr->set_re_len(pdb_ptr, sizeof(DBRecord));
idb_ptr->set_flags(idb_ptr, DB_DUP);
pdb_ptr->open(pdb_ptr, NULL, "PrimaryDB.db", NULL, DB_QUEUE, DB_CREATE, 0); //QUEUE TYPE
idb_ptr->open(idb_ptr, NULL, "image_id.db", NULL, DB_BTREE, DB_CREATE, 0); //BTREE TYPE
/* 设置Primary DB 与 Secondary DB 的关联 */
pdb_ptr->associate(pdb_ptr, NULL, idb_ptr, get_record_by_image_id, 0);
/* 打开游标 */
pdb_ptr->cursor(pdb_ptr, NULL, &pcur, 0);
idb_ptr->cursor(idb_ptr, NULL, &icur, 0);
return 0;
}
int get_record_by_image_id(DB *sdbp, const DBT *pkey, const DBT *pdata, DBT *skey)
{
DBRecord * record;
record = (DBRecord *)(pdata->data);
memset(skey, 0, sizeof(DBT));
skey->data = &(record->image_id);
skey->size = 4;
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -