⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 offlineclustering.cpp

📁 对数据流,按指数递增方式,形成汇总信息,在离线层,根据用户需求,进行聚类
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include <stdio.h>
#include <math.h>
#include <memory.h>
#include <string.h>
#include <time.h>
#include <sys/timeb.h>

//-----------------------------------------------
//常量定义
//

//
//错误常量
//
#define ERR_SUCCESS    0
#define ERR_FAILURE    -1
#define ERR_LACKPIECE  -2

//
//数据流文件相关参数设定
//规定数据流文件命名为 DATAFLOW_FILENAME_COMMON+数据流序号+DATAFLOW_FILENAME_EXT
//规定数据流序号从0开始
//
#define DATAFLOW_NUM 100
#define DATAFLOW_FILENAME_COMMON "dataflow"
#define DATAFLOW_FILENAME_EXT    ".dat"

//
//数据流片断相关参数设定
//
#define PIECE_DIM 1024

//
//离线存储参数
//
#define L 1024
#define M 50


//
//衰减系数
//
#define ATTENUATION 0.99

//
//初始化聚类个数
//
#define INIT_CLUSTER_NUM 3
#define MAX_CLUSTER_NUM  DATAFLOW_NUM

//-----------------------------------------------
//数据结构定义
//
typedef struct _PIECE_COEFFICIENT
{
	double sumxi;
	double sumxi2;
	double sumxyi[DATAFLOW_NUM];
}PIECE_COEFFICIENT,BLOCK_COEFFICIENT;

typedef struct _CLUSTER
{
	int center;
	int    member[DATAFLOW_NUM];
	double coeffi[DATAFLOW_NUM];
	int    num;
}CLUSTER;

typedef struct _OFFLINEBLOCK
{
	BLOCK_COEFFICIENT blockEffi;
	int infoNum;  //实际蕴含信息量
	bool isUsed;
}OFFLINEBLOCK;

//-----------------------------------------------
//全局变量定义
//
FILE *g_ArrPFile[DATAFLOW_NUM]={NULL};
double g_ArrDataFlowAndPieceData[DATAFLOW_NUM][PIECE_DIM];
PIECE_COEFFICIENT g_ArrDataFlowAndPieceCoeffi[DATAFLOW_NUM];

double TABLE_INDEX_TIMEVALUE[L/2*PIECE_DIM]; //时间权值索引表

OFFLINEBLOCK OFFLINEDATA[DATAFLOW_NUM][M+1];//离线数据存储

CLUSTER g_Cluster[MAX_CLUSTER_NUM];
int g_iClusterNum=0;  //工作变量,指示cluster的个数
double g_ssq;

BLOCK_COEFFICIENT g_ArrDataFlowAndSegmentCoeffi[DATAFLOW_NUM];

FILE *g_POutFile=NULL; //保存输出



//-----------------------------------------------
//功能函数定义
//

//
//功能:求带时间权值向量分量和
//
double SigmaX(double buf[],int len)
{
	double sum=0;
	for(int i=0;i<len;i++)
	{
		sum+=buf[i]*TABLE_INDEX_TIMEVALUE[len-i-1];
	}
	return sum;
}

//
//功能:求带时间权值向量分量平方和
//
double SigmaX2(double buf[],int len)
{
	double sum=0;
	for(int i=0;i<len;i++)
	{
		sum+=buf[i]*TABLE_INDEX_TIMEVALUE[len-i-1]*buf[i]*TABLE_INDEX_TIMEVALUE[len-i-1];
	}
	return sum;
}

//
//功能:求两带时间权值向量对应分量乘积和
//
double SigmaXY(double bufx[],double bufy[],int len)
{
	double sum=0;
	for(int i=0;i<len;i++)
	{
		sum+=bufx[i]*TABLE_INDEX_TIMEVALUE[len-i-1]*bufy[i]*TABLE_INDEX_TIMEVALUE[len-i-1];
	}
	return sum;
}

//
//功能:记录当前时间,精确到毫秒
//
void RecNowTime()
{
	time_t stime;
	struct tm *ltime;
	char rectime[255];
	time(&stime);
	ltime=localtime(&stime);
	struct _timeb tstruct;
	_ftime(&tstruct);
	char millis[10];
	sprintf(millis,"%u",tstruct.millitm);
	strftime(rectime,sizeof rectime,"%Y-%m-%d %H:%M:%S",ltime);
	
	strcat(rectime,":");
	strcat(rectime,millis);
	fprintf(g_POutFile,"Now Time:%s\n\n",rectime);
}

//
//功能:从DATAFLOW_NUM个数据流同时读入1片数据,存入g_ArrayDataFlowAndPieceData
//返回值: ERR_FAILURE 无数据; ERR_LACKPIECE 不足1片; ERR_SUCCESS 读写成功 
//
int ReadOnePieceFromMultiDataFlow()
{
	int i,j;
	char filename[256];
	for(i=0;i<DATAFLOW_NUM;i++)
	{
		sprintf(filename,"%s%d%s",DATAFLOW_FILENAME_COMMON,i,DATAFLOW_FILENAME_EXT);
		if(g_ArrPFile[i]==NULL)
		{
			g_ArrPFile[i]=fopen(filename,"rb");
			if(!g_ArrPFile[i])
			{
				printf("error:Open file %s failed.",filename);
				return ERR_FAILURE;
			}
		}
		for(j=0;j<PIECE_DIM;j++)
		{
			int ret;
			ret=fread(&g_ArrDataFlowAndPieceData[i][j],sizeof(double),1,g_ArrPFile[i]);
			if(ret==0)
				return ERR_LACKPIECE;
		}
	}
	return ERR_SUCCESS;
}

//
//功能:更新各数据流当前片的信息
//
void UpdateArrDataFlowAndPieceCoeffi()
{
	int i,j;
	for(i=0;i<DATAFLOW_NUM;i++)
	{
		g_ArrDataFlowAndPieceCoeffi[i].sumxi=SigmaX(g_ArrDataFlowAndPieceData[i],PIECE_DIM);
		g_ArrDataFlowAndPieceCoeffi[i].sumxi2=SigmaX2(g_ArrDataFlowAndPieceData[i],PIECE_DIM);
		for(j=0;j<DATAFLOW_NUM;j++)
		{
			g_ArrDataFlowAndPieceCoeffi[i].sumxyi[j]=SigmaXY(g_ArrDataFlowAndPieceData[i],g_ArrDataFlowAndPieceData[j],PIECE_DIM);
		}
	}
}

//
//功能:寻找信息保存位置
//
int FindInfoSavePos()
{
	for(int i=0;i<M+1;i++)
	{
		if(OFFLINEDATA[0][i].isUsed==false)
			return i;
	}
	return -1;
}

//
//功能:寻找信息合并位置,为返回值与返回值-1两个位置
//
int FindInfoConvergePos()
{
	int samev=-1;
	bool flag=false;
	for(int i=0;i<M+1;i++)
	{
		if(OFFLINEDATA[0][i].infoNum!=samev)
		{
			if(flag) break;
			samev=OFFLINEDATA[0][i].infoNum;
		}
		else
			flag=true;
	}
	return i-1;
}

//
//
//功能:根据离线信息求与中心的相关系数
//
double CalCoefficient(int ione,int itwo,int len)
{
	double fz,fm,fm2;
	double dSigmaXY,dSigmaY,dSigmaY2,dSigmaX,dSigmaX2;

	dSigmaX=g_ArrDataFlowAndSegmentCoeffi[ione].sumxi;
	dSigmaX2=g_ArrDataFlowAndSegmentCoeffi[ione].sumxi2;
	dSigmaY=g_ArrDataFlowAndSegmentCoeffi[itwo].sumxi;
	dSigmaY2=g_ArrDataFlowAndSegmentCoeffi[itwo].sumxi2;
	dSigmaXY=g_ArrDataFlowAndSegmentCoeffi[ione].sumxyi[itwo];

	fz=dSigmaXY-1.0/len*dSigmaX*dSigmaY;
	fm=dSigmaX2-1.0/len*pow(dSigmaX,2);
	fm=sqrt(fm);
	fm2=dSigmaY2-1.0/len*pow(dSigmaY,2);
	fm2=sqrt(fm2);
	return fz/(fm*fm2);
}

//
//功能:评价本次聚类后的效果,SSQ标准
//
double EvaluateCluster(CLUSTER cluster[],int cnum)
{
	double ssq=0;
	int i,j;
	for(i=0;i<cnum;i++)
		for(j=0;j<cluster[i].num;j++)
			ssq+=cluster[i].coeffi[j];
	return ssq;
}

//
//功能:根据类中心进行分类
//
double DistributeSamples(CLUSTER cluster[],int cnum,int winlen)
{
	int i,j;
	double maxcoeff,coeff;
	int p;

	for(j=0;j<cnum;j++)
	{
		cluster[j].num=0;
	}

	for(i=0;i<DATAFLOW_NUM;i++)
	{
		maxcoeff=-2;
		p=-1;
		for(j=0;j<cnum;j++)
		{
			coeff=CalCoefficient(i,j,winlen);
			if(coeff>maxcoeff)
			{
				maxcoeff=coeff;
				p=j;
			}
		}
		cluster[p].member[cluster[p].num]=i;
		cluster[p].coeffi[cluster[p].num]=maxcoeff;
		cluster[p].num++;
	}

	//输出
	fprintf(g_POutFile,"Now there is %d clusters.\n",cnum);
	for(i=0;i<cnum;i++)
	{
		fprintf(g_POutFile,"Cluster %d center:%d\n",i,cluster[i].center);
		fprintf(g_POutFile,"Cluster %d has pattern:",i);	
		for(j=0;j<cluster[i].num;j++)
		{
			fprintf(g_POutFile,"%d ",cluster[i].member[j]);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -