📄 offlineclustering.cpp
字号:
#include <stdio.h>
#include <math.h>
#include <memory.h>
#include <string.h>
#include <time.h>
#include <sys/timeb.h>
//-----------------------------------------------
//常量定义
//
//
//错误常量
//
#define ERR_SUCCESS 0
#define ERR_FAILURE -1
#define ERR_LACKPIECE -2
//
//数据流文件相关参数设定
//规定数据流文件命名为 DATAFLOW_FILENAME_COMMON+数据流序号+DATAFLOW_FILENAME_EXT
//规定数据流序号从0开始
//
#define DATAFLOW_NUM 100
#define DATAFLOW_FILENAME_COMMON "dataflow"
#define DATAFLOW_FILENAME_EXT ".dat"
//
//数据流片断相关参数设定
//
#define PIECE_DIM 1024
//
//离线存储参数
//
#define L 1024
#define M 50
//
//衰减系数
//
#define ATTENUATION 0.99
//
//初始化聚类个数
//
#define INIT_CLUSTER_NUM 3
#define MAX_CLUSTER_NUM DATAFLOW_NUM
//-----------------------------------------------
//数据结构定义
//
typedef struct _PIECE_COEFFICIENT
{
double sumxi;
double sumxi2;
double sumxyi[DATAFLOW_NUM];
}PIECE_COEFFICIENT,BLOCK_COEFFICIENT;
typedef struct _CLUSTER
{
int center;
int member[DATAFLOW_NUM];
double coeffi[DATAFLOW_NUM];
int num;
}CLUSTER;
typedef struct _OFFLINEBLOCK
{
BLOCK_COEFFICIENT blockEffi;
int infoNum; //实际蕴含信息量
bool isUsed;
}OFFLINEBLOCK;
//-----------------------------------------------
//全局变量定义
//
FILE *g_ArrPFile[DATAFLOW_NUM]={NULL};
double g_ArrDataFlowAndPieceData[DATAFLOW_NUM][PIECE_DIM];
PIECE_COEFFICIENT g_ArrDataFlowAndPieceCoeffi[DATAFLOW_NUM];
double TABLE_INDEX_TIMEVALUE[L/2*PIECE_DIM]; //时间权值索引表
OFFLINEBLOCK OFFLINEDATA[DATAFLOW_NUM][M+1];//离线数据存储
CLUSTER g_Cluster[MAX_CLUSTER_NUM];
int g_iClusterNum=0; //工作变量,指示cluster的个数
double g_ssq;
BLOCK_COEFFICIENT g_ArrDataFlowAndSegmentCoeffi[DATAFLOW_NUM];
FILE *g_POutFile=NULL; //保存输出
//-----------------------------------------------
//功能函数定义
//
//
//功能:求带时间权值向量分量和
//
double SigmaX(double buf[],int len)
{
double sum=0;
for(int i=0;i<len;i++)
{
sum+=buf[i]*TABLE_INDEX_TIMEVALUE[len-i-1];
}
return sum;
}
//
//功能:求带时间权值向量分量平方和
//
double SigmaX2(double buf[],int len)
{
double sum=0;
for(int i=0;i<len;i++)
{
sum+=buf[i]*TABLE_INDEX_TIMEVALUE[len-i-1]*buf[i]*TABLE_INDEX_TIMEVALUE[len-i-1];
}
return sum;
}
//
//功能:求两带时间权值向量对应分量乘积和
//
double SigmaXY(double bufx[],double bufy[],int len)
{
double sum=0;
for(int i=0;i<len;i++)
{
sum+=bufx[i]*TABLE_INDEX_TIMEVALUE[len-i-1]*bufy[i]*TABLE_INDEX_TIMEVALUE[len-i-1];
}
return sum;
}
//
//功能:记录当前时间,精确到毫秒
//
void RecNowTime()
{
time_t stime;
struct tm *ltime;
char rectime[255];
time(&stime);
ltime=localtime(&stime);
struct _timeb tstruct;
_ftime(&tstruct);
char millis[10];
sprintf(millis,"%u",tstruct.millitm);
strftime(rectime,sizeof rectime,"%Y-%m-%d %H:%M:%S",ltime);
strcat(rectime,":");
strcat(rectime,millis);
fprintf(g_POutFile,"Now Time:%s\n\n",rectime);
}
//
//功能:从DATAFLOW_NUM个数据流同时读入1片数据,存入g_ArrayDataFlowAndPieceData
//返回值: ERR_FAILURE 无数据; ERR_LACKPIECE 不足1片; ERR_SUCCESS 读写成功
//
int ReadOnePieceFromMultiDataFlow()
{
int i,j;
char filename[256];
for(i=0;i<DATAFLOW_NUM;i++)
{
sprintf(filename,"%s%d%s",DATAFLOW_FILENAME_COMMON,i,DATAFLOW_FILENAME_EXT);
if(g_ArrPFile[i]==NULL)
{
g_ArrPFile[i]=fopen(filename,"rb");
if(!g_ArrPFile[i])
{
printf("error:Open file %s failed.",filename);
return ERR_FAILURE;
}
}
for(j=0;j<PIECE_DIM;j++)
{
int ret;
ret=fread(&g_ArrDataFlowAndPieceData[i][j],sizeof(double),1,g_ArrPFile[i]);
if(ret==0)
return ERR_LACKPIECE;
}
}
return ERR_SUCCESS;
}
//
//功能:更新各数据流当前片的信息
//
void UpdateArrDataFlowAndPieceCoeffi()
{
int i,j;
for(i=0;i<DATAFLOW_NUM;i++)
{
g_ArrDataFlowAndPieceCoeffi[i].sumxi=SigmaX(g_ArrDataFlowAndPieceData[i],PIECE_DIM);
g_ArrDataFlowAndPieceCoeffi[i].sumxi2=SigmaX2(g_ArrDataFlowAndPieceData[i],PIECE_DIM);
for(j=0;j<DATAFLOW_NUM;j++)
{
g_ArrDataFlowAndPieceCoeffi[i].sumxyi[j]=SigmaXY(g_ArrDataFlowAndPieceData[i],g_ArrDataFlowAndPieceData[j],PIECE_DIM);
}
}
}
//
//功能:寻找信息保存位置
//
int FindInfoSavePos()
{
for(int i=0;i<M+1;i++)
{
if(OFFLINEDATA[0][i].isUsed==false)
return i;
}
return -1;
}
//
//功能:寻找信息合并位置,为返回值与返回值-1两个位置
//
int FindInfoConvergePos()
{
int samev=-1;
bool flag=false;
for(int i=0;i<M+1;i++)
{
if(OFFLINEDATA[0][i].infoNum!=samev)
{
if(flag) break;
samev=OFFLINEDATA[0][i].infoNum;
}
else
flag=true;
}
return i-1;
}
//
//
//功能:根据离线信息求与中心的相关系数
//
double CalCoefficient(int ione,int itwo,int len)
{
double fz,fm,fm2;
double dSigmaXY,dSigmaY,dSigmaY2,dSigmaX,dSigmaX2;
dSigmaX=g_ArrDataFlowAndSegmentCoeffi[ione].sumxi;
dSigmaX2=g_ArrDataFlowAndSegmentCoeffi[ione].sumxi2;
dSigmaY=g_ArrDataFlowAndSegmentCoeffi[itwo].sumxi;
dSigmaY2=g_ArrDataFlowAndSegmentCoeffi[itwo].sumxi2;
dSigmaXY=g_ArrDataFlowAndSegmentCoeffi[ione].sumxyi[itwo];
fz=dSigmaXY-1.0/len*dSigmaX*dSigmaY;
fm=dSigmaX2-1.0/len*pow(dSigmaX,2);
fm=sqrt(fm);
fm2=dSigmaY2-1.0/len*pow(dSigmaY,2);
fm2=sqrt(fm2);
return fz/(fm*fm2);
}
//
//功能:评价本次聚类后的效果,SSQ标准
//
double EvaluateCluster(CLUSTER cluster[],int cnum)
{
double ssq=0;
int i,j;
for(i=0;i<cnum;i++)
for(j=0;j<cluster[i].num;j++)
ssq+=cluster[i].coeffi[j];
return ssq;
}
//
//功能:根据类中心进行分类
//
double DistributeSamples(CLUSTER cluster[],int cnum,int winlen)
{
int i,j;
double maxcoeff,coeff;
int p;
for(j=0;j<cnum;j++)
{
cluster[j].num=0;
}
for(i=0;i<DATAFLOW_NUM;i++)
{
maxcoeff=-2;
p=-1;
for(j=0;j<cnum;j++)
{
coeff=CalCoefficient(i,j,winlen);
if(coeff>maxcoeff)
{
maxcoeff=coeff;
p=j;
}
}
cluster[p].member[cluster[p].num]=i;
cluster[p].coeffi[cluster[p].num]=maxcoeff;
cluster[p].num++;
}
//输出
fprintf(g_POutFile,"Now there is %d clusters.\n",cnum);
for(i=0;i<cnum;i++)
{
fprintf(g_POutFile,"Cluster %d center:%d\n",i,cluster[i].center);
fprintf(g_POutFile,"Cluster %d has pattern:",i);
for(j=0;j<cluster[i].num;j++)
{
fprintf(g_POutFile,"%d ",cluster[i].member[j]);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -