📄 isodata.cpp
字号:
#include <stdio.h>
#include <math.h>
#include <memory.h>
class Isodata
{
public:
int sampleNum;
int dataDim;
int K; //center num
int thetaN; //min sample num per class
double thetaS; //standard deviation in class
double thetaC; //min dis between class center
int L; //max pairs to unite per iterative
int I; //num of iterative
int Nc; //temp class num
int loopCount;
int splitOk;
double **data; //the last dim is label;
double **z; //the last dim is data num;
double *D;
double DD;
public:
Isodata(int num);
void _main();
void Cluster();
void Split();
void Unite();
void WriteResult();
~Isodata();
};
Isodata::Isodata(int num)
{
sampleNum = num;
dataDim = 2;
K = 2;
thetaN = 1;
thetaS = 1;
thetaC = 4;
L = 0;
I = 4;
Nc = 1;
loopCount = 0;
splitOk = 0;
int i;
data = new double*[sampleNum];
for(i = 0; i < sampleNum; i++)
{
data[i] = new double[dataDim + 1];
}
z = new double*[sampleNum];
for(i = 0; i < sampleNum; i++)
{
z[i] = new double[dataDim + 1];
}
D = new double[Nc];
}
Isodata::~Isodata()
{
for(int i = 0; i < sampleNum; i++)
{
delete []data[i];
delete []z[i];
}
delete []data;
delete []z;
}
void Isodata::Cluster()
{
int i,j;
for (j = 0; j < Nc; j++)
z[j][dataDim] = 0;
for (i = 0; i < sampleNum; i++)
{
double *dis = new double[K];
for (j = 0; j < Nc; j++)
{
dis[j] = sqrt ( pow( (data[i][0] - z[j][0]), 2 ) + pow( (data[i][1] - z[j][1]), 2 ) );
}
int label = 0;
for (j = 1; j < Nc; j++)
{
if (dis[j] < dis[label])
label = j;
}
data[i][dataDim] = label;
z[label][dataDim]++;
delete[]dis;
}
}
void Isodata::Split()
{
int i,j;
double **sdv; //standard deviation vector
sdv = new double*[Nc];
for(i = 0; i < Nc; i++)
{
sdv[i] = new double[dataDim];
}
for(i = 0; i < Nc; i++)
for (j = 0; j < dataDim; j++)
sdv[i][j] = 0;
for(i = 0; i < sampleNum; i++)
{
for (j = 0; j < dataDim; j++)
{
int label = (int)data[i][dataDim];
sdv[label][j] += pow( (data[i][j] - z[label][j]) , 2);
}
}
for(i = 0; i < Nc; i++)
for (j = 0; j < dataDim; j++)
sdv[i][j] = sqrt( sdv[i][j] / z[i][dataDim] );
splitOk = 0;
for(i = 0; i < Nc; i++)
{
double maxValue = 0;
for (j = 0; j < dataDim; j++)
{
if (maxValue < sdv[i][j])
{
maxValue = sdv[i][j];
}
}
if (maxValue > thetaS)
{
if( ((D[i] > DD) && (z[i][dataDim] > 2 * (thetaN + 1))) || (Nc <= K / 2) )
{
for (int ii = i; ii < Nc; ii++)
{
memcpy( z[ii + 1], z[ii],(dataDim + 1) * sizeof(double) );
}
if (sdv[i][0] > sdv[i][1])
{
z[i + 1][0] = z[i][0] + 0.5 * maxValue;
z[i][0] = z[i][0] - 0.5 * maxValue;
}
else
{
z[i + 1][1] = z[i][1] + 0.5 * maxValue;
z[i][1] = z[i][1] - 0.5 * maxValue;
}
Nc ++;
splitOk ++;
}
}
if (splitOk)
break;
}
if (splitOk)
_main();
if (loopCount < I)
_main();
}
void Isodata::Unite()
{
int i,j;
double **dis;
dis = new double*[Nc];
for(i = 0; i < Nc; i++)
dis[i] = new double[Nc];
for(i = 0; i < Nc - 1; i++)
{
for(j = i + 1; j < Nc ; j++)
{
dis[i][j] = 0;
for(int dim = 0; dim < dataDim; dim++)
{
dis[i][j] += pow (z[i][dim] - z[j][dim] , 2);
}
dis[i][j] = sqrt (dis[i][j]);
if (dis[i][j] < thetaC)
{
//unite;
z[i][0] = (z[i][0] * z[i][2] + z[j][0] * z[j][2]) / (z[i][2] + z[j][2]);
z[i][1] = (z[i][1] * z[i][2] + z[j][1] * z[j][2]) / (z[i][2] + z[j][2]);
for (int jj = j; jj < Nc - 1; jj++)
{
memcpy( z[jj], z[jj + 1],(dataDim + 1) * sizeof(double) );
}
Nc--;
for (jj = 0; jj < sampleNum; jj++)
{
if (data[jj][dataDim] == j)
data[jj][dataDim] = i;
}
}
}
}
if (loopCount < I)
_main();
}
void Isodata::WriteResult()
{
FILE *r;
r = fopen("result.txt", "wt");
fprintf(r, "Class Number: %d\n",Nc);
fprintf(r, "\nClass Center:\n");
int i,j;
for (i = 0; i < Nc; i++)
{
fprintf(r, "Class[%d]: ",i);
for (j = 0; j < dataDim; j++)
fprintf(r, "%f ",z[i][j]);
fprintf(r, "\n");
}
fprintf(r, "\nItem list:\n");
for (i = 0; i < sampleNum; i++)
{
fprintf(r, "Data[%d]: ",i);
for (j = 0; j < dataDim; j++)
fprintf(r, "%f ",data[i][j]);
fprintf(r, ",belongs to class[%d]\n",(int)data[i][dataDim]);
}
fclose(r);
}
void Isodata::_main()
{
loopCount ++;
Cluster();
int i,j;
for (i = 0; i < Nc; i++)
if (z[i][dataDim] == 0)
{
for (j = i; j < Nc - 1; j++)
{
memcpy( z[i], z[j + 1],(dataDim + 1) * sizeof(double) );
}
Nc--;
}
//calc the new z
for (j = 0; j < Nc; j++)
{
z[j][0] = 0;
z[j][1] = 0;
z[j][2] = 0;
}
for (i = 0; i < sampleNum; i++)
{
int label = (int)data[i][dataDim];
z[label][0] += data[i][0];
z[label][1] += data[i][1];
z[label][2] ++;
}
for (j = 0; j < Nc; j++)
{
z[j][0] = z[j][0] / z[j][dataDim];
z[j][1] = z[j][1] / z[j][dataDim];
//printf("%f %f\n",nz[j].x,nz[j].y);
}
//calc dis within and between classes.
double DD = 0;
for (i = 0; i < Nc; i++)
{
D[i] = 0;
}
for (i = 0; i < sampleNum; i++)
{
int label = (int) data[i][dataDim];
D[label] += sqrt( pow( (data[i][0] - z[label][0]), 2 ) + pow( (data[i][1] - z[ label ][1]), 2 ) );
}
for (i = 0; i < Nc; i++)
{
DD += D[i];
D[i] = D[i] / z[i][dataDim];
}
DD = DD / sampleNum;
//options
if (loopCount == I)
{
//thetaC = 0;
//十一步
Unite();
}
else if (Nc <= K / 2)
{
//八步
Split();
}
else if ( ((loopCount % 2) == 0) || (Nc >= 2 * K) )
{
//十一步
Unite();
}
else
{
//八步
Split();
}
WriteResult();
}
void main()
{
FILE * f;
f = fopen( "data.txt","rt");
int sampleNum, i, j;
fscanf(f, "%d", &sampleNum);
//get data
Isodata *iso =new Isodata(sampleNum);
for (i = 0; i < iso->sampleNum; i++)
{
int temp;
for (j = 0; j < iso->dataDim; j++)
{
fscanf(f, "%d", &temp);
iso->data[i][j] = temp;
}
iso->data[i][iso->dataDim] = 0;
}
fclose(f);
for (i = 0; i < iso->Nc; i++)
{
memcpy( iso->z[i], iso->data[i],(iso->dataDim + 1) * sizeof(double) );
}
iso->_main();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -