⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 isodata.cpp

📁 聚类算法研究
💻 CPP
字号:
// isodata.cpp: implementation of the ISODATA class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "gqk_mini2.h"
#include "isodata.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

ISODATA::ISODATA()
{
	
	K =5;
	Nmin = 1;
	Ds = 1;
	Dm = 1;
	L = 1;
	I = 6;

}

ISODATA::~ISODATA()
{

}

void ISODATA::start (int vn , int vs , float vd[] , char rst[])
{
	int  i;

	result = rst;

	initiate (vn , vs , vd);//读入数据,初始化聚类中心,参数设定默认值
	for (i = 1 ; ; i++)
	{
		input ();//显示、修改参数
		allocate ();//完成第2-6步
		if (i == I)
		{
			Dm = 0;
			converge ();
			break;
		}
		if (groupnum <= K / 2)  diverge ();
		else 
		{
			if ((groupnum >= K * 2) | ( i % 2 == 0))
				converge ();
			else 
				diverge ();
		}	
		renum ();		
	}

	free (datafield);
	free (ccdhead);
	for (i = 0 ; i < 100 ; i++)
		free (group[i].center);
}
void ISODATA::initiate (int vn , int vs , float vd[])
{
	int  i , j , size;
	float  d;
	maxindex = groupnum = 1;

	vectornum = vn;
	vectorsize = vs;

	size = vectornum * (vectorsize + 1);
	datafield = (float *) malloc (size * sizeof (float));
	for (i = 0 ; i < vectornum ; i++)
	{
		for (j = 0 ; j < vectorsize ; j++)
		{
			d = vd[ i * vectorsize + j ];

			write (i , j + 1 , d);
		}
		write (i , 0 , -1);
	}
	for (i = 0 ; i < 100 ; i++)
	{
		group[i].flag = 0;
		group[i].center = (float *) malloc ((vectorsize) * sizeof (float));
		group[i].groupsize = 0;
		group[i].variance = 0;
		group[i].D = 0;
	}
	for (i = 0 ; i < groupnum ; i++)
	{
		for (j = 0 ; j < vectorsize ; j++)
		{
			* (group[i].center + j) = data(i , j + 1);///////////////////////////////////////////
		}
		group[i].flag = 1;
	}
}
void  ISODATA::input ()
{
	
	char tmp[500]={'\0'};
	sprintf(tmp,"\r\n\r\n现用的参数取值:\r\n");
	strcat(result,tmp);
	sprintf(tmp,"预期的聚类中心数目:K = %d\r\n 每一聚类中最少的样本数目,即如少于此数就不作为一个孤立的聚类:Nmin=%d\r\n 一个聚类域中样本距离分布的标准差:Ds=%f\r\n 两个聚类中心之间的最小距离,如小于此数,两个聚类进行合并:Dm=%f\r\n 在于此迭代运算中可以合并的聚类中心的最多对数:L=%d\r\n 迭代运算的次数序号:I=%d\r\n",K,Nmin,Ds,Dm,L,I);
	strcat(result,tmp);

	showresult();
}
void  ISODATA::allocate ()
{
	int  i , j , k , num , index;
	float  D , D1 , sum;
	for (i = 0 ; i < 100 ; i++)//为各聚类中心值清零
	{
		group[i].D = 0;
		group[i].groupsize = 0;
		group[i].variance = 0;
	}
	for (i=0 ; i < vectornum ; i++)//按距离分配到各聚类域
	{
		D = distance (group[0].center , vector (i));
		k = 0;
		for (j = 1 ; j < groupnum ; j++)
		{
			D1 = distance (group[j].center , vector (i));
			if (D > D1)
			{
				k = j;
				D = D1;
			}
		}
		write (i , 0 , (float) k);		
		group[k].groupsize++;
	}
	num = groupnum;
	for (i = 0 ; i < num ; i++)//当聚类中样本个数小于规定值时撤销聚类
	{
		if (group[i].groupsize < Nmin)
		{
			group[i].flag = 0;
			group[i].groupsize = 0;
			for (j = 0 ; j < vectornum ; j++)
			{
				if (data (j , 0) == i)
				{
					write (j , 0 , -1.0);
				}
			}
			groupnum--;
		}
	}	
	for (i = 0 ; i < 100 ; i++)//为各聚类中心值清零
	{
		for (j = 0 ; j < vectorsize ; j++)
			* (group[i].center + j) = 0;
	}
	for (i = 0 ; i < vectornum ; i++)//计算新的聚类中心
	{
		index = (int) data (i , 0);
		if (index != -1)
		{
			sum = (float) group[index].groupsize;
			for (j = 0 ; j < vectorsize ; j++)
			{
				* (group[index].center + j) = * (group[index].center + j) + data(i , j + 1) / sum;
			}
		}
	}
	for (i = 0 ; i < vectornum ; i++)//计算各样点到聚类中心距离
	{
		index = (int) data (i , 0);
		if (index != -1)
		{
			sum = (float) group[index].groupsize;
			D = distance (group[index].center , vector (i));
			group[index].D = group[index].D + D / sum;
		}
	}
	Dst = 0;
	for (i = 0 ; i < maxindex ; i++)
	{
		if (group[i].flag != 0)
			Dst = Dst + group[i].D;
	}	
	Dst = Dst / groupnum;
}
void  ISODATA::diverge ()
{
	float  newvar , oldvar , center;
	int  i , j , k , l , flag;
	flag = 0;
	for (i = 0 ; i < maxindex ; i++)
	{
		if (group[i].flag != 0)
		{
			oldvar = 0;//标准差
			for (j = 0 , l = 0 ; j < vectorsize ; j++)
			{//计算同一聚类域中各分量对应的标准差中的最大值		
				newvar = 0;
				center = * (group[i].center + j);
				for (k = 0 ; k < vectornum ; k++)
				{
					if (data (k , 0) == i)
						newvar = newvar +(center - data(k , j+1))*(center - data(k , j+1));
				}
				if (newvar > oldvar)
				{
					oldvar = newvar;
					l = j;
				}
			}
			group[i].variance = (float) sqrt( oldvar / group[i].groupsize);
			if (group[i].variance > Ds)
			{
				if ((groupnum <= K/2) | ((group[i].D > Dst) & (group[i].groupsize > 2 * (Nmin + 1))))
				{
					split (i , l);
					flag = 1;	
				}
			}
		}
	}
	if (flag == 0)
		converge ();
}
void  ISODATA::split (int i , int j)
{
	int  k , l;
	k = maxindex;
	group[k].flag = 1;
	for (l = 0 ; l < vectorsize ; l++)
	{
		* (group[k].center + l) = (* (group[i].center + l));
	}
	* (group[k].center + j) = (* (group[k].center + j) + group[i].variance);
	* (group[i].center + j) = (* (group[i].center + j) - group[i].variance);			
	maxindex++;
	groupnum++;
}
void  ISODATA::converge ()
{	
	int  i , j , k , h , l;
	float  D , c1 , c2 , n1 , n2;
	ccdhead = (struct CCD *) malloc (sizeof (ccd) * L);
	for (i = 0 , k = 0 ; i < maxindex - 1 ; i++)
	{
		if (group[i].flag != 0)
		{
			for (j = i + 1 ; j < maxindex ; j++)
			{
				if (group[j].flag != 0)
				{
					D = distance (group[i].center , group[j].center);
					if (D < Dm)
					{
						if (k < L)
						{
							(ccdhead + k)->dst = D;
							(ccdhead + k)->g1 = i;
							(ccdhead + k)->g2 = j;
							k++;
						}
						else
						{
							insert (i,j,D);							
						}
						break;
					}
				}
			}
		}
	}		
	for (h = 0 ; h < k ;h++)
	{
		i = (ccdhead + h)->g1;
		j = (ccdhead + h)->g2;
		n1 = (float) group[i].groupsize;
		n2 = (float) group[j].groupsize;
		for (l = 0 ; l < vectorsize ; l++)
		{
			c1 = (* (group[i].center + l));
			c2 = (* (group[j].center + l));
			* (group[i].center + l) = (n1 * c1 + n2 * c2) / (n1 + n2);
		}
		group[i].groupsize = (int) (n1 + n2);
		for (l = 0 ; l < vectornum ; l++)
			if (data(l , 0) == j)
				write (l , 0 , (float) i);
		group[j].flag = 0;
		group[j].groupsize = 0;
		groupnum--;
	}
}
void  ISODATA::insert (int i , int j , float D)
{
	int  h , l;
	for (h = 0 ; h < L ; h++)
	{
		if (D < (ccdhead + h)->dst)
		{
			for (l = L - 1 ; l > h ; l--)
			{
				(ccdhead + l)->dst = (ccdhead + l - 1)->dst;
				(ccdhead + l)->g1 = (ccdhead + l - 1)->g1;
				(ccdhead + l)->g2 = (ccdhead + l - 1)->g2;
			}
			(ccdhead + h)->dst = D;
			(ccdhead + h)->g1 = i;
			(ccdhead + h)->g2 = j;
		}
	}
}

void  ISODATA::renum ()
{
	int  i , j , k;
	for (i = 0 ; i < maxindex - 1 ; i++)
	{
		if (group[i].flag == 0)
		{
			for (j = maxindex ; j > i ; j--)
			{
				if (group[j].flag == 1)
				{
					group[i].flag = 1;
					group[j].flag = 0;					
					for (k = 0 ; k < vectorsize ; k++)
					{
						* (group[i].center + k) = (* (group[j].center + k));
						* (group[j].center + k) = 0;
					}
				}
			}
		}
	}
	maxindex = groupnum;
}

void  ISODATA::showresult ()
{
	char tmp[100]={'\0'};

	int  i , j , k , l;
	for (j = 1 , i = 0 ; i < maxindex ; i++)
	{
		if (group[i].flag != 0)
		{
			sprintf (tmp,"第%3d组聚类中心坐标为:" , j ++);
			strcat(result,tmp);

			for (k = 0 ; k < vectorsize ; k++){
				sprintf (tmp," %10f " , *(group[i].center + k));
				strcat(result,tmp);
			}

			sprintf (tmp," \r\n聚类包含的样本点的坐标为:\r\n ");
			strcat(result,tmp);

			for (k = 0 ; k < vectornum ; k++)
			{
				if (data(k , 0) == i)
				{
					for (l = 0 ; l < vectorsize ; l++)
					{
						sprintf (tmp," %10f " , data(k , l + 1));
						strcat(result,tmp);
					}
					sprintf (tmp," \r\n ");
					strcat(result,tmp);
				}
			}
		}
	}
}
float  ISODATA::data (int i , int j)
{
	return  * (datafield + i * (vectorsize + 1) + j);
}
void  ISODATA::write (int i , int j , float data)
{
	* (datafield + i * (vectorsize + 1) + j) = data;
}
float  * ISODATA::vector (int i)
{ 
	return  datafield + i * (vectorsize + 1) + 1;
}
float  ISODATA::distance (float  * x , float  * y)
{
	int  i;
	float  z;
	for (i = 0 , z = 0 ; i < vectorsize ; i++)
		z = z + ((* (x + i)) - (* (y + i))) * ((* (x + i))-(* (y + i)));
	return  (float) sqrt(z);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -