📄 k-均值聚类算法- 月华 - 新浪blog.htm
字号:
src="k-均值聚类算法- 月华 - 新浪BLOG.files/article_index_new.js"></SCRIPT>
</DIV></DIV>
<DIV class=margin></DIV></TD>
<TD class=box_2 vAlign=top>
<DIV class=margin>
<DIV id=labelFM000012>
<DIV class=add><IMG class=ico
src="k-均值聚类算法- 月华 - 新浪BLOG.files/blank.gif" align=absMiddle
border=0><A
href="http://blog.sina.com.cn/control/writing/scriber/article_add.php?mode=1"
target=_blank>发表文章</A> </DIV></DIV></DIV>
<DIV class=margin id=box_2>
<CENTER>
<DIV class=article>
<TABLE id=article48e01819010003qp cellSpacing=0 cellPadding=0
border=0>
<TBODY>
<TR>
<TD align=middle>
<TABLE cellSpacing=0 cellPadding=0 border=0>
<TBODY>
<TR class=sysHand
onclick="javascript:hide('articleBody48e01819010003qp');swap('articleTitle48e01819010003qp','className','up','down');">
<TD class=up id=articleTitle48e01819010003qp>
<DIV class="sysBr500 title"
id=commentText48e01819010003qp>k-均值聚类算法</DIV></TD></TR></TBODY></TABLE></TD></TR>
<TR id=articleBody48e01819010003qp>
<TD class=aBody>
<TABLE cellSpacing=0 cellPadding=0 align=center border=0>
<TBODY>
<TR>
<TD class=author><IMG class=man
src="k-均值聚类算法- 月华 - 新浪BLOG.files/moon.gif"><SPAN
class=time>2006-05-23 19:35:09</SPAN></TD></TR></TBODY></TABLE>
<TABLE class=dashed cellSpacing=0 cellPadding=0 align=center
border=0>
<TBODY>
<TR>
<TD></TD></TR></TBODY></TABLE>
<TABLE class=aSize cellSpacing=0 cellPadding=0 align=center
border=0>
<TBODY>
<TR>
<TD align=right><A
onclick="$('articleText48e01819010003qp').style.fontSize='16px';$('articleText48e01819010003qp').style.lineHeight='27px'"
href="javascript:;">大</A><A
onclick="$('articleText48e01819010003qp').style.fontSize='14px';$('articleText48e01819010003qp').style.lineHeight='24px'"
href="javascript:;">中</A><A
onclick="$('articleText48e01819010003qp').style.fontSize='12px';$('articleText48e01819010003qp').style.lineHeight='21px'"
href="javascript:;">小</A></TD></TR></TBODY></TABLE>
<TABLE class=description cellSpacing=0 cellPadding=0
align=center border=0>
<TBODY>
<TR>
<TD align=middle>
<DIV class="sysBr500 text"
id=articleText48e01819010003qp align=left>
<DIV> </DIV>
<DIV>k-均值聚类是一种迭代的聚类算法,迭代过程中不断地移动簇集中的成员直至得到理想的簇集为止。时间复杂度是o(tkn),t是迭代次数。k-均值找到的是局部最优解,而不是全局最优解。发现的是行状为凹的簇,它也不能很好的处理异常点。虽然k-均值算法产生的结果通常都不错,但时间上不是高效的,并且不具备很好的可收缩性。</DIV>
<DIV>k-均值算法通常用来和新的聚类算法做比较。</DIV>
<DIV>本人花了2个晚上编出k-均值算法。在p4机,2.82G,512M内存上运行,效率还可以。读文件是本人用matlab随机产生的10个簇,共500个点。当然可根据个人情况不同,修改读文件的部分。由于时间仓卒,水平有限,错误之处,在所难免。欢迎批评指证。</DIV>
<DIV>附:源代码</DIV>
<DIV>//////////////////////////////////////////////////////////////////////////<BR>//k-mean
聚类创建日期2006年5月22日<BR>#include <stdio.h><BR>#include
<stdlib.h><BR>#include <string><BR>#include
<time.h><BR>#include <math.h><BR>long int
posnum;<BR>const int M=1000;<BR>const double
NUM_MAX=100000000;<BR>const double
NUM_MIN=0.00000001;<BR>int knum;<BR>int row;<BR>int
*numperclu;//每个簇的个数<BR>//均值聚类数据结构<BR>struct
cluster<BR>{<BR> double
*data;<BR> int
dataindex;<BR> struct cluster
*next;<BR>};<BR>typedef struct cluster
ccnode;<BR>typedef ccnode *ccLink;</DIV>
<DIV>ccLink kcluster;<BR>//读文件,位置矩阵<BR>double**
readdata(char* s)<BR>{<BR> FILE*
fp;<BR> if
((fp=fopen(s,"rb"))==NULL)<BR> {<BR> printf("erro!
cannot open
file!\n");<BR> exit(0);<BR> }<BR> <BR>
int i,j;<BR> <BR> double
**tpos;<BR> tpos=
(double**)malloc(M*sizeof(double*));<BR> for (
i=0;i<M;i++)<BR> { <BR>
tpos[i]
= (double*)malloc(M*sizeof(double));<BR> }</DIV>
<DIV> char
temp[100]={0};<BR> char *p =
NULL; double
x;<BR> <BR> for(i=0;!feof(fp);i++)<BR> {<BR> <BR>
if((fgets(temp,100,fp))==NULL)<BR>
{<BR> break;<BR> }</DIV>
<DIV> </DIV>
<DIV> p = temp; j=0;<BR>
while(1)<BR>
{<BR>
x=atof(p); <BR> tpos[i][j]
= x;<BR>
j++;<BR> <BR>
if((p=strchr(p,' '))==NULL)<BR>
{<BR>
break;<BR> }<BR>
p++;</DIV>
<DIV> }</DIV>
<DIV> }
<BR> <BR> <BR> posnum
= i ; row =
j;<BR> <BR> double
**pos;<BR> pos=
(double**)malloc(posnum*sizeof(double*));<BR> for (
i=0;i<posnum;i++)<BR> { <BR> pos[i]
=
(double*)malloc(row*sizeof(double));<BR>
for(j=0;j<row;j++)<BR> {<BR>
pos[i][j] =
tpos[i][j];<BR> }<BR>
free(tpos[i]);<BR> }<BR> <BR>
free(tpos); <BR> </DIV>
<DIV> fclose(fp); <BR> <BR> return
pos;<BR>}</DIV>
<DIV> void insertcluster(ccLink Head,double
*key,int dataindex)<BR> {<BR> ccLink
pointer;<BR> pointer =
Head;<BR> //
Head->data[0]++;<BR> <BR> int
i;</DIV>
<DIV> while (pointer->next!=NULL)<BR>
{<BR> pointer=pointer->next;<BR>
}</DIV>
<DIV> ccLink New;<BR> New
=
(ccLink)malloc(sizeof(ccnode));<BR>
pointer->next=New;<BR>
New->data=(double*)malloc(row*sizeof(double));</DIV>
<DIV> for (i=0;i<row;i++)<BR>
{<BR> New->data[i]=key[i];<BR>
}<BR>
New->dataindex=dataindex;</DIV>
<DIV>
New->next=NULL;<BR> <BR> }<BR>double
dist(double* d1,double *d2)<BR>{<BR> int
i;double distant=0;<BR> for
(i=0;i<row;i++)<BR>
{<BR>
distant=distant+(d1[i]-d2[i])*(d1[i]-d2[i]);<BR>
}<BR> distant=sqrt(distant);<BR>
return distant;<BR>}<BR>int isexit(double**
temp)<BR>{<BR> int i,j;double distant=0.0;</DIV>
<DIV> for
(i=0;i<knum;i++)<BR> {<BR>/* for
(j=0;j<row;j++)<BR>
{<BR> printf("%lf
",temp[i][j]);<BR> printf("%lf
",kcluster[i].data[j]);<BR>
} <BR>*/<BR>
distant=distant+
dist(temp[i],kcluster[i].data);<BR> }</DIV>
<DIV> printf("%lf\n",distant);<BR>
if (distant<NUM_MIN)<BR>
{<BR> return 0;<BR>
}<BR> else<BR> {<BR> return
1;<BR> }<BR> </DIV>
<DIV>}<BR> //重新计算均值并赋值<BR>void calmean(ccLink
Head)<BR>{<BR> int i;<BR> int
num=0;<BR> double
*sum=(double*)(malloc(row*sizeof(double)));<BR>
for (i=0;i<row;i++)<BR>
{<BR> sum[i]=0;<BR>
}<BR> <BR> ccLink
pointer;<BR> pointer=Head->next;</DIV>
<DIV><BR> for (
;pointer!=NULL;<BR> pointer=pointer->next)<BR> {</DIV>
<DIV> for
(i=0;i<row;i++)<BR>
{<BR>
sum[i]=sum[i]+pointer->data[i];<BR>
}<BR>
num++;<BR> }<BR> <BR> <BR>
for (i=0;i<row;i++)<BR>
{<BR> if
(num!=0)<BR>
{<BR>
sum[i]=sum[i]/num;<BR> <BR>
}<BR> else<BR>
{<BR>
sum[i]=0;<BR>
}<BR>
Head->data[i]=sum[i];<BR> <BR> <BR>
}<BR> free(sum);</DIV>
<DIV>}<BR>double**
initdata()<BR>{ <BR> int
i,j;<BR>
srand((unsigned)time(NULL));</DIV>
<DIV> <BR> double**
pos;<BR> pos = readdata("pos.txt");</DIV>
<DIV> numperclu =
(int*)malloc(knum*sizeof(int));<BR> for
(i=0;i<knum;i++)<BR> {<BR>
numperclu[i]=0;<BR> }</DIV>
<DIV> int num;<BR> int
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -