📄 c-junzhi.txt

📁 一个关于c均值的聚类源程序,在matlab下运行
💻 TXT
字号:
function [cid,nr,centers] = cskmeans(x,k,nc)
% CSKMEANS K-Means clustering - general method.
%
%   This implements the more general k-means algorithm, where 
%   HMEANS is used to find the initial partition and then each
%   observation is examined for further improvements in minimizing
%   the within-group sum of squares.
%
%   [CID,NR,CENTERS] = CSKMEANS(X,K,NC) Performs K-means
%   clustering using the data given in X. 
%   
%   INPUTS: X is the n x d matrix of data,
%   where each row indicates an observation. K indicates
%   the number of desired clusters. NC is a k x d matrix for the
%   initial cluster centers. If NC is not specified, then the
%   centers will be randomly chosen from the observations.
%
%   OUTPUTS: CID provides a set of n indexes indicating cluster
%   membership for each point. NR is the number of observations
%   in each cluster. CENTERS is a matrix, where each row
%   corresponds to a cluster center.
%
%   See also CSHMEANS


%   W. L. and A. R. Martinez, 9/15/01
%   Computational Statistics Toolbox 

warning off
[n,d] = size(x);
if nargin < 3
 % Then pick some observations to be the cluster centers.
    ind = ceil(n*rand(1,k));
 % We will add some noise to make it interesting.
 nc = x(ind,:) + randn(k,d);
end

% set up storage
% integer 1,...,k indicating cluster membership
cid = zeros(1,n); 
% Make this different to get the loop started.
oldcid = ones(1,n);
% The number in each cluster.
nr = zeros(1,k); 
% Set up maximum number of iterations.
maxiter = 100;
iter = 1;

while ~isequal(cid,oldcid) & iter < maxiter
 % Implement the hmeans algorithm
 % For each point, find the distance to all cluster centers
 for i = 1:n
  dist = sum((repmat(x(i,:),k,1)-nc).^2,2);
  [m,ind] = min(dist); % assign it to this cluster center
  cid(i) = ind;
 end
 % Find the new cluster centers
 for i = 1:k
  % find all points in this cluster
  ind = find(cid==i);
  % find the centroid
  nc(i,:) = mean(x(ind,:));
  % Find the number in each cluster;
  nr(i) = length(ind);
 end
  iter = iter + 1;
end

% Now check each observation to see if the error can be minimized some more. 
% Loop through all points.
maxiter = 2;
iter = 1;
move = 1;
while iter < maxiter & move ~= 0 
 move = 0;
 % Loop through all points.
 for i = 1:n
  % find the distance to all cluster centers
  dist = sum((repmat(x(i,:),k,1)-nc).^2,2);
  r = cid(i);  % This is the cluster id for x
        %%nr,nr+1;
  dadj = nr./(nr+1).*dist'; % All adjusted distances
  [m,ind] = min(dadj); % minimum should be the cluster it belongs to
  if ind ~= r  % if not, then move x
   cid(i) = ind;
   ic = find(cid == ind);
   nc(ind,:) = mean(x(ic,:));
   move = 1;
  end
 end
 iter = iter+1;
end
centers = nc;
if move == 0
 disp('No points were moved after the initial clustering procedure.')
else
 disp('Some points were moved after the initial clustering procedure.')
end
warning on



% 聚类方法：C－均值算法
clear;clc; % 清内存，工作空间及命令行空间

disp(strvcat('基于C－均值算法的特征点聚类',strcat(datestr(now))));

% S=double(imread('cluster.bmp'));% 读取源图像
S=double(imread('d:\c1.bmp'));% 读取源图像
[sr sc] = find(S==0); % sr,sc存储源图中待聚类点的坐标
len = length(sr); % len存储源图中待聚类点的个数
group = zeros(len,1); % group存储特征点与类相对应的情况
c_num = 0; % c_num存储聚类次数

yesno='n';
while yesno=='n' | yesno=='N'
disp('请输入类别数：'); C=input('C='); % 聚类中心数

tic; % 计时开始

% i是特征点控制变量
  % j,k是聚类中心控制变量
  mr = zeros(C,1); mc = zeros(C,1); % 初始化聚类中心坐标
  mr(1,1) = sr(1,1); mc(1,1) = sc(1,1); % 第一个聚类中心的初始坐标为源图中第一个特征点的坐标
  for k = 2:1:C % 计算第k个聚类中心的初始坐标
maxd = zeros(len,1);
for j = 1:1:k-1 % 求每个特征点与前k-1个聚类中心的距离之和
i = 1:1:len; 
d = (sr(i)-mr(j)).^2+(sc(i)-mc(j)).^2;
maxd = maxd+d;
end
d=find(maxd==max(maxd)); % 第k个聚类中心是与前k-1个聚类中心相距最远的特征点
mr(k,1)=sr(d(1),1); mc(k,1)=sc(d(1),1);
  end
  
  mrtemp = zeros(C,1); mctemp = zeros(C,1); N = zeros(C,1);
  for i=1:1:len % 基于初始聚类中心，进行初始划分
k=1:1:C;
d=(sr(i)-mr(k)).^2+(sc(i)-mc(k)).^2;
g=find(d==min(d)); group(i)=g(1); N(g(1),1) = N(g(1),1)+1;
mrtemp(g(1),1) = mrtemp(g(1),1)+sr(i); mctemp(g(1),1) = mctemp(g(1),1)+sc(i);
end
mr(:)=0; mc(:)=0; N(find(N==0))=1;
  mr=mrtemp./N; mc=mctemp./N; % 基于初始划分，重新计算聚类中心
  Je=0;
  for i=1:1:len % 基于初始划分，计算误差平方和
Je = Je+(sr(i)-mr(group(i))).^2+(sc(i)-mc(group(i))).^2;
  end
  
  no_change =1;
  while no_change==1 % 连续迭代thre次，Je不变，则停止聚类
ifbreak = 0;
for i=1:1:len
if N(group(i))~=0
k=1:1:C; % 计算p
d=(N(k).*((sr(i)-mr(k)).^2+(sc(i)-mc(k)).^2))./(N(k)+1);
d(group(i))=(N(group(i)).*((sr(i)-mr(group(i))).^2+(sc(i)-mc(group(i))).^2))./(N(group(i))-1); 

g=find(d==min(d));
if g(1)~=group(i) % 如果pk<=pj,把特征点从i类移到k类
mrtemp(group(i))=mrtemp(group(i))-sr(i); % 把特征点从i类中移出
mctemp(group(i))=mctemp(group(i))-sc(i);
N(group(i))=N(group(i))-1;

group(i)=g(1); % 修改特征点的类别归属
mrtemp(g(1))=mrtemp(g(1))+sr(i); % 把特征点移入k类
mctemp(g(1))=mctemp(g(1))+sc(i);
N(g(1))=N(g(1))+1;
      
mr(:)=0; mc(:)=0; N(find(N==0))=1;
mr=mrtemp./N; mc=mctemp./N; % 重新计算聚类中心
          Je=0;
          for i=1:1:len % 计算误差平方和
Je = Je+(sr(i)-mr(group(i))).^2+(sc(i)-mc(group(i))).^2;
          end
ifbreak=1;
end
end
    end
no_change=ifbreak==1;
end

  time=toc;
  figure; hold on; % 
  color='.r.g.b.m.c.y';  col2='rgbmcy';%'gbrymc';
  for k=1:1:C
rtemp=sr(find(group==k)); ctemp=sc(find(group==k));
plot(rtemp,ctemp,[color((k-1)*2+1),color((k-1)*2+2)]);
rectangle('Position',[round(mr(k)),round(mc(k)),3,3],'Curvature',[1,1],'FaceColor',col2(k));
  end
  title(['类别数为',num2str(C),', 聚类时间',num2str(time),'s']);
  hold off;

c_num=c_num+1;
judge(c_num,1)=C; judge(c_num,2)=Je; % judge存储类别数和误差平方和
figure; plot(judge(:,1),judge(:,2),'-s','MarkerFaceColor','g');
disp('是否结束聚类?'); yesno=input('Y/N? ','s');
end
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -