📄 legclust.m
字号:
% -------------------------------------------------------------------------
% Function: [classno]=LEGClust(x,M,K,h)
% -------------------------------------------------------------------------
% Aim:
% Clustering the data with Layered Entropic Subgraphs Algorithm(LEGClust)
% -------------------------------------------------------------------------
% Input:
% x - data set (m,n); m-objects, n-variables
% M - number of nearest nighbors
% K - minimum number of connections to join clusters in consecutive steps
% h - the smoothing parameter for computing the entropy, if not known, void
% it or set it with []
% -------------------------------------------------------------------------
% Output:
% classno - classification each point belong to
% -------------------------------------------------------------------------
% Example of use:
% [classno]=LEGClust(x,8,3,[])
% -------------------------------------------------------------------------
% References:
% [1] Jorge M. Santos, Joaquim Marques de Sa and Luis A. Alexander,
% "LEGClust - A Clustering Algorithm Based on Layered Entropic Subgraphs",
% IEEE TRANSACTIONS ON PATTERN ANALYIS AND MACHINE INTELLIGENCE, 2008.
% -------------------------------------------------------------------------
% Written by Jia Liu
% School of Computer and Information Technology
% The Beijing Jiaotong University
% March 2008
% Email: bjtu.liujia@gmail.com
%--------------------------------------------------------------------------
%-------------------begin of function LEGClust(x,M,k,h)--------------------
function [classno]=LEGClust(x,M,K,h)
% get the size of the matrix x, save in m and n
[m,n]=size(x);
% if number of the parameters less than 3 or more than 4, report error
if nargin<3 | nargin>4
error('input parameters error!');
% if the h parameter is not known, compute the value with Gethop function
elseif nargin==3 | isempty(h)
[h]=Gethop(x);
end
% compute the distance between oberservations with Eculidean Distances
EuclDist=pdist(x);
EuclDist=squareform(EuclDist);
% sort the EuclDist
[sorted,index]=sort(EuclDist,2);
% get the M-nearest point, (in fact shoud get the M+1 nearest point
% because of the point itself)
M_nearest=index(:,[2:M+1]);
% define a matrix to strore the entroy value
EntroyValue=inf(m,m);
% for each point, compute the entroy betwwen it and its M-nearest points
for i=1:m
% get the M*(M-1)+1 vector which will be used for entroy computation
vtor=zeros(M*(M-1)+1,n);
count=1;
% add the M*(M-1) vectors d{j,k}
for j=1:M
for k=1:M
if j~=k
vtor(count,:)=x(M_nearest(i,k),:)-x(M_nearest(i,j),:);
count=count+1;
end
end
end
% for each point of the M-nearest neighbors computes p(j} and entroy
for j=1:M
vtor(M*(M-1)+1,:)=x(M_nearest(i,j),:)-x(i,:);
% compute the entroy with the Getentroy function
EntroyValue(i,M_nearest(i,j))=Getentroy(vtor,h,n);
end
end
% sort the EntroyValue matrix
[sortedEntroy,indexEntroy]=sort(EntroyValue,2);
% construct the proximity matrix
M_layer=indexEntroy(:,[1:M]);
% define a matrix store the connection between observations
% if point i connect to point j, then the ith row and jth column of the
% matrix will be set as 1, or else set as 0
connections=zeros(m,m);
% form the elementary clusters using the first layers
for i=1:m
connections(i,M_layer(i,1))=1;
end
% define the sequence number of the classification
clusterno=1;
% sign the point whether it is classified
classified=zeros(1,m);
% compute the number of the elementary clusters
for i=1:m
if classified(i)==0
% classify this point
classno(i)=clusterno;
classified(i)=1;
list=i;
while ~isempty(list)
% get the first point of the list
fp=list(1);
% delete the first point
list(1)=[];
%check the ith row
rindex=find(connections(fp,:)==1);
for j=1:length(rindex)
if classified(rindex(j))==0
classno(rindex(j))=clusterno;
classified(rindex(j))=1;
list=[list,rindex(j)];
end
end
%check the ith column
cindex=find(connections(:,fp)==1);
for j=1:length(cindex)
if classified(cindex(j))==0
classno(cindex(j))=clusterno;
classified(cindex(j))=1;
list=[list,cindex(j)];
end
end
end
% add the clusterno
clusterno=clusterno+1;
end
end
% the count of cluster after the first layer connect
clustercount=clusterno-1;
% the number of the layer
layerid=2;
% record the lastest number of clusters
lastcount=clustercount;
% connect the clusters
while(clustercount>1 & layerid<=M)
% connect the next layer
for i=1:m
connections(i,M_layer(i,layerid))=1;
end
% for each cluster, compute the connections with other clusters
% define a clustercount*clustercount matrix to record the number of
% the connections between observations
connect_num=zeros(clustercount,clustercount);
for i=1:clustercount
for j=(i+1):clustercount
ithcluster=find(classno==i);
jthcluster=find(classno==j);
for ith=1:length(ithcluster)
for jth=1:length(jthcluster)
connect_num(i,j)=connect_num(i,j)...
+connections(ithcluster(ith),jthcluster(jth))...
+connections(jthcluster(jth),ithcluster(ith));
end
end
end
end
% find the pair where the number of the clusters >= k
[r,c]=find(connect_num>=K);
r=r';
c=c';
% construct a unidimensial arry
k_vctor=zeros(1,length(r));
for i=1:length(r)
k_vctor=connect_num(r(i),c(i));
end
% sort the k_vector in order to merge the largest connections each time
[sorted_kvctor,index_kvctor]=sort(k_vctor,'descend');
% define a vector to record whether the cluster has been merged
merged=zeros(1,clustercount);
% merge the clusters
while ~isempty(index_kvctor)
% get the first index_vctor
fst_kvctor=index_kvctor(1);
% delete the first element
index_kvctor(1)=[];
% get the numbers of the two clusters
cluster1=r(fst_kvctor);
cluster2=c(fst_kvctor);
% if the two clusters are not merged, then merge them
% and type the merged as 1
if ~merged(cluster1) & ~merged(cluster2)
classno(find(classno==cluster2))=cluster1;
merged(cluster1)=1;
merged(cluster2)=1;
end
end
% sequence the classno to "1,2,3..."format
cnums=unique(classno);
for i=1:length(cnums)
classno(find(classno==cnums(i)))=i;
end
% compare the clustercount with the last clustercount
% if they are equal, stop the process
clustercount=length(cnums);
if clustercount==lastcount
break;
else
% continue to connect the next layer
layerid=layerid+1;
% set the last count as the clustercount
lastcount=clustercount;
end
end
%-------------------end of function LEGClust(x,M,k,h)----------------------
%--------------------------------------------------------------------------
%--------------------------------------------------------------------------
%------------------------begin of function Gethop(x)-----------------------
function [h]=Gethop(x)
% get the size of the matrix x,save in m and n
[m,n]=size(x);
% compute the mean of standard deviations for each dimension
avg=sum(sum((ones(m,1)*mean(x)-x).^2)/(m-1))/m;
h=2*avg*(4/((n+2)*m).^(1/(n+4)));
%-----------------------end of function Gethop(x)--------------------------
%--------------------------------------------------------------------------
%--------------------------------------------------------------------------
%--------begin of function Getentroy(x,i,M_nearest)------------------------
function [entroy]=Getentroy(vctor,h,n)
% get the length of vctor
N=length(vctor);
% compute the entroy with the formula gaven in the reference
summation=0;
for i=1:N
for j=1:N
% the n*n identity matrix
idenmtr=2*h.^2*eye(n);
% the vector x(i)-x(j)
xij=vctor(i,:)-vctor(j,:);
summation=summation+(1/(((2*pi).^(n/2))*sqrt(det(idenmtr))))...
*exp(-1/2*xij*inv(idenmtr)*xij');
end
end
% return the entroy value
entroy=-log(summation/(N*N));
%---------end of function Getentroy(x,i,M_nearest)-------------------------
%--------------------------------------------------------------------------
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -