📄 isodata.m
字号:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% ISODATA聚类算法演示程序1.00 %%%
%%% 国防科技大学四院五队顾巍 %%%
%%% 完全按照《现代模式识别》孙即祥著作 2.4.4《动态聚类法》算法3实现 %%%
%%% 使用欧式距离作为测度标准,本方法使用范例见RunDemo.m %%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 初始参数的属性
% c 预期的类数
% Nc 初始聚类中心个数
% MinMemb 每一类中允许的最小模式数目
% MinSauare 类内标准差大于这个数jiu分裂
% JionLevel 两类中心的最小距离,低于这个距离就合并
% L 每次迭代可以合并的类的最多对数
% MaxLoops 允许的最大迭代次数
% RawData 初始特征矢量集合,[x1,x2,x3,x4...xn]
% 返回值:架构。
% Final.Err 完成情况的文本描述
% Final.Result 2 * n数组,记录各个样本的分类号码和距离该类心的欧式距离
% Final.ClassCenter nDIm*Nc数组,记录Nc个类心的位置
% Hinal.nLOOP 记录迭代了多少次
function Final = ISODATA(c,Nc,MinMemb,MinSquare,JoinLevel,L,MaxLoops,RawData)
%步骤1,读入数据、初始化
nLOOP = 0; %迭代次数
bSinked = 0; %收敛条件,0不收敛,1收敛
%获得样本维数和样本数目
nSize = size(RawData);
nDim = nSize(1); %向量维数
nTotalData = nSize(2); %样本数目
Final.Err = '正确执行'; %返回值:执行结果
Final.Result = 0; %返回值:分类结果和各个样本距类心距离
Final.ClassCenter = zeros(nDim,c); %返回值:各类的类心
%判断数据合法性
if c>nTotalData
Final.Err = '初始化失败。原因:预期类数大于样本数目,c';
return ;
end
if Nc>=nTotalData
Final.Err = '初始化失败。原因:初始类数大于等于样本数目,Nc';
return;
end
%建立聚类中心,用相距最远的Nc个矢量作为初始的聚类中心
%Z是记录每一个聚类的中心的向量组
Z = zeros(nDim,Nc);
%用拥有最多极值点的样本作为第一类的中心,这种点是超空间的角点
%类似立方体的顶点
MaxN = zeros(1,nTotalData);
%统计各个样本的极值数目
for IndexI = 1:nDim
SubMax = RawData(IndexI,1);
SubN = 1;
for IndexJ = 1:nTotalData
dDot = RawData(IndexI,IndexJ);
if SubMax<=dDot
SubN = IndexJ;
SubMax = dDot;
end
MaxN(SubN) = MaxN(SubN)+1;
end
end
%找到极值最多的样本
SubMax = MaxN(1);
SubMaxN = 1;
for IndexI = 1:nTotalData
if (SubMax<=MaxN(IndexI))
SubMax=MaxN(IndexI);
SubMaxN = IndexI;
end
end
Z(:,1) = RawData(:,SubMaxN);
CenterXY = Z(:,1);
for IndexI = 2:Nc
%找到和当前质心最远的样本作为下一个类IndexI的类心
MaxDis = -1e9;
MaxN = 0;
for IndexJ = 1:nTotalData
dDot = (RawData(:,IndexJ)-CenterXY)'*(RawData(:,IndexJ)-CenterXY);
if MaxDis<=dDot
MaxN = IndexJ;
MaxDis = dDot;
end
end
%记录新的类心
Z(:,IndexI) = RawData(:,MaxN);
%刷新质心
CenterXY = CenterXY + Z(:,IndexI);
CenterXY = CenterXY/2;
end
nZ = zeros(Nc,1);
%Result是记录分类结果的向量组,Result(1,i)表示第i个矢量的类属Result(2,i)表示类距离
Result = zeros(2,nTotalData);
%如果两次循环聚类不变化,说明跌代可以结束
OldResult = Result;
%使用迭代次数以及收敛条件控制的首层循环
while (nLOOP<=MaxLoops & bSinked ~= 1)
%第nLOOP次迭代
nLOOP = nLOOP + 1;
%步骤2,按照最小距离原则进行分类
bSepOK = 0;
nZ = zeros(Nc,1);
while (bSepOK==0)
d = zeros(nTotalData,Nc);
for IndexI = 1:nTotalData %对xi进行分类
dMinN = 0;
dMin = Inf;
for IndexJ = 1:Nc %分别计算和各个类心的距离
dDot = (RawData(:,IndexI)-Z(:,IndexJ))'*(RawData(:,IndexI)-Z(:,IndexJ));
d(IndexI,IndexJ) = sqrt(dDot);
if dMin>d(IndexI,IndexJ)
dMin = d(IndexI,IndexJ);
dMinN = IndexJ;
end
end
Result(1,IndexI) = dMinN;
Result(2,IndexI) = dMin;
nZ(dMinN) = nZ(dMinN)+1;
end
%判断分类是否满足每一类中允许的最小模式数目的约束
OK = 1;
for IndexI = 1:Nc
if (nZ(IndexI)<MinMemb)
TEMP = Z(:,Nc-1);
if IndexI~=Nc-1
TEMP (:,IndexI:Nc-1) = Z(:,IndexI+1:Nc);
end
OK = 0;
Nc = Nc - 1;
nZ = zeros(Nc,1);
break;
end
end
if OK==1
bSepOK = 1;
end
end
%步骤4 计算各类中心、各类中模式到类心的平均距离、总体平均距离
%重新计算类的中心
Z = zeros(nDim,Nc);
for IndexI = 1:nTotalData
TEMP = Result(1,IndexI);%TEMP是第IndexI个样本的类属
Z(:,TEMP) = Z(:,TEMP) + RawData(:,IndexI);
end
for IndexI = 1:Nc
Z(:,IndexI) = Z(:,IndexI)/nZ(IndexI);
end
%计算各类中心到类心的平均距离
dInDis = zeros(1,Nc); %初始化类内平均距离
dTDis = 0; %初始化总平均距离
%计算各类的类内平均距离
for IndexI = 1:nTotalData
TEMP = Result(1,IndexI);%TEMP是第IndexI个样本的类属
dInDis(TEMP) = dInDis(TEMP) + sqrt((RawData(:,IndexI)-Z(:,TEMP))'*(RawData(:,IndexI)-Z(:,TEMP)));
end
for IndexI = 1:Nc
dInDis(IndexI) = dInDis(IndexI)/nZ(IndexI);
dTDis = dTDis + nZ(IndexI)*dInDis(IndexI);
end
%计算总体平均距离
dTDis = dTDis / nTotalData;
%分裂、合并以及 依据nLOOP,Nc判断停止
bNeedDivide = 0;
bNeedJoint = 0;
if (nLOOP == MaxLoops)
JoinLevel = 0;
bNeedJoint = 1;
elseif Nc<=c/2
bNeedDivide = 1;
elseif Nc>=2*c
bNeedJoint = 1;
elseif mod(nLOOP,2)==0
bNeedJoint = 1;
else
bNeedDivide = 1;
end
%%分裂处理
if (bNeedDivide~=0)
%第六步 计算各类类内距的标准差矢量
bNeedDivide = 0;
Delta = zeros(Nc,nDim);
for IndexI = 1:nTotalData
TEMP = Result(1,IndexI);%TEMP是第IndexI个样本的类属
for IndexJ = 1:nDim
Delta(TEMP,IndexJ) = Delta(TEMP,IndexJ) + (RawData(IndexJ,IndexI) - Z(IndexJ,TEMP)).^2;
end
end
for IndexI = 1:Nc
for IndexJ = 1:nDim
Delta(IndexI,IndexJ) = sqrt(Delta(IndexI,IndexJ)./nZ(IndexI));
end
end
%第七步,求出每一类内距标准差矢量的最大分量
DeltaMax = max(Delta');
DeltaMaxN = DeltaMax;
for IndexI = 1:Nc
for IndexJ = 1:nDim
if (Delta(IndexI,IndexJ) ==DeltaMax(IndexI))
DeltaMaxN(IndexI) = IndexJ;
break;
end
end
end
%第八步,判断是否需要分裂
IndexI=0;
for IndexI = 1:Nc
if DeltaMax(IndexI)>MinSquare
if (dInDis(1,IndexI)>dTDis | (Nc ==1)) & (nZ(IndexI)>2*(MinMemb+1)) & (Nc<=c/2)
bNeedDivide = 1;
break;
end
end
end
if (bNeedDivide == 0)
bNeedJoint = 1;
else
%进行分裂操作
TEMP = zeros(nDim,Nc+1);
TEMP(:,1:Nc) = Z;
TEMP(:,Nc+1) = Z(:,IndexI);
DT = Delta';
TEMP(DeltaMaxN(IndexI),IndexI) = TEMP(DeltaMaxN(IndexI),IndexI) + 0.5*DT(DeltaMaxN(IndexI),IndexI);
TEMP(DeltaMaxN(IndexI),Nc+1) = TEMP(DeltaMaxN(IndexI),Nc+1) - 0.5*DT(DeltaMaxN(IndexI),IndexI);
Nc = Nc + 1;
Z = TEMP;
end
end
%合并处理
if (bNeedJoint~=0)
%第九步,合并处理
%计算各个类对中心距离
ED = zeros(Nc,Nc);
for IndexI = 1:Nc
for IndexJ = 1:Nc
if (IndexI<IndexJ)
ED(IndexI,IndexJ) = sqrt((Z(:,IndexI)-Z(:,IndexJ))'*(Z(:,IndexI)-Z(:,IndexJ)));
else
ED(IndexI,IndexJ) = Inf;
end
end
end
%排序,生成等待合并的类序列
%共能存放L个类对
Sorted = zeros(L,2);
%填充L个交换类对
for IndexK = 1:L
%找到最近的类
MinDis = Inf;
MinPos = [0,0];
for IndexI = 1:Nc
for IndexJ = 1:Nc
if (IndexI<IndexJ)
if MinDis>=ED(IndexI,IndexJ)
MinDis = ED(IndexI,IndexJ);
MinPos = [IndexI,IndexJ];
end
end
end
end
%判断最近的距离是否小于JoinLevel
if MinDis>=JoinLevel
break;
end
%追加待合并项、刷新距离矩阵
Sorted(IndexK,1) = MinPos(1);
Sorted(IndexK,2) = MinPos(2);
%已经合并的类再也不能合并了
ED(MinPos(1),:) = Inf;
ED(:,MinPos(2)) = Inf;
end
%合并类
count = 0;
for IndexI = 1:L
%注意,Sorted(x,1)<Sorted(x,2)
if (Sorted(IndexI,1)==0|Sorted(IndexI,2)==0)
break;
end
TEMP = (nZ(Sorted(IndexI,1))*Z(:,Sorted(IndexI,1))+nZ(Sorted(IndexI,2))*Z(:,Sorted(IndexI,2)))/(nZ(Sorted(IndexI,1))+nZ(Sorted(IndexI,2)));
%这是把删除的类标记为Inf
Z(:,Sorted(IndexI,1)) = TEMP;
Z(:,Sorted(IndexI,2)) = Inf;
count = count + 1;
end
%整理新的类空间
TEMP = zeros(nDim,Nc-count);
count = 0;
for IndexI = 1:Nc
if Z(1,IndexI)~=Inf
count = count + 1;
TEMP(:,count) = Z(:,IndexI);
end
end
Z = TEMP;
Nc = count;
end
%如果Result不变化,则跌代结束
if (OldResult==Result)
break;
end
OldResult=Result;
end
Final.Result = Result;
Final.nLOOP = nLOOP;
Final.ClassCenter = Z;
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -