vqlbg_bb.m
来自「一个关于数据聚类和模式识别的程序,在生物化学,化学中因该都可以用到.希望对大家有」· M 代码 · 共 117 行
M
117 行
function [center, U] = vqLBG(dataSet, codeBookSize, plotOpt)
% vqLBG: Vector quantization using LBG method of center splitting
% Usage: [center, U]=vq(dataSet, codeBookSize, plotOpt)
% dataSet : sampleNum x featureDim.
% codeBookSize : codebook size or number of cluster centers (should be the power of 2)
% Roger Jang, 20030330
if nargin==0, selfdemo; return; end;
if nargin<3, plotOpt=0; end;
% ====== Error checking
[sampleNum,featureDim] = size(dataSet);
if sampleNum<=1, msgbox('Error on dataSet!'), return; end
if codeBookSize<1, msgbox('Error on codebook size!'), return; end
if codeBookSize>sampleNum/2, error('codeBookSize must be less than half of the row size of dataSet!'), end
% Initial Centers: mean of all data
center= mean(dataSet);
if plotOpt
plot(dataSet(:,1), dataSet(:,2), 'b.');
centerH=line(center(:,1), center(:,2), 'color', 'r', 'marker', 'o', 'linestyle', 'none', 'linewidth', 2);
axis image
end;
% Do while-loop to increase center number till it is equal to or greater than codebook size.
maxLoopCount = 100;
centerNum = size(center,1);
while (centerNum<codeBookSize)
if plotOpt
fprintf('Hit return to start center splitting...\n'); pause;
end
center=[center-eps; center+eps];
centerNum = size(center,1);
prevDistortion = realmax;
distMat=vecdist(center, dataSet);
for i = 1:maxLoopCount
[center, distortion, distMat, U] = updateCenter(center, dataSet, distMat);
if(abs((prevDistortion-distortion)/prevDistortion) < eps)
break;
else
prevDistortion = distortion;
end
if plotOpt
set(centerH, 'xdata', center(:,1), 'ydata', center(:,2));
drawnow;
end
end;
fprintf('No. of centers = %g, loop count = %g, distortion = %g\n' , centerNum, i, distortion);
end
if plotOpt
color = {'r', 'g', 'c', 'y', 'm', 'b', 'k'};
figure;
plot(dataSet(:, 1), dataSet(:, 2), 'o');
maxU = max(U);
clusterNum = size(center,1);
for i=1:clusterNum,
index = find(U(i, :) == maxU);
colorIndex = rem(i, length(color))+1;
line(dataSet(index, 1), dataSet(index, 2), 'linestyle', 'none', 'marker', '*', 'color', color{colorIndex});
line(center(:,1), center(:,2), 'color', 'r', 'marker', 'o', 'linestyle', 'none', 'linewidth', 2);
end
axis image;
end
% ====== Find new centers (This is the same as the one in kmeans.m.)
function [center, distortion, distMat, U] = updateCenter(center, dataSet, distMat)
centerNum = size(center, 1);
dataNum = size(dataSet, 1);
dim = size(dataSet, 2);
% ====== Find the U (partition matrix)
[a,b] = min(distMat);
index = b+centerNum*(0:dataNum-1);
U = zeros(size(distMat));
U(index) = ones(size(index));
% ====== Check if there is an empty group (and delete them)
index=find(sum(U,2)==0);
emptyGroupNum=length(index);
if emptyGroupNum~=0,
fprintf('Found %d empty group(s)!', emptyGroupNum);
U(index,:)=[];
end
% ====== Find the new centers
center = (U*dataSet)./(sum(U,2)*ones(1,dim));
distMat = vecdist(center, dataSet);
% ====== Add new centers for the deleted group
if emptyGroupNum~=0
distortionByGroup=sum(((distMat.^2).*U)');
[junk, index]=max(distortionByGroup); % Find the indices of the centers to be split
index=index(1:emptyGroupNum);
temp=center; temp(index, :)=[];
center=[temp; center(index,:)-eps; center(index,:)+eps];
distMat = vecdist(center, dataSet);
[a,b] = min(distMat);
index = b+centerNum*(0:dataNum-1);
U = zeros(size(distMat));
U(index) = ones(size(index));
center = (U*dataSet)./(sum(U,2)*ones(1,dim));
distMat = vecdist(center, dataSet);
end
distortion = sum(sum((distMat.^2).*U)); % objective function
% ====== Self demo
function selfdemo
dataNum = 150;
data1 = ones(dataNum, 1)*[0 0] + randn(dataNum, 2)/5;
data2 = ones(dataNum, 1)*[0 1] + randn(dataNum, 2)/5;
data3 = ones(dataNum, 1)*[1 0] + randn(dataNum, 2)/5;
data4 = ones(dataNum, 1)*[1 1] + randn(dataNum, 2)/5;
dataSet = [data1; data2; data3; data4];
codeBookSize=2^6;
plotOpt=1;
codebook = feval(mfilename, dataSet, codeBookSize, plotOpt);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?