📄 mixtures4.m
字号:
function [bestk,bestpp,bestmu,bestcov,dl,countf] = mixtures4(y,kmin,kmax,regularize,th,covoption)
%
% Syntax:
% [bestk,bestpp,bestmu,bestcov,dl,countf] = mixtures3(y,kmin,kmax,regularize,th,covoption)
%
% Inputs:
%
% "y" is the data; for n observations in d dimensions, y must have
% d lines and n columns.
%
% "kmax" is the initial (maximum) number of mixture components
% "kmin" is the minimum number of components to be tested
%
% "covoption" controls the covarince matrix
% covoption = 0 means free covariances
% covoption = 1 means diagonal covariances
% covoption = 2 means a common covariance for all components
% covoption = 3 means a common diagonal covarince for all components
%
% "regularize" is a regularizing factor for covariance matrices; in very small samples,
% it may be necessary to add some small quantity to the diagonal of the covariances
%
% "th" is a stopping threshold
%
% Outputs:
%
% "bestk" is the selected number of components
% "bestpp" is the obtained vector of mixture probabilities
% "bestmu" contains the estimates of the means of the components
% it has bestk columns by d lines
% "bestcov" contains the estimates of the covariances of the components
% it is a three-dimensional (three indexes) array
% such that bestcov(:,:,1) is the d by d covariance of the first
% component and bestcov(:,:,bestk) is the covariance of the last
% component
%
% "dl" contains the successive values of the cost function
% "countf" is the total number of iterations performed
%
% Written by:
% Mario Figueiredo
% Instituto Superior Tecnico
% 1049-001 Lisboa
% Portugal
%
% Email: mtf@lx.it.pt
%
% 2000, 2001, 2002
%
verb=1; % verbose mode; change to zero for silent mode
bins = 40; % number of bins for the univariate data histograms for visualization
dl = []; % vector to store the consecutive values of the cost function
[dimens,npoints] = size(y);
switch covoption
case 0
npars = (dimens + dimens*(dimens+1)/2);
%this is for free covariance matrices
case 1
npars = 2*dimens;
%this is for diagonal covariance matrices
case 2
npars = dimens;
%this is for a common covariance matrix
%independently of its structure)
case 3
npars = dimens;
otherwise
%the default is to assume free covariances
npars = (dimens + dimens*(dimens+1)/2);
end
nparsover2 = npars / 2;
% we choose which axes to use in the plot,
% in case of higher dimensional data (>2)
% Change this to have other axes being shown
axis1 = 1;
axis2 = 2;
% kmax is the initial number of mixture components
k = kmax;
% indic will contain the assignments of each data point to
% the mixture components, as result of the E-step
indic = zeros(k,npoints);
% Initialization: we will initialize the means of the k components
% with k randomly chosen data points. Randperm(n) is a MATLAB function
% that generates random permutations of the integers from 1 to n.
randindex = randperm(npoints);
randindex = randindex(1:k);
estmu = y(:,randindex);
% the initial estimates of the mixing probabilities are set to 1/k
estpp = (1/k)*ones(1,k);
% here we compute the global covariance of the data
globcov = cov(y');
for i=1:k
% the covariances are initialized to diagonal matrices proportional
% to 1/10 of the mean variance along all the axes.
% Of course, this can be changed
estcov(:,:,i) = diag(ones(1,dimens)*max(diag(globcov/10)));
end
% we plot the data and the initial estimates
if (dimens == 1)
[hh,xx] = hist(y,bins);
barplot = bar(xx,hh/npoints/(xx(2)-xx(1)));
set(barplot,'EdgeColor',[1 1 1]);
set(barplot,'FaceColor',[0.75 0.75 0.75]);
set(gca,'FontName','Times'); set(gca,'FontSize',14);
drawnow
hold on
miny = min(y);
maxy = max(y);
plotgrid = miny:(maxy-miny)/200:maxy;
mix = zeros(size(plotgrid));
for comp=1:k
mix = mix + estpp(comp)*uninorm(plotgrid,estmu(comp),estcov(comp));
plot(plotgrid,estpp(comp)*uninorm(plotgrid,estmu(comp),estcov(comp)),'k')
end
plot(plotgrid,mix,'Color','red','LineWidth',3);
text(plotgrid(5),0.9*max(mix),sprintf('k=%d',k),...
'FontName','Times','FontSize',18,'FontWeight','bold');
drawnow
hold off
else
plot(y(axis1,:),y(axis2,:),'.','Color',[0.5 0.5 0.5]);
hold on
axis equal
set(gca,'FontName','Times','FontSize',22);
placex = get(gca,'Xlim'); placey = get(gca,'Ylim');
text(placex(1)+0.2,placey(2)-0.2,sprintf('k=%d',k),...
'FontName','Times','FontSize',22);
for comp=1:k
elipsnorm(estmu([axis1,axis2],comp),...
estcov([axis1,axis2],[axis1,axis2],comp),2)
end
drawnow
hold off
end
% having the initial means, covariances, and probabilities, we can
% initialize the indicator functions following the standard EM equation
% Notice that these are unnormalized values
for i=1:k
semi_indic(i,:) = multinorm(y,estmu(:,i),estcov(:,:,i));
indic(i,:) = semi_indic(i,:)*estpp(i);
end
% we can use the indic variables (unnormalized) to compute the
% loglikelihood and store it for later plotting its evolution
% we also compute and store the number of components
countf = 1;
loglike(countf) = sum(log(sum(realmin+indic)));
dlength = -loglike(countf) + (nparsover2*sum(log(estpp))) + ...
(nparsover2 + 0.5)*k*log(npoints);
dl(countf) = dlength;
kappas(countf) = k;
% the transitions vectors will store the iteration
% number at which components are killed.
% transitions1 stores the iterations at which components are
% killed by the M-step, while transitions2 stores the iterations
% at which we force components to zero.
transitions1 = [];
transitions2 = [];
% minimum description length seen so far, and corresponding
% parameter estimates
mindl = dl(countf);
bestpp = estpp;
bestmu = estmu;
bestcov = estcov;
bestk = k;
k_cont = 1; % auxiliary variable for the outer loop
while(k_cont) % the outer loop will take us down from kmax to kmin components
cont=1; % auxiliary variable of the inner loop
while(cont) % this inner loop is the component-wise EM algorithm with the
% modified M-step that can kill components
if verb==1
% in verbose mode, we keep displaying the minimum of the
% mixing probability estimates to see how close we are
% to killing one component
disp(sprintf('k = %2d, minestpp = %0.5g', k, min(estpp)));
end
% we begin at component 1
comp = 1;
% ...and can only go to the last component, k.
% Since k may change during the process, we can not use a for loop
while comp <= k
% we start with the M step
% first, we compute a normalized indicator function
clear indic
for i=1:k
indic(i,:) = semi_indic(i,:)*estpp(i);
end
normindic = indic./(realmin+kron(ones(k,1),sum(indic,1)));
% now we perform the standard M-step for mean and covariance
normalize = 1/sum(normindic(comp,:));
aux = kron(normindic(comp,:),ones(dimens,1)).*y;
estmu(:,comp)= normalize*sum(aux,2);
if (covoption == 0)|(covoption == 2)
estcov(:,:,comp) = normalize*(aux*y') - estmu(:,comp)*estmu(:,comp)' ...
+ regularize*eye(dimens);
else
estcov(:,:,comp) = normalize*diag(sum(aux.*y,2)) - diag(estmu(:,comp).^2) ;
end
if covoption == 2
comcov = zeros(dimens,dimens);
for comp2 = 1:k
comcov = comcov + estpp(comp2)*estcov(:,:,comp2);
end
for comp2 = 1:k
estcov(:,:,comp2) = comcov;
end
end
if covoption == 3
comcov = zeros(dimens,dimens);
for comp2 = 1:k
comcov = comcov + estpp(comp2)*diag(diag(estcov(:,:,comp2)));
end
for comp2 = 1:k
estcov(:,:,comp2) = comcov;
end
end
% this is the special part of the M step that is able to
% kill components
estpp(comp) = max(sum(normindic(comp,:))-nparsover2,0)/npoints;
estpp = estpp/sum(estpp);
% this is an auxiliary variable that will be used the
% signal the killing of the current component being updated
killed = 0;
% we now have to do some book-keeping if the current component was killed
% that is, we have to rearrange the vectors and matrices that store the
% parameter estimates
if estpp(comp)==0
killed = 1;
% we also register that at the current iteration a component was killed
transitions1 = [transitions1 countf];
if comp==1
estmu = estmu(:,2:k);
estcov = estcov(:,:,2:k);
estpp = estpp(2:k);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -