mbclust.m

来自「基于模型聚类算法的matlab实现 This does the entire 」· M 代码 · 共 269 行
269 行
function [bics,modelout,model,Z,clabs] = mbclust(data,maxclus);

% Model-based clustering - entire process
%
%   [BICS,BESTMODEL,ALLMODELS,Z,CLABS] = MBCLUST(DATA,MAXCLUS);
%
% This does the entire MB Clustering given a set of data.
% It only does the 4 basic models, unequal-unknown priors. It
% returns the BESTMODEL based on the highest BIC. 
%
% The output variable BICS contains the values of the BIC for
% each model (row) and number of clusters (col). The output variable 
% CLABS contains the class labels for the input data according to the 
% optimal clustering given by BIC.
%
% The output variable Z contains the cluster structure from the 
% agglomerative model-based clustering. The matrix Z can be used 
% in the DENDROGRAM function or the RECTPLOT plotting function.
% 
% The output variable ALLMODELS is a structure containing all of the models.
% ALLMODElS(I) indicates the I-th model type (1-4) and CLUS(J) indicates
% the model for J clusters.
%
% The input variable MAXCLUS denotes the maximum number of clusters to
% check for.
%

%   Model-based Clustering Toolbox, January 2003

% model(i) indicates the i-th model type.
% clus(i) indicates that there are i clusters in the model.

warning off

[n,d] = size(data);
bics = zeros(4,maxclus);     % Each row is a BIC for a model. Each col is a BIC for # clusters.

% Initialize the structure using all data points.
% This is the information for the one term/cluster model.
for i = 1:4
	model(i).clus(1).pies = 1;
	model(i).clus(1).mus = mean(data)';
	model(i).clus(1).vars = varupm(data,ones(n,1),1,mean(data)',i);
    bics(i,1) = bic(data,1,model(i).clus(1).mus,model(i).clus(1).vars,i);
end

if nargin == 3 
	disp('Getting the Adaptive Mixtures initial partition.')
	% Find an initial partition using AMDE.
	[pies,mus,vars,nterms] = amde(data,100);
	disp('Getting the agglomerative model based clustering structure')
	% Do the agglomerative model based clustering using mclust.
	Z = amdemclust(data,pies,mus,vars);
else
	disp('Getting the agglomerative model based clustering structure')
	Z = agmbclust(data);
end
	
% Based on the initialization of AMBC, get the models.

for m = 2:maxclus		% Loop over the different number of clusters.
	
	% m represents the number of clusters/terms in the model.
	
	% Find the cluster labels for the number of clusters.
	labs = cluster(Z,m);
	
    % Loop over the 4 different types of models.
    for i = 1:4 
        musin = zeros(d,m);     % each column is a term.     
        piesin = zeros(1,m); 
        % Find all of the points belonging to each cluster. 
        for j = 1:m
            ind = find(labs==j);
            musin(:,j) = mean(data(ind,:))';
            piesin(j) = length(ind)/n;
            varsin(:,:,j) = varupm(data(ind,:),ones(length(ind),1),1,musin(:,j),i);
        end % j loop
        % get the finite mixture only if the previous one did not diverge
        tmp = length(model(i).clus);
        if ~isempty(model(i).clus(tmp).mus)     % then get the model
            disp(['Getting the finite mixture estimate for model ' int2str(i) ', ' int2str(m) ' clusters.'])
            [model(i).clus(m).pies,model(i).clus(m).mus,model(i).clus(m).vars] = mbcfinmix(data,musin,varsin,piesin,i);
            if ~isempty(model(i).clus(m).mus)
                bics(i,m) = bic(data,model(i).clus(m).pies,model(i).clus(m).mus,model(i).clus(m).vars,i);
            else
                bics(i,m) = 0/0;   % set it equal to a nan
            end
        else
            bics(i,m) = 0/0;
		end
        
    end  % i model type loop
        
end   % for m loop

% Once we have the BIC for each model, then get the 
% Then get the class labels according to the highest BIC.
[maxbic,maxi] = max(bics(:));
[mi,mj] = ind2sub(size(bics),maxi);

disp(['Maximum BIC is ' num2str(maxbic) '. Model number ' int2str(mi) '. Number of clusters is ' int2str(mj)])

% get the best model.
pies = model(mi).clus(mj).pies;
mus = model(mi).clus(mj).mus;
vars = model(mi).clus(mj).vars;

clabs = zeros(1,n);
for i = 1:n     
    posterior = postm(data(i,:)',pies,mus,vars);
    [v, clabs(i)] = max(posterior);     % classify it with the highest posterior prob.
end

modelout.pies = pies;
modelout.mus = mus;
modelout.vars = vars;

% Now plot the results of the BIC - comment this out. Use plotbic instead
%plot(1:nc,bics(1,:),'*',1:nc,bics(2,:),'o',1:nc,bics(3,:),'x',1:nc,bics(4,:),'+')
%legend({'Model 1','Model 2','Model 3','Model 4'})
%hold on
%plot(1:nc,bics(1,:),1:nc,bics(2,:),1:nc,bics(3,:),1:nc,bics(4,:))
%hold off
%axs = axis;
%axis([0 maxclus+1 axs(3) axs(4)])
%set(gca,'XTick',0:(maxclus+1))

warning on
%%%%%%%%%%%%%%%%%%%%%%%%%5
%	INITIALIZE VARS
%%%%%%%%%%%%%%%%%%%%%%%%%%
function varm = varupm(data,posterior,mix_cof,mu,model)

[nn,c]=size(posterior);
[n,d]=size(data);
switch model
case 1
    % diagonal equal covariance matrices
    % first find the full one.
    var_mat = zeros(d,d,c);
    for i=1:c
        cen_data=data-ones(n,1)*mu(:,i)';
        mat=cen_data'*diag(posterior(:,i))*cen_data;
        var_mat(:,:,i)=mat;
    end
    % common covariance is the sum of these individual ones.
    vart = sum(var_mat,3);
    const = trace(vart)/(n*d);
    varmt = const*eye(d);
    varm = zeros(d,d,c);
    for i = 1:c
        varm(:,:,i) = varmt;
    end
case 2
    % diagonal, unequal covariance matrices
    % first find the full one.
    var_mat = zeros(d,d,c);
    varm = zeros(d,d,c);
    nk = sum(posterior); % one for each term.
    for i=1:c
        cen_data=data-ones(n,1)*mu(:,i)';
        mat=cen_data'*diag(posterior(:,i))*cen_data;
        var_mat(:,:,i)=mat;
        const = trace(var_mat(:,:,i))/(d*nk(i));
        varm(:,:,i) = const*eye(d);
    end
case 3
    % Full covariance matrix, equal across terms
    var_mat = zeros(d,d,c);
    for i=1:c
        cen_data=data-ones(n,1)*mu(:,i)';
        mat=cen_data'*diag(posterior(:,i))*cen_data;
        var_mat(:,:,i)=mat;
    end
    % common covariance is the sum of these individual ones.
    varmt = sum(var_mat,3);
    % put same covariance in each term
    varm = zeros(d,d,c);
    for i = 1:c
        varm(:,:,i) = varmt/n;
    end
case 4
    % this is the unconstrained version
    var_mat = zeros(d,d,c);
    for i=1:c
        cen_data=data-ones(n,1)*mu(:,i)';
        mat=cen_data'*diag(posterior(:,i))*cen_data;
        var_mat(:,:,i)=mat./(mix_cof(i)*n);
    end
    varm = var_mat;
otherwise
    error(['You entered ' int2str(model) ' for the model. Values must be 1, 2, 3 or 4'])
    return
end

%%%%%%%%%%%%%%%%%%%%  FUNCTION - BIC %%%%%%%%%%%%%%%%%%%%%%%%

function val = bic(data,pies,mus,vars,model)

% function val = bic(data,pies,mus,vars,model)
% This function returns the BIC criterion for 
% evaluating a finite mixture model obtained from
% the EM algorithm. This is an approximation to
% twice the Bayes factor.

% Reference: How many clusters? Which clustering method? 
% Answers via model-based cluster analysis. 

[n,d] = size(data);
c = length(pies);	% number of terms.

% Now find the number of independent parameters in the model.
switch model
case 1
	m = d*c + c;
case 2
	m = 2*c + d*c-1;
case 3
	m = c-1 + d*c + d*(d+1)/2;
case 4
	m = c-1 + d*c + c*d*(d+1)/2;
otherwise
	error('Model not recognized')
	return
end

loglike = likelihood(data,mus,vars,pies);

val = 2*loglike - m*log(n);
	
%%%%%%%%%%%%%%  FUNCTION TO EVALUATE LIKELIHOOD %%%%%%%%%%%%%%
function like = likelihood(data,mu,var_mat,mix_cof)  
% This will return the likelihood - actually the log likelihood
[n,d]=size(data);
[d,c]=size(mu);
tmplike = 0;
for i=1:c	
    % Find the value of the mixture at each data point and for each term.
    tmplike = tmplike + mix_cof(i)*evalnorm(data,mu(:,i)',var_mat(:,:,i));
end
% The likelihood is the product.
like = sum(log(tmplike));


%%%%%%%%%%%%%%%  FUNCTION EVALNORM %%%%%%%%%%%%%
function prob = evalnorm(x,mu,cov_mat);
[n,d]=size(x);
prob = zeros(n,1);
a=(2*pi)^(d/2)*sqrt(det(cov_mat));
covi = inv(cov_mat);
for i = 1:n
	xc = x(i,:)-mu;
	arg=xc*covi*xc';
	prob(i)=exp((-.5)*arg);
end
prob=prob/a;

%%%%%%%%%%%%FUNCTION - POSTM %%%%%%%%%%%%%%%%%%%%%%
function posterior = postm(x,pies,mus,vars)
nterms = length(pies);
totprob=0;
posterior=zeros(1,nterms);
for i=1:nterms	%loop to find total prob in denominator (hand, pg 37)
  posterior(i)=pies(i)*evalnorm(x',mus(:,i)',vars(:,:,i));
  totprob=totprob+posterior(i);
end
posterior=posterior/totprob;
mbclust.m - 源码说明

本页面展示了「基于模型聚类算法的matlab实现 This does the entire MB Clustering given a set of data. It only does the 4 basic」中的 mbclust.m 源码文件，采用 M 编程语言编写，共 269 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与does相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?