emclust.m

来自「模式识别工具箱。非常丰富的底层函数和常见的统计识别工具」· M 代码 · 共 216 行

216 行

%EMCLUST Expectation-Maximization clustering%%  [LABELS,W_EM] = EMCLUST (A,W_CLUST,K,LABTYPE,FID)%% INPUT%   A         Dataset, possibly labeled%   W_CLUST   Cluster model mapping, untrained (default: nmc)%   K         Number of clusters (default: 2)%   LABTYPE   Label type: 'crisp' or 'soft' (default: label type of A)%   FID       File ID to write progress to (default [], see PRPROGRESS)%% OUTPUT%   LABELS    Integer labels for the objects in A pointing to their cluster%   W_EM      EM clustering mapping%% DESCRIPTION% The untrained classifier mapping W_CLUST is used to update an initially% labeled dataset A by iterating the following two steps:%   1. Train W   :  W_EM = A*W_CLUST%   2. Relabel A :  A    = dataset(A,labeld(A*W_EM*classc))% This is repeated until the labeling does not change anymore. The final% classification matrix is returned in B. The final crisp labeling is returned% in LABELS. W_EM may be used for assigning new objects.%% If K is given, a random initialisation for K clusters is made and labels% of A are neglected. %% LABTYPE determines the type of labeling: 'crisp' or 'soft'. Default: label% type of A. It is assumed W_CLUST can handle the LABTYPE requested.% Only in case LABTYPE is 'soft' the tradition EM algorithm is followed.% In case LABTYPE is 'crisp' EMCLUST follows a generalised k-means% algorithm.%% SEE ALSO% MAPPINGS, DATASETS, KMEANS, PRPROGRESS% Copyright: R.P.W. Duin, r.p.w.duin@prtools.org% Faculty EWI, Delft University of Technology% P.O. Box 5031, 2600 GA Delft, The Netherlands% $Id: emclust.m,v 1.7 2007/11/20 10:28:15 duin Exp $function [new_lab,w_em] = emclust (a,w_clust,n,type,fid)	prtrace(mfilename);	n_ini		= 500;			% Maximum size of subset to use for initialisation.	epsilon = 1e-6;			% Stop when average labeling change drops below this.	% Check arguments.	if (nargin < 5), fid = []; end	if (nargin < 4)		prwarning(3,'No label type specified, using label type of dataset A.');		type = []; 	end	if (nargin < 3) | isempty(n)		prwarning(3,'No number of clusters specified, using number of classes in A.');		n = []; 	end	if (nargin < 2) | isempty(w_clust)		prwarning(2,'No clustering mapping specified, assuming NMC.');		w_clust = nmc;   	end  isuntrained(w_clust);   % Assert that clustering mapping is untrained.  % Determine number of clusters N and initialisation method.	a = testdatasize(a); 	islabtype(a,'crisp','soft');	[m,k,c] = getsize(a); 	rand_init = 1;	if (isempty(n))		if (c == 1)						% For one class, find two clusters.			n = 2;		else			n = c;										rand_init = 0; 			% Use given classification as initialisation.		end	end	if (n < 1),  error('Number of clusters should be at least one.'); end	if (n == 1), prwarning(4,'Clustering with 1 cluster is trivial.'); end	% Set label type, if given.	if ~isempty(type), a = setlabtype(a,type); end	a = setprior(a,[]); % make sure that priors will be deleted		% Initialise by performing KCENTRES on...	if (rand_init)		len1 = prprogress(fid,'emclust init: ');		if (m > n_ini)       						% ... a random subset of A.      prwarning(2,'Initializing by performing KCENTRES on a subset of %d samples.', n_ini);			a_ini = +gendat(+a,n_ini);			else      prwarning(2,'Initializing by performing KCENTRES on the training set.');			a_ini = +a;								% ... the entire set A.		end		not_found = 1;		itern = 0;		while(not_found)			% try to find an initialisation with all class sizes > 1			itern = itern + 1;			if itern > 100				error('Not possible to find desired number of components')			end			% add some noise to data to avoid problems			% 50 trials			assign  = kcentres(+distm(a_ini.*(ones(size(a_ini))+0.001*randn(size(a_ini)))),n,50);		% Train initial classifier on labels generated by KCENTRES and find		% initial hard labels. Use NMC instead of W_CLUST to make sure that we     % always have enough data to estimate the parameters.			a_ini = dataset(a_ini,assign); 			a_ini = setprior(a_ini,getprior(a_ini,0));			d = a*(a_ini*nmc);  		if (islabtype(a,'soft'))				new_lab = +d;				not_found = 0;			else				new_lab = d*labeld;				if all(classsizes(dataset(d,new_lab)) > 1)					not_found = 0;				end			end		end		lablist_org = [];		closemess(fid,len1)	else		lablist_org = getlablist(a);		a = setlablist(a,[1:c]');		new_lab = getlabels(a);		% Use given labeling.	end	% Ready for the work.	iter = 0;	change = 1;  if (islabtype(a,'soft'))		len1 = prprogress(fid,'emclust optim: iter, change (mse):');		len2 = prprogress(fid,' %i, %f ',0,0);		a = setlabels(a,new_lab);		a = setprior(a,getprior(a,0));		laba = getlabels(a);		lab = new_lab;  	while (change > epsilon)       	% EM loop, run until labeling is stable.  		w_em = a*w_clust;             % 1. Train classifier, density output.  		b = a*(w_em*classc);          % 2. Assign probability to training samples.  		a = settargets(a,b);          % 3. Insert probabilities as new labels.  		change = mean(mean((+b-lab).^2)); lab = b;          			closemess(fid,len2);			len2 = prprogress(fid,' %i, %f ',iter,change);			iter = iter+1;			if iter > 500				closemess(fid,len1+len2);				prwarning(1,'emclust stopped after 500 iterations')				change = 0;			end		end	else  % crisp labels		len1 = prprogress(fid,'emclust optim: iter, change (#obj), #clust:');		len2 = prprogress(fid,' %i, %i %i ',0,0,0);		lab = ones(m,1);		while (any(lab ~= new_lab)) % EM loop, run until labeling is stable.			a = setlabels(a,new_lab); 		% 0. Set labels and store old labels.			a = setprior(a,getprior(a,0));%    Set priors to class frequencies			lab = new_lab;								% 			a = remclass(a,1);            %    demand class sizes > 2 objects			itern = 0;			while getsize(a,3) < n        %    increase number of classes if necessary				itern = itern + 1;				if itern > 100					error('Not possible to find desired number of components')				end				laba = getlablist(a);				labmax = max(laba);				N = classsizes(a);				[Nmax,cmax] = max(N);        % find largest class				aa = seldat(a,cmax);         % select just that one				new_lab_aa = kmeans(aa,2);   % split it by kmeans				N1 = sum(new_lab_aa == 1);   				N2 = sum(new_lab_aa == 2);				if (N1 > 1 & N2 > 1) % use it if both classes have more than one sample					J = findlabels(a,laba(cmax,:));					a = setlabels(a,new_lab_aa + labmax,J);				end			end				  		w_em = a*w_clust;             % 1. Compute classifier, crisp output.  		b = a*w_em;               		% 2. Classify training samples.  		new_lab = labeld(b);      		% 3. Insert classification as new labels.			closemess(fid,len2);			len2 = prprogress(fid,' %i, %i %i ', ...				iter,length(find(lab ~= new_lab)),length(unique(new_lab)));			iter = iter+1;             %DXD Added also the iter for the crisp labels			if iter > 50				closemess(fid,len1+len2);				prwarning(1,'emclust stopped after 50 iterations')				change = 0;			end  	end  end		if ~isempty(lablist_org) % substitute original labels if desired		new_lab = lablist_org(new_lab);		wlab = getlabels(w_em);		wlab = lablist_org(wlab);		w_em = setlabels(w_em,wlab);	end			if change > 0		closemess(fid,len1+len2);	endreturn;

emclust.m - 源码说明

本页面展示了「模式识别工具箱。非常丰富的底层函数和常见的统计识别工具」中的 emclust.m 源码文件，采用 M 编程语言编写，共 216 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与模式识别相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?