📄 mfbox_softcluster.m
字号:
function [partition,cost,adjustedpoints]=mfbox_softcluster(X,varargin)%% Perform softclustering of the given points or distance matrix%% usage: % [partition,cost,adjustedpoints]=mfbox_softcluster(X[,argID,value[...]])%% with input and output arguments ([]'s are optional):% X (matrix) data matrix (any dimension)% or distance matrix%% Copyright by Peter Gruber and Fabian Theis% Signal Processing & Information Theory group% Institute of Biophysics, University of Regensburg, Germany% Homepage: http://research.fabian.theis.name% http://www-aglang.uni-regensburg.de%% This file is free software, subject to the % GNU GENERAL PUBLIC LICENSE, see gpl.txterror(nargchk(1,Inf,nargin));verbose = false;plotting = false;startstep = Inf;cluster = 2;numofev = Inf;maxiter = 10000;distfun = 'euclidian';initial = [];dotwostep = 0;minormax = 1;normalize = 0;search = false;postprocess = false;SX = [];XS = [];U = [];UV = [];normalizefact = 0;for i=1:2:(nargin-1) op = varargin{i}; pa = varargin{i+1}; switch lower(op) case 'startstep' startstep = pa; case 'cluster' cluster = pa; case 'distfunction' distfun = pa; case 'numofev' numofev = pa; case 'maxiter' maxiter = pa; case 'dotwostep' dotwostep = pa; case 'normalize' normalize = pa; case 'search' search = pa; case 'postprocess' postprocess = pa; case 'minormax' switch pa case 'max' minormax = -1; otherwise minormax = 1; end case 'initialize' if (~ischar(pa)) initial = pa; else switch pa case 'random' initial = -1; end end case 'plotting' p = find(strcmp(pa,{'off','on'})); plotting = p(1)-1; case 'verbose' p = find(strcmp(pa,{'off','on'})); verbose = p(1)-1; otherwise warning('Illegal parameter %s ',op); endendif (any(strcmp(class(X),{'function_handle','function handle'}))) distfun = 'fun';elseif (any(strcmp(class(X),{'cell'}))) distfun = 'eig';elseif ((length(size(X))==2)&&(size(X,1)==size(X,2))) if (~any(strcmp(distfun,{'mat','eig'}))) distfun = 'mat'; endelse sx = size(X); X = reshape(X,[],sx(end));endswitch distfun case 'euclidian' N = size(X,2); XS = sum(X.^2); if (normalize>0) normalizefact = normalize; s = sum(X,2); p = X*X'; normalize = normalize*2*((N-1)*sum(XS)-s'*s+trace(p))/(N^2-N); end case 'projective' N = size(X,2); X = X./repmat(sqrt(sum(X.^2)),size(X,1),1); if (normalize>0) normalizefact = normalize; normalize = normalize*(1-trace((X*X')^2)/(N^2-N)); end case 'eig' if (any(strcmp(class(X),{'cell'}))) U = X{1}; V = X{2}; N = size(U,1); if (size(V,1)~=size(V,2)) V = diag(V); end else N = size(X,1); [U,V] = eig(X); end [d,x] = sort(abs(real(diag(V)))); d = diag(V); d = d(x); if (numofev<N), d(1:(N-numofev)) = 0; end x = x(abs(d)>eps); d = d(abs(d)>eps); U = U(:,x); V = diag(d); UV = U*V; if (normalize>0) normalizefact = normalize; n = 0; for i=1:N p = zeros(N,1); p(i) = 1; n = n+((p'*UV)*(U'*(1-p))); end normalize = normalize*n/(N^2-N); end case 'mat' N = size(X,1); if (normalize>0) normalizefact = normalize; p = find(eye(size(X))==0); normalize = 0.5*mean(X(p)); SX = X; SX(p) = SX(p)-(normalize*normalizefact); end case 'fun' N = size(X(1),1); if (normalize>0) normalizefact = normalize; n = 0; for i=1:N p = zeros(N,1); p(i) = 1; n = n+(X(p)*(1-p)); end normalize = normalize*n/(N^2-N); end if (verbose), fprintf('Using distfunction with %d points\n',N); endendif (search) newva = {}; maxcl = 30; for i=1:2:(nargin-1) op = varargin{i}; pa = varargin{i+1}; switch op case 'cluster' maxcl = pa; case 'search' case 'initialize' otherwise newva{end+1} = op; newva{end+1} = pa; end end partition = ceil(rand(N,1)*2); want = 0; is = 1; while ((want~=is)&&(want<maxcl)) want = is; [partition,cost,adjustedpoints] = mfbox_softcluster(X,'cluster',min(2*want,want+10), ... 'normalize',1/want,'initialize',partition,newva{:}); is = length(unique(partition)); if ((verbose)&&(want~=is)), fprintf('Selecting components: going from %d to %d\n',want,is); end end returnendstartstep = min(max(1,startstep),floor(N/2));dotwostep = min(startstep,dotwostep);t = zeros(cluster,N);v = zeros(N,cluster);cost = ones(1,maxiter)*NaN;adjustedpoints = zeros(1,maxiter);p = zeros(N,cluster);switch length(initial) case N for j=1:N, p(j,floor(initial(j))) = 1; end case 1 for j=1:N, p(j,ceil(rand*cluster)) = 1; end otherwise p(:,1) = 1;endi = 1;step = startstep;minp = p;mini = 1;multistepstart = [];multistepsteps = [];adjustedpoints(i) = 0;canundo = false;[x,cost(i),adjustedpoints(i)] = dostep(distfun,SX,XS,X,U,UV,cluster,step,minormax,false,normalize*normalizefact,p,true);while (i<maxiter) running = true; if (canundo&&(minormax*(cost(i)-cost(i-1))>0)) if (verbose), fprintf('Undo at %d (costs %f,%f)\n',i,cost(i),cost(i-1)); end cost(i) = NaN; adjustedpoints(i) = NaN; i = i-1; %undo last step p = op; running = false; end canundo = true; if (running) op = p; i = i+1; [p,cost(i),adjustedpoints(i),running] = dostep(distfun,SX,XS,X,U,UV,cluster,step,minormax,false,normalize*normalizefact,p,false); j = i; while ((j>1)&&(cost(j)==cost(j-1))) j = j-1; end if (j<i-2) if (verbose), fprintf('Loop of length %d detected\n',i-j); end running = false; end end if (minormax*cost(i)<min(minormax*cost(1:(i-1)))) minp = p; mini = i; end if (plotting&&strcmp(distfun,'euclidian')&&((size(X,1)==2)||(size(X,1)==3))) colorizescplot(X,p*(1:cluster)'); pause; end if (running) if (verbose), fprintf('+'); end elseif (step>1) step = max(1,floor(step/2)); if (verbose), fprintf('Adjust stepsize (%d) at iteration %d (cost %f)\n',step,i,cost(i)); end elseif ((dotwostep>0)&&(all(multistepstart>cost(i))||any(multistepsteps(multistepstart<=cost(i))<dotwostep))) tp = p; c = cost(i); j = 0; twostep = max([min(2*multistepsteps(multistepstart<=cost(i))),1]); multistepsteps = [multistepsteps,twostep]; multistepstart = [multistepstart,cost(i)]; multistepsteps(multistepstart<=cost(i)) = twostep; if (verbose), fprintf('Try multistep %d at iteration %d (cost %f)\n',twostep,i,cost(i)); end tp = dostep(distfun,SX,XS,X,U,UV,cluster,twostep,minormax,true,normalize*normalizefact,tp,false); while ((j<dotwostep))% [ttp,ttc] = dostep(distfun,SX,XS,X,U,UV,cluster,twostep,minormax,false,normalize*normalizefact,tp,false); tp = dostep(distfun,SX,XS,X,U,UV,cluster,twostep,minormax,true,normalize*normalizefact,tp,false); j = j+1; end i = i+1; p = tp; [tp,tc] = dostep(distfun,SX,XS,X,U,UV,cluster,twostep,minormax,false,normalize*normalizefact,tp,true); cost(i) = tc; adjustedpoints(i) = 0; step = startstep; canundo = false; if (verbose), fprintf('End multistep at iteration %d (cost %f)\n',j,cost(i)); end else i = mini; p = minp; if (verbose), fprintf('Extremum reached at iteration %d (cost %f)\n',i,cost(i)); end break; endendp = p(:,any(p>0,1));partition = p*(1:size(p,2))';cost = cost(1:i);adjustedpoints = adjustedpoints(1:i);if ((normalize>0)&&(postprocess)) newva = {}; for i=1:2:(nargin-1) op = varargin{i}; pa = varargin{i+1}; switch op case 'cluster' case 'search' case 'normalize' case 'initialize' case 'postprocess' otherwise newva{end+1} = op; newva{end+1} = pa; end end want = length(unique(partition)); is = want; normalize = normalizefact*0.9; p = partition; c = []; a = []; while ((want==is)&&(normalize>0)) cost = [cost,NaN,c]; adjustedpoints = [adjustedpoints,NaN,a]; op = p; if (verbose), fprintf('Running postprocessing with %1.4f\n',normalize); end [p,c,a] = mfbox_softcluster(X,'cluster',want+2,'initialize',op,'normalize',normalize,'postprocess',false,newva{:}); is = length(unique(p)); normalize = normalize*0.9; end if (verbose), fprintf('Postprocessing changed %d/%d points\n',sum(p~=partition),length(partition)); end partition = op;endif (plotting) subplot(1,1,1); if (any(strcmp(distfun,{'projective','euclidian'}))) subplot(2,2,1); colorizescplot(X,partition); end subplot(2,2,2); plot(cost); subplot(2,2,3); plot(adjustedpoints);endreturnfunction [p,cost,adjp,suc]=dostep(distfun,SX,XS,X,U,UV,cluster,step, ... minormax,retract,normalize,p,onlycost)t = p'; op = p;N = size(p,1);if (retract), mom = -minormax; else mom = minormax; endsuc = false;adjp = 0;switch distfun case 'euclidian' for j=1:cluster, t(j,:) = sum(p(:,j))*XS+ ... repmat(XS*p(:,j),1,N)-2*(X*p(:,j))'*X; end if (normalize>0), t = t-(normalize*normalizefact)*(repmat(sum(p,1)',1,N)-p'); end case 'projective' for j=1:cluster, v = sum(p(:,j)); for k=1:N, t(j,k) = ... v-(p(:,j)'*(X'*X(:,k)).^2); end, end if (normalize>0), t = t-(normalize*normalizefact)*(repmat(sum(p,1)',1,N)-p'); end case 'eig' t = (p'*UV)*U'; if (normalize>0), t = t-(normalize*normalizefact)*(repmat(sum(p,1)',1,N)-p'); end case 'mat' if (normalize>0), t = p'*SX; else t = p'*X; end case 'fun' t = X(p); if (normalize>0), t = t-(normalize*normalizefact)*(repmat(sum(p,1)',1,N)-p'); endendcost = sum(sum(t.*p'));t = reshape(t(p'~=1)',cluster-1,N)-repmat(t(p'==1)',cluster-1,1);if (onlycost), return; endif (cluster>2) tt = t; tt(mom*tt>=0) = minormax*Inf; [mt,x] = min(minormax*tt); mt = minormax*mt; %richtig ?else mt = t; x = ones(1,N);endfmt = find((mom*mt)<0);if (isempty(fmt)), p = op; return; endsuc = true;r = min(sum(mt(fmt)<Inf),step);if (r>1) [smt,sx] = sort(mt(fmt)); % only need the step smallest, use more efficient code! y = fmt(sx(1:r));elseif (r>0) [smt,y] = min(mt(fmt)); y = fmt(y);else y = [];end[ox,dummy] = ind2sub([cluster,r],find(p(y,:)'>0.5));x = x(y)'; x(x>=ox) = 1+x(x>=ox);adjp = r;p(sub2ind([N,cluster],y(:),ox)) = 0;p(sub2ind([N,cluster],y(:),x)) = 1;return
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -