📄 mfbox_hnmf.m
字号:
function [S,A,W]=mfbox_hnmf(D,varargin)% NMF using hyperplane clustering%% usage: [S,A]=mfbox_hnmf(D[,argID,value[...]]);%% with input and output arguments ([]'s are optional)% D data matrix (dim x samples)% [argID, Additional parameters are given as argID, value% value] pairs. See below for list of valid IDs and values.%% Here are the valid argument IDs and corresponding values:% % 'pcadim' (integer) number of dimensions to reduce data to after% simplex projection (default dim)% 'extractdim' (integer) extract at most this number of components (default dim)% 'tolerance' (double) allow neagtive values in components: (default 0)% Inf: no restriction% 0: dont allow any% else maximum negative value allowed% 'fraction' (double) fraction of data to use in each convex hull run (default 0.1)% 'restart' (integer) how often to restart the cluster algorithm (default 10)% % Copyright by Peter Gruber and Fabian J. Theis% Signal Processing & Information Theory group% Institute of Biophysics, University of Regensburg, Germany% Homepage: http://research.fabian.theis.name% http://www-aglang.uni-regensburg.de%% This file is free software, subject to the % GNU GENERAL PUBLIC LICENSE, see gpl.txtif ((nargin==0)||(mod(nargin,2)==0)) error('Wrong number of input arguments!\n');end[d,n] = size(D);clusterrestart = 10;fraction = 0.1;reddim = Inf;extrdim = 0;tol = 0;weightmethod = 0;for i=1:2:(nargin-1) cmd = varargin{i}; dat = varargin{i+1}; switch cmd case 'pcadim' reddim = dat; case 'extractdim' extrdim = dat; case 'weighting' switch dat case 'volume' weightmethod = 0; case 'points' weightmethod = 1; end case 'tolerance' tol = dat; case 'fraction' fraction = dat; case 'restart' clusterrestart = dat; otherwise error('Unknown parameter %s!\n',cmd); endendreddim = min([reddim,7,d]); % qhull is very slow in higher dimensionsextrdim = max(extrdim,reddim);fraction = max(fraction,min(100/n,1));minD = min(0,min(D(:)));D = D-minD;if (reddim<d) [Xf,v] = projectonsimplex(D); [re,rd] = eig(cov(Xf')); R = eye(reddim-1,d-1)*re; X = R*Xf;else [X,v] = projectonsimplex(D); R = [];endn = size(X,2);if (size(X,1)==1) C = [min(X,[],2);max(X,[],2)];else border = []; weight = []; for i=0:min(fraction,0.5):1 [b,w] = calcbordercones(X(:,ceil(n*rand(1,ceil(fraction*n))))); border = [border;b]; weight = [weight;w(:)]; end if (weightmethod==1) weight = sum((1+abs(border*[X;ones(1,n)])).^(-4),2); end E = Inf; for i=1:clusterrestart [c,r,e] = hyperplanecluster('batch',border,extrdim,200,weight); if (e<E), E = e; C = c; end end C = uncone(C);endM = sum(X,2)'/size(X,2);A = findintersections(C,M); % A(i,:) coordinate of the i-th corner in simplexif (tol<Inf) n = sqrt(sum(C.^2,2)); c_aufp = repmat((1-n)./(n.^2),1,size(C,2)).*C; c_norm = diag(sign(sum((repmat(M,size(C,1),1)-c_aufp).*c_aufp,2))./ ... sqrt(sum(c_aufp.^2,2)))*c_aufp; x = min(c_norm*X,[],2)+abs(tol); C = diag(sign(x)./(1+abs(x)))*c_norm; A = findintersections(C,M);endif (~isempty(R)), A = undoprojectonsimplex((A*R)',v);else A = undoprojectonsimplex(A',v);endA = A-repmat(min(min(A(:)),0),size(A));A = A./repmat(sqrt(sum(A.^2,1)),size(A,1),1);W = pinv(A);S = W*(D+minD);returnfunction [b,w]=calcbordercones(p)s = convhulln(p');n = size(p,1);[l,r] = size(s);b = zeros(l,n+1);w = zeros(l,1);for i=1:l v = p(:,s(i,:)); b(i,:) = null(orth([v;ones(1,n)])')'; v = v(:,2:r)-repmat(v(:,1),1,r-1); w(i) = abs(det(orth(v)'*v));endfunction s=uncone(m)d = size(m,2);v = m(:,1:(d-1));t = m(:,d);s = v./repmat(-sign(t).*sqrt(sum(v.^2,2)).*(sqrt((t.^2)./(1-t.^2))+1),1,d-1);function z=findintersections(a,m)[n,d] = size(a);s = subsets(1:n,d);z = zeros(size(s,1),d);for i=1:size(s,1) nz = sqrt(sum(a(s(i,:),:).^2,2)); z(i,:) = ((a(s(i,:),:)./repmat(nz,1,d))^(-1)*(-1+1./nz))';endw = ones(size(s,1),1)*true;for i=1:n v = a(i,:); b = v*(-1+1/norm(v))/norm(v); c = ((z-repmat(b,size(z,1),1))*v').*repmat(sign((m-b)*v'),size(z,1),1); w = w&(c>=-eps);endz = z(w,:);function [s,v]=projectonsimplex(m)d = size(m,1);sm = sum(m,1);m = m(:,sm~=0)./repmat(sm(sm~=0),d,1);v = orth(eye(d)-ones(d)/d)';s = v*m;function m=undoprojectonsimplex(s,v)m = v'*s;m = m+(1/size(m,1));function [centroids,clusters,err]=hyperplanecluster(method,D,n,maxiter,weight,verbose)data = spnorm(D); [dlen,dim] = size(data);if (prod(size(n))==1) temp = randperm(dlen); centroids = data(temp(1:n),:);else centroids = n; n = size(centroids,1);endcentroids = spnorm(centroids);if (nargin<4), maxiter = 100; endif (nargin<5), weight = ones(1,dlen); endif (nargin<6), verbose = 0; endlr = 0.5; % initial learning rate for sequential k-meansclusters = zeros(1,dlen); switch method case 'seq' len = maxiter*dlen; l_rate = linspace(lr,0,len); order = randperm(dlen); for iter=1:len x = data(order(rem(iter,dlen)+1),:); % pick one sample vector dx = spdist(x,centroids); % difference [dist,nearest] = min(dx); % find nearest centroid x = x*diag(sign(diag(x'*centroids(nearest,:)))); o = spdist(x,centroids(nearest,:)); if (o~=0), x = spnorm(x-(o*centroids(nearest,:))); end centroids(nearest,:) = spnorm(centroids(nearest,:)+l_rate(iter)*x); end case 'batch' old_clusters = zeros(1,dlen); for iter=0:maxiter [dummy,clusters] = min(spdist(centroids,data)); for i=1:n f = find(clusters==i); if (~isempty(f)), centroids(i,:) = spcenter(data(f,:),weight(f)); end end if ((iter>0)&&all(old_clusters==clusters)) break end old_clusters = clusters; end otherwise error('Unknown method\n');endcentroids = spnorm(centroids);[qerrs,clusters] = min(spdist(centroids,data));err = sum(qerrs);function d=spdist(x,y)d = sqrt(1-(x*y').^2);function x=spnorm(x)x = x./repmat(sqrt(sum(x.^2,2)),1,size(x,2));function s=spcenter(x,w)c = x'*diag(w)*x;[E,D] = eig(c);[dummy,q] = sort(diag(D));s = E(:,q(end))';function s=subsets(m,l)if (l==1) s = m(:);elseif (l==(length(m))) s = m(:)';else s = []; for i=2:(length(m)-l+2) r = subsets(m(i:end),l-1); r = [repmat(m(i-1),size(r,1),1),r]; s = [s;r]; endend
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -