📄 dendrogram_version1.m
字号:
function dendrogram_version1(OriginalData, Z, slidervalue, varargin)
% Reference:
% Evrim Acar, Rasmus Bro, Bonnie Schmidt, New Exploratory Metabonomic
% Tools, Submitted to Journal of Chemometrics, 2007.
%
%
% dendrogram_version1(OriginalData, Z, slidervalue, varargin)
%
% Inputs: OriginalData
% Z: Clustering after using a linkage method
% slidervalue: Slidervalue chosen in previous dendrogram
% vargin: 'skip_color' 'skip_label' 'classcolor' 'colorthreshold' 'labels'
%
%
% Copyright 1993-2004 The MathWorks, Inc.
% Modified by Evrim Acar, 2007
%
% Last modified: 05/21/2007
save dendrogram_version1.mat OriginalData Z varargin
Z_temp=Z;
m = size(Z,1)+1;
p = size(OriginalData,1);
orientation = 't';
horz = true;
class_color=false;
color = false;
obslabels = [];
threshold = 0.7 * max(Z(:,3));
flag_skipcolor=0;
flag_skiplabel=0;
if (nargin > 3)
okargs = {'skip_color' 'skip_label' 'classcolor' 'colorthreshold' 'labels'};
for j=1:2:nargin-3
pname = varargin{j};
pval = varargin{j+1};
k = strmatch(lower(pname), okargs);
if isempty(k)
error('stats:dendrogram:BadParameter',...
'Unknown parameter name: %s.',pname);
elseif length(k)>1
error('stats:dendrogram:BadParameter',...
'Unknown parameter name: %s.',pname);
else
switch(k)
case 1 % skip_color
flag_skipcolor=1;
case 2 % skip_label
flag_skiplabel=1;
case 3 %class_color
class_color=true;
input_class=pval;
case 4 % colorthreshold
color = true;
if ischar(pval)
if ~strncmp(lower(pval),'default',length(pval))
warning('stats:dendrogram:BadThreshold',...
'Unknown threshold specified, using default');
end
end
if isnumeric(pval)
threshold = pval;
end
case 5 % labels
if ischar(pval)
pval = cellstr(pval);
end
if ~iscellstr(pval)
error('stats:dendrogram:BadLabels',...
'Value of ''labels'' parameter is invalid');
end
if ~isvector(pval) || numel(pval)~=m
error('stats:dendrogram:InputSizeMismatch',...
'Must supply a label for each observation.');
end
obslabels = pval(:);
end
end
end
end
% For each node currently labeled m+k, replace its index by
% min(i,j) where i and j are the nodes under node m+k.
Z = transz(Z);
T = (1:m)';
% If there are more than p nodes, the dendrogram looks crowded.
% The following code will make the last p link nodes into leaf nodes,
% and only these p nodes will be visible.
if (m > p) & (p ~= 0)
Y = Z((m-p+1):end,:); % get the last nodes
R = unique(Y(:,1:2));
Rlp = R(R<=p);
Rgp = R(R>p);
W(Rlp) = Rlp; % use current node number if <=p
W(Rgp) = setdiff(1:p, Rlp); % otherwise get unused numbers <=p
W = W(:);
T(R) = W(R);
% Assign each leaf in the original tree to one of the new node numbers
for i = 1:p
c = R(i);
T = clusternum(Z,T,W(c),c,m-p+1,0); % assign to its leaves.
end
% Create new, smaller tree Z with new node numbering
Y(:,1) = W(Y(:,1));
Y(:,2) = W(Y(:,2));
Z = Y;
m = p; % reset the number of node to be 30 (row number = 29).
end
A = zeros(4,m-1);
B = A;
n = m;
X = 1:n;
Y = zeros(n,1);
r = Y;
% arrange Z into W so that there will be no crossing in the dendrogram.
W = zeros(size(Z));
W(1,:) = Z(1,:);
nsw = zeros(n,1); rsw = nsw;
nsw(Z(1,1:2)) = 1; rsw(1) = 1;
k = 2; s = 2;
while (k < n)
i = s;
while rsw(i) | ~any(nsw(Z(i,1:2)))
if rsw(i) & i == s, s = s+1; end
i = i+1;
end
W(k,:) = Z(i,:);
nsw(Z(i,1:2)) = 1;
rsw(i) = 1;
if s == i, s = s+1; end
k = k+1;
end
g = 1;
for k = 1:m-1 % initialize X
i = W(k,1);
if ~r(i),
X(i) = g;
g = g+1;
r(i) = 1;
end
i = W(k,2);
if ~r(i),
X(i) = g;
g = g+1;
r(i) = 1;
end
end
[u,perm]=sort(X); % perm is the third output value
label = num2str(perm');
if ~isempty(obslabels)
label = cellstr(label);
% label(:) = {''}; % to make sure non-singletons get an empty label
singletons = find(histc(T,1:m)==1);
for j=1:length(singletons)
sj = singletons(j);
label(perm==sj) = obslabels(T==sj);
end
end
% set up the color
numColors = 0;
groups = 0;
cmap = [0 0 1];
theGroups = zeros(m-1,1);
%if data is labeled with classes, class color option can be used to color
%leaves by class color.
if class_color
numColors=size(unique(input_class),2);
leaf_nodes=m;
for i=1:size(Z_temp,1)
Elem= [cluster_elements(Z_temp,Z_temp(i,1),leaf_nodes) cluster_elements(Z_temp,Z_temp(i,2),leaf_nodes)];
variable1=unique(input_class(Elem));
for index=1:size(variable1,2)
for index_elem=1:size(Elem,2)
if input_class(Elem(index_elem))==variable1(index)
Variables{index}=Elem(index_elem);
end
end
array_numbers(index,1)=variable1(index);
array_numbers(index,2)=size(find(input_class(Elem)==variable1(index)),2);
end
[max_val, max_index]=max(array_numbers(:,2));
deneme=zeros(size(array_numbers,1),1);
deneme(max_index)=1;
min_val_indices=find(deneme==0);
min_val=sum(array_numbers(min_val_indices,2));
if (size(unique(input_class(Elem)),2)==1)
theGroups(i)=unique(input_class(Elem));
groups=groups+1;
elseif (min_val<=slidervalue)
Row=[];
for index_min=1:size(min_val_indices,1)
Row=[Row find_rownb(Z_temp, Variables{min_val_indices(index_min)})];
end
change_ids=unique(Row);
change_color=cluster_elements_color(Z_temp, i, leaf_nodes);
if (isempty(change_ids)==0)
uplimit=max(size(change_ids));
for cc_i=1:uplimit
change_color=[change_color cluster_elements_color(Z_temp, change_ids(cc_i), leaf_nodes)];
end
end
theGroups(change_color)=array_numbers(max_index,1);
groups=groups+1;
end
clear array_numbers
clear min_val
clear min_val_indices
clear change_ids
clear Variables
clear change_color
end
if (groups)
cmap = hsv(numColors);
cmap(end+1,:) = [0 0 0];
else
groups = 1;
end
elseif color
groups = sum(Z(:,3)< threshold);
if groups > 1 & groups < (m-1)
for count = groups:-1:1
if (theGroups(count) == 0)
P = zeros(m-1,1);
P(count) = 1;
P = colorcluster(Z,P,Z(count,1),count);
P = colorcluster(Z,P,Z(count,2),count);
numColors = numColors + 1;
theGroups(logical(P)) = numColors;
end
end
cmap = hsv(numColors);
cmap(end+1,:) = [0 0 0];
else
groups = 1;
end
end
handle_figure=figure;
fillscreen(handle_figure)
figure1=subplot(2,2,[1 3]);
col = zeros(m-1,3);
h = zeros(m-1,1);
for n = 1:(m-1)
i = Z(n,1); j = Z(n,2); w = Z(n,3);
A(:,n) = [X(i) X(i) X(j) X(j)]';
B(:,n) = [Y(i) w w Y(j)]';
X(i) = (X(i)+X(j))/2; Y(i) = w;
if (n < m-1)
if (theGroups(n)~=0)
col(n,:) = cmap(theGroups(n),:);
else
col(n,:) = cmap(end,:);
end
else
if ((size(unique(theGroups),1)==1) & flag_skipcolor==0)
col(n,:)=cmap(theGroups(1),:);
else
col(n,:) = cmap(end,:);
end
end
end
ymin = min(Z(:,3));
ymax = max(Z(:,3));
margin = (ymax - ymin) * 0.05;
n = size(label,1);
if(~horz)
for count = 1:(m-1)
h(count) = line(A(:,count),B(:,count),'color',col(count,:));
end
lims = [0 m+1 max(0,ymin-margin) (ymax+margin)];
set(gca, 'Xlim', [.5 ,(n +.5)], 'XTick', 1:n, 'XTickLabel', label, ...
'Box', 'off');
mask = logical([0 0 1 1]);
if strcmp(orientation,'b')
set(gca,'XAxisLocation','top','Ydir','reverse');
end
else
for count = 1:(m-1)
h(count) = line(B(:,count),A(:,count),'color',col(count,:));
linelabel(h(count),{['Cluster#:', num2str(count) ' Cost:' num2str(Z(count,3))]});
end
lims = [max(0,ymin-margin) (ymax+margin) 0 m+1 ];
set(gca, 'Ylim', [.5 ,(n +.5)], 'YTick', 1:n, 'YTickLabel', label, ...
'Box', 'off');
mask = logical([1 1 0 0]);
if strcmp(orientation, 'l')
set(gca,'YAxisLocation','right','Xdir','reverse');
end
end
if margin==0
if ymax~=0
lims(mask) = ymax * [0 1.25];
else
lims(mask) = [0 1];
end
end
axis(lims);
position1=[0.175 0.01 0.23 0.03];
u1=uicontrol(gcf,'style','pushbutton','string','SELECT CLUSTERS (Push button first and then Select two lines)','Units','Normal','position',position1,...
'callback', {@Showdiff_call,OriginalData, Z_temp, h, handle_figure});
if (flag_skipcolor==0)
class_id=unique(input_class);
class_count=0;
for class_index=1:size(class_id,2)
class_count=class_count+1;
class_array(class_count,1)=class_id(class_index);
class_array(class_count,2)=size(find(input_class==class_id(class_index)),2);
end
if (class_count>1)
max_class=max(class_array(:,2));
max_sliderval=size(input_class,2)-max_class;
stepsize=1/max_sliderval;
position2=[0.129 0.95 0.336 0.02];
u2=uicontrol(gcf,'style','slider','Units','Normal','Tooltipstring', num2str(slidervalue), 'position',position2,'min',0,'max',max_sliderval, 'sliderstep', [stepsize stepsize],...
'callback', {@slider_call});
set(u2, 'Value', slidervalue);
position3=[0.115 0.95 0.01 0.02];
u3=uicontrol(gcf,'style','text','string',num2str(0),'fontsize', 10, 'Units','Normal','position',position3);
position4=[0.467 0.95 0.02 0.02];
u4=uicontrol(gcf,'style','text','string',num2str(max_sliderval),'fontsize', 10, 'Units','Normal','position',position4);
end
end
% -------------------------------------------------------------------------
function T = clusternum(X, T, c, k, m, d)
% assign leaves under cluster c to c.
d = d+1;
n = m; flag = 0;
while n > 1
n = n-1;
if X(n,1) == k % node k is not a leave, it has subtrees
T = clusternum(X, T, c, k, n,d); % trace back left subtree
T = clusternum(X, T, c, X(n,2), n,d);
flag = 1; break;
end
end
n = size(X,1);
if flag == 0 & d ~= 1 % row m is leaf node.
T(X(m,1)) = c;
T(X(m,2)) = c;
end
% -------------------------------------------------------------------------
function T = colorcluster(X, T, k, m)
% find local clustering
n = m;
while n > 1
n = n-1;
if X(n,1) == k % node k is not a leave, it has subtrees
T = colorcluster(X, T, k, n); % trace back left subtree
T = colorcluster(X, T, X(n,2), n);
break;
end
end
T(m) = 1;
% -------------------------------------------------------------------------
function Z = transz(Z)
%TRANSZ Translate output of LINKAGE into another format.
% This is a helper function used by DENDROGRAM and COPHENET.
% In LINKAGE, when a new cluster is formed from cluster i & j, it is
% easier for the latter computation to name the newly formed cluster
% min(i,j). However, this definition makes it hard to understand
% the linkage information. We choose to give the newly formed
% cluster a cluster index M+k, where M is the number of original
% observation, and k means that this new cluster is the kth cluster
% to be formmed. This helper function converts the M+k indexing into
% min(i,j) indexing.
m = size(Z,1)+1;
for i = 1:(m-1)
if Z(i,1) > m
Z(i,1) = traceback(Z,Z(i,1));
end
if Z(i,2) > m
Z(i,2) = traceback(Z,Z(i,2));
end
if Z(i,1) > Z(i,2)
Z(i,1:2) = Z(i,[2 1]);
end
end
function a = traceback(Z,b)
m = size(Z,1)+1;
if Z(b-m,1) > m
a = traceback(Z,Z(b-m,1));
else
a = Z(b-m,1);
end
if Z(b-m,2) > m
c = traceback(Z,Z(b-m,2));
else
c = Z(b-m,2);
end
a = min(a,c);
% -------------------------------------------------------------------------
function Showdiff_call(no_user1, no_user2, X, Z, h, handle_figure)
nb_of_clicks=2;
if isa(X,'dataset')
A=X.data;
if (isempty(X.axisscale)==0)
axis_scales=X.axisscale;
else
axis_scale={};
end
if (isempty(X.title)==0)
axis_title=X.title;
else
axis_title={};
end
else
A=X;
end
h_line = get_line(handle_figure,nb_of_clicks);
for i=1:nb_of_clicks
eval(['row' num2str(i) '=find(h==h_line(i));']);
end
col={'b','g','m','r','k','c'};
figure2=subplot(2,2,2,'replace');
plot_data_v1(A, Z, row1, row2, col, axis_scales, axis_title);
% -------------------------------------------------------------------------
function slider_call(no_user1,no_user2)
slider_value = get(gcbo,'Value');
close;
load dendrogram_version1.mat
dendrogram_version1(OriginalData, Z, slider_value, varargin{1}, varargin{2}, varargin{3}, varargin{4})
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -