corr_stats.m
字号:
function [c_var, c_couples] = corr_stats(X, cut_1, cut_2, is_corrmatrix)
% -------------------------------------------------------------------------
% this code is part of the 'Reduction Testbench' suite
% developed by A. Manganaro, R. Todeschini, A. Ballabio, D. Mauri
% 2006 - Milano Chemometrics and QSAR Research Group
% -------------------------------------------------------------------------
%
%
% [c_var, c_couples] = corr_stats(X, cut_1, cut_2, is_corrmatrix)
%
% corr_stats gives useful statistics about the correlation of
% the variables in X, showing also two plots
%
% Input:
% X = data set [n x p] n objects, p variables
% cut_1 = cutoff value for the variables' list
% cut_2 = cutoff value for the couples' list
% is_corrmatrix = if set to 'y', X is seen as the dataset's correlation
% matrix
%
% Output:
% c_var = array (p x 4)
% c_couples = array (p x 4)
echo off;
[n,p] = size(X);
if ( (p<2) | (n<2) )
disp('Wrong matrix dimension - execution aborted');
c_couples=0; c_var=0; return;
end
% Calculate the correlation matrix
if (is_corrmatrix)
C = X;
else
C = corrcoef(X);
end
for i=1:p C(i,i)=0; end
% Puts the information of C in the c_couples array, where all the
% rows have the form [abs(c(i,j)), c(i,j), i, j]
idx = 1;
for i=2:p
for j=1:(i-1)
c_couples(idx,:) = [abs(C(i,j)), C(i,j), i, j];
idx = idx + 1;
end
end
% Sorts the c_couples vector on the correlation value
c_couples = sortrows(c_couples,1);
c_couples = reverse(c_couples);
% Calculate the vector of mean correlation value, maximum value and
% minimum value for each variable, on absolute values
for i=1:p
if (i==1)
cur_row = C(i,2:end);
elseif (i==p)
cur_row = C(i,1:(p-1));
else
cur_row = C(i,[1:(i-1) (i+1):p]);
end
cur_row = abs(cur_row);
c_var(i,:) = [i (sum(cur_row)/(p-1)) max(cur_row) min(cur_row)];
end
% Sorts the c_var vector on the mean correlation value
c_var = sortrows(c_var,2);
c_var = reverse(c_var);
%%%% Fig_1: graph of c_couples %%%%
idx = 1;
c_graph_x = -1:0.1:1;
for i=-1:0.1:1
c_graph(idx) = length(find( (c_couples(:,2)>i)&(c_couples(:,2)<=(i+0.1)) ) );
idx = idx + 1;
end
fig_1 = figure('Name','Correlation between couples of variables');
xlim([-1 1]);
ylim([0 max(c_graph)+2]);
title('Variables correlation frequencies');
ylabel('No of couples of variables');
xlabel('Correlation values');
hold on;
bar(c_graph_x, c_graph,'g');
hold off;
%%%% Fig_2: graph of c_var %%%%
fig_2 = figure('Name','Correlation of single variables');
title('Mean correlation of single variables (on absolute values)');
ylabel('Correlation value');
xlabel('Variables');
ylim([0 1]);
xlim([0 p+1]);
hold on;
for i=1:p
line([i i],[c_var(i,3) c_var(i,4)],'Color',[0.83 0.83 0.83],'LineStyle',':');
end
line([0.1 p+1],[0 0],'Color',[0.9 0.9 0.9],'LineStyle','-');
g1 = plot(c_var(:,2),'Marker','o','MarkerEdgeColor','b','MarkerFaceColor','k');
g2 = plot(c_var(:,3),'o--r','MarkerFaceColor','r');
g3 = plot(c_var(:,4),'o--g','MarkerFaceColor','g');
for i=1:p
text(i,c_var(i,2)+0.1,num2str(c_var(i,1)));
end
legend([g1 g2 g3],'Mean value','Max value','Min Value');
hold off;
% Cuts all the rows under the cutoff
if (cut_1>0)
trim_point = find(c_var(:,2)<cut_1);
if ~(isempty(trim_point))
c_var = c_var(1:trim_point(1)-1,:);
end
end
if (cut_2>0)
trim_point = find(c_couples(:,1)<cut_2);
if ~(isempty(trim_point))
c_couples = c_couples(1:trim_point(1)-1,:);
end
end
echo on;
function rev_X = reverse(X);
% reverse is a subfunction that reverses the order of the vector X
%
% Input:
% X = data vector
%
% Output:
% rev_X = reversed vector
[n p] = size(X);
rev_X = [];
for i=n:-1:1
rev_X = [rev_X; X(i,:)];
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -