欢迎来到虫虫下载站 | 资源下载 资源专辑 关于我们
虫虫下载站

corr_stats.m

Variable Reduction Testbench通过对变量进行相关性分析来实现减少变量的目的。
M
字号:

function [c_var, c_couples] = corr_stats(X, cut_1, cut_2, is_corrmatrix)

% -------------------------------------------------------------------------
% this code is part of the 'Reduction Testbench' suite
% developed by A. Manganaro, R. Todeschini, A. Ballabio, D. Mauri
% 2006 - Milano Chemometrics and QSAR Research Group
% -------------------------------------------------------------------------
%
%
% [c_var, c_couples] = corr_stats(X, cut_1, cut_2, is_corrmatrix)
%
% corr_stats gives useful statistics about the correlation of
% the variables in X, showing also two plots
%
% Input:
% X = data set [n x p]  n objects, p variables
% cut_1 = cutoff value for the variables' list
% cut_2 = cutoff value for the couples' list
% is_corrmatrix = if set to 'y', X is seen as the dataset's correlation
%   matrix
%
% Output:
% c_var = array (p x 4)
% c_couples = array (p x 4)


echo off;

[n,p] = size(X);

if ( (p<2) | (n<2) )
    disp('Wrong matrix dimension - execution aborted');
    c_couples=0; c_var=0; return;
end

% Calculate the correlation matrix
if (is_corrmatrix)
    C = X;
else
    C = corrcoef(X);
end

for i=1:p C(i,i)=0; end

% Puts the information of C in the c_couples array, where all the
% rows have the form [abs(c(i,j)), c(i,j), i, j]
idx = 1;
for i=2:p
    for j=1:(i-1)
        c_couples(idx,:) = [abs(C(i,j)), C(i,j), i, j];
        idx = idx + 1;
    end
end

% Sorts the c_couples vector on the correlation value
c_couples = sortrows(c_couples,1);
c_couples = reverse(c_couples);

% Calculate the vector of mean correlation value, maximum value and
% minimum value for each variable, on absolute values
for i=1:p
    if (i==1)
        cur_row = C(i,2:end);
    elseif (i==p)
        cur_row = C(i,1:(p-1));
    else
        cur_row = C(i,[1:(i-1) (i+1):p]);
    end

    cur_row = abs(cur_row);
    c_var(i,:) = [i (sum(cur_row)/(p-1)) max(cur_row) min(cur_row)];
end

% Sorts the c_var vector on the mean correlation value
c_var = sortrows(c_var,2);
c_var = reverse(c_var);


%%%% Fig_1: graph of c_couples %%%%

idx = 1;
c_graph_x = -1:0.1:1;
for i=-1:0.1:1
    c_graph(idx) = length(find( (c_couples(:,2)>i)&(c_couples(:,2)<=(i+0.1)) ) );
    idx = idx + 1;
end

fig_1 = figure('Name','Correlation between couples of variables');
xlim([-1 1]);
ylim([0 max(c_graph)+2]);
title('Variables correlation frequencies');
ylabel('No of couples of variables');
xlabel('Correlation values');

hold on;
bar(c_graph_x, c_graph,'g');
hold off;


%%%% Fig_2: graph of c_var %%%%

fig_2 = figure('Name','Correlation of single variables');
title('Mean correlation of single variables (on absolute values)');
ylabel('Correlation value');
xlabel('Variables');
ylim([0 1]);
xlim([0 p+1]);

hold on;

for i=1:p
    line([i i],[c_var(i,3) c_var(i,4)],'Color',[0.83 0.83 0.83],'LineStyle',':');
end
line([0.1 p+1],[0 0],'Color',[0.9 0.9 0.9],'LineStyle','-');
g1 = plot(c_var(:,2),'Marker','o','MarkerEdgeColor','b','MarkerFaceColor','k');
g2 = plot(c_var(:,3),'o--r','MarkerFaceColor','r');
g3 = plot(c_var(:,4),'o--g','MarkerFaceColor','g');
for i=1:p
    text(i,c_var(i,2)+0.1,num2str(c_var(i,1)));
end

legend([g1 g2 g3],'Mean value','Max value','Min Value');

hold off;


% Cuts all the rows under the cutoff
if (cut_1>0)
    trim_point = find(c_var(:,2)<cut_1);
    if ~(isempty(trim_point))
        c_var = c_var(1:trim_point(1)-1,:);
    end
end

if (cut_2>0)
    trim_point = find(c_couples(:,1)<cut_2);
    if ~(isempty(trim_point))
        c_couples = c_couples(1:trim_point(1)-1,:);
    end
end

echo on;




function rev_X = reverse(X);

% reverse is a subfunction that reverses the order of the vector X
%
% Input:
% X = data vector
%
% Output:
% rev_X = reversed vector

[n p] = size(X);

rev_X = [];
for i=n:-1:1
    rev_X = [rev_X; X(i,:)];
end

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -