method_simplecorr.m

来自「Variable Reduction Testbench通过对变量进行相关性分析」· M 代码 · 共 165 行

M
165
字号

function [ranking, corr_values, cut_off] = method_simplecorr(X, is_corrmatrix)

% -------------------------------------------------------------------------
% this code is part of the 'Reduction Testbench' suite
% developed by A. Manganaro, R. Todeschini, A. Ballabio, D. Mauri
% 2006 - Milano Chemometrics and QSAR Research Group
% -------------------------------------------------------------------------
%
%
% [ranking, corr_values, cut_off] = method_simplecorr(X, is_corrmatrix)
%
% method_simplecorr uses the Simple Correlation method to evaluate an elimination ranking 
% of variables without any loss of data; it is suggested to delete
% variables over the cut-off showed in the plot.
% This routine also outputs a plot of the ranking
%
% Input:
% X = data set [n x p]  n objects, p variables
% is_corrmatrix = if set to 'y', X is seen as the dataset's correlation
%   matrix
%
% Output:
% ranking = elimination ranking [1 x p] of the variables
% corr_values = values [1 x p] of the mean correlation for each variable
% cut_off = number of variables suggested to be retained


echo off;

[n,p] = size(X);
cut_off = 0;

if ( (p<2) | (n<2) )
    disp('Wrong matrix dimension - execution aborted');
    ranking=0; load_x=0; return;
end

% Reads threshold from file
[SimpleCorr_T, R2_T, KIF_T] = init_read;
corr_threshold = SimpleCorr_T;

% Sets the progressbar
screensize = [0 0 1 1];
width = screensize(3)/3;
height = screensize(4)/25;
left = screensize(3)/2 - width/2;
bottom = screensize(4)/2 - height/2;
progress_win = figure('Units','normalized','Position',[[left bottom] width height],...
                          'MenuBar','none','Resize','off','Name','Algorithm working...',...
                          'NumberTitle','off','WindowStyle','modal');
progress_axes = axes('Position',[0.02 0.15 0.96 0.70],'XLim',[0 1],'YLim',[0 1],'Box','on',...
                         'xtick',[],'ytick',[]);
progress_patch = patch('XData',[0 1 1 0],'YData',[0 0 1 1],'FaceColor',[1 0 0]);
drawnow;

set(progress_win,'Pointer','watch');


% Calculate the correlation matrix
if (is_corrmatrix)
    C = X;
else
    C = corrcoef(X);
end


% Creates the class vector
var_class = zeros(1,p);


for idx = 1:p
    
    % Updates the progressbar
    set(progress_win,'Name',['Algorithm working... ',int2str(p-idx+1),' variables left']);
    set(progress_patch,'XData',[0 (p-idx+1)/p (p-idx+1)/p 0],'Facecolor',...
      [1 (1-(p-idx+1)/p) 0]);
    drawnow;

    % Checks for variables with correlation over the threshold
    if (var_class(idx)==0) 
        for k=1:p
            if ( (idx~=k) & (abs(C(idx,k))>corr_threshold) & (var_class(k)==0) )
            
                % If indices are not pointing at the correlation matrix
                % diagonal, and the correlation between the two variables is
                % over the given threshold, and the other variables has not
                % already been excluded, the variable with highest correlation
                % mean value is chosen to be excluded
            
                if mean(abs(C(idx,:))) > mean(abs(C(k,:)))
                    var_class(idx) = 1;
                else
                    var_class(k) = 1;
                end
                
                cut_off = cut_off + 1;
            
            end
        end
    end

    % Calculates mean correlation value for the variable
    corr_values(idx) = mean(abs(C(idx,:)));
    
end


% Orders the results
var_id = [1:p];
r = [var_id' (corr_values + 100*var_class)' var_class'];
r = sortrows(r,2);
ranking = reverse(r(:,1))';
corr_values =  reverse(r(:,2)- r(:,3)*100)';


% Closes the progressbar
set(progress_win,'Pointer','arrow');
close(progress_win);



%%%% Fig_1: graph of resulting ranking %%%%

fig_1 = figure('Name','Variables elimination ranking - Simple Pairwise Correlation');
title('Variables ranking by Simple Pairwise Correlation method');
xlabel('Variables ranking');
ylabel('Mean correlation value of the variable');
ylim([0 max(corr_values)+(10*max(corr_values)/100)]);
xlim([0 length(corr_values)+1]);

hold on;

line([(cut_off+0.5) (cut_off+0.5)],ylim,'LineStyle','--','Color','r');
plot(corr_values,'o-b','MarkerFaceColor','b');
for i=1:length(ranking)
    text(i,corr_values(i)++(5*max(corr_values)/100),[' ',num2str(ranking(i))]);
end
text((cut_off+0.6),(5*max(corr_values)/100),['variables = ',num2str(p-cut_off)]);

hold off;

cut_off = p-cut_off;

echo on;




function rev_X = reverse(X);

% reverse is a subfunction that reverses the order of the vector X
%
% Input:
% X = data vector
%
% Output:
% rev_X = reversed vector

[n p] = size(X);

rev_X = [];
for i=n:-1:1
    rev_X = [rev_X; X(i,:)];
end

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?