method_r2.m
来自「Variable Reduction Testbench通过对变量进行相关性分析」· M 代码 · 共 252 行
M
252 行
function [ranking, load_x, cut_off] = method_R2(X, method)
% -------------------------------------------------------------------------
% this code is part of the 'Reduction Testbench' suite
% developed by A. Manganaro, R. Todeschini, A. Ballabio, D. Mauri
% 2006 - Milano Chemometrics and QSAR Research Group
% -------------------------------------------------------------------------
%
%
% [ranking, load_x, cut_off] = method_R2(X, method)
%
% method_R2 uses the VIF (R2) index method to evaluate an elimination ranking
% of variables without any loss of data
% This routine also outputs a plot of the ranking
%
% Input:
% X = data set [n x p] n objects, p variables
% method = if set to 'i' enables the iterative version of R2
%
% Output:
% ranking = elimination ranking [1 x p] of the variables
% load_x = values [1 x p] of R2 associated with each variable
% cut_off = number of variables suggested to be retained
echo off;
[n,p] = size(X);
if ( (p<2) | (n<2) )
disp('Wrong matrix dimension - execution aborted');
ranking=0; load_x=0; return;
end
% Reads threshold from file
[SimpleCorr_T, R2_T, KIF_T] = init_read;
r2_threshold = R2_T;
% Sets the progressbar
screensize = [0 0 1 1];
width = screensize(3)/3;
height = screensize(4)/25;
left = screensize(3)/2 - width/2;
bottom = screensize(4)/2 - height/2;
progress_win = figure('Units','normalized','Position',[[left bottom] width height],...
'MenuBar','none','Resize','off','Name','Algorithm working...',...
'NumberTitle','off','WindowStyle','modal');
progress_axes = axes('Position',[0.02 0.15 0.96 0.70],'XLim',[0 1],'YLim',[0 1],'Box','on',...
'xtick',[],'ytick',[]);
progress_patch = patch('XData',[0 1 1 0],'YData',[0 0 1 1],'FaceColor',[1 0 0]);
drawnow;
p_tot = p;
ranking = [];
load_x = [];
if ((nargin>1) & (method=='i'))
% Iterative version
X_vars = [];
for idx=1:p
X_vars(idx) = idx;
end
while (p>1)
% Updates the progressbar
set(progress_win,'Name',['Algorithm working... ',int2str(p),' variables left']);
set(progress_patch,'XData',[0 p/p_tot p/p_tot 0],'Facecolor',...
[1 (1-p/p_tot) 0]);
drawnow;
R2 = [];
for idx=1:p
% Current var becomes the Y var
y = X(:,idx);
% xs is the original data matrix without the Y var
if (idx==1)
xs = X(:,2:end);
elseif (idx==p);
xs = X(:,1:(p-1));
else
xs = X(:,[1:(idx-1) (idx+1):p]);
end
% Adds the intercept column
xs = [ones(n,1) xs];
% Calculates b (Ordinary Least Squares coefficient)
b = inv(xs'*xs) * xs' * y;
% Calculates the Yc
yc = xs * b;
% Calculates the TSS and RSS parameters
RSS = sum( (y - yc).^2 );
TSS = sum( ( y - (mean(y)*ones(n,1)) ).^2 );
% Calculates R2
R2(idx) = 1 - RSS/TSS;
% Calculates VIF (putting it in variable R2)
if (R2(idx)==1)
R2(idx) = 1 / 0.0001;
else
R2(idx) = 1 / (1-R2(idx));
end
pack;
end
% Selects the var with the greater R2
cur_var_idx = find(R2==max(R2));
cur_var_idx = cur_var_idx(1);
ranking = [ranking X_vars(cur_var_idx)];
load_x = [load_x R2(cur_var_idx)];
% Builds the new dataset without the chosen var
if (cur_var_idx==1)
X = X(:,2:end);
X_vars = X_vars(:,2:end);
elseif (cur_var_idx==p);
X = X(:,1:(p-1));
X_vars = X_vars(:,1:(p-1));
else
X = X(:,[1:(cur_var_idx-1) (cur_var_idx+1):p]);
X_vars = X_vars(:,[1:(cur_var_idx-1) (cur_var_idx+1):p]);
end
p = p-1;
end
% Inserts into the ranking the last variable
ranking = [ranking X_vars(p)];
load_x = [load_x R2(p)];
else
% Non-iterative version
for idx=1:p
% Updates the progressbar
set(progress_win,'Name',['Algorithm working... ',int2str(p-idx),' variables left']);
set(progress_patch,'XData',[0 (p-idx)/p_tot (p-idx)/p_tot 0],'Facecolor',...
[1 (1-(p-idx)/p_tot) 0]);
drawnow;
% Current var becomes the Y var
y = X(:,idx);
% xs is the original data matrix without the Y var
if (idx==1)
xs = X(:,2:end);
elseif (idx==p);
xs = X(:,1:(p-1));
else
xs = X(:,[1:(idx-1) (idx+1):p]);
end
% Adds the intercept column
xs = [ones(n,1) xs];
% Calculates b (Ordinary Least Squares coefficient)
b = inv(xs'*xs) * xs' * y;
% Calculates the Yc
yc = xs * b;
% Calculates the TSS and RSS parameters
RSS = sum( (y - yc).^2 );
TSS = sum( ( y - (mean(y)*ones(n,1)) ).^2 );
% Calculates R2
R2(idx) = 1 - RSS/TSS;
% Calculates VIF (putting it in variable R2)
if (R2(idx)==1)
R2(idx) = 1 / 0.0001;
else
R2(idx) = 1 / (1-R2(idx));
end
end
% Sorts the resulting ranking
for idx=1:p
temp_rank(idx,1) = idx;
temp_rank(idx,2) = R2(idx);
end
temp_rank = sortrows(temp_rank,2);
for idx=1:length(temp_rank)
inv_rank(idx,:) = temp_rank(length(temp_rank)+1-idx,:);
end
temp_rank = inv_rank;
ranking = temp_rank(:,1);
load_x = temp_rank(:,2);
end
% Calculates the cut-off value
cut_off = 0;
for idx=1:p_tot
if (load_x(idx)>r2_threshold)
cut_off = cut_off + 1;
end
end
% Closes the progressbar
set(progress_win,'Pointer','arrow');
close(progress_win);
%%%% Fig_1: graph of resulting ranking %%%%
fig_1 = figure('Name','Variables elimination ranking - method VIF');
title('Variables ranking by VIF method');
xlabel('Variables ranking');
ylabel('VIF value');
ylim([0 max(load_x)+(10*max(load_x)/100)]);
xlim([0 length(load_x)+1]);
hold on;
line([(cut_off+0.5) (cut_off+0.5)],ylim,'LineStyle','--','Color','r');
plot(load_x,'o-b','MarkerFaceColor','b');
for i=1:length(ranking)
text(i,load_x(i)++(5*max(load_x)/100),[' ',num2str(ranking(i))]);
end
text((cut_off+0.6),(5*max(load_x)/100),['variables = ',num2str(p_tot-cut_off)]);
hold off;
cut_off = p_tot-cut_off;
echo on;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?