method_kif.m
来自「Variable Reduction Testbench通过对变量进行相关性分析」· M 代码 · 共 327 行
M
327 行
function [ranking, KIF, cut_off] = method_KIF(X, is_corrmatrix, enable_logging, method)
% -------------------------------------------------------------------------
% this code is part of the 'Reduction Testbench' suite
% developed by A. Manganaro, R. Todeschini, A. Ballabio, D. Mauri
% 2006 - Milano Chemometrics and QSAR Research Group
% -------------------------------------------------------------------------
%
%
% [ranking, KIF, cut_off] = method_KIF(X, is_corrmatrix, enable_logging, method)
%
% method_KIF uses the K index method to evaluate an elimination ranking
% of variables without any loss of data; it is suggested to delete
% variables until they have a KIF value >0.5
% This routine also outputs a plot of the ranking
%
% Input:
% X = data set [n x p] n objects, p variables
% is_corrmatrix = if set to 'y', X is seen as the dataset's correlation
% matrix
% enable_logging = if set to 'y', logging to file is enabled
% method = if set to 'n' enables the non-iterative version of KIF
%
% Output:
% ranking = elimination ranking [1 x p] of the variables
% KIF = values [1 x p] of the KIF associated with the elimination of
% each variable
% cut_off = number of variables suggested to be retained
try
echo off;
if (enable_logging=='y')
logging = 1;
tot_initial_time = cputime;
else
logging = 0;
end
[n,p] = size(X);
if ( (p<2) | (n<2) )
disp('Wrong matrix dimension - execution aborted');
ranking=0; KIF=0; return;
end
% Reads threshold from file
[SimpleCorr_T, R2_T, KIF_T] = init_read;
kif_threshold = KIF_T;
if (logging)
% Opens the log file
cur_time = clock;
file_name = ['log_k_red_' num2str(cur_time(1)) '-' num2str(cur_time(2)) '-' num2str(cur_time(3)) '-h'...
num2str(cur_time(4)) 'm' num2str(cur_time(5)) '.txt'];
[log_id mess] = fopen(file_name, 'wt');
if (log_id==-1)
disp('Error opening the log file:');
disp(mess);
return;
end
fprintf(log_id,'%s\n\n',['K index reduction, matrix size ' num2str(n) ' x ' num2str(p)]);
end
% Sets the progressbar
screensize = [0 0 1 1];
width = screensize(3)/3;
height = screensize(4)/25;
left = screensize(3)/2 - width/2;
bottom = screensize(4)/2 - height/2;
progress_win = figure('Units','normalized','Position',[[left bottom] width height],...
'MenuBar','none','Resize','off','Name','Algorithm working...',...
'NumberTitle','off','WindowStyle','modal');
progress_axes = axes('Position',[0.02 0.15 0.96 0.70],'XLim',[0 1],'YLim',[0 1],'Box','on',...
'xtick',[],'ytick',[]);
progress_patch = patch('XData',[0 1 1 0],'YData',[0 0 1 1],'FaceColor',[1 0 0]);
drawnow;
set(progress_win,'Pointer','watch');
p_tot = p;
% Initialize variables
rank_idx = 1;
for idx=1:p
var_id(idx)=idx;
end
% Calculate the correlation matrix
if (is_corrmatrix)
C = X;
else
initial_time = cputime;
C = corrcoef(X);
if (logging)
elapsed_time = cputime - initial_time;
fprintf(log_id,'%s\n\n',['Correlation matrix calculated in : ' num2str(elapsed_time) ' s' ]);
end
end
if ((nargin>3) & (method=='n'))
% non-iterative method
K_val = [0];
% Calculate Kp
eig_val = eig(C);
Kp = calc_K(eig_val, p);
% Calculate Kpj for each j = 1..p and stores the values in K_val
Cj = C(2:end,2:end);
eig_val = eig(Cj);
K_val(1) = calc_K(eig_val, p-1);
for idx=2:(p-1)
Cj = C([1:(idx-1) (idx+1):p],[1:(idx-1) (idx+1):p]);
eig_val = eig(Cj);
K_val(idx) = calc_K(eig_val, p-1);
end
Cj = C(1:(p-1),1:(p-1));
eig_val = eig(Cj);
K_val(p) = calc_K(eig_val, p-1);
% Calculates KIF value for each var
for i=1:p
KIF(i) = (Kp*(p-1)-K_val(i)*(p-2)) / (2+K_val(i)*(p-2)-Kp*(p-1));
end
% Orders the results upon the Kpj value
r = [K_val' KIF' var_id'];
r = sortrows(r,1);
ranking = r(:,3)';
KIF = r(:,2)';
else
% iterative method
while (p > 2)
% Updates the progressbar
set(progress_win,'Name',['Algorithm working... ',int2str(p),' variables left']);
set(progress_patch,'XData',[0 p/p_tot p/p_tot 0],'Facecolor',[1 (1-p/p_tot) 0]);
drawnow;
initial_time = cputime;
p_minus_one = p-1;
K_val = [0];
% Calculate Kp
eig_val = eig(C);
Kp = calc_K(eig_val, p);
% Calculate Kpj for each j = 1..p and stores the values in K_val
Cj = C(2:end,2:end);
eig_val = eig(Cj);
K_val(1) = calc_K(eig_val, p_minus_one);
for idx=2:(p_minus_one)
Cj = C([1:(idx-1) (idx+1):p],[1:(idx-1) (idx+1):p]);
eig_val = eig(Cj);
K_val(idx) = calc_K(eig_val, p_minus_one);
end
Cj = C(1:p_minus_one,1:p_minus_one);
eig_val = eig(Cj);
K_val(p) = calc_K(eig_val, p_minus_one);
% Finds the lower Kpj
K_min_idx = find(K_val==min(K_val));
Kpj = K_val(K_min_idx(1));
% Updates ranking
ranking(rank_idx) = var_id(K_min_idx(1));
% Updates KIF
KIF(rank_idx) = (Kp*(p-1)-Kpj*(p-2)) / (2+Kpj*(p-2)-Kp*(p-1));
% Deletes from the dataset the variable associated with the lower Kpj
% deleting from C the j-th column and row
if (K_min_idx(1)==1)
var_id = var_id(:,2:end);
C = C(2:end,2:end);
elseif (K_min_idx(1)==p)
var_id = var_id(:,1:(p-1));
C = C(1:(p-1),1:(p-1));
else
var_id = [var_id(:,1:K_min_idx(1)-1),var_id(:,K_min_idx(1)+1:p)];
C = C([1:(K_min_idx(1)-1) (K_min_idx(1)+1):p],[1:(K_min_idx(1)-1) (K_min_idx(1)+1):p]);
end
if (logging)
% Packs memory and saves data in a temporary file
pack;
save('temp_k.mat');
% Updates the log file
elapsed_time = cputime - initial_time;
fprintf(log_id,'%s\n',['Elapsed cpu time for var no.' num2str(p) ': ' num2str(elapsed_time) ' s' ]);
end
% Updates indexes
rank_idx = rank_idx + 1;
p = p_minus_one;
end
% Adds to ranking and KIF the last two variables
ranking = [ranking, var_id];
KIF = [KIF, [0,0]];
end
% Calculates the cut-off value
cut_off = 0;
for idx=1:p_tot
if (KIF(idx)>kif_threshold)
cut_off = cut_off + 1;
end
end
% Closes the progressbar
set(progress_win,'Pointer','arrow');
close(progress_win);
if (logging)
% Closes the log file
tot_elapsed_time = cputime - tot_initial_time;
fprintf(log_id,'\n%s\n',['Total cpu time elapsed: ' num2str(tot_elapsed_time) ' s (about '...
num2str(round(tot_elapsed_time/60)) ' min)' ]);
fclose(log_id);
end
%%%% Fig_1: graph of resulting ranking %%%%
fig_1 = figure('Name','Variables elimination ranking - KIF method');
title('Variables ranking by KIF method');
ylabel('KIF value');
xlabel('Variables ranking');
xlim([0 length(KIF)+1]);
hold on;
line([(cut_off+0.5) (cut_off+0.5)],ylim,'LineStyle','--','Color','r');
plot(KIF,'o-b','MarkerFaceColor','b');
for i=1:length(ranking)
text(i,KIF(i)+0.03,num2str(ranking(i)));
end
text((cut_off+0.6),(5*max(KIF)/100),['variables = ',num2str(p_tot-cut_off)]);
hold off;
cut_off = p_tot-cut_off;
echo on;
catch
% If execution is aborted, displays a warning message, closes the
% progressbar window and closes the log file
disp(' ');
disp(' Execution aborted!');
disp(' Temporary workspace saved in temp_k.mat');
disp(' ');
close(progress_win);
if (logging)
fprintf(log_id,'\n\n%s\n','WARNING: execution aborted');
fclose(log_id);
end
end
function K = calc_K (eig_val, p_val)
% calc_K is a subfunction that calculates the K index
%
% Input:
% eig_val = vector [1 x p_val] of the p eigenvalues
% p_val = p number of the eigenvalues
%
% Output:
% K = K index value
num = 0;
% Calculates the numerator of K
for m=1:p_val
num = num + abs( (eig_val(m)-1) );
end
% Calculate K
K = num / (2*(p_val-1));
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?