method_kif.m

来自「Variable Reduction Testbench通过对变量进行相关性分析」· M 代码 · 共 327 行

M
327
字号

function [ranking, KIF, cut_off] = method_KIF(X, is_corrmatrix, enable_logging, method)

% -------------------------------------------------------------------------
% this code is part of the 'Reduction Testbench' suite
% developed by A. Manganaro, R. Todeschini, A. Ballabio, D. Mauri
% 2006 - Milano Chemometrics and QSAR Research Group
% -------------------------------------------------------------------------
%
%
% [ranking, KIF, cut_off] = method_KIF(X, is_corrmatrix, enable_logging, method)
%
% method_KIF uses the K index method to evaluate an elimination ranking 
% of variables without any loss of data; it is suggested to delete
% variables until they have a KIF value >0.5
% This routine also outputs a plot of the ranking
%
% Input:
% X = data set [n x p]  n objects, p variables
% is_corrmatrix = if set to 'y', X is seen as the dataset's correlation
%   matrix
% enable_logging = if set to 'y', logging to file is enabled
% method = if set to 'n' enables the non-iterative version of KIF
%
% Output:
% ranking = elimination ranking [1 x p] of the variables
% KIF = values [1 x p] of the KIF associated with the elimination of
%       each variable
% cut_off = number of variables suggested to be retained


try
    
    echo off;
    
    if (enable_logging=='y')
        logging = 1;    
        tot_initial_time = cputime;
    else
        logging = 0;
    end

    [n,p] = size(X);

    if ( (p<2) | (n<2) )
        disp('Wrong matrix dimension - execution aborted');
        ranking=0; KIF=0; return;
    end

    % Reads threshold from file
    [SimpleCorr_T, R2_T, KIF_T] = init_read;
    kif_threshold = KIF_T;

    if (logging)
        % Opens the log file
        cur_time = clock;
        file_name = ['log_k_red_' num2str(cur_time(1)) '-' num2str(cur_time(2)) '-' num2str(cur_time(3)) '-h'...
                num2str(cur_time(4)) 'm' num2str(cur_time(5)) '.txt'];
        [log_id mess] = fopen(file_name, 'wt');
        if (log_id==-1)
            disp('Error opening the log file:');
            disp(mess);
            return;
        end
        fprintf(log_id,'%s\n\n',['K index reduction, matrix size ' num2str(n) ' x ' num2str(p)]);
    end


    % Sets the progressbar
    screensize = [0 0 1 1];
    width = screensize(3)/3;
    height = screensize(4)/25;
    left = screensize(3)/2 - width/2;
    bottom = screensize(4)/2 - height/2;
    progress_win = figure('Units','normalized','Position',[[left bottom] width height],...
                          'MenuBar','none','Resize','off','Name','Algorithm working...',...
                          'NumberTitle','off','WindowStyle','modal');
    progress_axes = axes('Position',[0.02 0.15 0.96 0.70],'XLim',[0 1],'YLim',[0 1],'Box','on',...
                         'xtick',[],'ytick',[]);
    progress_patch = patch('XData',[0 1 1 0],'YData',[0 0 1 1],'FaceColor',[1 0 0]);
    drawnow;

    set(progress_win,'Pointer','watch');

    p_tot = p;


    % Initialize variables
    rank_idx = 1;
    for idx=1:p
        var_id(idx)=idx;
    end


    % Calculate the correlation matrix
    if (is_corrmatrix)
        C = X;
    else
        initial_time = cputime;
        C = corrcoef(X);
        if (logging)
            elapsed_time = cputime - initial_time;
            fprintf(log_id,'%s\n\n',['Correlation matrix calculated in : ' num2str(elapsed_time) ' s' ]);
        end
    end
    
    
if ((nargin>3) & (method=='n'))
    
    % non-iterative method    
    
    
    K_val = [0];
    
    % Calculate Kp
    eig_val = eig(C);
    Kp = calc_K(eig_val, p);

    
    % Calculate Kpj for each j = 1..p and stores the values in K_val

    Cj = C(2:end,2:end);
    eig_val = eig(Cj);
    K_val(1) = calc_K(eig_val, p-1);
    
    for idx=2:(p-1)
        Cj = C([1:(idx-1) (idx+1):p],[1:(idx-1) (idx+1):p]);
        eig_val = eig(Cj);
        K_val(idx) = calc_K(eig_val, p-1);
    end

    Cj = C(1:(p-1),1:(p-1));
    eig_val = eig(Cj);
    K_val(p) = calc_K(eig_val, p-1);
    
    % Calculates KIF value for each var
    
    for i=1:p
        KIF(i) = (Kp*(p-1)-K_val(i)*(p-2)) / (2+K_val(i)*(p-2)-Kp*(p-1));
    end
    
    % Orders the results upon the Kpj value
    
    r = [K_val' KIF' var_id'];
    r = sortrows(r,1);
    
    ranking = r(:,3)';
    KIF =  r(:,2)';
    
else

    % iterative method
    
    
    while (p > 2)

        % Updates the progressbar
        set(progress_win,'Name',['Algorithm working... ',int2str(p),' variables left']);
        set(progress_patch,'XData',[0 p/p_tot p/p_tot 0],'Facecolor',[1 (1-p/p_tot) 0]);
        drawnow;
    
        initial_time = cputime;
        p_minus_one = p-1;
        K_val = [0];
    
    
        % Calculate Kp
        eig_val = eig(C);
        Kp = calc_K(eig_val, p);

    
        % Calculate Kpj for each j = 1..p and stores the values in K_val

        Cj = C(2:end,2:end);
        eig_val = eig(Cj);
        K_val(1) = calc_K(eig_val, p_minus_one);
    
        for idx=2:(p_minus_one)
            Cj = C([1:(idx-1) (idx+1):p],[1:(idx-1) (idx+1):p]);
            eig_val = eig(Cj);
            K_val(idx) = calc_K(eig_val, p_minus_one);
        end

        Cj = C(1:p_minus_one,1:p_minus_one);
        eig_val = eig(Cj);
        K_val(p) = calc_K(eig_val, p_minus_one);
    
    
        % Finds the lower Kpj
        K_min_idx = find(K_val==min(K_val));
        Kpj = K_val(K_min_idx(1));

    
        % Updates ranking
        ranking(rank_idx) = var_id(K_min_idx(1));
    
        % Updates KIF
        KIF(rank_idx) = (Kp*(p-1)-Kpj*(p-2)) / (2+Kpj*(p-2)-Kp*(p-1));
    
    
        % Deletes from the dataset the variable associated with the lower Kpj
        % deleting from C the j-th column and row
        if (K_min_idx(1)==1)
            var_id = var_id(:,2:end);
            C = C(2:end,2:end);
        elseif (K_min_idx(1)==p)
            var_id = var_id(:,1:(p-1));
            C = C(1:(p-1),1:(p-1));
        else
            var_id = [var_id(:,1:K_min_idx(1)-1),var_id(:,K_min_idx(1)+1:p)];
            C = C([1:(K_min_idx(1)-1) (K_min_idx(1)+1):p],[1:(K_min_idx(1)-1) (K_min_idx(1)+1):p]);
        end
    
    
        if (logging)
            % Packs memory and saves data in a temporary file
            pack;
            save('temp_k.mat');
    
            % Updates the log file
            elapsed_time = cputime - initial_time;
            fprintf(log_id,'%s\n',['Elapsed cpu time for var no.' num2str(p) ': ' num2str(elapsed_time) ' s' ]);
        end
        
    
        % Updates indexes
        rank_idx = rank_idx + 1;
        p = p_minus_one;
    
    end


    % Adds to ranking and KIF the last two variables
    ranking = [ranking, var_id];
    KIF = [KIF, [0,0]];

end    

    % Calculates the cut-off value
    cut_off = 0;
    for idx=1:p_tot
        if (KIF(idx)>kif_threshold)
            cut_off = cut_off + 1;
        end
    end


    % Closes the progressbar
    set(progress_win,'Pointer','arrow');
    close(progress_win);

    if (logging)
        % Closes the log file
        tot_elapsed_time = cputime - tot_initial_time;
        fprintf(log_id,'\n%s\n',['Total cpu time elapsed: ' num2str(tot_elapsed_time) ' s (about '...
                num2str(round(tot_elapsed_time/60)) ' min)' ]);
        fclose(log_id);
    end



    %%%% Fig_1: graph of resulting ranking %%%%

    fig_1 = figure('Name','Variables elimination ranking - KIF method');
    title('Variables ranking by KIF method');
    ylabel('KIF value');
    xlabel('Variables ranking');
    xlim([0 length(KIF)+1]);

    hold on;

    line([(cut_off+0.5) (cut_off+0.5)],ylim,'LineStyle','--','Color','r');
    plot(KIF,'o-b','MarkerFaceColor','b');

    for i=1:length(ranking)
        text(i,KIF(i)+0.03,num2str(ranking(i)));
    end
    text((cut_off+0.6),(5*max(KIF)/100),['variables = ',num2str(p_tot-cut_off)]);

    hold off;

    cut_off = p_tot-cut_off;

    echo on;

    

catch
    
    % If execution is aborted, displays a warning message, closes the
    % progressbar window and closes the log file

    disp(' ');
    disp(' Execution aborted!');
    disp(' Temporary workspace saved in temp_k.mat');
    disp(' ');
    close(progress_win);
    if (logging)
        fprintf(log_id,'\n\n%s\n','WARNING: execution aborted');
        fclose(log_id);
    end
end




function K = calc_K (eig_val, p_val)

% calc_K is a subfunction that calculates the K index
%
% Input:
% eig_val = vector [1 x p_val]  of the p eigenvalues
% p_val = p number of the eigenvalues
%
% Output:
% K = K index value

num = 0;

% Calculates the numerator of K
for m=1:p_val
    num = num + abs( (eig_val(m)-1)  );
end

% Calculate K
K = num / (2*(p_val-1));

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?