⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pca.m

📁 《机器学习》课上的作业
💻 M
字号:
function [eigvector, eigvalue, elapse] = PCA(data, ReducedDim)
%PCA    Principal Component Analysis
%
%    Usage:
%       [eigvector, eigvalue] = PCA(data, ReducedDim)
%       [eigvector, eigvalue] = PCA(data)
% 
%             Input:
%               data       - Data matrix. Each row vector of fea is a data point.
%
%          ReducedDim   - The dimensionality of the reduced subspace. If 0,
%                         all the dimensions will be kept. 
%                         Default is 0. 
%
%             Output:
%               eigvector - Each column is an embedding function, for a new
%                           data point (row vector) x,  y = x*eigvector
%                           will be the embedding result of x.
%               eigvalue  - The sorted eigvalue of PCA eigen-problem. 
%
%    Examples:
%             fea = rand(7,10);
%             [eigvector,eigvalue] = PCA(fea,4);
%           Y = fea*eigvector;
% 
% 
%   version 2.1 --June/2007 
%   version 2.0 --May/2007 
%   version 1.1 --Feb/2006 
%   version 1.0 --April/2004 
%
%   Written by Deng Cai (dengcai2 AT cs.uiuc.edu)
%                                                   

if (~exist('ReducedDim','var'))
   ReducedDim = 0;
end 

[nSmp,nFea] = size(data);
if (ReducedDim > nFea) | (ReducedDim <=0)
    ReducedDim = nFea;
end 

tmp_T = cputime; 

if issparse(data)
    data = full(data);      %make sparse matrix to be a non-sparse one
end
sampleMean = mean(data,1);
data = (data - repmat(sampleMean,nSmp,1)); 

if nFea/nSmp > 1.0713
    % This is an efficient method which computes the eigvectors of
    % of A*A^T (instead of A^T*A) first, and then convert them back to
    % the eigenvectors of A^T*A.    
    ddata = data*data';
    ddata = max(ddata, ddata'); 

    dimMatrix = size(ddata,2);
    if dimMatrix > 1000 & ReducedDim < dimMatrix/10  % using eigs to speed up!
        option = struct('disp',0);
        [eigvector, eigvalue] = eigs(ddata,ReducedDim,'la',option);
        eigvalue = diag(eigvalue);
    else
        [eigvector, eigvalue] = eig(ddata);
        eigvalue = diag(eigvalue); 

        [junk, index] = sort(-eigvalue);
        eigvalue = eigvalue(index);
        eigvector = eigvector(:, index);
    end 

    clear ddata;
    maxEigValue = max(abs(eigvalue));
    eigIdx = find(abs(eigvalue)/maxEigValue < 1e-12);   %去掉那些能量几乎为零的特征向量
    eigvalue (eigIdx) = [];
    eigvector (:,eigIdx) = []; 

    eigvector = data'*eigvector;        % Eigenvectors of A^T*A
    eigvector = eigvector*diag(1./(sum(eigvector.^2).^0.5)); % Normalization
else
    ddata = data'*data;
    ddata = max(ddata, ddata'); 

    dimMatrix = size(ddata,2);
    if dimMatrix > 1000 & ReducedDim < dimMatrix/10  % using eigs to speed up!
        option = struct('disp',0);
        [eigvector, eigvalue] = eigs(ddata,ReducedDim,'la',option);
        eigvalue = diag(eigvalue);
    else
        [eigvector, eigvalue] = eig(ddata);
        eigvalue = diag(eigvalue); 

        [junk, index] = sort(-eigvalue);
        eigvalue = eigvalue(index);
        eigvector = eigvector(:, index);
    end
    clear ddata;
    maxEigValue = max(abs(eigvalue));
    eigIdx = find(abs(eigvalue)/maxEigValue < 1e-12);       %去掉那些能量几乎为零的特征向量
    eigvalue (eigIdx) = [];
    eigvector (:,eigIdx) = [];
end 

if ReducedDim < length(eigvalue)
    eigvalue = eigvalue(1:ReducedDim);
    eigvector = eigvector(:, 1:ReducedDim);
end 

elapse = cputime - tmp_T; 

%以下为数据示例
% fea = rand(7,10)
% [eigvector,eigvalue] = PCA(fea,4)
% Y = fea*eigvector 
% 
% fea = 
% 
%     0.0305    0.8594    0.4899    0.6820    0.7224    0.4538    0.8314    0.6280    0.3724    0.7379
%     0.7441    0.8055    0.1679    0.0424    0.1499    0.4324    0.8034    0.2920    0.1981    0.2691
%     0.5000    0.5767    0.9787    0.0714    0.6596    0.8253    0.0605    0.4317    0.4897    0.4228
%     0.4799    0.1829    0.7127    0.5216    0.5186    0.0835    0.3993    0.0155    0.3395    0.5479
%     0.9047    0.2399    0.5005    0.0967    0.9730    0.1332    0.5269    0.9841    0.9516    0.9427
%     0.6099    0.8865    0.4711    0.8181    0.6490    0.1734    0.4168    0.1672    0.9203    0.4177
%     0.6177    0.0287    0.0596    0.8175    0.8003    0.3909    0.6569    0.1062    0.0527    0.9831 
% 
% eigvector = 
% 
%    -0.1487    0.1730   -0.3812    0.2153
%    -0.1381   -0.5340    0.5429    0.2571
%    -0.4056   -0.1441    0.0047   -0.5249
%     0.4681    0.1735    0.5405   -0.3343
%    -0.1373    0.4380    0.1915   -0.1696
%    -0.0795   -0.2602   -0.1359   -0.0552
%     0.2845    0.0474    0.1770    0.5382
%    -0.4609    0.2519    0.1666    0.4194
%    -0.5001    0.1770    0.3892   -0.0415
%     0.0814    0.5268    0.0462    0.0352 
% 
% eigvalue = 
% 
%     1.5668
%     1.4181
%     0.9042
%     0.8643 
% 
% Y = 
% 
%    -0.3170    0.4447    1.3333    0.3162
%    -0.3083   -0.0766    0.4278    0.7718
%    -1.0658    0.1451    0.4726   -0.2309
%    -0.2380    0.5501    0.5203   -0.2640
%    -1.1723    1.3025    0.6794    0.4791
%    -0.5088    0.3902    1.2730   -0.0102
%     0.3133    1.0587    0.5222    0.1090 


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -