📄 mdpca.m

📁 偏最小二乘算法在MATLAB中的实现
💻 M
字号:
function [scores,loads,estdata] = mdpca(data,pcs,flag,tol)
%MDPCA Principal Components for matrices with missing data
%  The inputs are the input matrix (data), the number of 
%  principal components assumed to be significant (pcs), the
%  value of the flag used for missing data (flag), e.g. set 
%  flag = 9999 and put 9999 for each missing value, and an
%  optional variable that sets the convergence tolerance (tol).
%  If tol is not specified it is set to 1e-5. The outputs are
%  the estimated principal components scores (scores) and
%  loadings (loads) and an estimate of the data matrix with
%  the missing values filled in (estdata). I/O format is: 
%  [scores,loads,estdata] = mdpca(data,pcs,flag,tol);
%
%  This function works by calculating a PCA model and then 
%  replacing the missing data with values that are most 
%  consistant with the model. A new PCA model is then 
%  calculated, and the process is repeated until the estimates 
%  of the missing data converge. This is one of many ways to
%  treat the missing data problem.

%  Copyright
%  Barry M. Wise
%  1992
%  Modified May 1994

if nargin == 3
  tol = 1e-5;
end
[m,n] = size(data);
mdlocs = zeros(m,n);
for i = 1:m
  for j = 1:n
    if data(i,j) == flag;
      mdlocs(i,j) = 1;
      data(i,j) = 0;
    end
  end
end
totssq = sum(sum(data.^2));
change = 1;
count = 0;
clc
while change > tol
  home
  count = count + 1;
  s = sprintf('Now working on iteration number %g',count);
  disp(s)
  if n < m
    cov = (data'*data)/(m-1);
    [u,s,v] = svd(cov);
  else
    cov = (data*data')/(m-1);
    [u,s,v] = svd(cov);
    v = data'*v;
    for i = 1:m
      v(:,i) = v(:,i)/norm(v(:,i));
    end
  end
  loads = v(:,1:pcs);
  scores = data*loads;
  estdata = data;
  for i = 1:m
    vars = find(mdlocs(i,:));
    if isempty(vars) == 0
      rm = replace(loads,vars);
      estdata(i,:) = data(i,:)*rm;
    end
  end
  dif = data - estdata;
  change = sum(sum(dif.^2));
  s = sprintf('Sum of squared differences in missing data estimates = %g',change);
  disp(s)
  data = estdata;
  if count == 50
    disp('Algorithm failed to converge after 50 iterations')
    change = 0;
  end
end
disp('  ')
disp('Now forming final PCA model')
disp('   ')
ssq = [[1:pcs]' zeros(pcs,2)];
for i = 1:pcs
  resmat = data - scores(:,1:i)*loads(:,1:i)';
  resmat = resmat - resmat.*mdlocs;
  ssqres = sum(sum(resmat.^2));
  ssq(i,3) = (1 - ssqres/totssq)*100;
end
ssq(1,2) = ssq(1,3);
for i = 2:pcs
  ssq(i,2) = ssq(i,3) - ssq(i-1,3);
end
disp('    Percent Variance Captured') 
disp('           by PCA Model')
disp('     Based on Known Data Only')
disp('  ')
disp('    PC#      %Var      %TotVar')
disp(ssq)
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -