📄 pca.m
字号:
function [scores,loads,ssq,res,q,tsq] = pca(data,plots,scl,lv)
%PCA Principal components analysis
% This function uses the svd to perform pca on a data matrix.
% It is assumed that samples are rows and variables are columns.
% The inputs are the input matrix (data), an optional variable
% (plots) that controls the graphs produced (see below), an
% optional vector (scl) for plotting scores against and
% an optional variable (lv) which specifies the number of
% principal components to use in the model and which suppresses
% the prompt for number of PCs. The outputs are the scores
% (scores), loadings (loads), variance info (ssq), residuals (res),
% calculated q limit (q), and t^2 limit (tsq). The I/O format is
% [scores,loads,ssq,res,q,tsq] = pca(data,plots,scl,lvs);
%
% Set plots = 0 to suppress all plots, plots = 1 for plots with
% no confidence limits and plots = 2 for plots with limits.
% Note: with plots = 0 and lv specified, this routine requires
% no interactive user input. If you would like to scale the data
% before processing use the functions auto or scale.
% Copyright
% Barry M. Wise
% 1991, 1992
% Modified by B.M. Wise, November 1993
if nargin < 2
plots = 1;
end
if plots > 2
error('Plot option must be 0, 1 or 2')
elseif plots < 0
error('Plot option must be 0, 1 or 2')
end
[m,n] = size(data);
if n < m
cov = (data'*data)/(m-1);
[u,s,v] = svd(cov);
temp2 = (1:n)';
escl = 1:n;
else
cov = (data*data')/(m-1);
[u,s,v] = svd(cov);
v = data'*v;
for i = 1:m
v(:,i) = v(:,i)/norm(v(:,i));
end
temp2 = (1:m)';
escl = 1:m;
end
temp = diag(s)*100/(sum(diag(s)));
ssq = [temp2 diag(s) temp cumsum(temp)];
% This section calculates the standard errors of the
% eigenvalues and plots them
if plots == 2
eigmax = ssq(:,2)/(1-(1.96*sqrt(2/m)));
eigmin = ssq(:,2)/(1+(1.96*sqrt(2/m)));
clg
plot(escl,ssq(:,2),escl,eigmax,'--b',escl,eigmin,'--b',escl,ssq(:,2),'og')
title('Eigenvalue vs. PC Number showing 95% Confidence Limits')
xlabel('PC Number')
ylabel('Eigenvalue')
elseif plots == 1
clg
plot(escl,ssq(:,2),escl,ssq(:,2),'og')
title('Eigenvalue vs. PC Number')
xlabel('PC Number')
ylabel('Eigenvalue')
end
% Print out the amount of variance captured
disp(' ')
disp(' Percent Variance Captured by PCA Model')
disp(' ')
disp(' PC# Eigval %Var %TotVar')
disp(ssq)
if nargin < 4
input('How many principal components do you want to keep? ');
lv = ans;
else
sf = sprintf('Now calculating statistics based on %g PC model',lv);
disp(sf)
end
if lv > n
error('No. of PCs must be <= no. of variables')
end
if lv > m
error('No. of PCs must be <= no. of samples')
end
% Form the PCA Model Based on the Number of PCs Chosen
loads = v(:,1:lv);
scores = data*loads;
%I = eye(n);
% Calculate the standard error on the PC loadings if needed
if plots == 2
loaderr = zeros(n,lv);
if n > m, nn = m; else, nn = n; end
for i = 1:lv
dif = (ssq(:,2)-ones(nn,1)*ssq(i,2)).^2;
dif = sort(dif);
sig = sum((ones(nn-1,1)*ssq(i,2))./dif(2:nn,1));
loaderr(:,i) = ((ssq(i,2)/m)*loads(:,i).^2)*sig;
end
loadmax = loads+loaderr;
loadmin = loads-loaderr;
end
% Calculate the residuals matrix and Q values
resmat = (data - scores*loads')';
res = (sum(resmat.^2))';
% Create the scale vectors to plot against
if plots >= 1.0
if nargin < 3
scl = 1:m;
scllim = [1 m];
else
scllim = [min(scl) max(scl)];
end
scl2 = 1:n;
temp = [1 1];
for i = 1:lv
pclim = sqrt(s(i,i))*temp*1.96;
plot(scl,scores(:,i),scllim,pclim,'--b',scllim,-pclim,'--b')
hold on, plot(scl,scores(:,i),'+g'), hold off
xlabel('Sample Number')
str = sprintf('Score on PC# %g',i);
ylabel(str)
title('Sample Scores with 95% Limits')
pause
if plots == 2 plot(scl2,loads(:,i),scl2,loads(:,i),'og',scl2,loadmax(:,i),'--b',scl2,loadmin(:,i),'--b',[1 n],[0 0])
elseif plots == 1
plot(scl2,loads(:,i),scl2,loads(:,i),'og',[1 n],[0 0])
end
xlabel('Variable Number')
str = sprintf('Loadings for PC# %g',i);
ylabel(str)
if plots == 2
str = sprintf('Variable # vs. Loadings for PC# %g Showing Standard Errors',i);
title(str)
else
str = sprintf('Variable Number vs. Loadings for PC# %g',i);
title(str)
end
pause
end
end
% Calculate Q limit using unused eigenvalues
temp = diag(s);
if n < m
emod = temp(lv+1:n,:);
else
emod = temp(lv+1:m,:);
end
th1 = sum(emod);
th2 = sum(emod.^2);
th3 = sum(emod.^3);
h0 = 1 - ((2*th1*th3)/(3*th2^2));
if h0 <= 0.0
h0 = .0001;
disp(' ')
disp('Warning: Distribution of unused eigenvalues indicates that')
disp(' you should probably retain more PCs in the model.')
end
q = th1*(((1.65*sqrt(2*th2*h0^2)/th1) + 1 + th2*h0*(h0-1)/th1^2)^(1/h0));
disp(' ')
disp('The 95% Q limit is')
disp(q)
if plots >= 1
lim = [q q];
plot(scl,res,scl,res,'+g',scllim,lim,'--b')
str = sprintf('Process Residual Q with 95 Percent Limit Based on %g PC Model',lv);
title(str)
xlabel('Sample Number')
ylabel('Residual')
pause
end
% Calculate T^2 limit using ftest routine
if lv > 1
if m > 300
tsq = (lv*(m-1)/(m-lv))*ftest(.95,300,lv);
else
tsq = (lv*(m-1)/(m-lv))*ftest(.95,m-lv,lv);
end
disp(' ')
disp('The 95% T^2 limit is')
disp(tsq)
% Calculate the value of T^2 by normalizing the scores to
% unit variance and summing them up
if plots >= 1.0
temp2 = scores*inv(diag(ssq(1:lv,2).^.5));
tsqvals = sum((temp2.^2)');
tlim = [tsq tsq];
plot(scl,tsqvals,scl,tsqvals,'+g',scllim,tlim,'--b')
str = sprintf('Value of T^2 with 95 Percent Limit Based on %g PC Model',lv);
title(str)
xlabel('Sample Number')
ylabel('Value of T^2')
end
else
disp('T^2 not calculated when number of latent variables = 1')
tsq = 1.96^2;
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -