📄 modlmker.m
字号:
function model = modlmker(x,y)
%MODLMKER Develops PCA, PCR, PLS and RR models.
% This function can be used to develop PCA, PCR, PLS and ridge
% regression models with a variety of scaling and cross-
% validation options. The function is interactive and prompts
% the user for input on the model building options. The inputs
% are the matrix of predictor variables (x) and predicted
% variable (y) when a regression model is desired. The output
% the PCA or regression model in compact form (model) that includes
% information about how the model was developed, the scaling parameters,
% the final loadings or regression vector and a date and time stamp.
% In the case of regression models the function
% also plots actual vs. fitted values and leverage vs.
% studentized residuals for outlier detection.
%
% For regression models the I/O format is model = modlmker(x,y);
% For PCA models the I/O format is model = modlmker(x);
% See also MODLPRED, MODLRDER and MODLDEMO
% Copyright
% Barry M. Wise
% Eigenvector Technologies
% 1994
% Modified February 1995
% Modified August 1995
[smx,snx] = size(x);
if nargin == 2
[smy,sny] = size(y);
if smy ~= smx
error('X and Y blocks must have the same number of samples')
end
else
smy = 0; sny = 0;
end
repf = 1;
while repf == 1
disp(' ')
disp('How would you like to scale the data? Your choices are:')
disp(' ')
disp('1 - No scaling')
disp('2 - Mean centering')
disp('3 - Auto scaling')
disp(' ')
sopt = input('What scaling option would you like? ');
if sopt == 2
[ax,mx] = mncn(x); sx = ones(1,smx);
if nargin == 2
[ay,my] = mncn(y); sy = ones(1,sny);
end
elseif sopt == 3
[ax,mx,sx] = auto(x);
if nargin == 2
[ay,my,sy] = auto(y);
end
else
ax = x; mx = 0; sx = 1;
if nargin == 2
ay = y; my = 0; sy = 1;
end
if sopt ~= 1
disp('Option not in range 1-3, defaulting to no scaling')
end
end
if nargin == 2
disp(' ')
disp('What regression technique would you like to use?')
disp('Your choices are:')
disp(' ')
disp('1 - Principal Components Regression')
disp('2 - Partial Least Squares Regression')
if sny == 1
if smx > snx
disp('3 - Ridge Regression')
end
end
disp(' ')
ropt = input('Please enter your regression choice ');
disp(' ')
if ropt ~= 3
disp('What method of cross-validation would you like?')
disp(' ')
disp('1 - Venetian blinds')
disp('2 - Leave one out')
disp('3 - Contiguous test blocks')
disp('4 - SDEP - Repeated Cross Validations')
disp(' ')
copt = input('Please enter your cross-validation choice ');
elseif ropt == 3
disp('What method of model selection would you like?')
disp(' ')
disp('1 - Hoerl Kennard estimate of ridge parameter')
disp('2 - Venetian blind cross-validation')
disp(' ')
copt = input('Please enter your model selection method choice ');
else
error('Regression method must be in range 1-3')
end
if ropt == 1
pc = input('How many PCs would you like to consider? ');
if copt == 1
split = input('How many times would you like to form the test set? ');
[press,cumpress,minpc,b,p,qlim,t2lim,tvar] = pcrcv(ax,ay,split,pc);
elseif copt == 2
[press,cumpress,minpc,b,p,qlim,t2lim,tvar] = pcrcv1(ax,ay,pc);
elseif copt == 3
split = input('How many times would you like to form the test set? ');
[press,cumpress,minpc,b,p,qlim,t2lim,tvar] = pcrcvblk(ax,ay,split,pc);
elseif copt == 4
split = input('How many divisions of the data would you like? ');
cvnum = input('How many times would you like to repeat the CV? ');
[press,cumpress,minpc,b,p,qlim,t2lim,tvar] = pcrcvrnd(ax,ay,split,cvnum,pc);
end
elseif ropt == 2
pc = input('How many LVs would you like to consider? ');
if copt == 1
split = input('How many times would you like to form the test set? ');
[press,cumpress,minpc,b,rinv,w,p,qlim,t2lim,tvar] = plscv(ax,ay,split,pc);
elseif copt == 2
[press,cumpress,minpc,b,rinv,w,p,qlim,t2lim,tvar] = plscv1(ax,ay,pc);
elseif copt == 3
split = input('How many times would you like to form the test set? ');
[press,cumpress,minpc,b,rinv,w,p,qlim,t2lim,tvar] = plscvblk(ax,ay,split,pc);
elseif copt == 4
split = input('How many divisions of the data would you like? ');
cvnum = input('How many times would you like to repeat the CV? ');
[press,cumpress,minpc,b,rinv,w,p,qlim,t2lim,tvar] = plscvrnd(ax,ay,split,cvnum,pc);
end
else
tmax = input('What is the maximum value of theta to consider? ');
divs = input('How many divisions of theta to consider? ');
if copt == 1
[b,theta] = ridge(ax,ay,tmax,divs);
elseif copt == 2
split = input('How many times would you like to form the test set? ');
[b,theta,cumpress] = ridgecv(ax,ay,tmax,divs,split);
end
end
if ropt ~= 3
if sny == 1
if sopt ~= 3
rmsecv = sqrt(cumpress(minpc)/smx);
else
rmsecv = sqrt(cumpress(minpc)*(sy.^2)/smx);
end
else
rmsecv = zeros(sny,1);
[mpress,npress] = size(press);
for yvar = 1:sny
rmsecv(yvar) = sum(press(yvar:mpress,minpc));
end
if sopt ~= 3
rmsecv = sqrt(rmsecv/smx);
else
rmsecv = sqrt(rmsecv.*(sy.^2)'/smx);
end
end
end
rmsf = 0;
if ropt == 3
if copt == 1
rmsf = 1;
else
rmsecv = sqrt(min(cumpress)*(sy^2)/smx);
end
end
if rmsf == 0
if sny == 1
disp(' ')
s = sprintf('Root Mean Square Error of Cross Validation = %g',...
rmsecv);
disp(s)
else
disp(' ')
disp('Root Mean Square Errors of Cross Validation')
s = sprintf('for the %g Y-block Variables are:',sny);
disp(s)
disp(rmsecv)
end
end
disp(' ')
pause
% Plot fitted versus true value
fy = ax*b;
fys = rescale(fy,my,sy);
plot(y,fys,'+')
for i = 1:smx
for j = 1:sny
s = int2str(i);
text(y(i,j),fys(i,j),s)
end
end
z = axis;
hold on, plot(z(1:2),z(1:2),'-g'), hold off
xlabel('Actual Value')
ylabel('Predicted Value'), drawnow
% Calculate RMSEC
rmsec = sqrt(sum((fys-y).^2)/smx);
if sny == 1
s = sprintf('Root Mean Square Error of Calibration is %g',rmsec);
disp(s), disp(' ')
else
disp('Root Mean Square Errors of Calibration')
s = sprintf('for the %g Y-block Variables are: ',sny);
disp(s), disp(rmsec')
end
% Calculate R+
if ropt == 1
t = ax*p;
rinv = p(:,1:minpc)*inv(t(:,1:minpc)'*t(:,1:minpc))*t(:,1:minpc)';
elseif ropt == 2
% rinv calculated in pls routine
else
rinv = inv(ax'*ax + eye(snx)*theta)*ax';
end
lev = zeros(smx,1);
for i = 1:smx
lev(i) = ax(i,:)*rinv(:,i);
end
res = y - fys;
sigcv = zeros(1,sny);
tres = zeros(smx,sny);
ecv = zeros(smx,sny);
for i = 1:sny
sigcv(i) = sqrt(inv(smx-1)*res(:,i)'*(((ones(smx,1)-lev).^(-2)).*res(:,i)));
tres(:,i) = res(:,i)./(sigcv(i)*sqrt(ones(smx,1)-lev));
ecv(:,i) = res(:,i).*(ones(smx,1)./(ones(smx,1)-lev));
end
rmsecv = sqrt(sum(ecv.^2)/smx);
if sny == 1
disp('Root Mean Square Error of Leave One Out')
s = sprintf('Cross Validation is %g',rmsecv);
disp(s)
else
disp('Root Mean Square Errors of Leave One Out')
s = sprintf('Cross Validation for the %g Y-block variables are:',sny);
disp(s), disp(rmsecv')
end
disp(' ')
pause
plot(lev,tres,'+')
for i = 1:smx
for j = 1:sny
s = int2str(i);
text(lev(i),tres(i,j),s)
end
end
xt = get(gca,'XTick');
[mxt,nxt] = size(xt);
hold on, plot([xt(1) xt(nxt)],[0 0],'-g'), hold off
xlabel('Leverage')
ylabel('Studentized Residuals')
disp(' ')
else
[scores,loads,ssq,res,q,tsq] = pca(ax);
[smx,spca] = size(scores);
end
repf = input('Would you like to repeat the analysis? Yes = 1 ');
end
if snx+sny > 16
model = zeros(1,snx+sny); sm = snx+sny;
else
model = zeros(1,16); sm = 16;
end
if nargin == 2
model(1,1:13) = [clock smx snx smy sny ropt sopt copt];
if ropt == 3
model(1,14) = theta;
else
model(1,14:16) = [minpc qlim t2lim];
end
else
model(1,1:16) = [clock smx snx smy 0 0 sopt 0 spca q tsq];
end
if sopt ~= 1
model = [model; zeros(1,sm)];
model(2,1:snx) = mx;
model(2,snx+1:snx+sny) = my;
if sopt == 3
model = [model; zeros(1,sm)];
model(3,1:snx) = sx;
model(3,snx+1:snx+sny) = sy;
end
end
if nargin == 2
reg = zeros(sny,sm);
reg(1:sny,1:snx) = b';
model = [model; reg];
if ropt == 1
model = [model; [p' tvar' zeros(minpc,sm-snx-1)]];
end
if ropt == 2
model = [model; [p' tvar' zeros(minpc,sm-snx-1)]; [w' zeros(minpc,sm-snx)]];
end
else
reg = zeros(1,sm);
reg(1,1:spca) = ssq(1:spca,2)';
model = [model; reg];
reg = zeros(spca,sm);
reg(1:spca,1:snx) = loads';
model = [model; reg];
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -