📄 pcreg.m
字号:
function [act_out,pred_out,er,imp,tp,tq,vp,vq]=pcreg(p,q,n,split_option,reg_option)
%
% pcreg.m
%
% Principal Components Regression programme
%
% Call variables:
%
% p - Independant (process) variables pc transforms
% q - Dependant (quality) variable
% n - Include n components
% split_option - Data splitting option:
% 1 - split 50:50 down middle
% 2 - randomly split
% 3 - Inner/Outer block split
% 4 - Alternate split
%
% poly_deg - Degree of polnomials (if any) to include
% pc_coeffs - Coefficients of the principal components
% reg_option - Regression option:
% 1 - include n components
% 2 - Optimum number of components
% 3 - Best combination of components
%
%
% Returns:
%
% pred_out - The predicted value of the dependant variable
% act_out - The actual value of the dependant variable
% er - Measure of the error
% tp - }
% tq - } Training and validation process
% vp - } and quality data sets, for saving
% vq - } and analysis at a later date.
% imp - measure of a particular variables importance
%
%
% Firstly scale the quality variable (-mean /std)
%
[q mn sd]=mcen(q);
%
% Now build up a training and validation data set
% tp = training process data
% tq = training quality data
%
% vp = validating process data
% vq = validating quality data
%
%
% 50:50 split:
%
if split_option == 1
%
% Determine in round numbers where to split the data
%
[D L]=size(p);
t_size=ceil(D/2);
tp=p(1:t_size,:);
tq=q(1:t_size,:);
vp=p(t_size:D,:);
vq=q(t_size:D,:);
end
%
% Randomly split
%
if split_option == 2
%
% Work through data
% if randn >= 0 = training data
% if randn < 0 = validation data
%
[D L]=size(p);
t_count=1;
v_count=1;
for i = 1 : D
a=randn;
if a >= 0
tp(t_count,:)=p(i,:);
tq(t_count,:)=q(i,:);
t_count=t_count+1;
else
vp(v_count,:)=p(i,:);
vq(v_count,:)=q(i,:);
v_count=v_count+1;
end
end
end
%
% Inner/Outer block split
%
if split_option == 3
%
%
%
[D L]=size(p);
bsize=round(D/4);
%
% Make sure that the matrix dimensions are not exceeded
%
b1=bsize;
b2=bsize*3;
b3=D;
t1=p(1:b1,:);
t2=p(b1:b2,:);
t3=p(b2:b3,:);
q1=q(1:b1,:);
q2=q(b1:b2,:);
q3=q(b2:b3,:);
tp=[t1 ; t3];
tq=[q1 ; q3];
vp=[t2];
vq=[q2];
end
%
% Alternate split
%
if split_option == 4
%
% Step through data, alternate training/validation
%
t_count=1;
v_count=1;
train=0;
[D L]=size(p);
for i = 1 : D
if train == 1
tp(t_count,:)=p(i,:);
tq(t_count,:)=q(i,:);
t_count=t_count+1;
train=0;
elseif train == 0
vp(v_count,:)=p(i,:);
vq(v_count,:)=q(i,:);
v_count=v_count+1;
train=1;
end
end
end
[D old_size]=size(tp);
%
% Now carry out the desired regression
%
if reg_option == 1
%
% Include first n components
%
tp=tp(:,1:n);
vp=vp(:,1:n);
%
% Perform regression
%
a=inv(tp'*tp)*tp'*tq;
act_out=vq;
pred_out=vp*a;
elseif reg_option == 3
%
% Optimum number of components
%
% Already calculated that the total number of components
% is old_size (ie size before they were buggered about with)
%
min_err=1e50;
for i = 1 : old_size
t=tp(:,1:i);
v=vp(:,1:i);
%
% Do regression
%
a=inv(t'*t)*t'*tq;
act_out=vq;
pred_out=v*a;
e=sse(act_out,pred_out);
if e < min_err
opt_a=a;
opt_tp=t;
opt_vp=v;
min_err=e;
end
end
tp=opt_tp;
vp=opt_vp;
pred_out=vp*opt_a;
end
imp=0;
er=sse(act_out,pred_out);
%
% Re-scale the quality variables
%
tq=tq.*sd;
tq=tq+mn;
vq=vq.*sd;
vq=vq+mn;
act_out=act_out.*sd;
act_out=act_out+mn;
pred_out=pred_out.*sd;
pred_out=pred_out+mn;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -