maingclearnapp.m
来自「这是一个支持向量机的工具」· M 代码 · 共 272 行
M
272 行
% Geometrical Correlation Learning to mine geometric information
% Main program for prediction tasks without known answers
% prediction results stored in 'Resulta' & 'Resultb' for GcLearn LR & SVR
% Kaijun WANG: kjwang@mail.xidian.edu.cn, sunice9@yahoo.com, Aug. 2006.
clear;
% Choices for different designs or observations
gm=2; % 1: run linear regression; 3: run SVR; 2: run both
plots=1; % 1: plotting data of response variable & predictions
% set breakpoints in line 150 and 247 to see figures
useNewParam=1; % 0: using parameters obtained from last running
scale=1; % 1: scale data to 0 mean & variance 1; 2: [-1,1]
vmax='allcoef'; % max number of predictor variables
nus=[5:14]; % searching scope of geometrization parameter nu
nosort=0; % 1 for data under correct value correlations
check=0; % 1: check whether any code mistake in programs
ik=1;
%(1) laoding data sets
disp(' ===> running starts ');
Xtrain=load('traindata.txt');
Ytest=load('testdata.txt');
Z=1; % Z: target variable for predictions
if check
Xtrain=load('servo.data'); Z=5;
Ytest=load('servo.data'); end
[ntrain,nx]=size(Xtrain); % number of variables nx
[ntest,ny]=size(Ytest);
order=nx+1;
Xlabel=1:ntrain;
Ylabel=1:ntest;
if ny<nx
if ny<nx-1 ErrorTwoDataFileNotMatch; end
if Z~=nx
Ytest(:,[Z+1:nx])=Ytest(:,[Z:nx-1]);
Ytest(:,Z)=1;
end
end
[Xtrain,md,mi]=datanormalize(Xtrain,scale,0); % data are scaled
Ytest=datanormalize(Ytest,scale,md,mi); % same normalization
if gm<3
% --- (4) general linear regression analysis ---
[ph,pp]=sumdata(Xtrain); % computing X(i)*X(j)
regionc{1}=[1 1 0 0 ntrain];
for i=Z
% a linear relational model with its coefficients stored in 'coef'
[coef,sigmc]=modeloutdata(regionc{1},Xtrain,ph,pp,i,vmax);
end
sigmc=sigmc/ntrain;
% prediction error (PE) between test data and prediction values
bx=find(coef~=0);
bx=setdiff(bx,Z);
Y_hat=Ytest(:,bx)*coef(bx)'+coef(Z);
residual=Ytest(:,Z)-Y_hat;
C(1)=(norm(residual)^2)/ntest; % Mean Square Error
C(2)=C(1)*md(Z)^2; % scaled back
C(3)=mean(abs(residual))*md(Z); % mean absolute error
% --- (5-7) Mining geometric information and Applying it to linear regression---
%(5) mining value correlations by sorting estimated values of variable Z
X_hat=Xtrain(:,bx)*coef(bx)'+coef(Z); % estimated Z for training set
if nosort
if exm<10
XYsort=datas;
else
XYsort=[Xtrain;Ytest];
Xlabel=1:ntrain;
Ylabel=(1:ntest)+ntrain;
end
Xsort=Xtrain; Ysort=Ytest;
XYsort(Xlabel,nx+1)=X_hat;
XYsort(Ylabel,nx+1)=Y_hat;
else
% XYsort=[column 1 - nx: Xsort+Ysort, column nx+1: estimated Z]
[XYsort,Xlabel,Ylabel,Xsort,Ysort,U]=datasort(Xtrain,Ytest,X_hat,Y_hat,order);
end
if plots
cla
plot(Xlabel,Xsort(:,Z),'r.');hold on;
plot(Xlabel,XYsort(Xlabel,nx+1),'m.');hold on;
plot(Ylabel,Ysort(:,Z),'ko');hold on;
plot(Ylabel,XYsort(Ylabel,nx+1),'g^');hold on;
end
%(6) Mining geometric information applied to linear geometric regression
pp=XYsort(:,1:nx);
pp(Ylabel,Z)=XYsort(Ylabel,nx+1); % using prediction values
%[ph,sigma]=crossvalid(Xsort,Z,[1 4],1,'gclearn','find_nu','zvalue');
if useNewParam ph=nus; else ph=nu1(ik); end
% Constructing optimal curve manifolds and finding optimal relational model
[coef,ph,R]=gclearngress(pp,Z,ph,vmax,Xlabel,'minsigm','zcurve','gclearn');
nu1(ik)=ph; % optimal geometrization parameter
disp([' ---> runs/k-fold= ' num2str(ik) ' optimal nu= ' num2str(ph)]);
bx=find(coef~=0);
bx=setdiff(bx,Z); % optimal model with coefficients 'bx'
%(7) prediction error between test data Ysort(:,Z) and Z_hat predicted
Ycurve=R(Ylabel,1:nx); % Ycurve formed from Ytest
Z_hat=Ycurve(:,bx)*coef(bx)'+coef(Z); % predicting with model 'coef'
[Q,ph]=sort(U);
Q=md(Z)*Z_hat+mi(Z);
Resulta=Q(ph);
residual=Ysort(:,Z)-Z_hat;
A(1)=(norm(residual)^2)/ntest; % Mean Square Error
A(2)=md(Z)^2*A(1); % scaled back
A(3)=md(Z)*mean(abs(residual));% mean absolute error
if plots
plot(Ylabel,Z_hat,'b.');hold on;
Xcurve=R(Xlabel,1:nx);
X_hat=Xcurve(:,bx)*coef(bx)'+coef(Z);
[ph,pp]=sort([Xlabel; Ylabel]);
Q=[X_hat; Z_hat];
Q=Q(pp);
plot(ph,Q,'b-');hold on;
%plot(Xlabel,X_hat,'-');hold on;
legend('training data','predictions of training data','test data','GLR predictions','GcLearn predictions','curve manifold',2);
xlim([-2 2+ntrain+ntest]);
end
end
%--- (8-9) Mining geometric information and Applying it to SVM regression ---
if gm>1
%(8) support vector machine regression "epsilon-SVR" from Libsvm Software
bx=setdiff(1:nx,Z);
S=ceil(ik/8);
T=mod(ik-1,8)+1;
T=2*(T-1)+1;
XYsort=[]; Xsort=[]; Ysort=[]; Ycurve=[]; X_hat=[];
% searching optimal parameters of C, gamma for epsilon-SVR
if useNewParam
R=3; % seaching by 3-fold validation
pp='-s 3 -e 0.003';
pam=[2.^(11:-2:-3); 2.^(5:-2:-9)]; % parameter grid: -c -g
[pamopt,Q]=parameterlivsvm(Xtrain(:,1:nx),Z,bx,pp,pam,R,0);
ph=[(0.8:0.1:1.2)*Q(1); (0.8:0.1:1.2)*Q(2)];
[pamopt,Q]=parameterlivsvm(Xtrain(:,1:nx),Z,bx,pp,ph,R,0);
% saving optimal parameters
if ik==1 nopts=zeros(1,16); end
nopts(S,T:(T+1))=Q(1:2);
else
pamopt=['-s 3 -e 0.003 -c ' num2str(nopts(S,T)) ' -g ' num2str(nopts(S,T+1))];
end
% training with optimal parameters
model = svmtrain(Xtrain(:,Z), Xtrain(:,bx),pamopt);
% prediction errors from model
[Y_hat,Pe] = svmpredictnoprint(Ytest(:,Z), Ytest(:,bx), model);
D(1)=Pe(2); % Mean Square Error
D(2)=Pe(2)*md(Z)^2; % MSE is scaled back
residual=Ytest(:,Z)-Y_hat;
D(3)=md(Z)*mean(abs(residual)); % mean absolute error
% (9) Mining geometric information and Geometrical learning
[X_hat,sigmd]=svmpredictnoprint(Xtrain(:,Z),Xtrain(:,bx),model);
if nosort
if exm<10
XYsort=datas;
else
XYsort=[Xtrain;Ytest];
Xlabel=1:ntrain;
Ylabel=(1:ntest)+ntrain;
end
Xsort=Xtrain; Ysort=Ytest;
XYsort(Xlabel,nx+1)=X_hat;
XYsort(Ylabel,nx+1)=Y_hat;
else
% sorting estimated Z in training set and test set
[XYsort,Xlabel,Ylabel,Xsort,Ysort,U]=datasort(Xtrain,Ytest,X_hat,Y_hat,order);
end
if plots
cla
plot(Xlabel,Xsort(:,Z),'r.');hold on;
plot(Xlabel,XYsort(Xlabel,nx+1),'m.');hold on;
plot(Ylabel,Ysort(:,Z),'ko');hold on;
plot(Ylabel,XYsort(Ylabel,nx+1),'g^');hold on;
end
% finding optimal nu with min fitting error (or 1-fold cross validation)
if useNewParam
nu3(ik)=crossvalid(Xsort,Z,nus,1,'epsilsv','find_nu','zcurve',pamopt);
end
disp([' ---> runs/k-fold= ' num2str(ik) ' optimal nu= ' num2str(nu3(ik))]);
% projecting data to curve manifolds as per optimal nu
[R,Q,S,T]=datageometrize(XYsort(:,1:nx),nu3(ik),0);
Ycurve=R(Ylabel,:);
S=[S;S(length(S))+1];
R=sumcurve(Q,S,T,'each');
Xcurve=R(Xlabel,:);
% training models using curve manifolds from training set
model= svmtrain(Xcurve(:,Z),Xcurve(:,bx), pamopt);
% predicting for test set by the learned model
[Z_hat,Pe] = svmpredictnoprint(Ysort(:,Z), Ycurve(:,bx), model);
[Q,ph]=sort(U);
Q=md(Z)*Z_hat+mi(Z);
Resultb=Q(ph);
B(1)=Pe(2); % Mean Square Error
B(2)=Pe(2)*md(Z)^2; % MSE is scaled back
residual=Ysort(:,Z)-Z_hat;
B(3)=md(Z)*mean(abs(residual)); % mean absolute error
if plots
plot(Ylabel,Z_hat,'b.');hold on;
[X_hat,Pe] = svmpredictnoprint(Xsort(:,Z), Xcurve(:,bx), model);
plot(Xlabel,X_hat,'-');hold on;
legend('training data','predictions of training data','test data','epsilon-SVR predictions','GcLearn predictions','curve manifold',2);
xlim([-2 2+ntrain+ntest]);
end
end
disp('==> Running is over, prediction results stored in Resulta for');
disp(' GcLearn linear gresssion and in Resultb for GcLearn SVR');
% following codes are only for checking code mistakes
if check
if gm<3
Pea=[C A];
Pra=(Pea(1:3)-Pea(4:6))./Pea(1:3);
residual=abs(Resulta-Ytest(:,Z));
meanErrorA=mean(residual)
end
if gm>1
Peb=[D B];
Prb=(Peb(1:3)-Peb(4:6))./Peb(1:3);
residual=abs(Resultb-Ytest(:,Z));
meanErrorB=mean(residual)
end
if gm<3
disp(' ');
disp('==> Results for GcLearn linear regression');
disp('* Prediction errors (Pe) in mean squared errors:');
disp(' (1st row for pure linear regression, 2nd row for GcLearn)');
disp('* Proportional reduction of Pe (Pr %) by GcLearn');
Pe=Pea([2 5])',Pr=100*Pra(2)
end
if gm>1
disp(' ');
disp('==> Results for GcLearn SVM regression');
disp('* Prediction errors (Pe) in mean squared errors');
disp(' (1st row for SVR, 2nd row for GcLearn SVR)');
disp('* Proportional reduction of Pe (Pr %) by GcLearn');
Pe=Peb([2 5])',Pr=100*Prb(2)
end
end
XYsort=[];Xsort=[];Ysort=[];X_hat=[];Y_hat=[];Z_hat=[];Xcurve=[];
Xlabel=[];Ylabel=[];Xtrain=[];Ytest=[];dtrain=[];dtest=[];Ycurve=[];
datas=[];model=[];residual=[];Q=[];R=[];S=[]; ph=[]; pp=[];
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?