maingclearnapp.m

来自「这是一个支持向量机的工具」· M 代码 · 共 272 行

M
272
字号
% Geometrical Correlation Learning to mine geometric information
% Main program for prediction tasks without known answers
% prediction results stored in 'Resulta' & 'Resultb' for GcLearn LR & SVR
% Kaijun WANG: kjwang@mail.xidian.edu.cn, sunice9@yahoo.com, Aug. 2006.

clear;
% Choices for different designs or observations
gm=2;                        % 1: run linear regression; 3: run SVR; 2: run both
plots=1;                     % 1: plotting data of response variable & predictions 
                                  % set breakpoints in line 150 and 247 to see figures
useNewParam=1;    % 0: using parameters obtained from last running
scale=1;                    % 1: scale data to 0 mean & variance 1; 2: [-1,1]
vmax='allcoef';          % max number of predictor variables
nus=[5:14];                % searching scope of geometrization parameter nu
nosort=0;                   % 1 for data under correct value correlations
check=0;                   % 1: check whether any code mistake in programs
ik=1;


%(1) laoding data sets
disp(' ===> running starts ');
Xtrain=load('traindata.txt'); 
Ytest=load('testdata.txt'); 
Z=1;                               % Z: target variable for predictions

if check 
   Xtrain=load('servo.data'); Z=5; 
   Ytest=load('servo.data'); end

[ntrain,nx]=size(Xtrain);  % number of variables nx
[ntest,ny]=size(Ytest); 
order=nx+1; 
Xlabel=1:ntrain;
Ylabel=1:ntest; 

if ny<nx 
  if ny<nx-1 ErrorTwoDataFileNotMatch; end
  if Z~=nx
    Ytest(:,[Z+1:nx])=Ytest(:,[Z:nx-1]); 
    Ytest(:,Z)=1; 
  end
end
[Xtrain,md,mi]=datanormalize(Xtrain,scale,0);  % data are scaled
Ytest=datanormalize(Ytest,scale,md,mi);   %  same normalization 


if gm<3
% --- (4) general linear regression analysis ---
[ph,pp]=sumdata(Xtrain);           % computing X(i)*X(j)
regionc{1}=[1 1 0 0 ntrain];
for i=Z
 % a linear relational model with its coefficients stored in 'coef'
 [coef,sigmc]=modeloutdata(regionc{1},Xtrain,ph,pp,i,vmax);
end
sigmc=sigmc/ntrain;

% prediction error (PE) between test data and prediction values
bx=find(coef~=0);
bx=setdiff(bx,Z);
Y_hat=Ytest(:,bx)*coef(bx)'+coef(Z);
residual=Ytest(:,Z)-Y_hat;
C(1)=(norm(residual)^2)/ntest;        % Mean Square Error
C(2)=C(1)*md(Z)^2;                     % scaled back
C(3)=mean(abs(residual))*md(Z);  % mean absolute error


% --- (5-7) Mining geometric information and Applying it to linear regression---
%(5) mining value correlations by sorting estimated values of variable Z
X_hat=Xtrain(:,bx)*coef(bx)'+coef(Z);   % estimated Z for training set
if nosort
  if exm<10 
    XYsort=datas; 
  else 
    XYsort=[Xtrain;Ytest];
    Xlabel=1:ntrain;
    Ylabel=(1:ntest)+ntrain;
  end
  Xsort=Xtrain; Ysort=Ytest;
  XYsort(Xlabel,nx+1)=X_hat;
  XYsort(Ylabel,nx+1)=Y_hat;
else 
% XYsort=[column 1 - nx: Xsort+Ysort, column nx+1: estimated Z]
  [XYsort,Xlabel,Ylabel,Xsort,Ysort,U]=datasort(Xtrain,Ytest,X_hat,Y_hat,order);
end

if plots
cla
plot(Xlabel,Xsort(:,Z),'r.');hold on;
plot(Xlabel,XYsort(Xlabel,nx+1),'m.');hold on;
plot(Ylabel,Ysort(:,Z),'ko');hold on;
plot(Ylabel,XYsort(Ylabel,nx+1),'g^');hold on;
end


%(6)   Mining geometric information applied to linear geometric regression
pp=XYsort(:,1:nx); 
pp(Ylabel,Z)=XYsort(Ylabel,nx+1);  % using prediction values
%[ph,sigma]=crossvalid(Xsort,Z,[1 4],1,'gclearn','find_nu','zvalue');
if useNewParam ph=nus; else ph=nu1(ik); end

% Constructing optimal curve manifolds and finding optimal relational model
[coef,ph,R]=gclearngress(pp,Z,ph,vmax,Xlabel,'minsigm','zcurve','gclearn');
nu1(ik)=ph;                                    % optimal geometrization parameter
disp([' ---> runs/k-fold= ' num2str(ik) ' optimal nu= ' num2str(ph)]);
bx=find(coef~=0); 
bx=setdiff(bx,Z);                            % optimal model with coefficients 'bx'

%(7) prediction error between test data Ysort(:,Z) and Z_hat predicted
Ycurve=R(Ylabel,1:nx);                  % Ycurve formed from Ytest
Z_hat=Ycurve(:,bx)*coef(bx)'+coef(Z); % predicting with model 'coef'
[Q,ph]=sort(U);
Q=md(Z)*Z_hat+mi(Z);
Resulta=Q(ph);
residual=Ysort(:,Z)-Z_hat;
A(1)=(norm(residual)^2)/ntest;    % Mean Square Error
A(2)=md(Z)^2*A(1);                  % scaled back
A(3)=md(Z)*mean(abs(residual));% mean absolute error

if plots
plot(Ylabel,Z_hat,'b.');hold on;
Xcurve=R(Xlabel,1:nx);
X_hat=Xcurve(:,bx)*coef(bx)'+coef(Z);
[ph,pp]=sort([Xlabel; Ylabel]);
Q=[X_hat; Z_hat];
Q=Q(pp);
plot(ph,Q,'b-');hold on;
%plot(Xlabel,X_hat,'-');hold on;
legend('training data','predictions of training data','test data','GLR predictions','GcLearn predictions','curve manifold',2);
xlim([-2 2+ntrain+ntest]);
end

end


%--- (8-9) Mining geometric information and Applying it to SVM regression --- 
if gm>1
%(8) support vector machine regression "epsilon-SVR" from Libsvm Software
bx=setdiff(1:nx,Z);
S=ceil(ik/8);
T=mod(ik-1,8)+1;
T=2*(T-1)+1;
XYsort=[]; Xsort=[]; Ysort=[]; Ycurve=[]; X_hat=[];

% searching optimal parameters of C, gamma for epsilon-SVR
if useNewParam
 R=3;                                             % seaching by 3-fold validation
 pp='-s 3 -e 0.003'; 
 pam=[2.^(11:-2:-3); 2.^(5:-2:-9)]; % parameter grid: -c -g
 [pamopt,Q]=parameterlivsvm(Xtrain(:,1:nx),Z,bx,pp,pam,R,0);
 ph=[(0.8:0.1:1.2)*Q(1); (0.8:0.1:1.2)*Q(2)];
 [pamopt,Q]=parameterlivsvm(Xtrain(:,1:nx),Z,bx,pp,ph,R,0);
 
 % saving optimal parameters
 if ik==1 nopts=zeros(1,16); end
 nopts(S,T:(T+1))=Q(1:2);
else
 pamopt=['-s 3 -e 0.003 -c ' num2str(nopts(S,T)) ' -g ' num2str(nopts(S,T+1))];
end

% training with optimal parameters
model = svmtrain(Xtrain(:,Z), Xtrain(:,bx),pamopt);

% prediction errors from model
[Y_hat,Pe] = svmpredictnoprint(Ytest(:,Z), Ytest(:,bx), model);
D(1)=Pe(2);                             % Mean Square Error 
D(2)=Pe(2)*md(Z)^2;               % MSE is scaled back 
residual=Ytest(:,Z)-Y_hat;
D(3)=md(Z)*mean(abs(residual)); % mean absolute error

% (9) Mining geometric information and Geometrical learning
[X_hat,sigmd]=svmpredictnoprint(Xtrain(:,Z),Xtrain(:,bx),model);

if nosort
  if exm<10 
    XYsort=datas; 
  else 
    XYsort=[Xtrain;Ytest];
    Xlabel=1:ntrain;
    Ylabel=(1:ntest)+ntrain;
  end
  Xsort=Xtrain; Ysort=Ytest;
  XYsort(Xlabel,nx+1)=X_hat;
  XYsort(Ylabel,nx+1)=Y_hat;
else 
% sorting estimated Z in training set and test set
  [XYsort,Xlabel,Ylabel,Xsort,Ysort,U]=datasort(Xtrain,Ytest,X_hat,Y_hat,order);
end

if plots
cla
plot(Xlabel,Xsort(:,Z),'r.');hold on;
plot(Xlabel,XYsort(Xlabel,nx+1),'m.');hold on;
plot(Ylabel,Ysort(:,Z),'ko');hold on;
plot(Ylabel,XYsort(Ylabel,nx+1),'g^');hold on;
end

% finding optimal nu with min fitting error (or 1-fold cross validation)
if useNewParam
   nu3(ik)=crossvalid(Xsort,Z,nus,1,'epsilsv','find_nu','zcurve',pamopt);
end
disp([' ---> runs/k-fold= ' num2str(ik) ' optimal nu= ' num2str(nu3(ik))]);

% projecting data to curve manifolds as per optimal nu
[R,Q,S,T]=datageometrize(XYsort(:,1:nx),nu3(ik),0);
Ycurve=R(Ylabel,:);
S=[S;S(length(S))+1];
R=sumcurve(Q,S,T,'each');
Xcurve=R(Xlabel,:);

% training models using curve manifolds from training set
model= svmtrain(Xcurve(:,Z),Xcurve(:,bx), pamopt);

% predicting for test set by the learned model
[Z_hat,Pe] = svmpredictnoprint(Ysort(:,Z), Ycurve(:,bx), model);
[Q,ph]=sort(U);
Q=md(Z)*Z_hat+mi(Z);
Resultb=Q(ph);
B(1)=Pe(2);                             % Mean Square Error 
B(2)=Pe(2)*md(Z)^2;               % MSE is scaled back 
residual=Ysort(:,Z)-Z_hat;
B(3)=md(Z)*mean(abs(residual)); % mean absolute error

if plots
plot(Ylabel,Z_hat,'b.');hold on;
[X_hat,Pe] = svmpredictnoprint(Xsort(:,Z), Xcurve(:,bx), model);
plot(Xlabel,X_hat,'-');hold on;
legend('training data','predictions of training data','test data','epsilon-SVR predictions','GcLearn predictions','curve manifold',2);
xlim([-2 2+ntrain+ntest]);
end

end 

disp('==> Running is over, prediction results stored in Resulta for');
disp(' GcLearn linear gresssion and in Resultb for GcLearn SVR');

% following codes are only for checking code mistakes
if check 
if gm<3
  Pea=[C A];
  Pra=(Pea(1:3)-Pea(4:6))./Pea(1:3);
  residual=abs(Resulta-Ytest(:,Z));
  meanErrorA=mean(residual)
end
if gm>1
  Peb=[D B];
  Prb=(Peb(1:3)-Peb(4:6))./Peb(1:3);
  residual=abs(Resultb-Ytest(:,Z));
  meanErrorB=mean(residual)
end

if gm<3
disp('  ');
disp('==> Results for GcLearn linear regression');
disp('* Prediction errors (Pe) in mean squared errors:');
disp('   (1st row for pure linear regression, 2nd row for GcLearn)');
disp('* Proportional reduction of Pe (Pr %) by GcLearn');
Pe=Pea([2 5])',Pr=100*Pra(2)
end

if gm>1
disp('  ');
disp('==> Results for GcLearn SVM regression');
disp('* Prediction errors (Pe) in mean squared errors');
disp('   (1st row for SVR, 2nd row for GcLearn SVR)');
disp('* Proportional reduction of Pe (Pr %) by GcLearn');
Pe=Peb([2 5])',Pr=100*Prb(2)
end
end

XYsort=[];Xsort=[];Ysort=[];X_hat=[];Y_hat=[];Z_hat=[];Xcurve=[];
Xlabel=[];Ylabel=[];Xtrain=[];Ytest=[];dtrain=[];dtest=[];Ycurve=[];
datas=[];model=[];residual=[];Q=[];R=[];S=[]; ph=[]; pp=[]; 

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?