⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 maingclearn.m

📁 这是一个支持向量机的工具
💻 M
字号:
% Geometrical Correlation Learning to mine geometric information
% Main program for validation and comparision 
% Kaijun WANG: kjwang@mail.xidian.edu.cn, sunice9@yahoo.com, Aug. 2006.

clear;
exm=2;       % 1 for pyrimidines data set, 2 for servo, 3 for autoprice,
                    % 4 for cpu, 5 for autompg; 11 for artificial data from
                    % model: Z(t)= 2X(t)+Y(t)+ 2.0, X(t)=10+4*sin(0.08*t),
                    % Y(t)=10+4cos(0.04t+4), t=[1,300]; 12 for artificial
                    % data from model: Z(t)=X(t)^2 +4Y(t) -3X(t)Y(t) +10, 
                    % X(t1)=-0.1t1+10, Y(t)=3+2sin(0.1t-3.0), t=[1,300], ...

nus=[5:14];              % searching scope of geometrization parameter nu

% Choices for different designs or observations
gm=2;                        % 1: run linear regression; 3: run SVR; 2: run both
plots=0;                     % 1: plotting data of response variable & predictions 
                                  % set breakpoints in line 150 and 247 to see figures
useNewParam=1;    % 0: using parameters obtained from last running
nois=[0.2:0.4:2.2];    % variance of Gaussian noise added to data
repeats=20;              % repetition times of running for each noise level
kfold=10;                  % k-fold cross-validation, each 1/kfold as a test set
randoms=0;              % 1: 1/kfold test set is randomly selected from data
scale=1;                    % 1: scale data to 0 mean & variance 1; 2: [-1,1]
vmax='allcoef';          % max number of predictor variables
ks=0;                         % initial value, for recording iteration times

if exm<11 nois=1; repeats=1; end
if exm>10 nosort=1; else nosort=0; end  % 1: no sorting needed
disp([' == == ==> Example  ' num2str(exm)]);

%(0) iteration starts for different noise 
for nk=nois
  ks=ks+1;
  iterations=1:repeats;
  disp([' ===> running at: ' num2str(nk) ' total repeats: ' num2str(repeats)]);

%(1) laoding data sets
if exm<11                 % Z: target variable for prediction
  [datas,dtrain,dtest,Z,xname]=dataload(exm,kfold,randoms);
  [datas,md]=datanormalize(datas,scale,0); % variable values are scaled
  if iscell(dtrain) iterations=1:length(dtrain); end
end


%(2) runs for k-fold validation or repeats
for ik=iterations
 if exm>10
   % generating a training and a test data set from true models
   [Xtrain,true,xname]=datagenerate(exm,nk);
   Ytest=datagenerate(exm,nk); 
   
%(3) data values are scaled to prevent possible numeric problems
   [Xtrain,md,mi]=datanormalize(Xtrain,scale,0);  
   Ytest=datanormalize(Ytest,scale,md,mi);   % normalize by same md & pp
   Z=1;                                         % Z: target/response variable for prediction
 else
   Xlabel=dtrain{ik}; 
   Xtrain=datas(Xlabel,:);
   Ylabel=dtest{ik};    
   Ytest=datas(Ylabel,:);
   dtrain{ik}=[]; dtest{ik}=[];
 end
   
% number of variables nx
[ntrain,nx]=size(Xtrain);
ntest=size(Ytest,1); 
order=nx+1; 


if gm<3
% --- (4) general linear regression analysis ---
[ph,pp]=sumdata(Xtrain);           % computing X(i)*X(j)
regionc{1}=[1 1 0 0 ntrain];
for i=Z
 % a linear relational model with its coefficients stored in 'coef'
 [coef,sigmc]=modeloutdata(regionc{1},Xtrain,ph,pp,i,vmax);
end
sigmc=sigmc/ntrain;

% prediction error (PE) between test data and prediction values
bx=find(coef~=0);
bx=setdiff(bx,Z);
Y_hat=Ytest(:,bx)*coef(bx)'+coef(Z);
residual=Ytest(:,Z)-Y_hat;
C(ik,1)=(norm(residual)^2)/ntest;        % Mean Square Error
C(ik,2)=C(ik,1)*md(Z)^2;                     % scaled back
C(ik,3)=mean(abs(residual))*md(Z);  % mean absolute error


% --- (5-7) Mining geometric information and Applying it to linear regression---
%(5) mining value correlations by sorting estimated values of variable Z
X_hat=Xtrain(:,bx)*coef(bx)'+coef(Z);   % estimated Z for training set
if nosort
  if exm<10 
    XYsort=datas; 
  else 
    XYsort=[Xtrain;Ytest];
    Xlabel=1:ntrain;
    Ylabel=(1:ntest)+ntrain;
  end
  Xsort=Xtrain; Ysort=Ytest;
  XYsort(Xlabel,nx+1)=X_hat;
  XYsort(Ylabel,nx+1)=Y_hat;
else 
% XYsort=[column 1 - nx: Xsort+Ysort, column nx+1: estimated Z]
  [XYsort,Xlabel,Ylabel,Xsort,Ysort]=datasort(Xtrain,Ytest,X_hat,Y_hat,order);
end

if plots
cla
plot(Xlabel,Xsort(:,Z),'r.');hold on;
plot(Xlabel,XYsort(Xlabel,nx+1),'m.');hold on;
plot(Ylabel,Ysort(:,Z),'ko');hold on;
plot(Ylabel,XYsort(Ylabel,nx+1),'g^');hold on;
end


%(6)   Mining geometric information applied to linear geometric regression
pp=XYsort(:,1:nx); 
pp(Ylabel,Z)=XYsort(Ylabel,nx+1);  % using prediction values
%[ph,sigma]=crossvalid(Xsort,Z,[1 4],1,'gclearn','find_nu','zvalue');
if useNewParam ph=nus; else ph=nu1(ik); end

% Constructing optimal curve manifolds and finding optimal relational model
[coef,ph,R]=gclearngress(pp,Z,ph,vmax,Xlabel,'minsigm','zcurve','gclearn');
nu1(ik)=ph;                                    % optimal geometrization parameter
disp([' ---> runs/k-fold= ' num2str(ik) ' optimal nu= ' num2str(ph)]);
bx=find(coef~=0); 
bx=setdiff(bx,Z);                            % optimal model with coefficients 'bx'

%(7) prediction error between test data Ysort(:,Z) and Z_hat predicted
Ycurve=R(Ylabel,1:nx);                  % Ycurve formed from Ytest
Z_hat=Ycurve(:,bx)*coef(bx)'+coef(Z); % predicting with model 'coef'
residual=Ysort(:,Z)-Z_hat;
A(ik,1)=(norm(residual)^2)/ntest;    % Mean Square Error
A(ik,2)=md(Z)^2*A(ik,1);                  % scaled back
A(ik,3)=md(Z)*mean(abs(residual));% mean absolute error

if plots
plot(Ylabel,Z_hat,'b.');hold on;
Xcurve=R(Xlabel,1:nx);
X_hat=Xcurve(:,bx)*coef(bx)'+coef(Z);
[ph,pp]=sort([Xlabel; Ylabel]);
Q=[X_hat; Z_hat];
Q=Q(pp);
plot(ph,Q,'b-');hold on;
%plot(Xlabel,X_hat,'-');hold on;
legend('training data','predictions of training data','test data','GLR predictions','GcLearn predictions','curve manifold',2);
xlim([-2 2+ntrain+ntest]);
end

end


%--- (8-9) Mining geometric information and Applying it to SVM regression --- 
if gm>1
%(8) support vector machine regression "epsilon-SVR" from Libsvm Software
bx=setdiff(1:nx,Z);
S=ceil(ik/8);
T=mod(ik-1,8)+1;
T=2*(T-1)+1;
XYsort=[]; Xsort=[]; Ysort=[]; Ycurve=[]; X_hat=[];

% searching optimal parameters of C, gamma for epsilon-SVR
if useNewParam
 R=3;                                             % seaching by 3-fold validation
 pp='-s 3 -e 0.003'; 
 pam=[2.^(11:-2:-3); 2.^(5:-2:-9)]; % parameter grid: -c -g
 [pamopt,Q]=parameterlivsvm(Xtrain(:,1:nx),Z,bx,pp,pam,R,0);
 ph=[(0.8:0.1:1.2)*Q(1); (0.8:0.1:1.2)*Q(2)];
 [pamopt,Q]=parameterlivsvm(Xtrain(:,1:nx),Z,bx,pp,ph,R,0);
 
 % saving optimal parameters
 if ik==1 nopts=zeros(1,16); end
 nopts(S,T:(T+1))=Q(1:2);
else
 pamopt=['-s 3 -e 0.003 -c ' num2str(nopts(S,T)) ' -g ' num2str(nopts(S,T+1))];
end

% training with optimal parameters
model = svmtrain(Xtrain(:,Z), Xtrain(:,bx),pamopt);

% prediction errors from model
[Y_hat,Pe] = svmpredictnoprint(Ytest(:,Z), Ytest(:,bx), model);
D(ik,1)=Pe(2);                             % Mean Square Error 
D(ik,2)=Pe(2)*md(Z)^2;               % MSE is scaled back 
residual=Ytest(:,Z)-Y_hat;
D(ik,3)=md(Z)*mean(abs(residual)); % mean absolute error

% (9) Mining geometric information and Geometrical learning
[X_hat,sigmd]=svmpredictnoprint(Xtrain(:,Z),Xtrain(:,bx),model);
if nosort
  if exm<10 
    XYsort=datas; 
  else 
    XYsort=[Xtrain;Ytest];
    Xlabel=1:ntrain;
    Ylabel=(1:ntest)+ntrain;
  end
  Xsort=Xtrain; Ysort=Ytest;
  XYsort(Xlabel,nx+1)=X_hat;
  XYsort(Ylabel,nx+1)=Y_hat;
else 
% sorting estimated Z in training set and test set
  [XYsort,Xlabel,Ylabel,Xsort,Ysort]=datasort(Xtrain,Ytest,X_hat,Y_hat,order);
end

if plots
cla
plot(Xlabel,Xsort(:,Z),'r.');hold on;
plot(Xlabel,XYsort(Xlabel,nx+1),'m.');hold on;
plot(Ylabel,Ysort(:,Z),'ko');hold on;
plot(Ylabel,XYsort(Ylabel,nx+1),'g^');hold on;
end

% finding optimal nu with min fitting error (or 1-fold cross validation)
if useNewParam
   nu3(ik)=crossvalid(Xsort,Z,nus,1,'epsilsv','find_nu','zcurve',pamopt);
end
disp([' ---> runs/k-fold= ' num2str(ik) ' optimal nu= ' num2str(nu3(ik))]);

% projecting data to curve manifolds as per optimal nu
[R,Q,S,T]=datageometrize(XYsort(:,1:nx),nu3(ik),0);
Ycurve=R(Ylabel,:);
S=[S;S(length(S))+1];
R=sumcurve(Q,S,T,'each');
Xcurve=R(Xlabel,:);

% training models using curve manifolds from training set
model= svmtrain(Xcurve(:,Z),Xcurve(:,bx), pamopt);

% predicting for test set by the learned model
[Z_hat,Pe] = svmpredictnoprint(Ysort(:,Z), Ycurve(:,bx), model);
B(ik,1)=Pe(2);                             % Mean Square Error 
B(ik,2)=Pe(2)*md(Z)^2;               % MSE is scaled back 
residual=Ysort(:,Z)-Z_hat;
B(ik,3)=md(Z)*mean(abs(residual)); % mean absolute error

if plots
plot(Ylabel,Z_hat,'b.');hold on;
[X_hat,Pe] = svmpredictnoprint(Xsort(:,Z), Xcurve(:,bx), model);
plot(Xlabel,X_hat,'-');hold on;
legend('training data','predictions of training data','test data','epsilon-SVR predictions','GcLearn predictions',2);
xlim([-2 2+ntrain+ntest]);
end

end % end gm
end % end runs of ik


%(10) computing prediction errors
if gm<3
  Pea(:,ks)=mean([C A])';
  deviata(:,ks)=std([C A])';
  Pra(:,ks)=(Pea(1:3,ks)-Pea(4:6,ks))./Pea(1:3,ks);
end
if gm>1
  Peb(:,ks)=mean([D B])';
  deviatb(:,ks)=std([D B])';
  Prb(:,ks)=(Peb(1:3,ks)-Peb(4:6,ks))./Peb(1:3,ks);
end
end % end nk (noise)

XYsort=[];Xsort=[];Ysort=[];X_hat=[];Y_hat=[];Z_hat=[];Xcurve=[];
Xlabel=[];Ylabel=[];Xtrain=[];Ytest=[];dtrain=[];dtest=[];Ycurve=[];
datas=[];model=[];residual=[];Q=[];R=[];S=[]; ph=[]; pp=[]; 


% (11) printing and plotting results 
disp('  ');
disp(xname);
if gm<3
disp('==> Results for GcLearn linear regression');
disp('* Prediction errors (Pe) in mean squared errors:');
disp('   (1st row for pure linear regression, 2nd row for GcLearn)');
disp('*  Proportional reduction of Pe (Pr %) by GcLearn');
if exm<10
  Pe=Pea([2 5]),Pr=100*Pra(2)
else
  nois
  Pe=[Pea([2 5],:);100*Pra(2,:)]
end
R=Pea([2 5],:);
S=deviata([2 5],:);
end

if gm>1
disp('  ');
disp('==> Results for GcLearn SVM regression');
disp('* Prediction errors (Pe) in mean squared errors');
disp('   (1st row for SVR, 2nd row for GcLearn SVR)');
disp('*  Proportional reduction of Pe (Pr %) by GcLearn');
if exm<10
  Pe=Peb([2 5]),Pr=100*Prb(2)
else
  nois
  Pe=[Peb([2 5],:);100*Prb(2,:)]
end
R=[R;Peb([2 5],:)];
S=[S;deviatb([2 5],:)];
end

if exm>10
plotout(R,[],nois,exm,xname,0,0);
ploterrorbar(R,S,nois,xname,1,0);
end

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -