📄 gaselctr.m

📁 PLS_Toolbox是用于故障检测与诊断方面的matlab工具箱
💻 M
字号:
function [fit,pop] = gaselctr(x,y,np,mg,mt,wn,cn,bf,cr,ml,cv,sp,it)
%GASELCTR genetic algorithm for variable selection with PLS
%  GASELCTR uses a genetic algorithm optimization to
%  minimize cross validation error for variable selection.
%  Inputs are: (x) the predictor block, (y) the predicted
%  block [all scaling should be done prior to running the
%  GA], (np) the population size [16 to 256 and must be
%  divisible by 4], (mg) the maximum number of generations
%  [25 to 500], (mt) the mutation rate [typically 0.001 to
%  0.01], (wn) the number of variables in a window (window
%  width), (cn) per cent of population the same at
%  convergence, (bf) per cent terms included at initiation
%  [10 to 50], (cr) breeding cross over rule [1 = single,
%  2 = double], (ml) maximum number of latent variables for
%  the PLS models, (cv) cross validation option [0 = random,
%  1 = contiguous blocks], (sp) number of subsets to divide
%  data into for cross validation, (it) number of iterations
%  for cross validation at each generation. The outputs are:
%  (pop) the unique populations at either convergence
%  or maximum generations, and (fit) the fitness [cross
%  valdation error] for each population in (pop).
%
%I/O: [fit,pop] = gaselctr(x,y,np,mg,mt,wn,cn,bf,cr,ml,cv,sp,it);
%
%Example:
%  [fit,pop] = gaselctr(mncn(x),mncn(y),32,100,0.005,5,80,50,2,4,0,5,1)
%  
%See also: GENALG

% Copyright Eigenvector Research, Inc. 1995-2000
% Modified 2/9/97,2/10/98 NBG
% Modified 4/30/98 BMW
% 10/27/00, nbg change in main loop to check if pop(i,:) is all zeros

% Set Metaparameters
nopop     = np;   %Population size
maxgen    = mg;   %Max number of generations
mut       = mt;   %Mutation Rate
window    = wn;   %Window width for spectral channels
converge  = cn;   % % of pop the same at convergence
begfrac   = bf/100;   %Fraction of terms included in beginning
cross     = cr;   %Double or single cross over, 1 = single
reg       = 1;    %Regression method, 0 = MLR, 1 = PLS
maxlv     = ml;   %No. LVs, only needed with reg = 1
cvopt     = cv;   %CV option, 0 = random, 1 = contiguous
split     = sp;   %No. subsets to divide data into
iter      = it;   %No. iterations of CV at each generation

[m,n]     = size(x);
fig       = figure;
  gcount    = 1;
  specsplit = ceil(n/window);
  %Check to see that nopop is divisible by 4
  dp1        = nopop/4;
  if ceil(dp1) ~= dp1
    nopop   = ceil(dp1)*4;
    disp('Population size not divisible by 4')
    s       = sprintf('Resizing to a population of %g',nopop);
    disp(s)
  end
  %Generate initial population
  pop     = rand(nopop,specsplit);
  for i = 1:nopop
    for j = 1:specsplit
      if pop(i,j) < begfrac
        pop(i,j) = 1;
      else
        pop(i,j) = 0;
      end
    end
	if sum(pop(i,:)')<0.5
	  colm        = round(rand(1)*specsplit);
	  if colm <0.5
	    colm      = 1;
	  end
	  pop(i,colm) = 1;
	end
  end

%Set limit on number of duplicates in population
maxdups = ceil(nopop*converge/100);
%Iterate until dups > maxdups
dat = [x y];

  dups     = 0;
  cavterms = zeros(1,maxgen);
  cavfit   = zeros(1,maxgen);
  cbfit    = zeros(1,maxgen);

% Main Loop

  while dups < maxdups
    drawnow

    %Shuffle data and form calibration and test sets
    s        = sprintf('At generation %g the number of duplicates is %g',gcount,dups);
    disp(s)
    avterms  = mean(sum(pop'));
    cavterms(gcount) = avterms;
    s        = sprintf('The average number of terms is %g',avterms);
    disp(s)
    dups     = 0; 
    if reg == 1
      fit    = zeros(maxlv,nopop); 
    else
      fit    = zeros(1,nopop);
    end
    %Test each model in population
	drawnow
    for kk = 1:iter       %Number of iterations
      if cvopt == 0
        dat  = shuffle(dat);
      else
        di   = shuffle([2:m]');
        dat  = [dat(di(1):m,:); dat(1:di(1)-1,:)];
      end
      for i = 1:nopop
	    drawnow
      %Check to see that model isn't a repeat
        dflag = 0;
        if i > 1
          for ii = 1:i-1
	           dif = sum(abs(pop(i,:) - pop(ii,:)));
            if dif == 0
              dflag = 1;
              fit(:,i) = fit(:,ii);
            end
          end
        end
        if dflag == 1;
          if kk == 1
            dups = dups + 1;
          end
        else
          %Select the proper columns for use in modeling
          inds = find(pop(i,:))*window;
          [smi,sni] = size(inds);
          if inds(1) <= n
            ninds = [inds(1)-window+1:inds(1)];
          else
            ninds = [inds(1)-window+1:n];
          end
          for aaa = 2:sni
            if inds(aaa) <= n
              ninds = [ninds [inds(aaa)-window+1:inds(aaa)]];
            else
              ninds = [ninds [inds(aaa)-window+1:n]];
            end
          end
          xx = dat(:,ninds);
          [mxx,nxx] = size(xx);
          yy = dat(:,n+1);
          if reg == 1
	  	    lvs = min([nxx,maxlv]);
            %[press,cumpress,minlv] = plscvbkf(xx,yy,split,lvs); 
            [press,cumpress] = crossval(xx,yy,'sim','con',lvs,split,[],[],0);   
            fit(1:lvs,i) = fit(1:lvs,i) + (sqrt(cumpress/m)/iter)';
		    if lvs < maxlv
		      fit(lvs+1:maxlv,i) = Inf*ones(maxlv-lvs,1);
		    end
          else
            press = mlrcvblk(xx,yy,split);
			fit(i) = fit(i) + sqrt(press/m)/iter;  
          end
        end
      end
    end
    %Sort models based on fitness
	drawnow
	if reg == 1
	  if maxlv ==1
	    mfit       = fit;
	  else
        mfit       = min(fit);
	  end
	else
      mfit = fit;
	end
    [mfit,ind]     = sort(mfit);
    s              = sprintf('The best fitness is %g',mfit(1));
    disp(s)
    cbfit(gcount)  = mfit(1);
    s              = sprintf('The average fitness is %g',mean(mfit));
    disp(s)
    cavfit(gcount) = mean(mfit);
    pop            = pop(ind,:);
    figure(fig)
    subplot(2,2,1)
    sumpop         = sum(pop');
    plot(sumpop,mfit,'og'), mnfit = min(mfit); mxfit = max(mfit);
    dfit           = mxfit - mnfit; if dfit == 0, dfit=1; end
    axis([min(sumpop)-1 max(sumpop)+1 mnfit-dfit/10 mxfit+dfit/10])
    if window > 1
      xlabel('Number of Windows')
      s = sprintf('Fitness vs. # of Windows at Generation %g',gcount);
    else
      xlabel('Number of Variables')
	  s = sprintf('Fitness vs. # of Variables at Generation %g',gcount);
    end  
    title(s)
    ylabel('Fitness')
	set(gca,'FontSize',9)
    set(get(gca,'Ylabel'),'FontSize',9)
    set(get(gca,'Title'),'FontSize',9)
    set(get(gca,'Xlabel'),'FontSize',9)
    subplot(2,2,2)
    plot(1:gcount,cavfit(1:gcount),1:gcount,cbfit(1:gcount))
    xlabel('Generation')
    ylabel('Average and Best Fitness')
    title('Evolution of Average and Best Fitness')
	set(gca,'FontSize',9)
    set(get(gca,'Ylabel'),'FontSize',9)
    set(get(gca,'Title'),'FontSize',9)
    set(get(gca,'Xlabel'),'FontSize',9)
    subplot(2,2,3)
    plot(cavterms(1:gcount))
    xlabel('Generation')
    if window > 1
      ylabel('Average Windows Used')
	  title('Evolution of Number of Windows')
    else
      ylabel('Average Variables Used')
	  title('Evolution of Number of Variables')	
    end
	set(gca,'FontSize',9)
    set(get(gca,'Ylabel'),'FontSize',9)
    set(get(gca,'Title'),'FontSize',9)
    set(get(gca,'Xlabel'),'FontSize',9)
    subplot(2,2,4)
    bar(sum(pop))
    if window > 1
      xlabel('Window Number')
      ylabel('Models Including Window')
	  s = sprintf('Models with Window at Generation %g',gcount);
    else
      xlabel('Variable Number')
      ylabel('Models Including Variable')
      s = sprintf('Models with Variable at Generation %g',gcount);
    end
    title(s)
    axis([0 ceil(n/window)+1 0 nopop+2])
	set(gca,'FontSize',9)
    set(get(gca,'Ylabel'),'FontSize',9)
    set(get(gca,'Title'),'FontSize',9)
    set(get(gca,'Xlabel'),'FontSize',9)
    drawnow
    % Check to see if maxgen has been met
    if gcount >= maxgen
      dups = maxdups;
    end
    % Breed best half of population and replace worst half
    pop(1:nopop/2,:) = shuffle(pop(1:nopop/2,:));
    pop((nopop/2)+1:nopop,:) = pop(1:nopop/2,:);
    for i = 1:nopop/4
      for j = 1:cross
        %Select twist point at random
        tp = ceil(rand(1)*(specsplit-1));
        %Twist pairs and replace
	    p1 = (nopop/2)+(i*2)-1;
	    p2 = (nopop/2)+(i*2);
	    p1rep = [pop(p1,1:tp) pop(p2,tp+1:specsplit)];
	    p2rep = [pop(p2,1:tp) pop(p1,tp+1:specsplit)];
        pop(p1,:) = p1rep;
        pop(p2,:) = p2rep;
      end
    end
    %Mutate the population if dups < maxdups
    if dups < maxdups
      [mi,mj] = find(rand(nopop,specsplit)<mut);
      [ms,ns] = size(mi);
      for i=1:ms
        if pop(mi(i),mj(i)) == 1
          pop(mi(i),mj(i)) = 0;
        else
          pop(mi(i),mj(i)) = 1;
        end
      end
      for i=1:nopop                %nbg 10/27/00, start, check if any row of pop all zero, 
        if isempty(find(pop(i,:)))
          pop(i,1+round(rand(1)*(size(pop,2)-1))) = 1;
        end
      end                          %nbg 10/27/00, end
    end 
    gcount = gcount + 1;
  end
%End of Main Loop

if dups >= maxdups
  drawnow
  %Extract unique models from final population
  fpop = zeros(nopop-dups,specsplit);
  unique = 0; dups = 0;
  for i = 1:nopop
    dflag = 0;
    if i > 1
      for ii = 1:i-1
        dif = sum(abs(pop(i,:) - pop(ii,:)));
        if dif == 0
          dflag = 1;
        end
      end 
    end
    if dflag == 1
      dups = dups + 1;
    else
	  unique = unique + 1;
	  fpop(unique,:) = pop(i,:);
    end
  end
  s = sprintf('There are %g unique models in final population',unique);
  disp(s)
  %Testing final population
  if reg == 1
    fit = zeros(maxlv,unique); 
  else
    fit = zeros(1,unique);
  end
  disp('Now testing models in final population')
  for kk = 1:3*iter       %Number of iterations 
    if cvopt == 0
      dat = shuffle(dat);
    else
      di = shuffle([2:m]');
      dat = [dat(di(1):m,:); dat(1:di(1)-1,:)];
    end
    for i=1:unique
      %Select the proper columns for use in modeling
      inds      = find(fpop(i,:))*window;
      [smi,sni] = size(inds);
      if inds(1)<=n
        ninds = [inds(1)-window+1:inds(1)];
      else
        ninds = [inds(1)-window+1:n];
      end
      for aaa=2:sni
        if inds(aaa) <= n
          ninds = [ninds [inds(aaa)-window+1:inds(aaa)]];
        else
          ninds = [ninds [inds(aaa)-window+1:n]];
        end
      end
      xx = dat(:,ninds);
      [mxx,nxx] = size(xx);
      yy = dat(:,n+1);
      if reg==1
        lvs = min([nxx,maxlv]);
        %[press,cumpress,minlv] = plscvbkf(xx,yy,split,lvs); 
        [press,cumpress] = crossval(xx,yy,'sim','con',lvs,split,[],[],0);   
        fit(1:lvs,i) = fit(1:lvs,i) + (sqrt(cumpress/m)/(iter*3))';
	      if lvs < maxlv
	  	    fit(lvs+1:maxlv,i) = Inf*ones(maxlv-lvs,1);
        end
      else
        press = mlrcvblk(xx,yy,split);
        fit(i) = fit(i) + sqrt(min(press)/m)/(iter*3);
      end
	    if kk == iter*3
        if reg == 1
          [mf,ind] = min(fit(:,i));
          s = sprintf('Number %g fitness is %g at %g LVs',i,mf,ind);
          %s = sprintf('Number %g fitness is %g at %g LVs',i,min(fit(:,i)),minlv);
        else
          s = sprintf('Number %g fitness is %g',i,min(fit(:,i)));	  
	      end
	      disp(s)
      end
    end
  end
  if reg==1
    if size(fit,1)==1 %modified 2/10/98
      mfit = fit;
    else
      mfit = min(fit,[],1);
    end %end modification 2/10/98
  else
    mfit = fit;
  end
  [mfit,ind] = sort(mfit);
  s          = sprintf('The best fitness is %g',mfit(1));
  disp(s)
  s          = sprintf('The average fitness is %g',mean(mfit));
  disp(s)
  fpop       = fpop(ind,:);
  ffit       = mfit;
  % Translate the population (in terms of windows) into the
  % actual variables used in the final population.
  if window == 1
    gpop = fpop;
  else
    gpop = zeros(unique,n);
    for jk = 1:unique
      inds = find(fpop(jk,:))*window;
      [smi,sni] = size(inds);
      if inds(1) <= n
        ninds = [inds(1)-window+1:inds(1)];
      else
        ninds = [inds(1)-window+1:n];
      end
      for aaa = 2:sni
        if inds(aaa) <= n
          ninds = [ninds [inds(aaa)-window+1:inds(aaa)]];
        else
          ninds = [ninds [inds(aaa)-window+1:n]];
        end
	  end
      [snmi,snni] = size(ninds);
	  gpop(jk,ninds) = ones(1,snni);
	end
  end
end  
  
fit = ffit;
pop = gpop;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -