⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gaselect.m

📁 偏最小二乘算法在MATLAB中的实现
💻 M
字号:
function gaselect(fig,action)
%GASELECT Subroutine of GENALG for Variable Selection
% Please see GENALG

% Copyright
% Eigenvector Technologies
% 1995

% Variables set by GUI
handl     = get(fig,'userdata'); 
nopop     = 4*round(get(handl(2,1),'Value')/4);       %Size of population
maxgen    = round(get(handl(3,1),'value'));           %Max number of generations
mut       = 1e-3*round(get(handl(4,1),'value')/1e-3); %Mutation Rate
window    = round(get(handl(5,1),'value'));           %Window width for spectral channels
converge  = round(get(handl(6,1),'value'));           % % of pop the same at convergence
begfrac   = round(get(handl(7,1),'value'))/100;       %Fraction of terms included in beginning
cross     = handl(8,3);                               %Double or single cross over, 1 = single
reg       = handl(9,3);                               %Regression method, 0 = MLR, 1 = PLS
maxlv     = round(get(handl(10,1),'value'));          %No. LVs, only needed with reg = 1
cvopt     = handl(11,3);                              %CV option, 0 = random, 1 = contiguous
split     = round(get(handl(12,1),'value'));          %No. subsets to divide data into
iter      = round(get(handl(13,1),'value'));          %No. iterations of CV at each generation
x         = get(handl(2,1),'UserData');               % x-block data
y         = get(handl(3,1),'UserData');               % y-block data
[m,n]     = size(x);
if strcmp(action,'gogas')
  gcount    = 1;
  specsplit = ceil(n/window);
  set(handl(2,2),'UserData',specsplit);
  %Check to see that nopop is divisible by 4
  dp        = nopop/4;
  if ceil(dp) ~= dp
    nopop   = ceil(dp)*4;
    disp('Population size not divisible by 4')
    s       = sprintf('Resizing to a population of %g',nopop);
    disp(s)
  end
  %Generate initial population
  pop     = rand(nopop,specsplit);
  for i = 1:nopop
    for j = 1:specsplit
      if pop(i,j) < begfrac
        pop(i,j) = 1;
      else
        pop(i,j) = 0;
      end
    end
	if sum(pop(i,:)')<0.5
	  colm        = round(rand(1)*specsplit);
	  if colm <0.5
	    colm      = 1;
	  end
	  pop(i,colm) = 1;
	end
  end
end

%Set limit on number of duplicates in population
maxdups = ceil(nopop*converge/100);
%Iterate until dups > maxdups
dat = [x y];
if gcount == 1
  dups     = 0;
  cavterms = zeros(1,maxgen);
  cavfit   = zeros(1,maxgen);
  cbfit    = zeros(1,maxgen);
end
if strcmp(action,'resum')
  specsplit= get(handl(2,2),'UserData');
  fit      = get(handl(2,3),'UserData');
  pop      = get(handl(2,4),'UserData');
  gcount   = get(handl(2,5),'UserData');
  dups     = get(handl(3,2),'UserData');
  cavfit   = get(handl(3,3),'UserData');
  cbfit    = get(handl(3,4),'UserData');
  cavterms = get(handl(3,5),'UserData');
end
% Main Loop
if strcmp(action,'gogas')|strcmp(action,'resum')
  while dups < maxdups
    drawnow
    if get(handl(15,1),'UserData') == 1
      set(handl(2,3),'UserData',fit);
      set(handl(2,4),'UserData',pop);
	  set(handl(2,5),'UserData',gcount);
	  set(handl(3,2),'UserData',dups);
      set(handl(3,3),'UserData',cavfit);
      set(handl(3,4),'UserData',cbfit);
      set(handl(3,5),'UserData',cavterms);
      set(handl(17,1),'Visible','On');
	  set(handl([3 4 6 12 13],[1 3 4]),'Visible','On');
	  if get(handl(9,2),'Value')
	    set(handl(10,[1 3 4]),'Visible','On')
	  end
	  break
    end
    %Shuffle data and form calibration and test sets
    s        = sprintf('At generation %g the number of duplicates is %g',gcount,dups);
    disp(s)
    avterms  = mean(sum(pop'));
    cavterms(gcount) = avterms;
    s        = sprintf('The average number of terms is %g',avterms);
    disp(s)
    dups     = 0; 
    if reg == 1
      fit    = zeros(maxlv,nopop); 
    else
      fit    = zeros(1,nopop);
    end
    %Test each model in population
	drawnow
    for kk = 1:iter       %Number of iterations
      if cvopt == 0
        dat  = shuffle(dat);
      else
        di   = shuffle([2:m]');
        dat  = [dat(di(1):m,:); dat(1:di(1)-1,:)];
      end
      for i = 1:nopop
	    drawnow
      %Check to see that model isn't a repeat
        dflag = 0;
        if i > 1
          for ii = 1:i-1
	           dif = sum(abs(pop(i,:) - pop(ii,:)));
            if dif == 0
              dflag = 1;
              fit(:,i) = fit(:,ii);
            end
          end
        end
        if dflag == 1;
          if kk == 1
            dups = dups + 1;
          end
        else
          %Select the proper columns for use in modeling
          inds = find(pop(i,:))*window;
          [smi,sni] = size(inds);
          if inds(1) <= n
            ninds = [inds(1)-window+1:inds(1)];
          else
            ninds = [inds(1)-window+1:n];
          end
          for aaa = 2:sni
            if inds(aaa) <= n
              ninds = [ninds [inds(aaa)-window+1:inds(aaa)]];
            else
              ninds = [ninds [inds(aaa)-window+1:n]];
            end
          end
          xx = dat(:,ninds);
          [mxx,nxx] = size(xx);
          yy = dat(:,n+1);
          if reg == 1
	  	    lvs = min([nxx,maxlv]);
            [press,cumpress,minlv] = plscvbkf(xx,yy,split,lvs);    
            fit(1:lvs,i) = fit(1:lvs,i) + (sqrt(cumpress/m)/iter)';
		    if lvs < maxlv
		      fit(lvs+1:maxlv,i) = Inf*ones(maxlv-lvs,1);
		    end
          else
            press = mlrcvblk(xx,yy,split);
			fit(i) = fit(i) + sqrt(press/m)/iter;  
          end
        end
      end
    end
    %Sort models based on fitness
	drawnow
	if reg == 1
      mfit           = min(fit);
	else
      mfit = fit;
	end
    [mfit,ind]     = sort(mfit);
    s              = sprintf('The best fitness is %g',mfit(1));
    disp(s)
    cbfit(gcount)  = mfit(1);
    s              = sprintf('The average fitness is %g',mean(mfit));
    disp(s)
    cavfit(gcount) = mean(mfit);
    pop            = pop(ind,:);
    figure(handl(1,1))
    subplot(2,2,1)
    sumpop         = sum(pop');
    plot(sumpop,mfit,'og'), mnfit = min(mfit); mxfit = max(mfit);
    dfit           = mxfit - mnfit; if dfit == 0, dfit=1; end
    axis([min(sumpop)-1 max(sumpop)+1 mnfit-dfit/10 mxfit+dfit/10])
    if window > 1
      xlabel('Number of Windows')
      s = sprintf('Fitness vs. # of Windows at Generation %g',gcount);
    else
      xlabel('Number of Variables')
	  s = sprintf('Fitness vs. # of Variables at Generation %g',gcount);
    end  
    title(s)
    ylabel('Fitness')
    subplot(2,2,2)
    plot(1:gcount,cavfit(1:gcount),1:gcount,cbfit(1:gcount))
    xlabel('Generation')
    ylabel('Average and Best Fitness')
    title('Evolution of Average and Best Fitness')
    subplot(2,2,3)
    plot(cavterms(1:gcount))
    xlabel('Generation')
    if window > 1
      ylabel('Average Windows Used')
	  title('Evolution of Number of Windows')
    else
      ylabel('Average Variables Used')
	  title('Evolution of Number of Variables')	
    end
    subplot(2,2,4)
    bar(sum(pop))
    if window > 1
      xlabel('Window Number')
      ylabel('Models Including Window')
	  s = sprintf('Models with Window at Generation %g',gcount);
    else
      xlabel('Variable Number')
      ylabel('Models Including Variable')
      s = sprintf('Models with Variable at Generation %g',gcount);
    end
    title(s)
    axis([0 ceil(n/window)+1 0 nopop+2])
    drawnow
    % Check to see if maxgen has been met
    if gcount >= maxgen
      dups = maxdups;
    end
    % Breed best half of population and replace worst half
    pop(1:nopop/2,:) = shuffle(pop(1:nopop/2,:));
    pop((nopop/2)+1:nopop,:) = pop(1:nopop/2,:);
    for i = 1:nopop/4
      for j = 1:cross
        %Select twist point at random
        tp = ceil(rand(1)*(specsplit-1));
        %Twist pairs and replace
	    p1 = (nopop/2)+(i*2)-1;
	    p2 = (nopop/2)+(i*2);
	    p1rep = [pop(p1,1:tp) pop(p2,tp+1:specsplit)];
	    p2rep = [pop(p2,1:tp) pop(p1,tp+1:specsplit)];
        pop(p1,:) = p1rep;
        pop(p2,:) = p2rep;
      end
    end
    %Mutate the population if dups < maxdups
    if dups < maxdups
      [mi,mj] = find(rand(nopop,specsplit)<mut);
      [ms,ns] = size(mi);
      for i = 1:ms
        if pop(mi(i),mj(i)) == 1
          pop(mi(i),mj(i)) = 0;
        else
          pop(mi(i),mj(i)) = 1;
        end
      end
    end 
    gcount = gcount + 1;
  end
end
%End of Main Loop

if dups >= maxdups
  set(handl([15 17],1),'Visible','Off');
  drawnow
  %Extract unique models from final population
  fpop = zeros(nopop-dups,specsplit);
  unique = 0; dups = 0;
  for i = 1:nopop
    dflag = 0;
    if i > 1
      for ii = 1:i-1
        dif = sum(abs(pop(i,:) - pop(ii,:)));
        if dif == 0
          dflag = 1;
        end
      end 
    end
    if dflag == 1
      dups = dups + 1;
    else
	  unique = unique + 1;
	  fpop(unique,:) = pop(i,:);
    end
  end
  s = sprintf('There are %g unique models in final population',unique);
  disp(s)
  %Testing final population
  if reg == 1
    fit = zeros(maxlv,unique); 
  else
    fit = zeros(1,unique);
  end
  disp('Now testing models in final population')
  for kk = 1:3*iter       %Number of iterations 
    if cvopt == 0
      dat = shuffle(dat);
    else
      di = shuffle([2:m]');
      dat = [dat(di(1):m,:); dat(1:di(1)-1,:)];
    end
    for i = 1:unique
      %Select the proper columns for use in modeling
      inds = find(fpop(i,:))*window;
      [smi,sni] = size(inds);
      if inds(1) <= n
        ninds = [inds(1)-window+1:inds(1)];
      else
        ninds = [inds(1)-window+1:n];
      end
      for aaa = 2:sni
        if inds(aaa) <= n
          ninds = [ninds [inds(aaa)-window+1:inds(aaa)]];
        else
          ninds = [ninds [inds(aaa)-window+1:n]];
        end
      end
      xx = dat(:,ninds);
      [mxx,nxx] = size(xx);
      yy = dat(:,n+1);
      if reg == 1
        lvs = min([nxx,maxlv]);
        [press,cumpress,minlv] = plscvbkf(xx,yy,split,lvs);    
        fit(1:lvs,i) = fit(1:lvs,i) + (sqrt(cumpress/m)/(iter*3))';
	    if lvs < maxlv
	  	 fit(lvs+1:maxlv,i) = Inf*ones(maxlv-lvs,1);
        end
      else
        press = mlrcvblk(xx,yy,split);
        fit(i) = fit(i) + sqrt(min(press)/m)/(iter*3);
      end
	  if kk == iter*3
        if reg == 1
          s = sprintf('Number %g fitness is %g at %g LVs',i,min(fit(:,i)),minlv);
        else
          s = sprintf('Number %g fitness is %g',i,min(fit(:,i)));	  
	    end
	    disp(s)
      end
    end
  end
  if reg == 1
    mfit       = min(fit);
  else
    mfit = fit;
  end
  [mfit,ind] = sort(mfit);
  s          = sprintf('The best fitness is %g',mfit(1));
  disp(s)
  s          = sprintf('The average fitness is %g',mean(mfit));
  disp(s)
  fpop       = fpop(ind,:);
  ffit       = mfit;
  % Translate the population (in terms of windows) into the
  % actual variables used in the final population.
  if window == 1
    gpop = fpop;
  else
    gpop = zeros(unique,n);
    for jk = 1:unique
      inds = find(fpop(jk,:))*window;
      [smi,sni] = size(inds);
      if inds(1) <= n
        ninds = [inds(1)-window+1:inds(1)];
      else
        ninds = [inds(1)-window+1:n];
      end
      for aaa = 2:sni
        if inds(aaa) <= n
          ninds = [ninds [inds(aaa)-window+1:inds(aaa)]];
        else
          ninds = [ninds [inds(aaa)-window+1:n]];
        end
	  end
      [snmi,snni] = size(ninds);
	  gpop(jk,ninds) = ones(1,snni);
	end
  end
  
  
  set(handl(4,2),'UserData',ffit)
  set(handl(4,5),'UserData',gpop)  
  set(handl(16,1),'Visible','On','String','Save & Quit');
end


	

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -