📄 gaselect.m
字号:
function gaselect(fig,action)
%GASELECT Subroutine of GENALG for Variable Selection
% Please see GENALG
% Copyright
% Eigenvector Technologies
% 1995
% Variables set by GUI
handl = get(fig,'userdata');
nopop = 4*round(get(handl(2,1),'Value')/4); %Size of population
maxgen = round(get(handl(3,1),'value')); %Max number of generations
mut = 1e-3*round(get(handl(4,1),'value')/1e-3); %Mutation Rate
window = round(get(handl(5,1),'value')); %Window width for spectral channels
converge = round(get(handl(6,1),'value')); % % of pop the same at convergence
begfrac = round(get(handl(7,1),'value'))/100; %Fraction of terms included in beginning
cross = handl(8,3); %Double or single cross over, 1 = single
reg = handl(9,3); %Regression method, 0 = MLR, 1 = PLS
maxlv = round(get(handl(10,1),'value')); %No. LVs, only needed with reg = 1
cvopt = handl(11,3); %CV option, 0 = random, 1 = contiguous
split = round(get(handl(12,1),'value')); %No. subsets to divide data into
iter = round(get(handl(13,1),'value')); %No. iterations of CV at each generation
x = get(handl(2,1),'UserData'); % x-block data
y = get(handl(3,1),'UserData'); % y-block data
[m,n] = size(x);
if strcmp(action,'gogas')
gcount = 1;
specsplit = ceil(n/window);
set(handl(2,2),'UserData',specsplit);
%Check to see that nopop is divisible by 4
dp = nopop/4;
if ceil(dp) ~= dp
nopop = ceil(dp)*4;
disp('Population size not divisible by 4')
s = sprintf('Resizing to a population of %g',nopop);
disp(s)
end
%Generate initial population
pop = rand(nopop,specsplit);
for i = 1:nopop
for j = 1:specsplit
if pop(i,j) < begfrac
pop(i,j) = 1;
else
pop(i,j) = 0;
end
end
if sum(pop(i,:)')<0.5
colm = round(rand(1)*specsplit);
if colm <0.5
colm = 1;
end
pop(i,colm) = 1;
end
end
end
%Set limit on number of duplicates in population
maxdups = ceil(nopop*converge/100);
%Iterate until dups > maxdups
dat = [x y];
if gcount == 1
dups = 0;
cavterms = zeros(1,maxgen);
cavfit = zeros(1,maxgen);
cbfit = zeros(1,maxgen);
end
if strcmp(action,'resum')
specsplit= get(handl(2,2),'UserData');
fit = get(handl(2,3),'UserData');
pop = get(handl(2,4),'UserData');
gcount = get(handl(2,5),'UserData');
dups = get(handl(3,2),'UserData');
cavfit = get(handl(3,3),'UserData');
cbfit = get(handl(3,4),'UserData');
cavterms = get(handl(3,5),'UserData');
end
% Main Loop
if strcmp(action,'gogas')|strcmp(action,'resum')
while dups < maxdups
drawnow
if get(handl(15,1),'UserData') == 1
set(handl(2,3),'UserData',fit);
set(handl(2,4),'UserData',pop);
set(handl(2,5),'UserData',gcount);
set(handl(3,2),'UserData',dups);
set(handl(3,3),'UserData',cavfit);
set(handl(3,4),'UserData',cbfit);
set(handl(3,5),'UserData',cavterms);
set(handl(17,1),'Visible','On');
set(handl([3 4 6 12 13],[1 3 4]),'Visible','On');
if get(handl(9,2),'Value')
set(handl(10,[1 3 4]),'Visible','On')
end
break
end
%Shuffle data and form calibration and test sets
s = sprintf('At generation %g the number of duplicates is %g',gcount,dups);
disp(s)
avterms = mean(sum(pop'));
cavterms(gcount) = avterms;
s = sprintf('The average number of terms is %g',avterms);
disp(s)
dups = 0;
if reg == 1
fit = zeros(maxlv,nopop);
else
fit = zeros(1,nopop);
end
%Test each model in population
drawnow
for kk = 1:iter %Number of iterations
if cvopt == 0
dat = shuffle(dat);
else
di = shuffle([2:m]');
dat = [dat(di(1):m,:); dat(1:di(1)-1,:)];
end
for i = 1:nopop
drawnow
%Check to see that model isn't a repeat
dflag = 0;
if i > 1
for ii = 1:i-1
dif = sum(abs(pop(i,:) - pop(ii,:)));
if dif == 0
dflag = 1;
fit(:,i) = fit(:,ii);
end
end
end
if dflag == 1;
if kk == 1
dups = dups + 1;
end
else
%Select the proper columns for use in modeling
inds = find(pop(i,:))*window;
[smi,sni] = size(inds);
if inds(1) <= n
ninds = [inds(1)-window+1:inds(1)];
else
ninds = [inds(1)-window+1:n];
end
for aaa = 2:sni
if inds(aaa) <= n
ninds = [ninds [inds(aaa)-window+1:inds(aaa)]];
else
ninds = [ninds [inds(aaa)-window+1:n]];
end
end
xx = dat(:,ninds);
[mxx,nxx] = size(xx);
yy = dat(:,n+1);
if reg == 1
lvs = min([nxx,maxlv]);
[press,cumpress,minlv] = plscvbkf(xx,yy,split,lvs);
fit(1:lvs,i) = fit(1:lvs,i) + (sqrt(cumpress/m)/iter)';
if lvs < maxlv
fit(lvs+1:maxlv,i) = Inf*ones(maxlv-lvs,1);
end
else
press = mlrcvblk(xx,yy,split);
fit(i) = fit(i) + sqrt(press/m)/iter;
end
end
end
end
%Sort models based on fitness
drawnow
if reg == 1
mfit = min(fit);
else
mfit = fit;
end
[mfit,ind] = sort(mfit);
s = sprintf('The best fitness is %g',mfit(1));
disp(s)
cbfit(gcount) = mfit(1);
s = sprintf('The average fitness is %g',mean(mfit));
disp(s)
cavfit(gcount) = mean(mfit);
pop = pop(ind,:);
figure(handl(1,1))
subplot(2,2,1)
sumpop = sum(pop');
plot(sumpop,mfit,'og'), mnfit = min(mfit); mxfit = max(mfit);
dfit = mxfit - mnfit; if dfit == 0, dfit=1; end
axis([min(sumpop)-1 max(sumpop)+1 mnfit-dfit/10 mxfit+dfit/10])
if window > 1
xlabel('Number of Windows')
s = sprintf('Fitness vs. # of Windows at Generation %g',gcount);
else
xlabel('Number of Variables')
s = sprintf('Fitness vs. # of Variables at Generation %g',gcount);
end
title(s)
ylabel('Fitness')
subplot(2,2,2)
plot(1:gcount,cavfit(1:gcount),1:gcount,cbfit(1:gcount))
xlabel('Generation')
ylabel('Average and Best Fitness')
title('Evolution of Average and Best Fitness')
subplot(2,2,3)
plot(cavterms(1:gcount))
xlabel('Generation')
if window > 1
ylabel('Average Windows Used')
title('Evolution of Number of Windows')
else
ylabel('Average Variables Used')
title('Evolution of Number of Variables')
end
subplot(2,2,4)
bar(sum(pop))
if window > 1
xlabel('Window Number')
ylabel('Models Including Window')
s = sprintf('Models with Window at Generation %g',gcount);
else
xlabel('Variable Number')
ylabel('Models Including Variable')
s = sprintf('Models with Variable at Generation %g',gcount);
end
title(s)
axis([0 ceil(n/window)+1 0 nopop+2])
drawnow
% Check to see if maxgen has been met
if gcount >= maxgen
dups = maxdups;
end
% Breed best half of population and replace worst half
pop(1:nopop/2,:) = shuffle(pop(1:nopop/2,:));
pop((nopop/2)+1:nopop,:) = pop(1:nopop/2,:);
for i = 1:nopop/4
for j = 1:cross
%Select twist point at random
tp = ceil(rand(1)*(specsplit-1));
%Twist pairs and replace
p1 = (nopop/2)+(i*2)-1;
p2 = (nopop/2)+(i*2);
p1rep = [pop(p1,1:tp) pop(p2,tp+1:specsplit)];
p2rep = [pop(p2,1:tp) pop(p1,tp+1:specsplit)];
pop(p1,:) = p1rep;
pop(p2,:) = p2rep;
end
end
%Mutate the population if dups < maxdups
if dups < maxdups
[mi,mj] = find(rand(nopop,specsplit)<mut);
[ms,ns] = size(mi);
for i = 1:ms
if pop(mi(i),mj(i)) == 1
pop(mi(i),mj(i)) = 0;
else
pop(mi(i),mj(i)) = 1;
end
end
end
gcount = gcount + 1;
end
end
%End of Main Loop
if dups >= maxdups
set(handl([15 17],1),'Visible','Off');
drawnow
%Extract unique models from final population
fpop = zeros(nopop-dups,specsplit);
unique = 0; dups = 0;
for i = 1:nopop
dflag = 0;
if i > 1
for ii = 1:i-1
dif = sum(abs(pop(i,:) - pop(ii,:)));
if dif == 0
dflag = 1;
end
end
end
if dflag == 1
dups = dups + 1;
else
unique = unique + 1;
fpop(unique,:) = pop(i,:);
end
end
s = sprintf('There are %g unique models in final population',unique);
disp(s)
%Testing final population
if reg == 1
fit = zeros(maxlv,unique);
else
fit = zeros(1,unique);
end
disp('Now testing models in final population')
for kk = 1:3*iter %Number of iterations
if cvopt == 0
dat = shuffle(dat);
else
di = shuffle([2:m]');
dat = [dat(di(1):m,:); dat(1:di(1)-1,:)];
end
for i = 1:unique
%Select the proper columns for use in modeling
inds = find(fpop(i,:))*window;
[smi,sni] = size(inds);
if inds(1) <= n
ninds = [inds(1)-window+1:inds(1)];
else
ninds = [inds(1)-window+1:n];
end
for aaa = 2:sni
if inds(aaa) <= n
ninds = [ninds [inds(aaa)-window+1:inds(aaa)]];
else
ninds = [ninds [inds(aaa)-window+1:n]];
end
end
xx = dat(:,ninds);
[mxx,nxx] = size(xx);
yy = dat(:,n+1);
if reg == 1
lvs = min([nxx,maxlv]);
[press,cumpress,minlv] = plscvbkf(xx,yy,split,lvs);
fit(1:lvs,i) = fit(1:lvs,i) + (sqrt(cumpress/m)/(iter*3))';
if lvs < maxlv
fit(lvs+1:maxlv,i) = Inf*ones(maxlv-lvs,1);
end
else
press = mlrcvblk(xx,yy,split);
fit(i) = fit(i) + sqrt(min(press)/m)/(iter*3);
end
if kk == iter*3
if reg == 1
s = sprintf('Number %g fitness is %g at %g LVs',i,min(fit(:,i)),minlv);
else
s = sprintf('Number %g fitness is %g',i,min(fit(:,i)));
end
disp(s)
end
end
end
if reg == 1
mfit = min(fit);
else
mfit = fit;
end
[mfit,ind] = sort(mfit);
s = sprintf('The best fitness is %g',mfit(1));
disp(s)
s = sprintf('The average fitness is %g',mean(mfit));
disp(s)
fpop = fpop(ind,:);
ffit = mfit;
% Translate the population (in terms of windows) into the
% actual variables used in the final population.
if window == 1
gpop = fpop;
else
gpop = zeros(unique,n);
for jk = 1:unique
inds = find(fpop(jk,:))*window;
[smi,sni] = size(inds);
if inds(1) <= n
ninds = [inds(1)-window+1:inds(1)];
else
ninds = [inds(1)-window+1:n];
end
for aaa = 2:sni
if inds(aaa) <= n
ninds = [ninds [inds(aaa)-window+1:inds(aaa)]];
else
ninds = [ninds [inds(aaa)-window+1:n]];
end
end
[snmi,snni] = size(ninds);
gpop(jk,ninds) = ones(1,snni);
end
end
set(handl(4,2),'UserData',ffit)
set(handl(4,5),'UserData',gpop)
set(handl(16,1),'Visible','On','String','Save & Quit');
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -