📄 qstudy.m
字号:
function q_value = qstudy(x)%global tempq;dir_q = x(1);% 相对速度angle_t = x(2);%目标角速度angle_q = x(3);%导弹目标视线角Net_qvalue_mangle_m = x(4);%导弹角速度yt = x(5);%目标y轴距离xt = x(6);%目标x轴距离ym = x(7);%导弹y轴距离xm = x(8);%导弹x轴距离rel_v = x(9);%r*视线旋转角速度distance = x(10);%导弹目标距离front_q_m = x(11);cyc = x(12);% front_q_m = x(12); first_dir_t = x(13); first_dir_m = x(14);angle_t_first = x(15);%目标角速度angle_q_first = x(16);%导弹目标视线角Net_qvalue_mangle_m_first = x(17);%导弹角速度yt_first = x(18);%目标y轴距离xt_first = x(19);%目标x轴距离ym_first = x(20);%导弹y轴距离xm_first = x(21);%导弹x轴距离Efsl = x(22);%学习速率StudyRate = 0.001,折算因子gama=0gama = 0.95;StudyRate = 0.001;%-----------神经元网络的实现函数------------------%%***********导弹权值和阈值初始化模块**********************%输入层至隐含层的权值和阈值初始化WInToH = abs(randn(6,6)); %输入层至隐含层连接权InYValue = randn(6,1);%数组InputValue用于保存隐层阈值HideYuValue = zeros(6,1);%HideInValue是隐层输入值,HideToOutYuValue输出层阈值HideInValue = zeros(6,1);HideOutValue = zeros(6,1);%隐含层至输出层的权值和阈值初始化WHiToOut = abs(randn(1,6)); %隐含层至输出层连接权HiYValue = randn(1,1);HideToOutYuValue = HiYValue;ExportInValue = zeros(1,1);%定义输出层净输入数组ExportOutValue = zeros(1,1);%输出层的输出NerOutQ =zeros(4,4);%用于保存所有状态-动作对的Q值% NerOutQ =zeros(3,3);%用于保存所有状态-动作对的Q值%NerfirstQ = zeros(4,4);%初始化所有状态-动作空间对应的Q值dError= zeros(1,1);SL1 = randn(1,1)*1;SL2 = randn(1,1)*1;ExpQ = 1;CycJudge = 0;ErrorJudge=1;Efsl = 1;tempangle_m =0;%==========================================================================%=========================动作选择模块======================================if(angle_q>=0) if(angle_m>=angle_q) %u_m_h = [-0.8 -0.5 -0.3 -0.1]; if(angle_t<=angle_q) % u_t_h=[-0.1 -0.3 -0.5 -0.8]; u_m_h = [-0.8 -0.5 -0.3 -0.1]; u_m_h = [-0.1 -0.05 0 -0.1]; elseif((angle_t>angle_q)&&(angle_m>angle_t)) u_m_h=[0.1 0.2 0.3 0.5]; u_m_h=[0.1 0.2 -0.3 -0.5]; elseif((angle_t>angle_q)&&(angle_m<=angle_t)) u_m_h=[0 0.1 0.15 0.2]; u_m_h=[0.1 0.2 0.3 0.5]; else% u_m_h=[-0.05 -0.1 -0.15 -0.2]; u_m_h=[0.05 0.1 0.15 0.2]; end elseif(angle_m<angle_q) % u_m_h=[0.1 0.3 0.5 0.8]; if(angle_t>angle_q) u_m_h=[0.1 0.3 0.5 0.8]; u_m_h=[0.3 0.3 0.3 0.3];%%%%%%%%%%%%%%%%%%%%%0,0,0时候改动 elseif((angle_t<angle_q)&&(angle_t<=angle_m)) u_m_h = [0.5 0.5 0.5 0.5];%%%%%%%%%%%0,0,0时候改动 elseif((angle_t<angle_q)&&(angle_t>angle_m)) u_m_h = [0.1 0.3 0.5 0.8]; end else(angle_m==angle_q) u_m_h=[0.1 0.2 0 -0.1]; endendif(angle_q<0) if(angle_m<=angle_q) %u_m_h = [-0.8 -0.5 -0.3 -0.1]; if(angle_t>=angle_q) % u_t_h=[-0.1 -0.3 -0.5 -0.8]; u_m_h = [0.8 0.5 0.3 0.1]; elseif((angle_t<angle_q)&&(angle_m<=angle_t)) u_m_h=[0.05 0.1 0.15 0.2]; elseif((angle_t<angle_q)&&(angle_m>angle_t)) u_m_h=[0 -0.1 -0.15 -0.2]; else u_m_h=[0.05 0.1 0.15 0.2]; end elseif(angle_m>angle_q) % u_m_h=[0.1 0.3 0.5 0.8]; if(angle_t<angle_q) u_m_h=[-0.1 -0.3 -0.5 -0.8]; elseif((angle_t>angle_q)&&(angle_t>angle_m)) u_m_h = [-0.2 -0.1 0 0.1]; elseif((angle_t>angle_q)&&(angle_t<=angle_m)) u_m_h = [-0.1 -0.3 -0.5 -0.8]; end else(angle_m==angle_q) u_m_h=[-0.1 -0.2 0 0.1]; endendif(angle_q>=0) if(angle_m>angle_q)% u_m_h = [-0.8 -0.5 -0.3 -0.1]; if(angle_t<=angle_q) u_t_h=[0.3 0.2 -0.5 -0.8];%%%%%%%%%%%%%%%%%%%%%%%%%%%0,0,0时候改动% u_t_h=[0.2 0.5 0.8 1]; else(angle_t>angle_q) u_t_h=[-0.2 -0.4 -0.6 -1]; u_t_h=[0.2 0.4 0.6 1]; end elseif(angle_m<angle_q)% u_m_h=[0.1 0.3 0.5 0.8]; if(angle_t<angle_q) u_t_h = [0.2 0.4 0.6 1]; else(angle_t>angle_q) u_t_h = [0.1 0.3 0.5 0.8]; end elseif(angle_m==angle_q)% u_m_h=[0.1 0 0 -0.1]; u_t_h=[1 0 0 -1]; endendif(angle_q<0) if(angle_m>angle_q)% u_m_h=[-0.8 -0.5 -0.3 -0.1]; if(abs(angle_t)<=angle_q) u_t_h=[-0.1 -0.3 -0.5 -0.8]; else u_t_h =[0.2 0.4 0.6 0.8]; end elseif(angle_m<angle_q)% u_m_h=[0.1 0.3 0.5 0.8]; if(abs(angle_t)<angle_q) u_t_h=[-0.2 -0.4 -0.6 -0.8]; else u_t_h=[0.1 0.3 0.5 0.8]; end elseif(angle_m==angle_q)% u_m_h=[0.1 0 0 -0.1]; u_t_h=[1 0 0 -1]; endend% % % if(angle_q>=0)% % % if(angle_m>angle_q)% % % %u_m_h = [-0.8 -0.5 -0.3 -0.1];% % % if(angle_t<=angle_q)% % % % u_t_h=[-0.1 -0.3 -0.5 -0.8];% % % u_m_h = [-0.7 -0.4 -0.2 0];% % % % u_m_h=[-0.2 -0.4 -0.6 -1];% % % elseif((angle_t>angle_q)&&(angle_m>angle_t))% % % u_m_h=[-0.05 -0.1 -0.15 -0.2];% % % u_m_h=[-0.1 -0.3 -0.5 -0.8];% % % elseif((angle_t>angle_q)&&(angle_m<angle_t))% % % u_m_h=[0 0.1 0.15 0.2];% % % u_m_h=[0 0.3 0.5 0.7];% % % % u_m_h=[-0.1 -0.3 -0.5 -0.8];% % % else% % % u_m_h=[-0.05 -0.1 -0.15 -0.2];% % % u_m_h=[-0.1 -0.3 -0.5 -0.8];% % % end% % % elseif(angle_m<angle_q)% % % % u_m_h=[0.1 0.3 0.5 0.8];% % % if(angle_t>angle_q)% % % u_m_h=[0.1 0.3 0.5 0.8];% % % u_m_h = [0.2 0.4 0.6 1];% % % u_m_h = [0.8 0.8 0.8 0.8];% % % elseif((angle_t<angle_q)&&(angle_t<angle_m))% % % u_m_h = [0.2 0.1 0 -0.1];% % % u_m_h = [0 -0.1 -0.2 -0.5];% % % u_m_h = [0 0.1 0.2 -0.1];% % % elseif((angle_t<angle_q)&&(angle_t>angle_m))% % % u_m_h = [0.1 0.3 0.5 0.8];% % % u_m_h = [0.5 0.6 0.7 1];% % % else% % % u_m_h = [1 1 1 1];% % % end% % % else(angle_m==angle_q)% % % u_m_h=[0.1 0.2 0 -0.1];% % % u_m_h=[1 0.5 0 -1];% % % end% % % end% % % if(angle_q<0)% % % if(angle_m<angle_q)% % % %u_m_h = [-0.8 -0.5 -0.3 -0.1];% % % if(angle_t>=angle_q)% % % % u_t_h=[-0.1 -0.3 -0.5 -0.8];% % % u_m_h = [0.8 0.5 0.3 0.1];% % % u_m_h=[-0.1 -0.15 -0.2 -0.25];% % % elseif((angle_t<angle_q)&&(angle_m<angle_t))% % % u_m_h=[0.05 0.1 0.15 0.2];% % % u_m_h=[-0.1 -0.15 -0.2 -0.25];% % % elseif((angle_t<angle_q)&&(angle_m>angle_t))% % % u_m_h=[0 -0.1 -0.15 -0.2];% % % u_t_h=[0.1 0.15 0.2 0.25];% % % else% % % u_m_h=[0.05 0.1 0.15 0.2];% % % u_m_h=[-0.1 -0.15 -0.2 -0.25];% % % end% % % elseif(angle_m>angle_q)% % % % u_m_h=[0.1 0.3 0.5 0.8];% % % if(angle_t<angle_q)% % % u_m_h=[-0.1 -0.3 -0.5 -0.8];% % % u_m_h=[0.1 0.15 0.2 0.25];% % % elseif((angle_t>angle_q)&&(angle_t>angle_m))% % % u_m_h = [-0.2 -0.1 0 0.1];% % % % u_m_h = [-0.1 -0.05 0 0.05];% % % u_m_h = [0.1 0.2 0.3 0.5];% % % elseif((angle_t>angle_q)&&(angle_t<angle_m))% % % u_m_h = [-0.1 -0.15 -0.2 -0.25];% % % % u_m_h = [0.1 0.15 0.2 0.25];% % % u_m_h = [0.2 0.3 0.4 0.5];% % % end% % % else(angle_m==angle_q)% % % u_m_h=[-0.1 -0.2 0 0.1];% % % u_m_h=[1 0.5 0 -1];% % % end% % % end% % % % % % % % % % % % % % % if(angle_q>=0)% % % if(angle_m>angle_q)% % % % u_m_h = [-0.8 -0.5 -0.3 -0.1];% % % if(angle_t<=angle_q)% % % u_t_h=[-0.05 -0.15 -0.25 -0.35];% % % else(angle_t>angle_q)% % % u_t_h=[-0.2 -0.3 -0.4 -5];% % % end% % % elseif(angle_m<angle_q)% % % % u_m_h=[0.1 0.3 0.5 0.8];% % % if(angle_t<angle_q)% % % u_t_h = [0.2 0.3 0.4 0.5];% % % else(angle_t>angle_q)% % % u_t_h = [0.1 0.2 0.3 0.5];% % % end% % % elseif(angle_m==angle_q)% % % % u_m_h=[0.1 0 0 -0.1];% % % u_t_h=[1 0 0 -1];% % % end% % % end% % % if(angle_q<0)% % % if(angle_m>angle_q)% % % % u_m_h=[-0.8 -0.5 -0.3 -0.1];% % % if(abs(angle_t)<=angle_q)% % % u_t_h=[-0.05 -0.15 -0.25 -0.3];% % % else% % % u_t_h =[0.2 0.4 0.5 0.6];% % % end% % % elseif(angle_m<angle_q)% % % % u_m_h=[0.1 0.3 0.5 0.8];% % % if(abs(angle_t)<angle_q)% % % u_t_h=[-0.2 -0.3 -0.4 -0.5];% % % else% % % u_t_h=[0.1 0.2 0.3 0.4];% % % end% % % elseif(angle_m==angle_q)% % % % u_m_h=[0.1 0 0 -0.1];% % % u_t_h=[1 0 0 -1];% % % end% % % end% u_t_h = [0.6 0.3 0 -0.3];%防御导弹和目标对象的航迹角fai_m = u_m_h ;fai_t = u_t_h*1.57;%==========================================================================xite = 1.5;alfa = 0.05;gama = 0.95;M = 100;N = 7;C = 7;w = zeros(N,6);w_1 = w;w_2 = w;d_w = w;u_1 = 0;y_1 = 0;ts = 0.001;CycJudge = 0;error = 1;ad = zeros(7,6);while (error > 0.05) CycJudge = CycJudge + 1;% if CycJudge >1000%当网络的学习次数超过学习极限时停止学习% break;% end for iJudge_m = 1:4; for iJudge_t =1:4; %=============================网络的输入============================= Input1 = xt; Input2 = xm; Input3 = yt; Input4 = ym; NetIn = [Input1,Input2,Input3,Input4,fai_m(iJudge_m),fai_t(iJudge_t)]; xmin = -10.; xmax = 1.0; s = round((NetIn-xmin)*M/(xmax-xmin)); sum = 0; for i=1:1:C for ii = 1:1:6 ad(i,ii) = mod(s(ii)+i,N)+1; sum = sum +w(ad(i,ii)); end end NerOutQ(iJudge_m,iJudge_t) = sum; end end %=======================按微分对策方法求的最佳动作=================== %选择两者中动作产生Q值小的那一个 Net_qvalue_m = zeros(4,1); i_t =[1;1;1;1]; jTemp = 0; for iJudge = 1:4;%判断每行中的最大值 Net_qvalue_m(iJudge) = NerOutQ(1,1); for jJudge = 1:4; if(Net_qvalue_m(iJudge) < NerOutQ(iJudge,jJudge)) Net_qvalue_m(iJudge) = NerOutQ(iJudge,jJudge); kk = iJudge; else kk=1; end end end for jJudge = 1:4;%判断每行中的最小值 Net_qvalue_m(iJudge) = NerOutQ(1,1); for iJudge = 1:4; if(Net_qvalue_m(iJudge) > NerOutQ(iJudge,jJudge)) Net_qvalue_m(iJudge) = NerOutQ(iJudge,jJudge); hh = jJudge; else hh=1; end end end front_q_m_new = NerOutQ(kk,hh); %=====================执行动作后新状态的q值=================== %===============================回报函数计算模块====================== %设定几个判定值 d1 = 10; d2 = 1000; if ( distance > d2 ) ReturnR = 1; % elseif( (distance > d1) && (distance <= d2) && (rel_v < 0) ) % ReturnR = -0.2; % elseif( (distance > d2)&&(rel_v > 0) ) % ReturnR = 0.2; elseif( (distance < d1) ) ReturnR = -1; else ReturnR = 0; end %====================================================================== %====================================================================== %计算t+1时刻的状态信息以及Q值 Miu = ReturnR + gama * front_q_m_new; error = front_q_m - Miu; d_w = xite*error/C; for i = 1:1:C for ii =1:1:6 ad(i,ii) = mod(s(ii)+1,N)+1; w(ad(i,ii)) = w_1(ad(i,ii))+d_w+alfa*(w_1(ad(i,ii))-w_2(ad(i,ii))); end end %%%%%%%%%%%%%%%%%%%%%%%% w_2 = w_1; w_1 = w;endfront_q_m = NerOutQ(kk,hh);%计算此时得到的航迹角速度%q_value(1) = q_action_m + x(1);%front_dir_m = fai_m(kk) + front_dir_m;%%front_dir_m = fai_m(kk);front_dir_m = fai_m(kk);front_dir_t = fai_t(hh);% front_dir_m = fai_m(1);% front_dir_t = fai_t(1);q_value(1) = front_dir_m;% q_value(1) = 3.5 *dir_q;front_dir_t = fai_t(hh) ;%%front_dir_t = fai_t(hh);% front_dir_t = fai_t(hh);q_value(2) = front_dir_t;q_value(4) = front_q_m_new;%q_value(4) = maxmin;% q_value(5) = Net_qvalue_t;beida = 0.85;cyc = cyc + 1;Efsl = 1/(CycJudge.^beida);q_value(3) = Efsl;q_value(5) = cyc;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -