📄 learn_struct_bnpc.m.svn-base
字号:
function [Phase_3, Phase_2, Phase_1, UPhase_3] = learn_struct_bnpc(data,node_sizes,epsilon,star)% G = learn_struct_bnpc(Data,node_sizes,epsilon,star)%% Data(i,m) is node i in case m.% node_sizes and epsilon are optionnals.% star = 0 to use try_to_separate_B instead of try_to_separate_B_star%% see "Learning bayesian Networks from Data: A Efficient Approach Based on Information Theorie"% Jie Cheng, David Bell and Weird Liu.%% Things to do : rewrite function orient_edges ?%% V0.91 : 18 sept 2003 (olivier.francois@insa-rouen.fr)verbose=1;%if nargin < 5, mwst=0; endif nargin < 4, star=1; endif nargin < 3, epsilon=0.05; end if nargin < 2, node_sizes=max(data'); endif verbose fprintf('================== phase I : \n');endtmp1=cputime;[Phase_1 II JJ score_mat score_mat2] = phaseI(data, node_sizes, epsilon);tmp1=cputime-tmp1;if verbose fprintf('Execution time : %2.5f\n',tmp1); fprintf('\n================== phase II : \n');endtmp1=cputime;Phase_2 = phaseII(Phase_1, data, node_sizes, epsilon, II, JJ, score_mat);tmp1=cputime-tmp1;if verbose fprintf('Execution time : %2.5f\n',tmp1); fprintf('\n================== phase III : \n');endtmp1=cputime;[Phase_3 UPhase_3] = phaseIII(Phase_2, data, node_sizes, epsilon, score_mat2, star);tmp1=cputime-tmp1;if verbose fprintf('Execution time : %2.5f\n',tmp1);end%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%function [G, II, JJ, score_mat, sc2] = phaseI(data,node_sizes,alpha)% [G, II , JJ, score_mat, s2] = phaseI(data,node_sizes,epsilon)% % G is an acyclic graph% [II JJ] is the list of important edges not processed in phase I (for phase II)% score_mat is the mutual information score matrix%% data(i,m) is node i in case m.% alpha is the significant level for CI tests ( default=0.05 ).% node_sizes is the vector of sizes ( default=max(data') ).%% see "Learning bayesian Networks from Data: A Efficient Approach Based on Information Theorie"% Jie Cheng, David Bell and Weird Liu.% 0.if nargin < 3, alpha=0.05; end if nargin < 2, node_sizes=max(data'); end[N m] = size(data);score_mat = zeros(N);edges=0;% 1.G = zeros(N);L=[];% 2. Use of Chi2 instead of MI ... allow using a confidence level alpha instead of an arbitrary epsilonfor i=1:(N-1) for j=(i+1):N [I score_mat(i,j)] = cond_indep_chisquare(i,j,[],data,'LRT',alpha,node_sizes); endendsc2=score_mat;[tmp ordre]=sort(-score_mat(:));ordre2=ordre(find(-tmp>alpha));[II JJ]=ind2sub([N N],ordre2);pointer=1 ;fini=length(II);% 3.edges=2;for pointer=1:min(2,fini), %fprintf('%d-%d\n',II(pointer),JJ(pointer)); G(II(pointer),JJ(pointer))=1; G(JJ(pointer),II(pointer))=1; score_mat(II(pointer),JJ(pointer))=-inf;endpointer=min(2,fini);arret=0;while pointer<fini & ~arret % 4. pointer=pointer+1; C = ~reachability_graph(G); if C(II(pointer),JJ(pointer)) %fprintf('%d-%d\n',II(pointer),JJ(pointer)); G(II(pointer),JJ(pointer))=1; G(JJ(pointer),II(pointer))=1; score_mat(II(pointer),JJ(pointer))=-inf; edges=edges+1; if edges==N-1 arret=1; end end % 5.end[tmp ordre]=sort(-score_mat(:));ordre2=ordre(find(-tmp>alpha));[II JJ]=ind2sub([N N],ordre2);%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%function G = phaseII(G1,data,node_sizes,alpha,II, JJ, score_mat)% G = phaseII(G1,data,node_sizes,epsilon,II,JJ, score_mat)% % G1, II, JJ,score_mat are given by phaseI.% data(i,m) is node i in case m.% node_sizes is the vector of sizes ( default=max(data') ).% alpha is the significant level for CI tests ( default=0.05 ).%% see "Learning bayesian Networks from Data: A Efficient Approach Based on Information Theorie"% Jie Cheng, David Bell and Weird Liu.st{1}='added';st{2}='';% 0.[N m] = size(data);G=G1;% 6.II=II(end:-1:1);JJ=JJ(end:-1:1);pointer=length(II);while pointer>0 % 7. trysep = try_to_separate_A(G,II(pointer),JJ(pointer),data,alpha,node_sizes); %fprintf('Try to separate %d and %d : %s\n',II(pointer),JJ(pointer),st{trysep+1}); if ~trysep G(II(pointer),JJ(pointer))=1; G(JJ(pointer),II(pointer))=1; %fprintf('%d-%d\n',II(pointer),JJ(pointer)); end % 8. pointer=pointer-1;end%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%function [G,U] = phaseIII(G1,data,node_sizes,alpha,s2,star)% [G] = phaseIII(G1,data,node_sizes,alpha,s2,star)% % data(i,m) is node i in case m.% if star~=0, use try_to_separate_B_star instead of try_to_separate_B (default star=1).% G is the non-oriented graph and G1 the oriented result.%% see "Learning bayesian Networks from Data: A Efficient Approach Based on Information Theorie"% Jie Cheng, David Bell and Weird Liu.% 0.if nargin < 2, disp('Not enough arguments');return; endif nargin < 3, node_sizes=max(data'); endif nargin < 4, alpha = 0.05; endif nargin < 5, star=1; endG=G1;N=length(G);% reachability_matrix of GM = expm(full(G)) - eye(length(G)); M = (M>0);% 9.fprintf('Thinning - separateA\n');% Edges are examined in the inverse order of their Chi2 (or MI) scores2(find(~G))=0;[tmp ordre]=sort(s2(:));ordre2=ordre(find(tmp>0));[I J]=ind2sub([N N],ordre2);ii=1:length(I);for i=ii, %fprintf('%d-%d : ',I(i),J(i)); G(I(i),J(i))=0; G(J(i),I(i))=0; trysep = try_to_separate_A(G,I(i),J(i),data,alpha,node_sizes); if ~trysep, G(I(i),J(i))=1; G(J(i),I(i))=1; %fprintf(' keep\n'); %else %fprintf('delete\n'); endend% 10.fprintf('Thinning - separateB'); if star; fprintf('star'); end; fprintf('\n');s2(find(~G))=0;[tmp ordre]=sort(s2(:));ordre2=ordre(find(tmp>0));[I J]=ind2sub([N N],ordre2);ii=1:length(I);for i=ii, %fprintf('%d-%d : ',I(i),J(i)); G(I(i),J(i))=0; G(J(i),I(i))=0; if star==0 trysep = try_to_separate_B(G,I(i),J(i),data,node_sizes,alpha); else trysep = try_to_separate_B_star(G,I(i),J(i),data,node_sizes,alpha); end if ~trysep, G(I(i),J(i))=1; G(J(i),I(i))=1; %fprintf(' keep\n'); %else %fprintf('delete\n'); endend%11.fprintf('Thinning - orient_edges\n');U=G;G=orient_edges(U,data,node_sizes,alpha);%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%function [I, N1, N2] = try_to_separate_A(G,node1,node2,data,alpha,node_sizes)% [I, N1, N2] = try_to_separate_A(G,node1,node2,data,alpha,node_sizess)% % G is the current partially directed graph.% I is a boolean : I=1 <==> separated.% N1 : neighbors of node1 that are on an adjacency path between node1 and node2 (ditto for N2).%% see "Learning bayesian Networks from Data: A Efficient Approach Based on Information Theorie"% Jie Cheng, David Bell and Weird Liu.% 0.if node1==node2 | length(G)<2 disp('Error: Check your arguments in try_to_separate_A.');I=0; returnendN=size(data,1);if nargin==4, alpha=0.05; node_sizes=max(data'); endif nargin==5, node_sizes=max(data'); end% 1.N1=find(G(node1,:)==1);N01=N1;GG1=G(setdiff(1:N,node1),setdiff(1:N,node1));node22=node2-(node2>node1);% reachability_matrix of GG1M = expm(full(GG1)) - eye(length(GG1)); M = (M>0);% N1 is the neighbors of node1 that are on the adjacency between node1 and node2for i=N1 j=i-(i>node1); if M(j,node22)~=1, N01=setdiff(N01,i); N1=setdiff(N1,i); end % 2. if ~G(node1,i), N1=setdiff(N1,i); endendN2=find(G(node2,:)==1);N02=N2;GG2=G(setdiff(1:N,node2),setdiff(1:N,node2));node12=node1-(node2<node1);% reachability_matrix of GG2M = expm(full(GG2)) - eye(length(GG2)); M = (M>0);% N2 is the neighbors of node2 that are on the adjacency between node1 and node2for i=N2 j=i-(i>node2); if M(node12,j)~=1, N02=setdiff(N02,i); N2=setdiff(N2,i); end % 2. if ~G(node2,i), N2=setdiff(N2,i); endend%fprintf('%d : N1=',node1); fprintf('%d',N1); fprintf('\n');%fprintf('%d : N2=',node2); fprintf('%d',N2); fprintf('\n');% 3.if length(N1)>length(N2), tmp=N1; N1=N2; N2=tmp; clear tmp, end% 4.C=N1;for test=1:2 if test==2, C=N2; end % 5. [I v1] = cond_indep_chisquare(node1,node2,C,data,'LRT',alpha,node_sizes); if I, %fprintf('%d-%d separated (%2.5f) by C=',node1,node2,v1); fprintf('%d',C); fprintf('\n'); return, %else %fprintf('%d-%d not separated (%2.5f) by C=',node1,node2,v1); fprintf('%d',C); fprintf('\n'); end; % 6. step6=1; while step6 step6=0; if length(C)>=1 v=[]; for i=C Ci = setdiff(C,i); [I(i) v(i)] = cond_indep_chisquare(node1,node2,Ci,data,'LRT',alpha,node_sizes); end [vm ind] = min(v); % 7. if I(ind) I = 1; return else if vm < v1 v1 = vm; C = setdiff(C,ind); % goto step 6. step6 = 1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -