📄 gener_mar_dataset.m.svn-base
字号:
function [data, comp_data, bnet_miss] = gener_MAR_dataset(bnet_orig, m, bnet_miss, upd)% [MAR_data] = gener_MAR_data(bnet_miss, length_of_dataset)% % this function takes in input a bnet that can be used to generate MAR data.% this bnet bnet_miss can be creating by the function gener_MAR_bnet.%% - comp_data (array) is a dataset that was generate by the bnet_orig that you enter in gener_MAR_bnet% - MAR_data (cell array) is the dataset compdata that was emptyied by the MAR process encodes in bnet_miss%% optional :% - bnet_miss : an old bnet_miss built by this function% - upd==1 if you want to update the bnet_miss%% [data, comp_data, bnet_miss] = gener_MAR_dataset(bnet_orig, m, bnet_miss, 1);%% version 1.1 : mar. 15th 2005, olivier.francois@insa-rouen.fr% INITN = size(bnet_orig.dag,2);if mod(N,2)~=0, error('The number of nodes must be even'); endif nargin<4, upd =0; end% fisrt rulesif nargin<3, l1=[]; l2=[]; lp=[]; b=-1; while ~(b==0 | b==1), b = input('Would you like to make a node missing when another one is missing (1 for yes, 0 for no) ? '); endelse b=-1; if upd, while ~(b==0 | b==1), b = input('Would you like to add rules (1 for yes, 0 for no) ? '); end, end l1=bnet_miss.list{1}; l2=bnet_miss.list{2}; lp=bnet_miss.list{3}; end while b==1, n = input('The firts node ? '); s = input('The node that have to be missing when this one is missing ? '); p = input('The probability of the second node to be missing ? '); l1 = [l1, n]; l2 = [l2, s]; lp=[lp, p]; b=-1; while ~(b==0 | b==1), b = input('Another one (1 for yes, 0 for no) ? '); endendbb = length(l1);if nargin>=3, bnet_miss = gener_MAR_bnet(upd, bnet_orig, bnet_miss);else bnet_miss = gener_MAR_bnet(1, bnet_orig); bnet_miss.list={l1; l2; lp};%%%%%%%%%%% SAVING FILE ss = 1; if nargin == 2 | upd==1, ss = input('Would you like to save the bnet of the MAR process you have made (1 for yes) ? '); end if ss == 1, ddd = datestr(now); ddd([12 15 18])='-' ; fnout=['MAR-bnet-' ddd '.mat']; eval(['save ' fnout ' bnet_miss']); fprintf(' The bnet for MAR process was saved as : %s\n',fnout); endend %if nargin% Generation of a complete datasetif N>9 & m>2000, disp(' ! It could take a long time...'); enddata = cell(N,m);for l = 1:m, data(:,l) = sample_bnet(bnet_orig); enddisp('Complete data have been creating.');% Generation of a MAR datasetmiss_array = cell(2*N,m);vide = cell(1,N); l= 1;while l <= m, ev(1:N) = data(:,l); ev(N+1:2*N) = vide; miss_array(:,l) = sample_bnet(bnet_miss, 'evidence', ev); % apply simple rule of missingness ev2 = cell2mat(miss_array(N+1:2*N, l)); if bb, missl1 = myintersect(find(ev2==2), l1); if ~isempty(missl1), for i=1:length(l1), if ev2(l1(i))==2, if rand<lp(i), ev2(l2(i))=2; miss_array{N+l2(i),l}=[2]; end, end end, end, end % verification that we have not a completly missing sample ev2 = 3-ev2; if prod(ev2)==1, fprintf(' - %d, one completly missing sample removed', l); else l=l+1; end if mod(l,100)==0, fprintf('\n - %d',l); endendfprintf('\n');data = bnt_to_mat(data); comp_data = data;miss_array = bnt_to_mat(miss_array(N+1:2*N, :));miss_array = 2-miss_array;data = data.*miss_array;data = mat_to_bnt(data, 0);%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%function bnet_miss = gener_MAR_bnet(upd, bnet_orig, bnet_miss)%%%%%%%%%%%% INITdag = bnet_orig.dag;N = size(dag,2);ns = bnet_orig.node_sizes;if nargin==2, ns_miss = zeros(1,2*N); ns_miss(1:N) = ns; ns_miss(N+1:2*N) = 2*ones(1,N); % 1= node i-N present, 2= node i-N missing dag_miss = zeros(2*N,2*N); dag_miss(1:N,1:N) = dag; dag_miss(N+1:2*N, N+1:2*N) = dag; dag_miss(1:N, N+1:2*N) = dag; for i=1:N, dag_miss(i, i+N)=1; end bnet_miss = mk_bnet(dag_miss, ns_miss); CPT = CPT_from_bnet(bnet_orig); for i=1:N bnet_miss.CPD{i} = tabular_CPD (bnet_miss, i, CPT{i}); endelseif nargin==3, ns_miss = bnet_miss.node_sizes; dag_miss = bnet_miss.dag;endorder = 1:2*N;%%%%%%%%%%%% Base probability of missing valueif nargin==2, b=1; else b=0; endif b==1,disp('Probability MUST be between 0 and 1.');if nargin==2, p=-1; while p<0 | p>1, p = input('Base probability of a value to be missing ? '); end for i=1:N fam = find(dag_miss(:,N+i)==1)'; semisize = prod(ns_miss(fam)); % as node N+i is binary to say i is present or missing CPT = zeros(1,2*semisize); CPT(1:semisize) = 1-p; CPT(semisize+1:2*semisize) = p; bnet_miss.CPD{N+i} = tabular_CPD (bnet_miss, N+i, CPT); endendendb=-1;while ~(b==0 | b==1), b = input('Would you like to change a probability of a node to be missing (1 for yes, 0 for no) ? '); endif b, disp(' BE CAREFULL !! New rules can overwrite old ones partialy or fully !! So the order of entries is important'); end%%%%%%%%%%%% Update CPT with MAR processwhile b fprintf('Nodes are from 1 to %d. ',N); i=0; while i<1 | i>N | round(i)~=i, i = input('Which node ? '); end fam = find(dag(:,i)==1)'; fam_miss = find(dag_miss(:,N+i)==1)'; cas = -ones(1, length(fam_miss)+1); familly = [fam_miss, N+i]; fprintf('States are from 1 to %d (-1 for any states, -2 to cancel). For which state of the variable %d ?', ns(i), i); state=-3; while state<-2 | state>ns(i) | round(state)~=state | state==0, state = input(' ');end if isempty(fam), if state==-1, p=-1; while p<0 | p>1, p = input(' - A priori probability for this node to be missing ? ');end semisize = prod(ns_miss(fam_miss)); CPT = zeros(1,2*semisize); CPT(1:semisize) = 1-p; CPT(semisize+1:2*semisize) = p; bnet_miss.CPD{N+i} = tabular_CPD (bnet_miss, N+i, CPT); elseif state~=-2 cas = state; CPT = CPT_from_bnet(bnet_miss); CPT = CPT{N+i}; p=-1; while p<0 | p>1, p = input(' - A priori probability for this node to be missing in this state ? ');end ind = subv2ind(ns_miss(familly),[cas, 1]); CPT(ind)=1-p; ind = subv2ind(ns_miss(familly),[cas, 2]); CPT(ind)=p; bnet_miss.CPD{N+i} = tabular_CPD (bnet_miss, N+i, CPT); end else if state>-2, siz=length(cas); place = find(fam_miss==i); cas(place) = state; for k = fam, state=-3; fprintf(' - For the parent named %d, states are from 1 to %d (-1 for any states of this parent). ',k, ns(k)); while state<0 | state>ns(k) | round(state)~=state, state = input('Which state ? ');end %if state==0, % place = find(fam_miss==(fam_miss(k)+N)); % cas(place) = 2; % Missing %elseif state==-2, % a changer ??? % disp(' This case is buggy, taking missing state instand to minimise influence.'); % place = find(fam_miss==(fam_miss(k)+N)); % cas(place) = 2; %elseif state~=0 & state~=-2, place = find(fam_miss==(fam_miss(k))); cas(place) = state; % Present if state~=-1; place = find(fam_miss==(fam_miss(k)+N)); cas(place) = 1; end %end end p=-1; while p<0 | p>1, p = input('Probability in this case of the value to be missing ? ');end CPT = CPT_from_bnet(bnet_miss); CPT = CPT{N+i}; cas(end) = 1; % i is present subcas_names = find(cas==-1); if isempty(subcas_names), ind = subv2ind(ns_miss(familly),cas); CPT(ind) = 1-p; else subcas = ones(1, length(subcas_names)); continu = 1; while continu cas(subcas_names) = subcas; ind = subv2ind(ns_miss(familly),cas); CPT(ind) = 1-p; [subcas, continu] = next_case(subcas, ns_miss(familly(subcas_names))); end end cas(end)=2; % i is missing if isempty(subcas_names), ind = subv2ind(ns_miss(familly),cas); CPT(ind) = p; else subcas = ones(1, length(subcas_names)); continu = 1; while continu cas(subcas_names) = subcas; ind = subv2ind(ns_miss(familly),cas); CPT(ind) = p; [subcas, continu] = next_case(subcas, ns_miss(familly(subcas_names))); end end mass = sum(CPT, length(size(CPT))); while length(size(mass))>2, mass = prod(mass, length(size(mass))); end mass = prod(prod(mass)); if mass~=1, disp('not a proba...'); end bnet_miss.CPD{N+i} = tabular_CPD (bnet_miss, N+i, CPT); end %if state~=-2 for the node end %if isempty(fam), b=-1; while ~(b==0 | b==1), b = input('Would you like to change a probability of a node to be missing (1 for yes, 0 for no) ? ');endend
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -