📄 learn_dhmm_entropic.m
字号:
function [hmm, LL] = learn_dhmm_entropic(data, hmm, varargin)% LEARN_DHMM_ENTROPIC Find the MAP params of an HMM with discrete outputs with an entropic prior using EM%% [hmm, LL] = learn_dhmm_entropic(data, hmm, ...)%% This has the same interface as learn_dhmm_simple.%% Extra optional params% trimtrans - trim uninformative outgoing transitions? [0]% trimobs - trim uninformative observations? [0]% trimstates - trim low occupancy states? [0]% Based on "Structure learning in conditional probability models via an entropic prior% and parameter extinction", M. Brand, Neural Computation 11 (1999): 1155--1182% For the annealed case, see "Pattern discovery via entropy minimization",% M. Brand, AI & Statistics 1999. Equation numbers refer to this paper.check_score_increases = 0;max_iter = 100;thresh = 1e-4;verbose = 0;dirichlet = 0;trimtrans = 0;trimobs = 0;trimstates = 0;anneal = 0;cool_rate = 0.7;init_temp = 1;final_temp = 0.1;if nargin >= 3 args = varargin; for i=1:2:length(args) switch args{i}, case 'max_iter', max_iter = args{i+1}; case 'thresh', thresh = args{i+1}; case 'verbose', verbose = args{i+1}; case 'dirichlet', dirichlet = args{i+1}; case 'trimtrans', trimtrans = args{i+1}; case 'trimobs', trimobs = args{i+1}; case 'trimstates', trimstates = args{i+1}; case 'anneal', anneal = args{i+1}; case 'anneal_rate', anneal_rate = args{i+1}; case 'init_beta', init_beta = args{i+1}; end endend previous_loglik = -inf;previous_logpost = -inf;converged = 0;num_iter = 1;LL = [];if ~iscell(data) data = num2cell(data, 2); % each row gets its own cellendnumex = length(data);startprob = hmm.startprob;endprob = hmm.endprob;transmat = hmm.transmat;obsmat = hmm.obsmat;if anneal% temp = [];% i = 1;% temp(i)=init_temp;% while temp(i) > final_temp% i = i + 1;% temp(i)=temp(i-1)*cool_rate;% end% temp_schedule = [temp 0:-0.5:-1]; % infty -> 1, then 0 for max lik and then -1 for min entropy temp_schedule = 2:-0.5:-1;else temp_schedule = -1;endQ = hmm.nstates;O = hmm.nobs;% record what has already been trimmedtrimmed_trans = zeros(1,Q);trimmed_obs = zeros(1,Q);trimmed_states = zeros(1,Q);total_amt_data = sum(cellfun('length',data));for anneal_iter = 1:length(temp_schedule) temp = temp_schedule(anneal_iter) Z = -temp; converged = 0; previous_loglik = -inf; previous_logpost = -inf; inner_iter = 1; while (inner_iter <= max_iter) & ~converged % E step [loglik, exp_num_trans, exp_num_visits1, exp_num_emit, exp_num_visitsT] = ... compute_ess_dhmm(startprob, transmat, obsmat, data, dirichlet); % M step startprob = normalise(exp_num_visits1); endprob = normalise(exp_num_visitsT); %transmat = mk_stochastic(exp_num_trans); %obsmat = mk_stochastic(exp_num_emit); for i=1:Q [transmat(i,:), logpost_trans(i)] = entropic_map(exp_num_trans(i,:), Z); [obsmat(i,:), logpost_obs(i)] = entropic_map(exp_num_emit(i,:), Z); end % only trim if we are in the min entropy setting % If Z << 0, we would trim everything! if trimtrans & (Z==1) for i=find(~trimmed_trans) % grad(j) = d log lik / d theta(i ->j) % transmat(i,j) = 0 => exp_num_trans(i,j) = 0 % so we can safely replace 0s by 1s in the denominator denom = transmat(i,:) + (transmat(i,:)==0); grad = exp_num_trans(i,:) ./ denom; trim = find(transmat(i,:) <= exp(-(1/Z)*grad)); % eqn 32 if ~isempty(trim) transmat(i,trim) = 0; trimmed_trans(i) = 1; if verbose, disp(['trimming transitions ' num2str(i) ' -> ' num2str(trim)]), end end end end if trimobs & (Z==1) for i=find(~trimmed_obs) denom = obsmat(i,:) + (obsmat(i,:)==0); grad = exp_num_emit(i,:) ./ denom; trim = find(obsmat(i,:) <= exp(-(1/Z)*grad)); % eqn 32 if ~isempty(trim) obsmat(i,trim) = 0; trimmed_obs(i) = 1; if verbose, disp(['trimming observations ' num2str(i) ' -> ' num2str(trim)]), end end end end if trimstates & (Z==1) prob_occ = sum(exp_num_emit, 2) / total_amt_data; assert(approxeq(sum(prob_occ),1)) trim = find((prob_occ < 1/(5*Q)) & ~trimmed_states(:)); if ~isempty(trim) %if verbose, disp(['trimming states ' num2str(trim(:)')]), end disp(['trimming states ' num2str(trim(:)')]) trimmed_states(trim) = 1; for i=trim(:)' %transmat(:,i) = 0; transmat(i,:) = 0; obsmat(i,:) = 0; endprob(i) = 1; end end end % log-prior = log exp(-H(theta)) = sum_i theta_i log (theta_i) logprior_trans = sum(sum(transmat .* log(transmat + (transmat==0)))); logprior_obs = sum(sum(obsmat .* log(obsmat + (obsmat==0)))); if 0 % Here is a way to compute the expected complete-data log-likelihood % using the fact that, for a multinomial, % L = log prod_i theta_i ^ c_i = sum_i c_i log theta_i % where c_i = counts (so theta_i => c_i = 0) loglik_start = sum(exp_num_visits1(:) .* log(startprob + (startprob==0))); loglik_trans = sum(sum(exp_num_trans .* log(transmat + (transmat==0)))); loglik_obs = sum(sum(exp_num_emit .* log(obsmat + (obsmat==0)))); % The sum of these != loglik, because loglik marginalizes over hidden vars % expected complete-data log un-normalized posterior is also computed by entropic_map assert(approxeq(sum(logpost_trans) + sum(logpost_obs), ... loglik_trans + Z*logprior_trans + loglik_obs + Z*logprior_obs)) % Actually, Z is not part of the prior; also, if Z < 0, this quantity will % be positive, but it should be negative! end % observed data log un-normalized posterior, i.e., log [ P(obs|theta) P(theta) ] % This should be negative, and increase at every step. logpost = loglik + logprior_trans + logprior_obs; if verbose fprintf(1, 'iteration %d, loglik = %7.4f, logpost = %7.4f, Z=%5.3f\n', num_iter, loglik, logpost, Z); end num_iter = num_iter + 1; inner_iter = inner_iter + 1; converged = em_converged(loglik, previous_loglik, thresh, check_score_increases); %converged = em_converged(logpost, previous_logpost, thresh, check_score_increases); previous_loglik = loglik; previous_logpost = logpost; LL = [LL loglik]; end % while not converged end % next temperaturehmm.startprob = startprob;hmm.endprob = endprob;hmm.transmat = transmat;hmm.obsmat = obsmat;if trimstates hmm.eff_num_states = sum(~trimmed_states);end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -