📄 learn_dhmm_entropic.m

📁 上载文件为Matlab环境下的高斯以马尔科夫模型例程
💻 M
字号:
function [hmm, LL] = learn_dhmm_entropic(data, hmm, varargin)% LEARN_DHMM_ENTROPIC Find the MAP params of an HMM with discrete outputs with an entropic prior using EM%% [hmm, LL] = learn_dhmm_entropic(data, hmm, ...)%% This has the same interface as learn_dhmm_simple.%% Extra optional params% trimtrans - trim uninformative outgoing transitions? [0]% trimobs - trim uninformative observations? [0]% trimstates - trim low occupancy states? [0]% Based on "Structure learning in conditional probability models via an entropic  prior% and parameter extinction", M. Brand, Neural Computation 11 (1999): 1155--1182% For the annealed case, see "Pattern discovery via entropy minimization",% M. Brand, AI & Statistics 1999. Equation numbers refer to this paper.check_score_increases = 0;max_iter = 100;thresh = 1e-4;verbose = 0;dirichlet = 0;trimtrans = 0;trimobs = 0;trimstates = 0;anneal = 0;anneal_fully = 0;anneal_rate = 1.2;init_beta = 0.1;if nargin >= 3  args = varargin;  for i=1:2:length(args)    switch args{i},     case 'max_iter', max_iter = args{i+1};     case 'thresh', thresh = args{i+1};     case 'verbose', verbose = args{i+1};     case 'dirichlet', dirichlet = args{i+1};     case 'trimtrans', trimtrans = args{i+1};     case 'trimobs', trimobs = args{i+1};     case 'trimstates', trimstates = args{i+1};     case 'anneal', anneal = args{i+1};     case 'anneal_rate', anneal_rate = args{i+1};     case 'anneal_fully', anneal_fully = args{i+1};     case 'init_beta', init_beta = args{i+1};    end  endend      previous_loglik = -inf;previous_logpost = -inf;converged = 0;num_iter = 1;LL = [];if ~iscell(data)  data = num2cell(data, 2); % each row gets its own cellendnumex = length(data);startprob = hmm.startprob;endprob = hmm.endprob;transmat = hmm.transmat;obsmat = hmm.obsmat;if anneal  % schedule taken from Ueda and Nakano, "Determinsitic Annealing EM algorithm",  % Neural Networks 11 (1998): 271-282, p276  b = [];  temp = [];  i = 1;  b(i)=init_beta;  temp(i)=1/b(i);  while b(i) < 1    i = i + 1;    b(i)=b(i-1)*anneal_rate;    temp(i)=1/b(i);  end  temp_schedule = temp;endQ = hmm.nstates;O = hmm.nobs;% record what has already been trimmedtrimmed_trans = zeros(1,Q);trimmed_obs = zeros(1,Q);trimmed_states = zeros(1,Q);while (num_iter <= max_iter) & ~converged  % Z = 1 is the min entropy case, Z = 0 is ML, Z = -1 is max ent  % Z << 0 is the high temperature case  if anneal    if num_iter <= length(temp_schedule)      temp = temp_schedule(num_iter);    else      temp = temp_schedule(end);    end    T0 = 0;    if temp <= 1.0      Z = 1;    else      Z = T0 - temp;    end  else    Z = 1;  end  % E step  [loglik, exp_num_trans, exp_num_visits1, exp_num_emit, exp_num_visitsT] = ...      compute_ess_dhmm(startprob, transmat, obsmat, data, dirichlet);  % M step  startprob = normalise(exp_num_visits1);  endprob = normalise(exp_num_visitsT);    %transmat = mk_stochastic(exp_num_trans);  for i=1:Q    %ndx = find(transmat(i,:)==0);    %assert(all(exp_num_trans(i,ndx)==0))    [transmat(i,:), logpost_trans(i)] = entropic_map(exp_num_trans(i,:), Z);    %assert(all(transmat(i,ndx)==0))        % only trim if we are in the min entropy setting    % If Z << 0, we would trim everything!    if trimtrans & ~trimmed_trans(i) & (Z==1)       % grad(j) = d log lik / d theta(i ->j)      % transmat(i,j) = 0 => exp_num_trans(i,j) = 0      % so we can safely replace 0s by 1s in the denominator      denom = transmat(i,:) + (transmat(i,:)==0);      grad = exp_num_trans(i,:) ./ denom;      trim = find(transmat(i,:) <= exp(-(1/Z)*grad)); % eqn 32      if ~isempty(trim)	transmat(i,trim) = 0;	trimmed_trans(i) = 1;	if verbose, disp(['trimming transitions ' num2str(i) ' -> ' num2str(trim)]), end      end    end  end  %obsmat = mk_stochastic(exp_num_emit);  for i=1:Q    [obsmat(i,:), logpost_obs(i)] = entropic_map(exp_num_emit(i,:), Z);    if trimobs & ~trimmed_obs(i) & (Z==1)      denom = obsmat(i,:) + (obsmat(i,:)==0);      grad = exp_num_emit(i,:) ./ denom;      trim = find(obsmat(i,:) <= exp(-(1/Z)*grad)); % eqn 32      if ~isempty(trim)	obsmat(i,trim) = 0;	trimmed_obs(i) = 1;	if verbose, disp(['trimming observations ' num2str(i) ' -> ' num2str(trim)]), end      end    end  end  if trimstates & (Z==1)    prob_occ = sum(exp_num_emit, 2);    trim = find((prob_occ < 1e-10) & ~trimmed_states);    if ~isempty(trim)      if verbose, disp(['trimming states ' num2str(trim)]), end      trimmed_states(trim) = 1;      for i=trim(:)'	transmat(:,i) = 0;	transmat(i,:) = 0;	obsmat(i,:) = 0;      end    end  end    % log-prior = log exp(-H(theta)) = sum_i theta_i log (theta_i)  logprior_trans = sum(sum(transmat .* log(transmat + (transmat==0))));  logprior_obs = sum(sum(obsmat .* log(obsmat + (obsmat==0))));  if 0  % Here is a way to compute the expected complete-data log-likelihood  % using the fact that, for a multinomial,  % L = log prod_i theta_i ^ c_i = sum_i c_i log theta_i  % where c_i = counts (so theta_i => c_i = 0)  loglik_start = sum(exp_num_visits1(:) .* log(startprob + (startprob==0)));  loglik_trans = sum(sum(exp_num_trans .* log(transmat + (transmat==0))));  loglik_obs = sum(sum(exp_num_emit .* log(obsmat + (obsmat==0))));  % The sum of these != loglik, because loglik marginalizes over hidden vars  %  expected complete-data log un-normalized posterior is also computed by entropic_map  assert(approxeq(sum(logpost_trans) + sum(logpost_obs), ...  		  loglik_trans + Z*logprior_trans + loglik_obs + Z*logprior_obs))  % Actually, Z is not part of the prior; also, if Z < 0, this quantity will  % be positive, but it should be negative!  end    % observed data log un-normalized posterior,  i.e., log [ P(obs|theta) P(theta) ]  % This should be negative, and increase at every step.  logpost = loglik + logprior_trans + logprior_obs;  if verbose    fprintf(1, 'iteration %d, loglik = %7.4f, logpost = %7.4f, Z=%5.3f\n',num_iter, loglik, logpost, Z);  end  num_iter =  num_iter + 1;    converged = em_converged(loglik, previous_loglik, thresh, check_score_increases);  %converged = em_converged(logpost, previous_logpost, thresh, check_score_increases);  if anneal_fully    % must cool all the way and do at least 3 steps at Z=1 before finishing    if num_iter <= length(temp_schedule)+3      converged = 0;    end  end  previous_loglik = loglik;  previous_logpost = logpost;  LL = [LL loglik];endhmm.startprob = startprob;hmm.endprob = endprob;hmm.transmat = transmat;hmm.obsmat = obsmat;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -