kbest.c
来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 605 行 · 第 1/2 页
C
605 行
/********************************************************************************* This file is part of the General Hidden Markov Model Library,* GHMM version 0.8_beta1, see http://ghmm.org** Filename: ghmm/ghmm/kbest.c* Authors: Anyess von Bock, Alexander Riemer, Janne Grunau** Copyright (C) 1998-2004 Alexander Schliep* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln* Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,* Berlin** Contact: schliep@ghmm.org** This library is free software; you can redistribute it and/or* modify it under the terms of the GNU Library General Public* License as published by the Free Software Foundation; either* version 2 of the License, or (at your option) any later version.** This library is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU* Library General Public License for more details.** You should have received a copy of the GNU Library General Public* License along with this library; if not, write to the Free* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*** This file is version $Revision: 1713 $* from $Date: 2006-10-16 16:06:28 +0200 (Mon, 16 Oct 2006) $* last change by $Author: grunau $.********************************************************************************/#ifdef HAVE_CONFIG_H# include "../config.h"#endif#include <stdlib.h>#include <math.h>#include "mes.h"#include "mprintf.h"#include "model.h"#include "kbest.h"#include "ghmm_internals.h"/* threshold probability (logarithmized) */#define KBEST_THRESHOLD -3.50655789732 /* log(0.03) => threshold: 3% of most probable partial hypothesis */#define KBEST_EPS 1E-15/*============================================================================*//** Data type for linked list of hypotheses Stores the actual label, a link to the parent hypothesis, a counter of the links to this hypothesis and the gamma values */struct hypo_List { int hyp_c; int refcount; int chosen; int gamma_states; double *gamma_a; int *gamma_id; struct hypo_List *next; struct hypo_List *parent;};typedef struct hypo_List hypoList;/*============================================================================*//* inserts new hypothesis into list at position indicated by pointer plist */static void ighmm_hlist_insert (hypoList ** plist, int newhyp, hypoList * parlist);/*============================================================================*//* removes hypothesis at position indicated by pointer plist from the list */static void ighmm_hlist_remove (hypoList ** plist);/*============================================================================*//** Propagates list of hypotheses forward by extending each old hypothesis to the possible new hypotheses depending on the states in which the old hypothesis could end and the reachable labels @return number of old hypotheses @param mo: pointer to the ghmm_dmodel @param h: pointer to list of hypotheses @param hplus: address of a pointer to store the propagated hypotheses @param labels: number of labels @param nr_s: number states which have assigned the index aa label @param max_out: maximum number of out_states over all states with the index aa label */static int ighmm_hlist_prop_forward (ghmm_dmodel * mo, hypoList * h, hypoList ** hplus, int labels, int *nr_s, int *max_out);/*============================================================================*//** Calculates the logarithm of sum(exp(log_a[j,a_pos])+exp(log_gamma[j,g_pos])) which corresponds to the logarithm of the sum of a[j,a_pos]*gamma[j,g_pos] @return log. sum for products of a row from gamma and a row from matrix A @param log_a: transition matrix with logarithmic values (1.0 for log(0)) @param s: ghmm_dstate whose gamma-value is calculated @param parent: a pointer to the parent hypothesis*/static double ighmm_log_gamma_sum (double *log_a, ghmm_dstate * s, hypoList * parent);/*============================================================================*//** Builds logarithmic transition matrix from the states' in_a values the row for each state is the logarithmic version of the state's in_a @return transition matrix with logarithmic values, 1.0 if a[i,j] = 0 @param s: array of all states of the model @param N: number of states in the model */static double **kbest_buildLogMatrix (ghmm_dstate * s, int N){#define CUR_PROC "kbest_buildLogMatrix" int i, j; double **log_a; /* log(a(i,j)) => log_a[i*N+j] */ /* create & initialize matrix: */ ARRAY_MALLOC (log_a, N); for (i = 0; i < N; i++) { ARRAY_MALLOC (log_a[i], s[i].in_states); for (j = 0; j < s[i].in_states; j++) log_a[i][j] = log (s[i].in_a[j]); } return log_a;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ GHMM_LOG(LCONVERTED, "kbest_buildLogMatrix failed\n"); exit (1);#undef CUR_PROC}/*============================================================================*//** Calculates the most probable labeling for the given sequence in the given model using k-best decoding. Labels must be from interval [0:max_label] without gaps!!! (not checked) Model must not have silent states. (checked in Python wrapper) @return array of labels (internal representation) @param mo: pointer to a ghmm_dmodel @param o_seq: output sequence (array of internal representation chars) @param seq_len: length of output sequence @param k: number of hypotheses to keep for each state @param log_p: variable reference to store the log prob. of the labeling */int *ghmm_dmodel_label_kbest (ghmm_dmodel * mo, int *o_seq, int seq_len, int k, double *log_p){#define CUR_PROC "ghmm_dl_kbest" int i, t, c, l, m; /* counters */ int no_oldHyps; /* number of hypotheses until position t-1 */ int b_index, i_id; /* index for addressing states' b arrays */ int no_labels = 0; int exists, g_nr; int *states_wlabel; int *label_max_out; char *str; /* logarithmized transition matrix A, log(a(i,j)) => log_a[i*N+j], 1.0 for zero probability */ double **log_a; /* matrix of hypotheses, holds for every position in the sequence a list of hypotheses */ hypoList **h; hypoList *hP; /* vectors for rows in the matrices */ int *hypothesis; /* pointer & prob. of the k most probable hypotheses for each state - matrices of dimensions #states x k: argm(i,l) => argmaxs[i*k+l] */ double *maxima; hypoList **argmaxs; /* pointer to & probability of most probable hypothesis in a certain state */ hypoList *argmax; double sum; /* break if sequence empty or k<1: */ if (seq_len <= 0 || k <= 0) return NULL; ARRAY_CALLOC (h, seq_len); /** 1. Initialization (extend empty hypothesis to #labels hypotheses of length 1): */ /* get number of labels (= maximum label + 1) and number of states with those labels */ ARRAY_CALLOC (states_wlabel, mo->N); ARRAY_CALLOC (label_max_out, mo->N); for (i = 0; i < mo->N; i++) { c = mo->label[i]; states_wlabel[c]++; if (c > no_labels) no_labels = c; if (mo->s[i].out_states > label_max_out[c]) label_max_out[c] = mo->s[i].out_states; } /* add one to the maximum label to get the number of labels */ no_labels++; ARRAY_REALLOC (states_wlabel, no_labels); ARRAY_REALLOC (label_max_out, no_labels); /* initialize h: */ hP = h[0]; for (i = 0; i < mo->N; i++) { if (mo->s[i].pi > KBEST_EPS) { /* printf("Found State %d with initial probability %f\n", i, mo->s[i].pi); */ exists = 0; while (hP != NULL) { if (hP->hyp_c == mo->label[i]) { /* add entry to the gamma list */ g_nr = hP->gamma_states; hP->gamma_id[g_nr] = i; hP->gamma_a[g_nr] = log (mo->s[i].pi) + log (mo->s[i].b[get_emission_index (mo, i, o_seq[0], 0)]); hP->gamma_states = g_nr + 1; exists = 1; break; } else hP = hP->next; } if (!exists) { ighmm_hlist_insert (&(h[0]), mo->label[i], NULL); /* initiallize gamma-array with safe size (number of states) and add the first entry */ ARRAY_MALLOC (h[0]->gamma_a, states_wlabel[mo->label[i]]); ARRAY_MALLOC (h[0]->gamma_id, states_wlabel[mo->label[i]]); h[0]->gamma_id[0] = i; h[0]->gamma_a[0] = log (mo->s[i].pi) + log (mo->s[i].b[get_emission_index (mo, i, o_seq[0], 0)]); h[0]->gamma_states = 1; h[0]->chosen = 1; } hP = h[0]; } } /* reallocating the gamma list to the real size */ hP = h[0]; while (hP != NULL) { ARRAY_REALLOC (hP->gamma_a, hP->gamma_states); ARRAY_REALLOC (hP->gamma_id, hP->gamma_states); hP = hP->next; } /* calculate transition matrix with logarithmic values: */ log_a = kbest_buildLogMatrix (mo->s, mo->N); /* initialize temporary arrays: */ ARRAY_MALLOC (maxima, mo->N * k); /* for each state save k */ ARRAY_MALLOC (argmaxs, mo->N * k); /*------ Main loop: Cycle through the sequence: ------*/ for (t = 1; t < seq_len; t++) { /* put o_seq[t-1] in emission history: */ update_emission_history (mo, o_seq[t - 1]); /** 2. Propagate hypotheses forward and update gamma: */ no_oldHyps = ighmm_hlist_prop_forward (mo, h[t - 1], &(h[t]), no_labels, states_wlabel, label_max_out); /* printf("t = %d (%d), no of old hypotheses = %d\n", t, seq_len, no_oldHyps); */ /*-- calculate new gamma: --*/ hP = h[t]; /* cycle through list of hypotheses */ while (hP != NULL) { for (i = 0; i < hP->gamma_states; i++) { /* if hypothesis hP ends with label of state i: gamma(i,c):= log(sum(exp(a(j,i)*exp(oldgamma(j,old_c))))) + log(b[i](o_seq[t])) else: gamma(i,c):= -INF (represented by 1.0) */ i_id = hP->gamma_id[i]; hP->gamma_a[i] = ighmm_log_gamma_sum (log_a[i_id], &mo->s[i_id], hP->parent); b_index = get_emission_index (mo, i_id, o_seq[t], t); if (b_index < 0) { hP->gamma_a[i] = 1.0; if (mo->order[i_id] > t) continue; else { str = ighmm_mprintf (NULL, 0, "i_id: %d, o_seq[%d]=%d\ninvalid emission index!\n", i_id, t, o_seq[t]); GHMM_LOG(LCONVERTED, str); m_free (str); } } else hP->gamma_a[i] += log (mo->s[i_id].b[b_index]);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?