kbest.c
来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 605 行 · 第 1/2 页
C
605 行
/*printf("%g = %g\n", log(mo->s[i_id].b[b_index]), hP->gamma_a[i]); */ if (hP->gamma_a[i] > 0.0) { GHMM_LOG(LCONVERTED, "gamma to large. ghmm_dl_kbest failed\n"); exit (1); } } hP = hP->next; } /** 3. Choose the k most probable hypotheses for each state and discard all hypotheses that were not chosen: */ /* initialize temporary arrays: */ for (i = 0; i < mo->N * k; i++) { maxima[i] = 1.0; argmaxs[i] = NULL; } /* cycle through hypotheses & calculate the k most probable hypotheses for each state: */ hP = h[t]; while (hP != NULL) { for (i = 0; i < hP->gamma_states; i++) { i_id = hP->gamma_id[i]; if (hP->gamma_a[i] > KBEST_EPS) continue; /* find first best hypothesis that is worse than current hypothesis: */ for (l = 0; l < k && maxima[i_id * k + l] < KBEST_EPS && maxima[i_id * k + l] > hP->gamma_a[i]; l++); if (l < k) { /* for each m>l: m'th best hypothesis becomes (m+1)'th best */ for (m = k - 1; m > l; m--) { argmaxs[i_id * k + m] = argmaxs[i_id * k + m - 1]; maxima[i_id * k + m] = maxima[i_id * k + m - 1]; } /* save new l'th best hypothesis: */ maxima[i_id * k + l] = hP->gamma_a[i]; argmaxs[i_id * k + l] = hP; } } hP = hP->next; } /* set 'chosen' for all hypotheses from argmaxs array: */ for (i = 0; i < mo->N * k; i++) /* only choose hypotheses whose prob. is at least threshold*max_prob */ if (maxima[i] != 1.0 && maxima[i] >= KBEST_THRESHOLD + maxima[(i % mo->N) * k]) argmaxs[i]->chosen = 1; /* remove hypotheses that were not chosen from the lists: */ /* remove all hypotheses till the first chosen one */ while (h[t] != NULL && 0 == h[t]->chosen) ighmm_hlist_remove (&(h[t])); /* remove all other not chosen hypotheses */ if (!h[t]) { GHMM_LOG(LCONVERTED, "No chosen hypothesis. ghmm_dl_kbest failed\n"); exit (1); } hP = h[t]; while (hP->next != NULL) { if (1 == hP->next->chosen) hP = hP->next; else ighmm_hlist_remove (&(hP->next)); } } /* dispose of temporary arrays: */ m_free(states_wlabel); m_free(label_max_out); m_free(argmaxs); m_free(maxima); /* transition matrix is no longer needed from here on */ for (i=0; i<mo->N; i++) m_free(log_a[i]); m_free(log_a); /** 4. Save the hypothesis with the highest probability over all states: */ hP = h[seq_len - 1]; argmax = NULL; *log_p = 1.0; /* log_p will store log of maximum summed probability */ while (hP != NULL) { /* sum probabilities for each hypothesis over all states: */ sum = ighmm_cvector_log_sum (hP->gamma_a, hP->gamma_states); /* and select maximum sum */ if (sum < KBEST_EPS && (*log_p == 1.0 || sum > *log_p)) { *log_p = sum; argmax = hP; } hP = hP->next; } /* found a valid path? */ if (*log_p < KBEST_EPS) { /* yes: extract chosen hypothesis: */ ARRAY_MALLOC (hypothesis, seq_len); for (i = seq_len - 1; i >= 0; i--) { hypothesis[i] = argmax->hyp_c; argmax = argmax->parent; } } else /* no: return 1.0 representing -INF and an empty hypothesis */ hypothesis = NULL; /* dispose of calculation matrices: */ hP = h[seq_len - 1]; while (hP != NULL) ighmm_hlist_remove (&hP); free (h); return hypothesis;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ GHMM_LOG(LCONVERTED, "ghmm_dl_kbest failed\n"); exit (1);#undef CUR_PROC}/*================ utility functions ========================================*//* inserts new hypothesis into list at position indicated by pointer plist */static void ighmm_hlist_insert (hypoList ** plist, int newhyp, hypoList * parlist){#define CUR_PROC "ighmm_hlist_insert" hypoList *newlist; ARRAY_CALLOC (newlist, 1); newlist->hyp_c = newhyp; if (parlist) parlist->refcount += 1; newlist->parent = parlist; newlist->next = *plist; *plist = newlist; return;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ GHMM_LOG(LCONVERTED, "ighmm_hlist_insert failed\n"); exit (1);#undef CUR_PROC}/*============================================================================*//* removes hypothesis at position indicated by pointer plist from the list removes recursively parent hypothesis with refcount==0 */static void ighmm_hlist_remove (hypoList ** plist) {#define CUR_PROC "ighmm_hlist_remove" hypoList *tempPtr = (*plist)->next; free ((*plist)->gamma_a); free ((*plist)->gamma_id); if ((*plist)->parent) { (*plist)->parent->refcount -= 1; if (0 == (*plist)->parent->refcount) ighmm_hlist_remove (&((*plist)->parent)); } free (*plist); *plist = tempPtr;#undef CUR_PROC}/*============================================================================*/static int ighmm_hlist_prop_forward (ghmm_dmodel * mo, hypoList * h, hypoList ** hplus, int labels, int *nr_s, int *max_out) {#define CUR_PROC "ighmm_hlist_prop_forward" int i, j, c, k; int i_id, j_id, g_nr; int no_oldHyps = 0, newHyps = 0; hypoList *hP = h; hypoList **created; ARRAY_MALLOC (created, labels); /* extend the all hypotheses with the labels of out_states of all states in the hypotesis */ while (hP != NULL) { /* lookup table for labels, created[i]!=0 iff the current hypotheses was propagated forward with label i */ for (c = 0; c < labels; c++) created[c] = NULL; /* extend the current hypothesis and add all states which may have probability greater null */ for (i = 0; i < hP->gamma_states; i++) { /* skip impossible states */ if (hP->gamma_a[i] == 1.0) continue; i_id = hP->gamma_id[i]; for (j = 0; j < mo->s[i_id].out_states; j++) { j_id = mo->s[i_id].out_id[j]; c = mo->label[j_id]; /* create a new hypothesis with label c */ if (!created[c]) { ighmm_hlist_insert (hplus, c, hP); created[c] = *hplus; /* initiallize gamma-array with safe size (number of states */ ARRAY_MALLOC ((*hplus)->gamma_id, m_min (nr_s[c], hP->gamma_states * max_out[hP->hyp_c])); (*hplus)->gamma_id[0] = j_id; (*hplus)->gamma_states = 1; newHyps++; } /* add a new gamma state to the existing hypothesis with c */ else { g_nr = created[c]->gamma_states; /* search for state j_id in the gamma list */ for (k = 0; k < g_nr; k++) if (j_id == created[c]->gamma_id[k]) break; /* add the state to the gamma list */ if (k == g_nr) { created[c]->gamma_id[g_nr] = j_id; created[c]->gamma_states = g_nr + 1; } } } } /* reallocating gamma-array to the correct size */ for (c = 0; c < labels; c++) { if (created[c]) { ARRAY_CALLOC (created[c]->gamma_a, created[c]->gamma_states); ARRAY_REALLOC (created[c]->gamma_id, created[c]->gamma_states); created[c] = NULL; } } hP = hP->next; no_oldHyps++; } /* printf("Created %d new Hypotheses.\n", newHyps); */ free (created); return (no_oldHyps);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ GHMM_LOG(LCONVERTED, "ighmm_hlist_prop_forward failed\n"); exit (1);#undef CUR_PROC}/*============================================================================*//** Calculates the logarithm of sum(exp(log_a[j,a_pos])+exp(log_gamma[j,g_pos])) which corresponds to the logarithm of the sum of a[j,a_pos]*gamma[j,g_pos] @return ighmm_log_sum for products of a row from gamma and a row from matrix A @param log_a: row of the transition matrix with logarithmic values (1.0 for log(0)) @param s: ghmm_dstate whose gamma-value is calculated @param parent: a pointer to the parent hypothesis*/static double ighmm_log_gamma_sum (double *log_a, ghmm_dstate * s, hypoList * parent) {#define CUR_PROC "ighmm_log_gamma_sum" double result; int j, j_id, k; double max = 1.0; int argmax = 0; double *logP; /* shortcut for the trivial case */ if (parent->gamma_states == 1) for (j = 0; j < s->in_states; j++) if (parent->gamma_id[0] == s->in_id[j]) return parent->gamma_a[0] + log_a[j]; ARRAY_MALLOC (logP, s->in_states); /* calculate logs of a[k,l]*gamma[k,hi] as sums of logs and find maximum: */ for (j = 0; j < s->in_states; j++) { j_id = s->in_id[j]; /* search for state j_id in the gamma list */ for (k = 0; k < parent->gamma_states; k++) if (parent->gamma_id[k] == j_id) break; if (k == parent->gamma_states) logP[j] = 1.0; else { logP[j] = log_a[j] + parent->gamma_a[k]; if (max == 1.0 || (logP[j] > max && logP[j] != 1.0)) { max = logP[j]; argmax = j; } } } /* calculate max+log(1+sum[j!=argmax; exp(logP[j]-max)]) */ result = 1.0; for (j = 0; j < s->in_states; j++) if (j != argmax && logP[j] != 1.0) result += exp (logP[j] - max); result = log (result); result += max; free (logP); return result;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ GHMM_LOG(LCONVERTED, "ighmm_log_gamma_sum failed\n"); exit (1);#undef CUR_PROC}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?