kbest.c

来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 605 行 · 第 1/2 页

C
605
字号
        /*printf("%g = %g\n", log(mo->s[i_id].b[b_index]), hP->gamma_a[i]); */        if (hP->gamma_a[i] > 0.0) {          GHMM_LOG(LCONVERTED, "gamma to large. ghmm_dl_kbest failed\n");          exit (1);        }      }      hP = hP->next;    }    /** 3. Choose the k most probable hypotheses for each state and discard all	   hypotheses that were not chosen: */    /* initialize temporary arrays: */    for (i = 0; i < mo->N * k; i++) {      maxima[i] = 1.0;      argmaxs[i] = NULL;    }    /* cycle through hypotheses & calculate the k most probable hypotheses for       each state: */    hP = h[t];    while (hP != NULL) {      for (i = 0; i < hP->gamma_states; i++) {        i_id = hP->gamma_id[i];        if (hP->gamma_a[i] > KBEST_EPS)          continue;        /* find first best hypothesis that is worse than current hypothesis: */        for (l = 0;             l < k && maxima[i_id * k + l] < KBEST_EPS             && maxima[i_id * k + l] > hP->gamma_a[i]; l++);        if (l < k) {          /* for each m>l: m'th best hypothesis becomes (m+1)'th best */          for (m = k - 1; m > l; m--) {            argmaxs[i_id * k + m] = argmaxs[i_id * k + m - 1];            maxima[i_id * k + m] = maxima[i_id * k + m - 1];          }          /* save new l'th best hypothesis: */          maxima[i_id * k + l] = hP->gamma_a[i];          argmaxs[i_id * k + l] = hP;        }      }      hP = hP->next;    }    /* set 'chosen' for all hypotheses from argmaxs array: */    for (i = 0; i < mo->N * k; i++)      /* only choose hypotheses whose prob. is at least threshold*max_prob */      if (maxima[i] != 1.0          && maxima[i] >= KBEST_THRESHOLD + maxima[(i % mo->N) * k])        argmaxs[i]->chosen = 1;    /* remove hypotheses that were not chosen from the lists: */    /* remove all hypotheses till the first chosen one */    while (h[t] != NULL && 0 == h[t]->chosen)      ighmm_hlist_remove (&(h[t]));    /* remove all other not chosen hypotheses */    if (!h[t]) {      GHMM_LOG(LCONVERTED, "No chosen hypothesis. ghmm_dl_kbest failed\n");      exit (1);    }    hP = h[t];    while (hP->next != NULL) {      if (1 == hP->next->chosen)        hP = hP->next;      else        ighmm_hlist_remove (&(hP->next));    }  }  /* dispose of temporary arrays: */  m_free(states_wlabel);  m_free(label_max_out);  m_free(argmaxs);  m_free(maxima);  /* transition matrix is no longer needed from here on */  for (i=0; i<mo->N; i++)    m_free(log_a[i]);  m_free(log_a);  /** 4. Save the hypothesis with the highest probability over all states: */  hP = h[seq_len - 1];  argmax = NULL;  *log_p = 1.0;                 /* log_p will store log of maximum summed probability */  while (hP != NULL) {    /* sum probabilities for each hypothesis over all states: */    sum = ighmm_cvector_log_sum (hP->gamma_a, hP->gamma_states);    /* and select maximum sum */    if (sum < KBEST_EPS && (*log_p == 1.0 || sum > *log_p)) {      *log_p = sum;      argmax = hP;    }    hP = hP->next;  }  /* found a valid path? */  if (*log_p < KBEST_EPS) {    /* yes: extract chosen hypothesis: */    ARRAY_MALLOC (hypothesis, seq_len);    for (i = seq_len - 1; i >= 0; i--) {      hypothesis[i] = argmax->hyp_c;      argmax = argmax->parent;    }  }  else    /* no: return 1.0 representing -INF and an empty hypothesis */    hypothesis = NULL;  /* dispose of calculation matrices: */  hP = h[seq_len - 1];  while (hP != NULL)    ighmm_hlist_remove (&hP);  free (h);  return hypothesis;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  GHMM_LOG(LCONVERTED, "ghmm_dl_kbest failed\n");  exit (1);#undef CUR_PROC}/*================ utility functions ========================================*//* inserts new hypothesis into list at position indicated by pointer plist */static void ighmm_hlist_insert (hypoList ** plist, int newhyp,                              hypoList * parlist){#define CUR_PROC "ighmm_hlist_insert"  hypoList *newlist;  ARRAY_CALLOC (newlist, 1);  newlist->hyp_c = newhyp;  if (parlist)    parlist->refcount += 1;  newlist->parent = parlist;  newlist->next = *plist;  *plist = newlist;  return;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  GHMM_LOG(LCONVERTED, "ighmm_hlist_insert failed\n");  exit (1);#undef CUR_PROC}/*============================================================================*//* removes hypothesis at position indicated by pointer plist from the list   removes recursively parent hypothesis with refcount==0 */static void ighmm_hlist_remove (hypoList ** plist) {#define CUR_PROC "ighmm_hlist_remove"  hypoList *tempPtr = (*plist)->next;  free ((*plist)->gamma_a);  free ((*plist)->gamma_id);  if ((*plist)->parent) {    (*plist)->parent->refcount -= 1;    if (0 == (*plist)->parent->refcount)      ighmm_hlist_remove (&((*plist)->parent));  }  free (*plist);  *plist = tempPtr;#undef CUR_PROC}/*============================================================================*/static int ighmm_hlist_prop_forward (ghmm_dmodel * mo, hypoList * h, hypoList ** hplus,				     int labels, int *nr_s, int *max_out) {#define CUR_PROC "ighmm_hlist_prop_forward"  int i, j, c, k;  int i_id, j_id, g_nr;  int no_oldHyps = 0, newHyps = 0;  hypoList *hP = h;  hypoList **created;  ARRAY_MALLOC (created, labels);  /* extend the all hypotheses with the labels of out_states     of all states in the hypotesis */  while (hP != NULL) {    /* lookup table for labels, created[i]!=0 iff the current hypotheses       was propagated forward with label i */    for (c = 0; c < labels; c++)      created[c] = NULL;    /* extend the current hypothesis and add all states which may have       probability greater null */    for (i = 0; i < hP->gamma_states; i++) {      /* skip impossible states */      if (hP->gamma_a[i] == 1.0)        continue;      i_id = hP->gamma_id[i];      for (j = 0; j < mo->s[i_id].out_states; j++) {        j_id = mo->s[i_id].out_id[j];        c = mo->label[j_id];        /* create a new hypothesis with label c */        if (!created[c]) {          ighmm_hlist_insert (hplus, c, hP);          created[c] = *hplus;          /* initiallize gamma-array with safe size (number of states */          ARRAY_MALLOC ((*hplus)->gamma_id, m_min (nr_s[c], hP->gamma_states * max_out[hP->hyp_c]));          (*hplus)->gamma_id[0] = j_id;          (*hplus)->gamma_states = 1;          newHyps++;        }        /* add a new gamma state to the existing hypothesis with c */        else {          g_nr = created[c]->gamma_states;          /* search for state j_id in the gamma list */          for (k = 0; k < g_nr; k++)            if (j_id == created[c]->gamma_id[k])              break;          /* add the state to the gamma list */          if (k == g_nr) {            created[c]->gamma_id[g_nr] = j_id;            created[c]->gamma_states = g_nr + 1;          }        }      }    }    /* reallocating gamma-array to the correct size */    for (c = 0; c < labels; c++) {      if (created[c]) {        ARRAY_CALLOC (created[c]->gamma_a, created[c]->gamma_states);        ARRAY_REALLOC (created[c]->gamma_id, created[c]->gamma_states);        created[c] = NULL;      }    }    hP = hP->next;    no_oldHyps++;  }  /* printf("Created %d new Hypotheses.\n", newHyps); */  free (created);  return (no_oldHyps);STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  GHMM_LOG(LCONVERTED, "ighmm_hlist_prop_forward failed\n");  exit (1);#undef CUR_PROC}/*============================================================================*//**   Calculates the logarithm of sum(exp(log_a[j,a_pos])+exp(log_gamma[j,g_pos]))   which corresponds to the logarithm of the sum of a[j,a_pos]*gamma[j,g_pos]   @return ighmm_log_sum for products of a row from gamma and a row from matrix A   @param log_a:      row of the transition matrix with logarithmic values (1.0 for log(0))   @param s:          ghmm_dstate whose gamma-value is calculated   @param parent:     a pointer to the parent hypothesis*/static double ighmm_log_gamma_sum (double *log_a, ghmm_dstate * s, hypoList * parent) {#define CUR_PROC "ighmm_log_gamma_sum"  double result;  int j, j_id, k;  double max = 1.0;  int argmax = 0;  double *logP;  /* shortcut for the trivial case */  if (parent->gamma_states == 1)    for (j = 0; j < s->in_states; j++)      if (parent->gamma_id[0] == s->in_id[j])        return parent->gamma_a[0] + log_a[j];  ARRAY_MALLOC (logP, s->in_states);  /* calculate logs of a[k,l]*gamma[k,hi] as sums of logs and find maximum: */  for (j = 0; j < s->in_states; j++) {    j_id = s->in_id[j];    /* search for state j_id in the gamma list */    for (k = 0; k < parent->gamma_states; k++)      if (parent->gamma_id[k] == j_id)        break;    if (k == parent->gamma_states)      logP[j] = 1.0;    else {      logP[j] = log_a[j] + parent->gamma_a[k];      if (max == 1.0 || (logP[j] > max && logP[j] != 1.0)) {        max = logP[j];        argmax = j;      }    }  }  /* calculate max+log(1+sum[j!=argmax; exp(logP[j]-max)])  */  result = 1.0;  for (j = 0; j < s->in_states; j++)    if (j != argmax && logP[j] != 1.0)      result += exp (logP[j] - max);  result = log (result);  result += max;  free (logP);  return result;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  GHMM_LOG(LCONVERTED, "ighmm_log_gamma_sum failed\n");  exit (1);#undef CUR_PROC}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?