📄 smixturehmm.c

📁 General Hidden Markov Model Library 一个通用的隐马尔科夫模型的C代码库
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
  }                             /* for (smo_number) */  if (smofile)    fclose (smofile);  exitcode = 0;  /*------------------------------------------------------------------------*/STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ighmm_mes (MES_WIN, "\n(%2.2T): Program finished with exitcode %d.\n", exitcode);  ighmm_mes_exit ();  return (exitcode);# undef CUR_PROC}                               /* main */#endif /* nomain *//*============================================================================*/ /* Original version, without attempt to avoid lokal traps */int ghmm_smixturehmm_cluster (FILE * outfile, double **cp, ghmm_cseq * sqd,                         ghmm_cmodel ** smo, int smo_number){#define CUR_PROC "ghmm_smixturehmm_cluster"  int i, k, iter = 0;  double likelihood, old_likelihood, delta_likelihood = 1000000.0;  double total_train_w = 0.0;  char *str;  double *save_w;  double model_weight, log_p, sum = 0.0;  ghmm_cmodel_baum_welch_context *smo_sqd;            /* this structure is used by sreestimate() */  ARRAY_CALLOC (smo_sqd, 1);  /*  smo_sqd->max_iter = MAX_ITER_BW; */  smo_sqd->max_iter = 10;  smo_sqd->eps = GHMM_EPS_ITER_BW;  smo_sqd->logp = &log_p;  smo_sqd->sqd = sqd;  ARRAY_CALLOC (save_w, sqd->seq_number);  for (i = 0; i < sqd->seq_number; i++) {    save_w[i] = sqd->seq_w[i];    total_train_w += save_w[i];  }  for (k = 0; k < smo_number; k++) {    sum = 0.0;    for (i = 0; i < sqd->seq_number; i++)      sum += cp[i][k] * sqd->seq_w[i];    smo[k]->prior = sum / total_train_w;  }  ghmm_cseq_mix_like (smo, smo_number, sqd, &old_likelihood);  printf ("Initial Likelihood %.4f\n", old_likelihood);  fprintf (outfile, "Initial Likelihood %.4f\n", old_likelihood);  while (((-1) * delta_likelihood / old_likelihood) > 0.001 && iter < 75) {    iter++;    for (k = 0; k < smo_number; k++) {      printf ("Model %d\n", k);      smo_sqd->smo = smo[k];      model_weight = 0.0;      for (i = 0; i < sqd->seq_number; i++) {        /* combine seq_w with appropriate comp. prob. */        sqd->seq_w[i] = save_w[i] * cp[i][k];        model_weight += sqd->seq_w[i];      }      if (ghmm_cmodel_baum_welch (smo_sqd) == -1) {        str = ighmm_mprintf (NULL, 0, "Error iteration %d, model %d\n", iter, k);        GHMM_LOG(LCONVERTED, str);        m_free (str);        goto STOP;      }      smo[k]->prior = model_weight / total_train_w;    }                           /* for (k ...) */    /* reset weigths for smixturehmm_like */    for (i = 0; i < sqd->seq_number; i++)      sqd->seq_w[i] = save_w[i];    ghmm_cseq_mix_like (smo, smo_number, sqd, &likelihood);    if (ghmm_smixturehmm_calc_cp (cp, sqd, smo, smo_number, &total_train_w) == -1) {      str = ighmm_mprintf (NULL, 0, "Error iteration %d\n", iter);      GHMM_LOG(LCONVERTED, str);      m_free (str);      goto STOP;    }    printf ("Iter %d, likelihood: %.4f\n", iter, likelihood);    fprintf (outfile, "BIter %d, likelihood: %.4f\n", iter, likelihood);    delta_likelihood = likelihood - old_likelihood;    old_likelihood = likelihood;  }                             /* while delta_likelihood */  m_free (smo_sqd);  m_free (save_w);  return 0;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  m_free (smo_sqd);  m_free (save_w);  return -1;#undef CUR_PROC}                               /* ghmm_smixturehmm_cluster *//*============================================================================*//* Initial component probs. (cp) for each sequence;   dimension cp: cp[sqd->seq_number] [smo_number]    and (depending on cp): model priors for all models */int ghmm_smixturehmm_init (double **cp, ghmm_cseq * sqd, ghmm_cmodel ** smo,                      int smo_number, int mode){#define CUR_PROC "ghmm_smixturehmm_init"  int i, j;  double p;  double *result;  char *str;  int bm;  for (i = 0; i < sqd->seq_number; i++)    for (j = 0; j < smo_number; j++)      cp[i][j] = 0.0;  if (mode < 1 || mode > 5) {    GHMM_LOG(LCONVERTED, "Error: initial mode out of range\n");    goto STOP;  }  /* 1. strict random partition; cp = 0/1 */  if (mode == 1) {    for (i = 0; i < sqd->seq_number; i++) {      p = GHMM_RNG_UNIFORM (RNG);      j = (int) floor (smo_number * p); /* ??? */      if (j < 0 || j >= smo_number) {        GHMM_LOG(LCONVERTED, "Error: initial model out of range\n");        goto STOP;      }      cp[i][j] = 1.0;    }  }  /* 2. ghmm_smap_bayes from initial models */  else if (mode == 2) {    for (i = 0; i < sqd->seq_number; i++)      if (ghmm_smap_bayes (smo, cp[i], smo_number, sqd->seq[i], sqd->seq_len[i]) ==          -1) {        str =          ighmm_mprintf (NULL, 0, "Can't determine comp. prob for seq ID %.0f \n",                   sqd->seq_id[i]);        GHMM_LOG(LCONVERTED, str);        m_free (str);           /* goto STOP; */      }  }  /* another possibility: make partition with best model from smap_bayes... */  /* 3. cp = 1 for best model, cp = 0 for other models */  else if (mode == 3) {    ARRAY_CALLOC (result, smo_number);    for (i = 0; i < sqd->seq_number; i++) {      bm = ghmm_smap_bayes (smo, result, smo_number, sqd->seq[i], sqd->seq_len[i]);      if (bm == -1) {        str = ighmm_mprintf (NULL, 0,                       "Can't determine comp. prob for seq ID %.0f \n",                       sqd->seq_id[i]);        GHMM_LOG(LCONVERTED, str);        m_free (str);           /* goto STOP; */      }      cp[i][bm] = 1.0;    }    m_free (result);  }  /* mode == 4 used to be kmeans labels */  /* 5. no start partition == equal cp for each model */  else if (mode == 5) {    for (i = 0; i < sqd->seq_number; i++)      for (j = 0; j < smo_number; j++)        cp[i][j] = 1 / (double) smo_number;  }  else {    printf ("Unknown Init Mode %d \n", mode);    return -1;  }  return 0;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return -1;#undef CUR_PROC}                               /* smixturehmm_compprob_init *//*============================================================================*//* currently not activated */# if 0int smixturehmm_calc_priors (double **cp, ghmm_cseq * sqd, ghmm_cmodel ** smo,                             int smo_number){#define CUR_PROC "smixturehmm_calc_priors"  int i, k;  double sum;  if (total_train_w == 0) {    GHMM_LOG(LCONVERTED, "total_train_w == 0!\n");    goto STOP;  }  for (k = 0; k < smo_number; k++) {    sum = 0.0;    for (i = 0; i < sqd->seq_number; i++)      sum += cp[i][k] * sqd->seq_w[i];    smo[k]->prior = sum / total_train_w;    printf ("\nPriors[%d] is %.6f\n", k, smo[k]->prior);  }  return 0;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return -1;#undef CUR_PROC}                               /* smixturehmm_calc_priors */#endif/*============================================================================*//* calculate component probs for all sequences and all components *//* also recalculate total_train_w; if a seq has cp = 0 don't add its weigth to    total_train_w, otherwise errors in calculating model priors occur*/int ghmm_smixturehmm_calc_cp (double **cp, ghmm_cseq * sqd, ghmm_cmodel ** smo,                         int smo_number, double *total_train_w){#define CUR_PROC "ghmm_smixturehmm_calc_cp"  int i;  char *str;  double errorseqs = 0.0;  *total_train_w = 0.0;  for (i = 0; i < sqd->seq_number; i++)    if (ghmm_smap_bayes (smo, cp[i], smo_number, sqd->seq[i], sqd->seq_len[i]) ==        -1) {      /* all cp[i] [ . ] are set to zero; seq. will be ignored for reestimation!!! */      str = ighmm_mprintf (NULL, 0,                     "Warning[%d]: Can't determine comp. prob for seq ID %.0f\n",                     i, sqd->seq_id[i]);      GHMM_LOG(LCONVERTED, str);      m_free (str);      errorseqs++;      if (errorseqs > 0.1 * (double) sqd->seq_number) {        printf ("errorseqs %.1f, max false %.1f\n", errorseqs,                0.1 * (double) sqd->seq_number);        GHMM_LOG(LCONVERTED, "max. no of errors from ghmm_smap_bayes exceeded\n");        goto STOP;      }    }    else      *total_train_w += sqd->seq_w[i];  return 0;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return -1;#undef CUR_PROC}                               /* ghmm_smixturehmm_calc_cp *//*============================================================================*//* Is currently not used. Use this function in calc_cp and smixturehmm_like   later on (saves half of the ghmm_cmodel_logp() calls).   Danger: Is it neccessary to take seq_w into account? */void ghmm_smixture_calc_logp (double **logp, int **error, ghmm_cseq * sqd,                         ghmm_cmodel ** smo, int smo_number){  int i, k;  for (i = 0; i < sqd->seq_number; i++)    for (k = 0; k < smo_number; k++) {      if (ghmm_cmodel_logp (smo[k], sqd->seq[i], sqd->seq_len[i], &(logp[i][k])) ==          -1)        error[i][k] = 1;      else        error[i][k] = 0;    }}/*============================================================================*//* flag == 1 --> Header for .like-file, else --> other file */void ghmm_smixturehmm_print_header (FILE * file, char *argv[], int flag){  time_t zeit;  int mode = atoi (argv[9]);  time (&zeit);  fprintf (file,           "\n************************************************************************\n");  fprintf (file, "Date: %ssmixturehmm:\n", ctime ((const time_t *) &zeit));  fprintf (file, "Seq. File\t%s\nInit-model File\t%s\nInit-Mode\t", argv[1],           argv[2]);  switch (mode) {  case 1:    fprintf (file, "%d SP_ZUF (random start partition)\n", mode);    break;  case 2:    fprintf (file, "%d SP_VERT (distr. accord smap_bayes)\n", mode);    break;  case 3:    fprintf (file, "%d SP_BEST (best model)\n", mode);    break;  case 4:    fprintf (file, "%d SP_KM (partition from k-means)\n", mode);    break;  case 5:    fprintf (file, "%d NO_SP (no start partition)\n", mode);    break;  default:    fprintf (file, "???\n");    break;  }  fprintf (file, "Train Ratio\t %.4f\n\n", atof (argv[7]));  if (flag == 1)                /* only in .like-File */    fprintf (file,             "smo no.\tCV Iter\t SeqTrain\tSeqTest\tLikeTrain\tLikeTest\tavrgTrain\tavrgTest\tErrorTrain\tErrorTest\tBIC\n");}/*============================================================================*//* calculates for each model the average likelihood per seq. weight:   avg_like[j] = sum_i (cp[i][j] * seq_w[i] * log_p(i | j) ) / sum_i (cp[i][j] * seq_w[i]).   Usefull for identifying models with poor likelihood*/double *ghmm_smixturehmm_avg_like (double **cp, ghmm_cseq * sqd,                              ghmm_cmodel ** smo, int smo_number){#define CUR_PROC "ghmm_smixturehmm_avg_like"  double *avg_like = NULL;  int i, k;  double num = 0.0, denom = 0.0, log_p = 0.0;  ARRAY_CALLOC (avg_like, smo_number);  for (k = 0; k < smo_number; k++) {    num = denom = 0.0;    for (i = 0; i < sqd->seq_number; i++) {      if (ghmm_cmodel_logp (smo[k], sqd->seq[i], sqd->seq_len[i], &log_p) != -1) {        num += cp[i][k] * sqd->seq_w[i] * log_p;        denom += cp[i][k] * sqd->seq_w[i];      }    }    if (denom > 0)      avg_like[k] = num / denom;    else      avg_like[k] = -1;  }                             /* for models k ... */  return avg_like;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return NULL;#undef CUR_PROC}                               /* ghmm_smixturehmm_avg_like */#endif /* GHMM_OBSOLETE */
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -