smodel.c

来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 1,896 行 · 第 1/4 页

C
1,896
字号
/*============================================================================*/ghmm_cseq *ghmm_cmodel_generate_sequences (ghmm_cmodel * smo, int seed,                                         int global_len, long seq_number,                                         long label, int Tmax){# define CUR_PROC "ghmm_cmodel_generate_sequences"  /* An end state is characterized by not having an output probabiliy. */  ghmm_cseq *sq = NULL;  int pos, n, i, j, m, reject_os, reject_tmax, badseq, class;  double p, sum;  int len = global_len, up = 0, stillbadseq = 0, reject_os_tmp = 0;  sq = ghmm_cseq_calloc (seq_number);  if (!sq) {    GHMM_LOG_QUEUED(LCONVERTED);    goto STOP;  }  /* A specific length of the sequences isn't given. As a model should have     an end state, the konstant MAX_SEQ_LEN is used. */  if (len <= 0)    len = (int) GHMM_MAX_SEQ_LEN;  /* Maximum length of a sequence not given */  if (Tmax <= 0)    Tmax = (int) GHMM_MAX_SEQ_LEN;  /* rng is also used by ighmm_rand_std_normal      seed == -1: Initialization, has already been done outside the function */  if (seed >= 0) {    ghmm_rng_init ();    if (seed > 0)      GHMM_RNG_SET (RNG, seed);    else                        /* Random initialization! */      ghmm_rng_timeseed (RNG);  }  n = 0;  reject_os = reject_tmax = 0;  while (n < seq_number) {    /* Test: A new seed for each sequence */    /*   ghmm_rng_timeseed(RNG); */    stillbadseq = badseq = 0;    ARRAY_CALLOC (sq->seq[n], len);    /* Get a random initial state i */    p = GHMM_RNG_UNIFORM (RNG);    sum = 0.0;    for (i = 0; i < smo->N; i++) {      sum += smo->s[i].pi;      if (sum >= p)        break;    }    if (i == smo->N) {          /* Can happen by a rounding error in the input */      i--;      while (i > 0 && smo->s[i].pi == 0.0)        i--;    }    /* Get a random initial output       -> get a random m and then respectively a pdf omega. */    p = GHMM_RNG_UNIFORM (RNG);    sum = 0.0;    for (m = 0; m < smo->s[i].M; m++) {      sum += smo->s[i].c[m];      if (sum >= p)        break;    }    if (m == smo->s[i].M)      m--;    /* Get random numbers according to the density function */    sq->seq[n][0] = ghmm_cmodel_get_random_var (smo, i, m);    pos = 1;    /* The first symbol chooses the start class */    if (smo->cos == 1) {      class = 0;    }    else {      /*printf("1: cos = %d, k = %d, t = %d\n",smo->cos,n,state);*/      if (!smo->class_change->get_class) {        printf ("ERROR: get_class not initialized\n");        return (NULL);      }      class = smo->class_change->get_class (smo, sq->seq[n], n, 0);      if (class >= smo->cos){        printf("ERROR: get_class returned index %d but model has only %d classes !\n",class,smo->cos);        goto STOP;      }    }    while (pos < len) {      /* Get a new state */      p = GHMM_RNG_UNIFORM (RNG);      sum = 0.0;      for (j = 0; j < smo->s[i].out_states; j++) {        sum += smo->s[i].out_a[class][j];        if (sum >= p)          break;      }      if (j == smo->s[i].out_states) {  /* Can happen by a rounding error */        j--;        while (j > 0 && smo->s[i].out_a[class][j] == 0.0)          j--;      }      if (sum == 0.0) {        if (smo->s[i].out_states > 0) {          /* Repudiate the sequence, if all smo->s[i].out_a[class][.] == 0,             that is, class "class" isn't used in the original data:             go out of the while-loop, n should not be counted. */          /* printf("Zustand %d, class %d, len %d out_states %d \n", i, class,             state, smo->s[i].out_states); */          badseq = 1;          /* break; */          /* Try: If the class is "empty", try the neighbour class;             first, sweep down to zero; if still no success, sweep up to             COS - 1. If still no success --> Repudiate the sequence. */          if (class > 0 && up == 0) {            class--;            continue;          }          else if (class < smo->cos - 1) {            class++;            up = 1;            continue;          }          else {            stillbadseq = 1;            break;          }        }        else {          /* Final state reached, out of while-loop */          break;        }      }      i = smo->s[i].out_id[j];      /* fprintf(stderr, "%d\n", i); */      /*      fprintf(stderr, "%d\n", i); */      /* Get output from state i */      p = GHMM_RNG_UNIFORM (RNG);      sum = 0.0;      for (m = 0; m < smo->s[i].M; m++) {        sum += smo->s[i].c[m];        if (sum >= p)          break;      }      if (m == smo->s[i].M) {        m--;        while (m > 0 && smo->s[i].c[m] == 0.0)          m--;      }      /* Get a random number from the corresponding density function */      sq->seq[n][pos] = ghmm_cmodel_get_random_var (smo, i, m);      /* Decide the class for the next step */      if (smo->cos == 1) {        class = 0;      }      else {        /*printf("2: cos = %d, k = %d, t = %d\n",smo->cos,n,state);*/        if (!smo->class_change->get_class) {          printf ("ERROR: get_class not initialized\n");          return (NULL);        }        class = smo->class_change->get_class (smo, sq->seq[n], n, pos);        printf ("class = %d\n", class);        if (class >= smo->cos){          printf("ERROR: get_class returned index %d but model has only %d classes !\n",class,smo->cos);          goto STOP;        }      }      up = 0;      pos++;    }                           /* while (state < len) */    if (badseq) {      reject_os_tmp++;    }    if (stillbadseq) {      reject_os++;      m_free (sq->seq[n]);      /*      printf("cl %d, s %d, %d\n", class, i, n); */    }    else if (pos > Tmax) {      reject_tmax++;      m_free (sq->seq[n]);    }    else {      if (pos < len)        ARRAY_REALLOC (sq->seq[n], pos);      sq->seq_len[n] = pos;#ifdef GHMM_OBSOLETE      sq->seq_label[n] = label;#endif /* GHMM_OBSOLETE */      /* ighmm_cvector_print(stdout, sq->seq[n], sq->seq_len[n]," "," ",""); */      n++;    }    /*    printf("reject_os %d, reject_tmax %d\n", reject_os, reject_tmax); */    if (reject_os > 10000) {      GHMM_LOG(LCONVERTED, "Reached max. no. of rejections\n");      break;    }    if (!(n % 1000))      printf ("%d Seqs. generated\n", n);  }                             /* n-loop */  if (reject_os > 0)    printf ("%d sequences rejected (os)!\n", reject_os);  if (reject_os_tmp > 0)    printf ("%d sequences changed class\n", reject_os_tmp - reject_os);  if (reject_tmax > 0)    printf ("%d sequences rejected (Tmax, %d)!\n", reject_tmax, Tmax);  /* printf ("End of smodel_generate_sequences.\n"); */  return (sq);STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ghmm_cseq_free (&sq);  return (NULL);# undef CUR_PROC}                               /* ghmm_cmodel_generate_sequences *//*============================================================================*/int ghmm_cmodel_likelihood (ghmm_cmodel * smo, ghmm_cseq * sqd, double *log_p){# define CUR_PROC "ghmm_cmodel_likelihood"  int res = -1;  double log_p_i;  int matched, i;  matched = 0;  *log_p = 0.0;  for (i = 0; i < sqd->seq_number; i++) {    /* setting sequence number in class_change struct if necessary */    if (smo->cos > 1) {      if (!smo->class_change) {        printf ("cos = %d but class_change not initialized !\n", smo->cos);        goto STOP;      }      smo->class_change->k = i;    }    if (ghmm_cmodel_logp (smo, sqd->seq[i], sqd->seq_len[i], &log_p_i) != -1) {      *log_p += log_p_i * sqd->seq_w[i];      matched++;    }    else {      /* Test: high costs for each unmatched Seq. */      *log_p += GHMM_PENALTY_LOGP * sqd->seq_w[i];      matched++;      ighmm_mes (MES_WIN, "sequence[%d] can't be build.\n", i);    }  }  if (!matched) {    GHMM_LOG(LCONVERTED, "NO sequence can be build.\n");    goto STOP;  }  /* return number of "matched" sequences */  res = matched;  /* resetting sequence number to default value */  if (smo->cos > 1) {    smo->class_change->k = -1;  }STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return (res);# undef CUR_PROC}                               /* ghmm_cmodel_likelihood */int ghmm_cmodel_individual_likelihoods (ghmm_cmodel * smo, ghmm_cseq * sqd,                                   double *log_ps){  int matched, res;  double log_p_i;  int i;  res = -1;  matched = 0;  for (i = 0; i < sqd->seq_number; i++) {    /* setting sequence number in class_change struct if necessary */    if (smo->cos > 1) {      if (!smo->class_change) {        printf ("cos = %d but class_change not initialized !\n", smo->cos);        goto STOP;      }      smo->class_change->k = i;    }    if (ghmm_cmodel_logp (smo, sqd->seq[i], sqd->seq_len[i], &log_p_i) != -1) {      log_ps[i] = log_p_i;      matched++;    }    else {      /* Test: very small log score for sequence cannot be produced. */      log_ps[i] = -DBL_MAX;      /* fprintf(stderr,"sequence[%d] cannot be build.\n", i); */    }  }  res = matched;  if (matched == 0) {    fprintf (stderr, "smodel_likelihood: NO sequence can be build.\n");  }  /* resetting sequence number to default value */  if (smo->cos > 1) {    smo->class_change->k = -1;  }  /* return number of "matched" sequences */  return res;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return -1;}/*============================================================================*//* various print functions                                                    *//*============================================================================*//*============================================================================*/void ghmm_cmodel_Ak_print (FILE * file, ghmm_cmodel * smo, int k, char *tab,                      char *separator, char *ending){  int i, j, out_state;  for (i = 0; i < smo->N; i++) {    out_state = 0;    fprintf (file, "%s", tab);    if (smo->s[i].out_states > 0 && smo->s[i].out_id[out_state] == 0) {      fprintf (file, "%.4f", smo->s[i].out_a[k][out_state]);      out_state++;    }    else      fprintf (file, "0.0   ");    for (j = 1; j < smo->N; j++) {      if (smo->s[i].out_states > out_state &&          smo->s[i].out_id[out_state] == j) {        fprintf (file, "%s %.4f", separator, smo->s[i].out_a[k][out_state]);        out_state++;      }      else        fprintf (file, "%s 0.0   ", separator);    }    fprintf (file, "%s\n", ending);  }}                               /* ghmm_cmodel_Ak_print *//*============================================================================*/void ghmm_cmodel_C_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator,                     char *ending){  int i, j;  for (i = 0; i < smo->N; i++) {    fprintf (file, "%s", tab);    fprintf (file, "%.4f", smo->s[i].c[0]);    for (j = 1; j < smo->s[i].M; j++)      fprintf (file, "%s %.4f", separator, smo->s[i].c[j]);    fprintf (file, "%s\n", ending);  }}                               /* ghmm_cmodel_C_print *//*============================================================================*/void ghmm_cmodel_Mue_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator,                       char *ending){  int i, j;  for (i = 0; i < smo->N; i++) {    fprintf (file, "%s", tab);    fprintf (file, "%.4f", smo->s[i].mue[0]);    for (j = 1; j < smo->s[i].M; j++)      fprintf (file, "%s %.4f", separator, smo->s[i].mue[j]);    fprintf (file, "%s\n", ending);  }}                               /* ghmm_cmodel_Mue_print *//*============================================================================*/void ghmm_cmodel_U_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator,                     char *ending){  /* attention: choose precision big enough to allow printing of       EPS_U in const.h */  int i, j;  for (i = 0; i < smo->N; i++) {    fprintf (file, "%s", tab);    fprintf (file, "%.4f", smo->s[i].u[0]);    for (j = 1; j < smo->s[i].M; j++)      fprintf (file, "%s %.4f", separator, smo->s[i].u[j]);    fprintf (file, "%s\n", ending);  }}                               /* ghmm_cmodel_U_print *//*============================================================================*/void ghmm_cmodel_Pi_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator,                      char *ending){  int i;  fprintf (file, "%s%.4f", tab, smo->s[0].pi);  for (i = 1; i < smo->N; i++)    fprintf (file, "%s %.4f", separator, smo->s[i].pi);  fprintf (file, "%s\n", ending);}                               /* ghmm_cmodel_Pi_print *//*============================================================================*/void ghmm_cmodel_fix_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator,                       char *ending){  int i;  fprintf (file, "%s%d", tab, smo->s[0].fix);  for (i = 1; i < smo->N; i++)    fprintf (file, "%s %d", separator, smo->s[i].fix);  fprintf (file, "%s\n", ending);}/*============================================================================*/void ghmm_cmodel_print (FILE * file, ghmm_cmodel * smo){/* old function - No support to heterogeneous densities */  int k;  fprintf (file,           "SHMM = {\n\tM = %d;\n\tN = %d;\n\tdensity = %d;\n\tcos = %d;\n",           smo->M, smo->N, (int) smo->s[0].density[0], smo->cos);  /* smo files support only models with a single density */  fprintf (file, "\tprior = %.5f;\n", smo->prior);  fprintf (file, "\tPi = vector {\n");  ghmm_cmodel_Pi_print (file, smo, "\t", ",", ";");  fprintf (file, "\t};\n");  fprintf (file, "\tfix_state = vector {\n");  ghmm_cmodel_fix_print (file, smo, "\t", ",", ";");  fprintf (file, "\t};\n");  for (k = 0; k < smo->cos; k++) {    fprintf (file, "\tAk_%d = matrix {\n", k);    ghmm_cmodel_Ak_print (file, smo, k, "\t", ",", ";");    fprintf (file, "\t};\n");  }  fprintf (file, "\tC = matrix {\n");  ghmm_cmodel_C_print (file, smo, "\t", ",", ";");  fprintf (file, "\t};\n\tMue = matrix {\n");  ghmm_cmodel_Mue_print (file, smo, "\t", ",", ";");  fprintf (file, "\t};\n\tU = matrix {\n");  ghmm_cmodel_U_print (file, smo, "\t", ",", ";");  fprintf (file, "\t};\n");  fprintf (file, "};\n\n");}                               /* ghmm_cmodel_print *//*============================================================================*//* needed for hmm_input: only one A (=Ak_1 = Ak_2...) is written */void ghmm_cmodel_print_oneA (FILE * file, ghmm_cmodel * smo){/* old function - No support to heterogeneous densities */  fprintf (file,           "SHMM = {\n\tM = %d;\n\tN = %d;\n\tdensity = %d;\n\tcos = %d;\n",

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?