model.c

来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 2,232 行 · 第 1/5 页

C
2,232
字号
      if (fix_read) {        ighmm_scanner_error (s, "identifier fix_state twice");        goto STOP;      }      ighmm_scanner_get_name (s);      if (!strcmp (s->id, "vector")) {        ighmm_scanner_consume (s, '{');        if (s->err)          goto STOP;        fix_vector = scanner_get_int_array (s, &len);        if (len != mo->N) {          ighmm_scanner_error (s, "wrong number of elements in fix_state");          goto STOP;        }        ighmm_scanner_consume (s, ';');        if (s->err)          goto STOP;        ighmm_scanner_consume (s, '}');        if (s->err)          goto STOP;        fix_read = 1;      }      else {        ighmm_scanner_error (s, "unknown identifier");        goto STOP;      }    }    else {      ighmm_scanner_error (s, "unknown identifier");      goto STOP;    }    ighmm_scanner_consume (s, ';');    if (s->err)      goto STOP;  }                             /* while(..s->c-'}') */  ighmm_scanner_consume (s, '}');  if (s->err)    goto STOP;  /* No prior read --> give it the value -1 */  if (prior_read == 0)    mo->prior = -1;  /* Allocate memory for the model */  for (i = 0; i < mo->N; i++) {    mo->s[i].out_states = ighmm_cmatrix_notzero_columns (a_matrix, i, mo->N);    mo->s[i].in_states = ighmm_cmatrix_notzero_rows (a_matrix, i, mo->N);    if (model_state_alloc (mo->s + i, mo->M, mo->s[i].in_states,                           mo->s[i].out_states)) {      GHMM_LOG_QUEUED(LCONVERTED);      goto STOP;    }    /* Assign the parameters to the model */    if (!a_matrix) {      fprintf (stderr, "no A matrix specified in file!\n");      exit (1);    }    if (!b_matrix) {      fprintf (stderr, "no B matrix specified in file!\n");      exit (1);    }    if (!fix_vector) {      fprintf (stderr, "no fix_state vector specified in file!\n");      exit (1);    }    if (!pi_vector) {      fprintf (stderr, "no Pi vector specified in file!\n");      exit (1);    }    if (model_copy_vectors (mo, i, a_matrix, b_matrix, pi_vector, fix_vector)) {      GHMM_LOG_QUEUED(LCONVERTED);      goto STOP;    }  }  ighmm_cmatrix_free (&a_matrix, mo->N);  ighmm_cmatrix_free (&b_matrix, mo->N);  m_free (pi_vector);  return (mo);STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ighmm_cmatrix_free (&a_matrix, mo->N);  ighmm_cmatrix_free (&b_matrix, mo->N);  m_free (pi_vector);  ghmm_dmodel_free(&mo);  return NULL;#undef CUR_PROC}                               /* ghmm_dmodel_direct_read */#endif /* GHMM_OBSOLETE *//*============================================================================*//* Produces models from given sequences */ghmm_dmodel **ghmm_dmodel_from_sequence (ghmm_dseq * sq, long *mo_number){#define CUR_PROC "ghmm_dmodel_from_sequence"  long i;  int max_symb;  ghmm_dmodel **mo = NULL;  ARRAY_CALLOC (mo, sq->seq_number);  max_symb = ghmm_dseq_max_symbol (sq);  for (i = 0; i < sq->seq_number; i++)    mo[i] = ghmm_dmodel_generate_from_sequence (sq->seq[i], sq->seq_len[i],                                          max_symb + 1);  *mo_number = sq->seq_number;  return mo;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  for (i = 0; i < *mo_number; i++)    ghmm_dmodel_free(&(mo[i]));  return NULL;#undef CUR_PROC}                               /* ghmm_dmodel_from_sequence */#ifdef GHMM_OBSOLETE/*============================================================================*//* Produces models form given sequences */ghmm_dmodel **ghmm_dmodel_from_sequence_ascii (scanner_t * s, long *mo_number){#define CUR_PROC "ghmm_dmodel_from_sequence_ascii"  long i;  int max_symb;  ghmm_dmodel **mo = NULL;  ghmm_dseq *sq = NULL;  ighmm_scanner_consume (s, '{');  if (s->err)    goto STOP;  while (!s->err && !s->eof && s->c - '}') {    ighmm_scanner_get_name (s);    ighmm_scanner_consume (s, '=');    if (s->err)      goto STOP;    /* Reads sequences on normal format */    if (!strcmp (s->id, "SEQ")) {      sq = ghmm_dseq_read_alloc (s);      if (!sq) {        GHMM_LOG_QUEUED(LCONVERTED);        goto STOP;      }    }    else {      ighmm_scanner_error (s, "unknown identifier");      goto STOP;    }    ighmm_scanner_consume (s, ';');    if (s->err)      goto STOP;  }                             /* while(..s->c-'}') */  ighmm_scanner_consume (s, '}');  if (s->err)    goto STOP;  ARRAY_CALLOC (mo, sq->seq_number);  /* The biggest symbol that occurs */  max_symb = ghmm_dseq_max_symbol (sq);  for (i = 0; i < sq->seq_number; i++)    mo[i] = ghmm_dmodel_generate_from_sequence (sq->seq[i], sq->seq_len[i],                                          max_symb + 1);  *mo_number = sq->seq_number;  ghmm_dseq_free (&sq);  return mo;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ghmm_dseq_free (&sq);  for (i = 0; i < *mo_number; i++)    ghmm_dmodel_free(&(mo[i]));  return NULL;#undef CUR_PROC}                               /* ghmm_dmodel_from_sequence_ascii */#endif /* GHMM_OBSOLETE *//*============================================================================*/int ghmm_dmodel_free(ghmm_dmodel ** mo) {#define CUR_PROC "ghmm_dmodel_free"  int i;  mes_check_ptr (mo, return (-1));  mes_check_ptr (*mo, return (-1));  for (i=0; i < (*mo)->N && (*mo)->s; i++)    ghmm_dstate_clean(&(*mo)->s[i]);  if ((*mo)->s)    m_free((*mo)->s);  if ((*mo)->name)    m_free((*mo)->name);  if ((*mo)->model_type & GHMM_kSilentStates) {    if ((*mo)->topo_order)      m_free((*mo)->topo_order);    m_free((*mo)->silent);  }  if ((*mo)->model_type & GHMM_kTiedEmissions)    m_free((*mo)->tied_to);  if ((*mo)->pow_lookup)    m_free((*mo)->pow_lookup);  if ((*mo)->model_type & GHMM_kBackgroundDistributions)    m_free((*mo)->background_id);  if ((*mo)->model_type & GHMM_kHigherOrderEmissions)    m_free((*mo)->order);  if ((*mo)->model_type & GHMM_kLabeledStates)    m_free((*mo)->label);   m_free(*mo);  return (0);#undef CUR_PROC}  /* ghmm_dmodel_free */int ghmm_dbackground_free (ghmm_dbackground * bg){#define CUR_PROC "ghmm_dbackground_free"  if (bg->order)    m_free(bg->order);  if (bg->b)    ighmm_cmatrix_free(&(bg->b), bg->n);  m_free (bg);  return (0);#undef CUR_PROC}/*============================================================================*/ghmm_dmodel *ghmm_dmodel_copy (const ghmm_dmodel * mo){# define CUR_PROC "ghmm_dmodel_copy"  int i, j, nachf, vorg, m, size;  ghmm_dmodel *m2 = NULL;  ARRAY_CALLOC (m2, 1);  ARRAY_CALLOC (m2->s, mo->N);  if (mo->model_type & GHMM_kSilentStates)    ARRAY_CALLOC (m2->silent, mo->N);  if (mo->model_type & GHMM_kTiedEmissions)    ARRAY_CALLOC (m2->tied_to, mo->N);  if (mo->model_type & GHMM_kBackgroundDistributions) {    ARRAY_CALLOC (m2->background_id, mo->N);    m2->bp = mo->bp;  }  if (mo->model_type & GHMM_kHigherOrderEmissions)    ARRAY_CALLOC (m2->order, mo->N);  if (mo->model_type & GHMM_kLabeledStates)    ARRAY_CALLOC (m2->label, mo->N);  ARRAY_MALLOC(m2->pow_lookup, mo->maxorder+2);    for (i = 0; i < mo->N; i++) {    if (mo->model_type & GHMM_kHigherOrderEmissions)      size = ghmm_ipow(mo, mo->M, mo->order[i]+1);    else      size = mo->M;    nachf = mo->s[i].out_states;    vorg = mo->s[i].in_states;        ARRAY_CALLOC (m2->s[i].out_id, nachf);    ARRAY_CALLOC (m2->s[i].out_a, nachf);    ARRAY_CALLOC (m2->s[i].in_id, vorg);    ARRAY_CALLOC (m2->s[i].in_a, vorg);    ARRAY_CALLOC (m2->s[i].b, size);    /* copy the values */    for (j = 0; j < nachf; j++) {      m2->s[i].out_a[j] = mo->s[i].out_a[j];      m2->s[i].out_id[j] = mo->s[i].out_id[j];    }    for (j = 0; j < vorg; j++) {      m2->s[i].in_a[j] = mo->s[i].in_a[j];      m2->s[i].in_id[j] = mo->s[i].in_id[j];    }    /* copy all b values for higher order states */    for (m = 0; m < size; m++)      m2->s[i].b[m] = mo->s[i].b[m];    m2->s[i].pi = mo->s[i].pi;    m2->s[i].fix = mo->s[i].fix;    if (mo->model_type & GHMM_kSilentStates)      m2->silent[i] = mo->silent[i];    if (mo->model_type & GHMM_kTiedEmissions)      m2->tied_to[i] = mo->tied_to[i];    if (mo->model_type & GHMM_kLabeledStates)      m2->label[i] = mo->label[i];    if (mo->model_type & GHMM_kHigherOrderEmissions)      m2->order[i] = mo->order[i];    if (mo->model_type & GHMM_kBackgroundDistributions)      m2->background_id[i] = mo->background_id[i];    m2->s[i].out_states = nachf;    m2->s[i].in_states = vorg;  }  m2->N = mo->N;  m2->M = mo->M;  m2->prior = mo->prior;  if (mo->model_type & GHMM_kHigherOrderEmissions) {    m2->maxorder = mo->maxorder;    for (i=0; i < mo->maxorder+2; i++)      m2->pow_lookup[i] = mo->pow_lookup[i];  }    m2->model_type = mo->model_type;  /* not necessary but the history is at least initialised */  m2->emission_history = mo->emission_history;  return (m2);STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ghmm_dmodel_free (&m2);  return (NULL);# undef CUR_PROC}                               /* ghmm_dmodel_copy *//*============================================================================*/int ghmm_dmodel_check(const ghmm_dmodel * mo) {# define CUR_PROC "ghmm_dmodel_check"  int res = -1;  double sum;  int i, j, imag=0;  char *str;  /* The sum of the Pi[i]'s is 1 */  sum = 0.0;  for (i = 0; i < mo->N; i++)    sum += mo->s[i].pi;  if (fabs(sum-1.0) >= GHMM_EPS_PREC) {    GHMM_LOG(LERROR, "sum Pi[i] != 1.0\n");    goto STOP;  }  /* check each state */  for (i=0; i<mo->N; i++) {    sum = 0.0;    if (mo->s[i].out_states == 0) {      str = ighmm_mprintf(NULL, 0, "out_states = 0 (state %d -> final state!)", i);      GHMM_LOG(LDEBUG, str);      m_free(str)    }    /* Sum the a[i][j]'s : normalized out transitions */    for (j=0; j < mo->s[i].out_states; j++)      sum += mo->s[i].out_a[j];    if (j>0 && fabs(sum-1.0) >= GHMM_EPS_PREC) {      str = ighmm_mprintf(NULL, 0, "sum of s[%d].out_a[*] (%g) not equal 1.0", i, sum);      GHMM_LOG(LWARN, str);      m_free(str);      goto STOP;    }    /* Sum the a[i][j]'s : normalized in transitions */    sum = mo->s[i].pi;    for (j=0; j<mo->s[i].in_states; j++)      sum += mo->s[i].in_a[j];    if (fabs(sum) <= GHMM_EPS_PREC) {      imag = 1;      str = ighmm_mprintf(NULL, 0, "state %d can't be reached", i);      GHMM_LOG(LINFO, str);      m_free(str);    }    /* Sum the b[j]'s: normalized emission probs */    sum = 0.0;    for (j=0; j<mo->M; j++)      sum += mo->s[i].b[j];    if (imag) {      /* not reachable states */      if ((fabs(sum + mo->M ) >= GHMM_EPS_PREC)) {        str = ighmm_mprintf(NULL, 0, "state %d can't be reached but is not set"                            " as non-reachale state", i);        GHMM_LOG(LWARN, str);        m_free(str);        goto STOP;      }    } else if ((mo->model_type & GHMM_kSilentStates) && mo->silent[i]) {      /* silent states */      if (sum != 0.0) {        str = ighmm_mprintf(NULL, 0, "state %d is silent but has a non-zero"                            " emission probability", i);        GHMM_LOG(LWARN, str);        m_free(str);        goto STOP;      }    }     else {      /* normal states */      if (fabs(sum-1.0) >= GHMM_EPS_PREC) {        str = ighmm_mprintf(NULL, 0, "sum s[%d].b[*] = %f != 1.0", i, sum);        GHMM_LOG(LCONVERTED, str);        m_free(str);        goto STOP;      }    }  }  res = 0;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return (res);# undef CUR_PROC}                               /* ghmm_dmodel_check *//*============================================================================*/int ghmm_dmodel_check_compatibility (ghmm_dmodel ** mo, int model_number){#define CUR_PROC "ghmm_dmodel_check_compatibility"  int i;  for (i = 1; i < model_number; i++)    if (-1 == ghmm_dmodel_check_compatibel_models (mo[0], mo[i]))      return -1;  return 0;#undef CUR_PROC}/*============================================================================*/int ghmm_dmodel_check_compatibel_models (const ghmm_dmodel * mo, const ghmm_dmodel * m2){#define CUR_PROC "ghmm_dmodel_check_compatibel_models"  int i, j;  char *str;  if (mo->N != m2->N) {    str = ighmm_mprintf(NULL, 0, "ERROR: different number of states (%d != %d)\n",                   mo->N, m2->N);    goto STOP;  }  if (mo->M != m2->M) {    str = ighmm_mprintf(NULL, 0, "ERROR: different number of possible outputs (%d != %d)\n",                   mo->M, m2->M);    goto STOP;  }  for (i=0; i<mo->N; ++i) {    if (mo->s[i].out_states != m2->s[i].out_states) {      str = ighmm_mprintf(NULL, 0, "ERROR: different number of outstates (%d != %d) in state %d.\n",                   mo->s[i].out_states, m2->s[i].out_states, i);      goto STOP;    }    for (j=0; j<mo->s[i].out_states; ++j) {      if (mo->s[i].out_id[j] != m2->s[i].out_id[j]) {	str = ighmm_mprintf(NULL, 0, "ERROR: different out_ids (%d != %d) in entry %d of state %d.\n",		      mo->s[i].out_id[j], m2->s[i].out_id[j], j, i);	goto STOP;      }    }  }  return 0;STOP:  GHMM_LOG(LCONVERTED, str);  m_free (str);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?