sequence.c

来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 1,678 行 · 第 1/4 页

C
1,678
字号
        sqd->seq[sqd->seq_number] =          scanner_get_double_earray (s, sqd->seq_len + sqd->seq_number);        if (sqd->seq_len[sqd->seq_number] > GHMM_MAX_SEQ_LEN) {          ighmm_scanner_error (s, "sequence too long");          goto STOP;        }        ighmm_scanner_consume (s, ';');        if (s->err)          goto STOP;        sqd->total_w += sqd->seq_w[sqd->seq_number];        sqd->seq_number++;      }                         /* while( !s->eof...) */      if ((sqd->seq_number == 0) || (sqd->seq_number > GHMM_MAX_SEQ_NUMBER)) {        str = ighmm_mprintf (NULL, 0, "Number of sequences %ld exceeds possible range",                             sqd->seq_number);        GHMM_LOG(LCONVERTED, str);        m_free (str);        goto STOP;      }      ighmm_scanner_consume (s, '}');      if (s->err)        goto STOP;    }    else {      ighmm_scanner_error (s, "unknown identifier");      goto STOP;    }    ighmm_scanner_consume (s, ';');    if (s->err)      goto STOP;  }                             /* while(!s->err && !s->eof && s->c - '}') */  ighmm_scanner_consume (s, '}');  if (s->err)    goto STOP;  return (sqd);STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ghmm_cseq_free (&sqd);  return (NULL);#undef CUR_PROC}                               /* ghmm_cseq_read_alloc */#endif /* GHMM_OBSOLETE *//*============================================================================*//* Truncate Sequences in a given sqd_field; useful for Testing;   returns truncated sqd_field;    trunc_ratio 0: no truncation   trunc_ratio 1: truncation (mean truncation faktor = 0.5)   trunc_ratio -1: 100 % truncation*/ghmm_cseq **ghmm_cseq_truncate (ghmm_cseq ** sqd_in, int sqd_fields,                                    double trunc_ratio, int seed){#define CUR_PROC "ghmm_cseq_truncate"  ghmm_cseq **sq;  int i, j, trunc_len;  /* Hack, use -1 for complete truncation */  if ((0 > trunc_ratio || 1 < trunc_ratio) && trunc_ratio != -1) {    GHMM_LOG(LCONVERTED, "Error: trunc_ratio not valid\n");    goto STOP;  }  ARRAY_CALLOC (sq, sqd_fields);  ghmm_rng_init ();  GHMM_RNG_SET (RNG, seed);  for (i = 0; i < sqd_fields; i++) {    sq[i] = ghmm_cseq_calloc (sqd_in[i]->seq_number);    sq[i]->total_w = sqd_in[i]->total_w;    for (j = 0; j < sqd_in[i]->seq_number; j++) {      ARRAY_CALLOC (sq[i]->seq[j], sqd_in[i]->seq_len[j]);      /* length of truncated seq. */      if (trunc_ratio == -1)        trunc_len = 0;      else        trunc_len = (int) ceil ((sqd_in[i]->seq_len[j] *                                 (1 - trunc_ratio * GHMM_RNG_UNIFORM (RNG))));      ghmm_cseq_copy (sq[i]->seq[j], sqd_in[i]->seq[j], trunc_len);      ARRAY_REALLOC (sq[i]->seq[j], trunc_len);      sq[i]->seq_len[j] = trunc_len;#ifdef GHMM_OBSOLETE      sq[i]->seq_label[j] = sqd_in[i]->seq_label[j];#endif /* GHMM_OBSOLETE */      sq[i]->seq_id[j] = sqd_in[i]->seq_id[j];      sq[i]->seq_w[j] = sqd_in[i]->seq_w[j];    }  }                             /* for all sqd_fields */  return sq;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return NULL;#undef CUR_PROC}/*============================================================================*/ghmm_cseq *ghmm_cseq_calloc (long seq_number){#define CUR_PROC "ghmm_cseq_calloc"  int i;  ghmm_cseq *sqd = NULL;  if (seq_number > GHMM_MAX_SEQ_NUMBER) {    GHMM_LOG_PRINTF(LERROR, LOC, "Number of sequences %ld exceeds possible range %d",                    seq_number, GHMM_MAX_SEQ_NUMBER);    goto STOP;  }  ARRAY_CALLOC (sqd, 1);  ARRAY_CALLOC (sqd->seq, seq_number);  ARRAY_CALLOC (sqd->seq_len, seq_number);#ifdef GHMM_OBSOLETE  ARRAY_CALLOC (sqd->seq_label, seq_number);#endif /* GHMM_OBSOLETE */  ARRAY_CALLOC (sqd->seq_id, seq_number);  ARRAY_CALLOC (sqd->seq_w, seq_number);  sqd->seq_number = seq_number;  sqd->capacity = seq_number;  sqd->total_w = 0.0;  for (i = 0; i < seq_number; i++) {#ifdef GHMM_OBSOLETE    sqd->seq_label[i] = -1;#endif /* GHMM_OBSOLETE */    sqd->seq_id[i] = -1.0;    sqd->seq_w[i] = 1;  }  return sqd;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ghmm_cseq_free (&sqd);  return NULL;#undef CUR_PROC}                               /* ghmm_cseq_calloc *//*============================================================================*/ghmm_dseq *ghmm_dseq_calloc (long seq_number){#define CUR_PROC "ghmm_dseq_calloc"  int i;  ghmm_dseq *sq = NULL;  if (seq_number > GHMM_MAX_SEQ_NUMBER) {    GHMM_LOG_PRINTF(LERROR, LOC, "Number of sequences %ld exceeds possible range %d",                    seq_number, GHMM_MAX_SEQ_NUMBER);    goto STOP;  }  ARRAY_CALLOC(sq, 1);  ARRAY_CALLOC(sq->seq, seq_number);  /*ARRAY_CALLOC (sq->states, seq_number);*/  ARRAY_CALLOC(sq->seq_len, seq_number);#ifdef GHMM_OBSOLETE  ARRAY_CALLOC(sq->seq_label, seq_number);#endif /* GHMM_OBSOLETE */  ARRAY_CALLOC(sq->seq_id, seq_number);  ARRAY_CALLOC(sq->seq_w, seq_number);  sq->seq_number = seq_number;  sq->capacity = seq_number;  for (i=0; i < seq_number; i++) {#ifdef GHMM_OBSOLETE    sq->seq_label[i] = -1;#endif /* GHMM_OBSOLETE */    sq->seq_id[i] = -1.0;    sq->seq_w[i] = 1;  }  sq->state_labels = NULL;  sq->state_labels_len = NULL;  return sq;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ghmm_dseq_free (&sq);  return NULL;#undef CUR_PROC}                               /* ghmm_dseq_calloc *//*============================================================================*/static int ghmm_dseq_realloc(ghmm_dseq *sq, int seq_number) {#define CUR_PROC "ghmm_dseq_realloc"  int i;  if (seq_number > GHMM_MAX_SEQ_NUMBER) {      GHMM_LOG_PRINTF(LERROR, LOC, "Number of sequences %ld exceeds possible range", seq_number);      goto STOP;  }  ARRAY_REALLOC(sq->seq, seq_number);  if (sq->flags & kHasLabels && sq->states)      ARRAY_REALLOC(sq->states, seq_number);  ARRAY_REALLOC(sq->seq_len, seq_number);#ifdef GHMM_OBSOLETE  ARRAY_REALLOC(sq->seq_label, seq_number);#endif /* GHMM_OBSOLETE */  ARRAY_REALLOC(sq->seq_id, seq_number);  ARRAY_REALLOC(sq->seq_w, seq_number);  sq->capacity = seq_number;  return 0;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return -1;#undef CUR_PROC}                               /* ghmm_dseq_realloc *//*============================================================================*/int ghmm_dseq_calloc_state_labels (ghmm_dseq *sq){#define CUR_PROC "ghmm_dseq_calloc_state_labels"  ARRAY_CALLOC(sq->state_labels, sq->seq_number);  ARRAY_CALLOC(sq->state_labels_len, sq->seq_number);  return 0;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return -1;#undef CUR_PROC}                               /* ghmm_dseq_calloc_state_labels *//*============================================================================*/ghmm_cseq *ghmm_cseq_get_singlesequence(ghmm_cseq *sq, int index){  ghmm_cseq *res;  res = ghmm_cseq_calloc(1);    res->seq[0] = sq->seq[index];  res->seq_len[0] = sq->seq_len[index];#ifdef GHMM_OBSOLETE  res->seq_label[0] = sq->seq_label[index];#endif /* GHMM_OBSOLETE */  res->seq_id[0] = sq->seq_id[index];  res->seq_w[0] = sq->seq_w[index];  res->total_w = res->seq_w[0];  return res;  }ghmm_dseq *ghmm_dseq_get_singlesequence(ghmm_dseq *sq, int index){#define CUR_PROC "ghmm_dseq_get_singlesequence"  ghmm_dseq *res;  res = ghmm_dseq_calloc(1);  if (!res) goto STOP;    res->seq[0] = sq->seq[index];  res->seq_len[0] = sq->seq_len[index];#ifdef GHMM_OBSOLETE  res->seq_label[0] = sq->seq_label[index];#endif /* GHMM_OBSOLETE */  res->seq_id[0] = sq->seq_id[index];  res->seq_w[0] = sq->seq_w[index];  res->total_w = res->seq_w[0];  if (sq->state_labels){      ARRAY_CALLOC (res->state_labels, 1);      ARRAY_CALLOC (res->state_labels_len, 1);      res->state_labels[0] = sq->state_labels[index];      res->state_labels_len[0] = sq->state_labels_len[index];  }    return res;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return NULL;#undef CUR_PROC}/*XXX TEST: frees everything but the seq field */int ghmm_dseq_subseq_free (ghmm_dseq *sq){# define CUR_PROC "ghmm_dseq_subseq_free"  /*int i,j;*/  mes_check_ptr (sq, return (-1));  m_free (sq->seq_len);#ifdef GHMM_OBSOLETE  m_free (sq->seq_label);#endif /* GHMM_OBSOLETE */  m_free (sq->seq_id);  m_free (sq->seq_w);  if (sq->states) {    ighmm_dmatrix_free (&(sq->states), sq->seq_number);    m_free(sq->states_len);  }  if (sq->state_labels) {    ighmm_dmatrix_free (&(sq->state_labels), sq->seq_number);    m_free (sq->state_labels_len);  }  m_free (sq->seq);  m_free (sq);  return 0;# undef CUR_PROC}                               /* ghmm_dseq_subseq_free */int ghmm_cseq_subseq_free (ghmm_cseq * sqd){# define CUR_PROC "ghmm_cseq_subseq_free"  mes_check_ptr (sqd, return (-1));  /* ghmm_cseq_print(stdout,*sqd,0);*/  m_free (sqd->seq);  m_free (sqd->seq_len);#ifdef GHMM_OBSOLETE  m_free (sqd->seq_label);#endif /* GHMM_OBSOLETE */  m_free (sqd->seq_id);  m_free (sqd->seq_w);  m_free (sqd);  return 0;# undef CUR_PROC}   /* ghmm_cseq_subseq_free *//*============================================================================*/ghmm_dseq *ghmm_dseq_lexWords (int n, int M){# define CUR_PROC "ghmm_dseq_lexWords"  ghmm_dseq *sq = NULL;  long seq_number, cnt = 0;  int j = n - 1;  int i;  int *seq;  if ((n < 0) || (M <= 0)) {    GHMM_LOG_QUEUED(LCONVERTED);    goto STOP;  }  seq_number = (long) pow ((double) M, (double) n);  sq = ghmm_dseq_calloc (seq_number);  if (!sq) {    GHMM_LOG_QUEUED(LCONVERTED);    goto STOP;  }  for (i = 0; i < seq_number; i++) {    ARRAY_CALLOC (sq->seq[i], n);    sq->seq_len[i] = n;    sq->seq_id[i] = i;  }  ARRAY_CALLOC (seq, n);  while (!(j < 0)) {    ghmm_dseq_copy (sq->seq[cnt], seq, n);    j = n - 1;    while (seq[j] == M - 1) {      seq[j] = 0;      j--;    }    seq[j]++;    cnt++;  }  m_free (seq);  return sq;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ghmm_dseq_free (&sq);  return NULL;# undef CUR_PROC}                               /* sequence_lewWords *//*============================================================================*/int ghmm_dseq_max_symbol (ghmm_dseq * sq){  long i, j;  int max_symb = -1;  for (i = 0; i < sq->seq_number; i++)    for (j = 0; j < sq->seq_len[i]; j++) {      if (sq->seq[i][j] > max_symb)        max_symb = sq->seq[i][j];    }  return max_symb;}                               /* ghmm_dseq_max_symbol *//*============================================================================*/void ghmm_dseq_copy (int *target, int *source, int len){  int i;  for (i = 0; i < len; i++)    target[i] = source[i];}                               /* ghmm_dseq_copy *//*============================================================================*/void ghmm_cseq_copy (double *target, double *source, int len){  int i;  for (i = 0; i < len; i++)    target[i] = source[i];}                               /* ghmm_dseq_copy *//*============================================================================*/int ghmm_dseq_add (ghmm_dseq * target, ghmm_dseq * source){#define CUR_PROC "ghmm_dseq_add"  int res = -1;  int **old_seq = target->seq;  /*int **old_seq_st    = target->states;*/  int *old_seq_len = target->seq_len;#ifdef GHMM_OBSOLETE  long *old_seq_label = target->seq_label;#endif /* GHMM_OBSOLETE */  double *old_seq_id = target->seq_id;  double *old_seq_w = target->seq_w;  long old_seq_number = target->seq_number;  long i;  target->seq_number = old_seq_number + source->seq_number;  target->total_w += source->total_w;  ARRAY_CALLOC (target->seq, target->seq_number);  /*ARRAY_CALLOC (target->states, target->seq_number);*/  ARRAY_CALLOC (target->seq_len, target->seq_number);#ifdef GHMM_OBSOLETE  ARRAY_CALLOC (target->seq_label, target->seq_number);#endif /* GHMM_OBSOLETE */  ARRAY_CALLOC (target->seq_id, target->seq_number);  ARRAY_CALLOC (target->seq_w, target->seq_number);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?