sequence.c
来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 1,678 行 · 第 1/4 页
C
1,678 行
sqd->seq[sqd->seq_number] = scanner_get_double_earray (s, sqd->seq_len + sqd->seq_number); if (sqd->seq_len[sqd->seq_number] > GHMM_MAX_SEQ_LEN) { ighmm_scanner_error (s, "sequence too long"); goto STOP; } ighmm_scanner_consume (s, ';'); if (s->err) goto STOP; sqd->total_w += sqd->seq_w[sqd->seq_number]; sqd->seq_number++; } /* while( !s->eof...) */ if ((sqd->seq_number == 0) || (sqd->seq_number > GHMM_MAX_SEQ_NUMBER)) { str = ighmm_mprintf (NULL, 0, "Number of sequences %ld exceeds possible range", sqd->seq_number); GHMM_LOG(LCONVERTED, str); m_free (str); goto STOP; } ighmm_scanner_consume (s, '}'); if (s->err) goto STOP; } else { ighmm_scanner_error (s, "unknown identifier"); goto STOP; } ighmm_scanner_consume (s, ';'); if (s->err) goto STOP; } /* while(!s->err && !s->eof && s->c - '}') */ ighmm_scanner_consume (s, '}'); if (s->err) goto STOP; return (sqd);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ghmm_cseq_free (&sqd); return (NULL);#undef CUR_PROC} /* ghmm_cseq_read_alloc */#endif /* GHMM_OBSOLETE *//*============================================================================*//* Truncate Sequences in a given sqd_field; useful for Testing; returns truncated sqd_field; trunc_ratio 0: no truncation trunc_ratio 1: truncation (mean truncation faktor = 0.5) trunc_ratio -1: 100 % truncation*/ghmm_cseq **ghmm_cseq_truncate (ghmm_cseq ** sqd_in, int sqd_fields, double trunc_ratio, int seed){#define CUR_PROC "ghmm_cseq_truncate" ghmm_cseq **sq; int i, j, trunc_len; /* Hack, use -1 for complete truncation */ if ((0 > trunc_ratio || 1 < trunc_ratio) && trunc_ratio != -1) { GHMM_LOG(LCONVERTED, "Error: trunc_ratio not valid\n"); goto STOP; } ARRAY_CALLOC (sq, sqd_fields); ghmm_rng_init (); GHMM_RNG_SET (RNG, seed); for (i = 0; i < sqd_fields; i++) { sq[i] = ghmm_cseq_calloc (sqd_in[i]->seq_number); sq[i]->total_w = sqd_in[i]->total_w; for (j = 0; j < sqd_in[i]->seq_number; j++) { ARRAY_CALLOC (sq[i]->seq[j], sqd_in[i]->seq_len[j]); /* length of truncated seq. */ if (trunc_ratio == -1) trunc_len = 0; else trunc_len = (int) ceil ((sqd_in[i]->seq_len[j] * (1 - trunc_ratio * GHMM_RNG_UNIFORM (RNG)))); ghmm_cseq_copy (sq[i]->seq[j], sqd_in[i]->seq[j], trunc_len); ARRAY_REALLOC (sq[i]->seq[j], trunc_len); sq[i]->seq_len[j] = trunc_len;#ifdef GHMM_OBSOLETE sq[i]->seq_label[j] = sqd_in[i]->seq_label[j];#endif /* GHMM_OBSOLETE */ sq[i]->seq_id[j] = sqd_in[i]->seq_id[j]; sq[i]->seq_w[j] = sqd_in[i]->seq_w[j]; } } /* for all sqd_fields */ return sq;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ return NULL;#undef CUR_PROC}/*============================================================================*/ghmm_cseq *ghmm_cseq_calloc (long seq_number){#define CUR_PROC "ghmm_cseq_calloc" int i; ghmm_cseq *sqd = NULL; if (seq_number > GHMM_MAX_SEQ_NUMBER) { GHMM_LOG_PRINTF(LERROR, LOC, "Number of sequences %ld exceeds possible range %d", seq_number, GHMM_MAX_SEQ_NUMBER); goto STOP; } ARRAY_CALLOC (sqd, 1); ARRAY_CALLOC (sqd->seq, seq_number); ARRAY_CALLOC (sqd->seq_len, seq_number);#ifdef GHMM_OBSOLETE ARRAY_CALLOC (sqd->seq_label, seq_number);#endif /* GHMM_OBSOLETE */ ARRAY_CALLOC (sqd->seq_id, seq_number); ARRAY_CALLOC (sqd->seq_w, seq_number); sqd->seq_number = seq_number; sqd->capacity = seq_number; sqd->total_w = 0.0; for (i = 0; i < seq_number; i++) {#ifdef GHMM_OBSOLETE sqd->seq_label[i] = -1;#endif /* GHMM_OBSOLETE */ sqd->seq_id[i] = -1.0; sqd->seq_w[i] = 1; } return sqd;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ghmm_cseq_free (&sqd); return NULL;#undef CUR_PROC} /* ghmm_cseq_calloc *//*============================================================================*/ghmm_dseq *ghmm_dseq_calloc (long seq_number){#define CUR_PROC "ghmm_dseq_calloc" int i; ghmm_dseq *sq = NULL; if (seq_number > GHMM_MAX_SEQ_NUMBER) { GHMM_LOG_PRINTF(LERROR, LOC, "Number of sequences %ld exceeds possible range %d", seq_number, GHMM_MAX_SEQ_NUMBER); goto STOP; } ARRAY_CALLOC(sq, 1); ARRAY_CALLOC(sq->seq, seq_number); /*ARRAY_CALLOC (sq->states, seq_number);*/ ARRAY_CALLOC(sq->seq_len, seq_number);#ifdef GHMM_OBSOLETE ARRAY_CALLOC(sq->seq_label, seq_number);#endif /* GHMM_OBSOLETE */ ARRAY_CALLOC(sq->seq_id, seq_number); ARRAY_CALLOC(sq->seq_w, seq_number); sq->seq_number = seq_number; sq->capacity = seq_number; for (i=0; i < seq_number; i++) {#ifdef GHMM_OBSOLETE sq->seq_label[i] = -1;#endif /* GHMM_OBSOLETE */ sq->seq_id[i] = -1.0; sq->seq_w[i] = 1; } sq->state_labels = NULL; sq->state_labels_len = NULL; return sq;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ghmm_dseq_free (&sq); return NULL;#undef CUR_PROC} /* ghmm_dseq_calloc *//*============================================================================*/static int ghmm_dseq_realloc(ghmm_dseq *sq, int seq_number) {#define CUR_PROC "ghmm_dseq_realloc" int i; if (seq_number > GHMM_MAX_SEQ_NUMBER) { GHMM_LOG_PRINTF(LERROR, LOC, "Number of sequences %ld exceeds possible range", seq_number); goto STOP; } ARRAY_REALLOC(sq->seq, seq_number); if (sq->flags & kHasLabels && sq->states) ARRAY_REALLOC(sq->states, seq_number); ARRAY_REALLOC(sq->seq_len, seq_number);#ifdef GHMM_OBSOLETE ARRAY_REALLOC(sq->seq_label, seq_number);#endif /* GHMM_OBSOLETE */ ARRAY_REALLOC(sq->seq_id, seq_number); ARRAY_REALLOC(sq->seq_w, seq_number); sq->capacity = seq_number; return 0;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ return -1;#undef CUR_PROC} /* ghmm_dseq_realloc *//*============================================================================*/int ghmm_dseq_calloc_state_labels (ghmm_dseq *sq){#define CUR_PROC "ghmm_dseq_calloc_state_labels" ARRAY_CALLOC(sq->state_labels, sq->seq_number); ARRAY_CALLOC(sq->state_labels_len, sq->seq_number); return 0;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ return -1;#undef CUR_PROC} /* ghmm_dseq_calloc_state_labels *//*============================================================================*/ghmm_cseq *ghmm_cseq_get_singlesequence(ghmm_cseq *sq, int index){ ghmm_cseq *res; res = ghmm_cseq_calloc(1); res->seq[0] = sq->seq[index]; res->seq_len[0] = sq->seq_len[index];#ifdef GHMM_OBSOLETE res->seq_label[0] = sq->seq_label[index];#endif /* GHMM_OBSOLETE */ res->seq_id[0] = sq->seq_id[index]; res->seq_w[0] = sq->seq_w[index]; res->total_w = res->seq_w[0]; return res; }ghmm_dseq *ghmm_dseq_get_singlesequence(ghmm_dseq *sq, int index){#define CUR_PROC "ghmm_dseq_get_singlesequence" ghmm_dseq *res; res = ghmm_dseq_calloc(1); if (!res) goto STOP; res->seq[0] = sq->seq[index]; res->seq_len[0] = sq->seq_len[index];#ifdef GHMM_OBSOLETE res->seq_label[0] = sq->seq_label[index];#endif /* GHMM_OBSOLETE */ res->seq_id[0] = sq->seq_id[index]; res->seq_w[0] = sq->seq_w[index]; res->total_w = res->seq_w[0]; if (sq->state_labels){ ARRAY_CALLOC (res->state_labels, 1); ARRAY_CALLOC (res->state_labels_len, 1); res->state_labels[0] = sq->state_labels[index]; res->state_labels_len[0] = sq->state_labels_len[index]; } return res;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ return NULL;#undef CUR_PROC}/*XXX TEST: frees everything but the seq field */int ghmm_dseq_subseq_free (ghmm_dseq *sq){# define CUR_PROC "ghmm_dseq_subseq_free" /*int i,j;*/ mes_check_ptr (sq, return (-1)); m_free (sq->seq_len);#ifdef GHMM_OBSOLETE m_free (sq->seq_label);#endif /* GHMM_OBSOLETE */ m_free (sq->seq_id); m_free (sq->seq_w); if (sq->states) { ighmm_dmatrix_free (&(sq->states), sq->seq_number); m_free(sq->states_len); } if (sq->state_labels) { ighmm_dmatrix_free (&(sq->state_labels), sq->seq_number); m_free (sq->state_labels_len); } m_free (sq->seq); m_free (sq); return 0;# undef CUR_PROC} /* ghmm_dseq_subseq_free */int ghmm_cseq_subseq_free (ghmm_cseq * sqd){# define CUR_PROC "ghmm_cseq_subseq_free" mes_check_ptr (sqd, return (-1)); /* ghmm_cseq_print(stdout,*sqd,0);*/ m_free (sqd->seq); m_free (sqd->seq_len);#ifdef GHMM_OBSOLETE m_free (sqd->seq_label);#endif /* GHMM_OBSOLETE */ m_free (sqd->seq_id); m_free (sqd->seq_w); m_free (sqd); return 0;# undef CUR_PROC} /* ghmm_cseq_subseq_free *//*============================================================================*/ghmm_dseq *ghmm_dseq_lexWords (int n, int M){# define CUR_PROC "ghmm_dseq_lexWords" ghmm_dseq *sq = NULL; long seq_number, cnt = 0; int j = n - 1; int i; int *seq; if ((n < 0) || (M <= 0)) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } seq_number = (long) pow ((double) M, (double) n); sq = ghmm_dseq_calloc (seq_number); if (!sq) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } for (i = 0; i < seq_number; i++) { ARRAY_CALLOC (sq->seq[i], n); sq->seq_len[i] = n; sq->seq_id[i] = i; } ARRAY_CALLOC (seq, n); while (!(j < 0)) { ghmm_dseq_copy (sq->seq[cnt], seq, n); j = n - 1; while (seq[j] == M - 1) { seq[j] = 0; j--; } seq[j]++; cnt++; } m_free (seq); return sq;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ghmm_dseq_free (&sq); return NULL;# undef CUR_PROC} /* sequence_lewWords *//*============================================================================*/int ghmm_dseq_max_symbol (ghmm_dseq * sq){ long i, j; int max_symb = -1; for (i = 0; i < sq->seq_number; i++) for (j = 0; j < sq->seq_len[i]; j++) { if (sq->seq[i][j] > max_symb) max_symb = sq->seq[i][j]; } return max_symb;} /* ghmm_dseq_max_symbol *//*============================================================================*/void ghmm_dseq_copy (int *target, int *source, int len){ int i; for (i = 0; i < len; i++) target[i] = source[i];} /* ghmm_dseq_copy *//*============================================================================*/void ghmm_cseq_copy (double *target, double *source, int len){ int i; for (i = 0; i < len; i++) target[i] = source[i];} /* ghmm_dseq_copy *//*============================================================================*/int ghmm_dseq_add (ghmm_dseq * target, ghmm_dseq * source){#define CUR_PROC "ghmm_dseq_add" int res = -1; int **old_seq = target->seq; /*int **old_seq_st = target->states;*/ int *old_seq_len = target->seq_len;#ifdef GHMM_OBSOLETE long *old_seq_label = target->seq_label;#endif /* GHMM_OBSOLETE */ double *old_seq_id = target->seq_id; double *old_seq_w = target->seq_w; long old_seq_number = target->seq_number; long i; target->seq_number = old_seq_number + source->seq_number; target->total_w += source->total_w; ARRAY_CALLOC (target->seq, target->seq_number); /*ARRAY_CALLOC (target->states, target->seq_number);*/ ARRAY_CALLOC (target->seq_len, target->seq_number);#ifdef GHMM_OBSOLETE ARRAY_CALLOC (target->seq_label, target->seq_number);#endif /* GHMM_OBSOLETE */ ARRAY_CALLOC (target->seq_id, target->seq_number); ARRAY_CALLOC (target->seq_w, target->seq_number);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?