sequence.c

来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 1,678 行 · 第 1/4 页

C
1,678
字号
/*********************************************************************************       This file is part of the General Hidden Markov Model Library,*       GHMM version 0.8_beta1, see http://ghmm.org**       Filename: ghmm/ghmm/sequence.c*       Authors:  Bernd Wichern, Andrea Weisse, Utz J. Pape, Benjamin Georgi**       Copyright (C) 1998-2004 Alexander Schliep*       Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln*       Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,*                               Berlin**       Contact: schliep@ghmm.org**       This library is free software; you can redistribute it and/or*       modify it under the terms of the GNU Library General Public*       License as published by the Free Software Foundation; either*       version 2 of the License, or (at your option) any later version.**       This library is distributed in the hope that it will be useful,*       but WITHOUT ANY WARRANTY; without even the implied warranty of*       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU*       Library General Public License for more details.**       You should have received a copy of the GNU Library General Public*       License along with this library; if not, write to the Free*       Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA***       This file is version $Revision: 1931 $*                       from $Date: 2007-10-30 15:54:41 +0100 (Tue, 30 Oct 2007) $*             last change by $Author: grunau $.********************************************************************************/#ifdef WIN32#  include "win_config.h"#endif#ifdef HAVE_CONFIG_H#  include "../config.h"#endif#include <math.h>#include <float.h>#include <string.h>#include <stdio.h>#include <sys/types.h>#include <sys/stat.h>#include <unistd.h>#include "ghmm.h"#include "mprintf.h"#include "mes.h"#include "sequence.h"#include "matrix.h"#include "vector.h"#include "model.h"#include "foba.h"#include "sfoba.h"#include "vector.h"#include "rng.h"#include "ghmm_internals.h"#include "obsolete.h"enum sequence_flags{    kBlockAllocation = 1<<0,    kHasLabels       = 1<<1,};#ifdef GHMM_OBSOLETE/*============================================================================*/ghmm_dseq **ghmm_dseq_read (const char *filename, int *sq_number){#define CUR_PROC "ghmm_dseq_read"  int i;  ghmm_dseq **sequence = NULL;  scanner_t *s = NULL;  *sq_number = 0;  s = ighmm_scanner_alloc (filename);  if (!s) {    GHMM_LOG_QUEUED(LCONVERTED);    goto STOP;  }  while (!s->err && !s->eof && s->c - '}') {    ighmm_scanner_get_name (s);    ighmm_scanner_consume (s, '=');    if (s->err)      goto STOP;    /* sequence file */    if (!strcmp (s->id, "SEQ")) {      (*sq_number)++;      /* more mem */      ARRAY_REALLOC (sequence, *sq_number);      sequence[*sq_number - 1] = ghmm_dseq_read_alloc (s);      if (!sequence[*sq_number - 1]) {        GHMM_LOG_QUEUED(LCONVERTED);        goto STOP;      }    }    else {      ighmm_scanner_error (s, "unknown identifier");      goto STOP;    }    ighmm_scanner_consume (s, ';');    if (s->err)      goto STOP;  }  ighmm_scanner_free (&s);  return sequence;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ighmm_scanner_free (&s);  for (i = 0; i < *sq_number; i++)    ghmm_dseq_free (&(sequence[i]));  m_free (sequence);  *sq_number = 0;  return NULL;#undef CUR_PROC}/*============================================================================*/ghmm_dseq *ghmm_dseq_read_alloc (scanner_t * s){#define CUR_PROC "ghmm_dseq_read_alloc"  int symbols = 0, lexWord = 0;  ghmm_dseq *sq = NULL;  int seq_len_lex = 0;  char * str;  ARRAY_CALLOC (sq, 1);  ighmm_scanner_consume (s, '{');  if (s->err)    goto STOP;  while (!s->err && !s->eof && s->c - '}') {    ighmm_scanner_get_name (s);    ighmm_scanner_consume (s, '=');    if (s->err)      goto STOP;    /* array of sequences to read */    if (!strcmp (s->id, "O")) {      ighmm_scanner_consume (s, '{');      if (s->err)        goto STOP;      sq->seq_number = 0;      sq->total_w = 0.0;      while (!s->eof && !s->err && s->c - '}') {        /* another sequence --> realloc */        ARRAY_REALLOC (sq->seq, sq->seq_number + 1);        ARRAY_REALLOC (sq->seq_len, sq->seq_number + 1);        ARRAY_REALLOC (sq->seq_label, sq->seq_number + 1);        ARRAY_REALLOC (sq->seq_id, sq->seq_number + 1);        ARRAY_REALLOC (sq->seq_w, sq->seq_number + 1);        /* Label and ID */        /* default */        sq->seq_label[sq->seq_number] = -1;        sq->seq_id[sq->seq_number] = -1.0;        sq->seq_w[sq->seq_number] = 1;        while (s->c == '<' || s->c == '(' || s->c == '|') {          if (s->c == '<') {            ighmm_scanner_consume (s, '<');            if (s->err)              goto STOP;            sq->seq_label[sq->seq_number] = ighmm_scanner_get_int (s);            if (s->err)              goto STOP;            ighmm_scanner_consume (s, '>');            if (s->err)              goto STOP;          }          if (s->c == '(') {            ighmm_scanner_consume (s, '(');            if (s->err)              goto STOP;            sq->seq_id[sq->seq_number] = ighmm_scanner_get_edouble (s);            if (s->err)              goto STOP;            ighmm_scanner_consume (s, ')');            if (s->err)              goto STOP;          }          if (s->c == '|') {            ighmm_scanner_consume (s, '|');            if (s->err)              goto STOP;            sq->seq_w[sq->seq_number] = (double) ighmm_scanner_get_int (s);            if (sq->seq_w[sq->seq_number] <= 0) {              ighmm_scanner_error (s, "sequence weight not positiv\n");              goto STOP;            }            if (s->err)              goto STOP;            ighmm_scanner_consume (s, '|');            if (s->err)              goto STOP;          }        }        sq->seq[sq->seq_number] =          scanner_get_int_array (s, sq->seq_len + sq->seq_number);        if (sq->seq_len[sq->seq_number] > GHMM_MAX_SEQ_LEN) {          ighmm_scanner_error (s, "sequence too long");          goto STOP;        }        ighmm_scanner_consume (s, ';');        if (s->err)          goto STOP;        sq->total_w += sq->seq_w[sq->seq_number];        sq->seq_number++;      }                         /* while( !s->eof...) */      if ((sq->seq_number == 0) || (sq->seq_number > GHMM_MAX_SEQ_NUMBER)) {        str = ighmm_mprintf (NULL, 0, "Number of sequences %ld exceeds possible range",                             sq->seq_number);        GHMM_LOG(LCONVERTED, str);        m_free (str);        goto STOP;      }      ighmm_scanner_consume (s, '}');      if (s->err)        goto STOP;    }    /* all possible seqs., sorted lexicographical */    else if (!strcmp (s->id, "L")) {      lexWord = 1;      ighmm_scanner_consume (s, '{');      if (s->err)        goto STOP;      while (!s->err && !s->eof && s->c - '}') {        ighmm_scanner_get_name (s);        ighmm_scanner_consume (s, '=');        if (s->err)          goto STOP;        if (!strcmp (s->id, "seq_len")) {          seq_len_lex = ighmm_scanner_get_int (s);          if (s->err)            goto STOP;          if (seq_len_lex <= 0) {            GHMM_LOG(LCONVERTED, "Value for sequence length not allowed");            goto STOP;          }        }        else if (!strcmp (s->id, "symb")) {          if (symbols < 0) {            GHMM_LOG(LCONVERTED, "Value for number of symbols not allowed");            goto STOP;          }          symbols = ighmm_scanner_get_int (s);          if (s->err)            goto STOP;        }        else {          ighmm_scanner_error (s, "unknown identifier");          goto STOP;        }        ighmm_scanner_consume (s, ';');        if (s->err)          goto STOP;      }      ighmm_scanner_consume (s, '}');      if ((seq_len_lex <= 0) || (symbols < 0)) {        GHMM_LOG(LCONVERTED,          "Values for seq. length or number of symbols not spezified");        goto STOP;      }      sq = ghmm_dseq_lexWords (seq_len_lex, symbols);      if (!sq)        goto STOP;    }                           /*if (!strcmp(s->id, "L")) */    else {      ighmm_scanner_error (s, "unknown identifier");      goto STOP;    }    ighmm_scanner_consume (s, ';');    if (s->err)      goto STOP;  }                             /* while(!s->err && !s->eof && s->c - '}') */  ighmm_scanner_consume (s, '}');  if (s->err)    goto STOP;  return (sq);STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ghmm_dseq_free (&sq);  return (NULL);#undef CUR_PROC}                               /* ghmm_dseq_read_alloc *//*============================================================================*/ghmm_cseq **ghmm_cseq_read (const char *filename, int *sqd_number){#define CUR_PROC "ghmm_cseq_read"  int i;  scanner_t *s = NULL;  ghmm_cseq **sequence = NULL;  *sqd_number = 0;  s = ighmm_scanner_alloc (filename);  if (!s) {    GHMM_LOG_QUEUED(LCONVERTED);    goto STOP;  }  while (!s->err && !s->eof && s->c - '}') {    ighmm_scanner_get_name (s);    ighmm_scanner_consume (s, '=');    if (s->err)      goto STOP;    /* sequence file */    if (!strcmp (s->id, "SEQD")) {      (*sqd_number)++;      /* more mem */      ARRAY_REALLOC (sequence, *sqd_number);      sequence[*sqd_number - 1] = ghmm_cseq_read_alloc (s);      if (!sequence[*sqd_number - 1]) {        GHMM_LOG_QUEUED(LCONVERTED);        goto STOP;      }    }    else {      ighmm_scanner_error (s, "unknown identifier");      goto STOP;    }    ighmm_scanner_consume (s, ';');    if (s->err)      goto STOP;  }  ighmm_scanner_free (&s);  return sequence;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ighmm_scanner_free (&s);  for (i = 0; i < *sqd_number; i++)    ghmm_cseq_free (&(sequence[i]));  m_free (sequence);  *sqd_number = 0;      return NULL;#undef CUR_PROC}                               /* ghmm_cseq_read *//*============================================================================*/ghmm_cseq *ghmm_cseq_read_alloc (scanner_t * s){#define CUR_PROC "ghmm_cseq_read_alloc"  char * str;  ghmm_cseq *sqd = NULL;  ARRAY_CALLOC (sqd, 1);  ighmm_scanner_consume (s, '{');  if (s->err)    goto STOP;  while (!s->err && !s->eof && s->c - '}') {    ighmm_scanner_get_name (s);    ighmm_scanner_consume (s, '=');    if (s->err)      goto STOP;    /* array of sequences to read */    if (!strcmp (s->id, "O")) {      ighmm_scanner_consume (s, '{');      if (s->err)        goto STOP;      sqd->seq_number = 0;      sqd->total_w = 0.0;      while (!s->eof && !s->err && s->c - '}') {        /* another sequence --> realloc */        ARRAY_REALLOC (sqd->seq, sqd->seq_number + 1);        ARRAY_REALLOC (sqd->seq_len, sqd->seq_number + 1);        ARRAY_REALLOC (sqd->seq_label, sqd->seq_number + 1);        ARRAY_REALLOC (sqd->seq_id, sqd->seq_number + 1);        ARRAY_REALLOC (sqd->seq_w, sqd->seq_number + 1);        /* Label and ID and weight */        /* default */        sqd->seq_label[sqd->seq_number] = -1;        sqd->seq_id[sqd->seq_number] = -1.0;        sqd->seq_w[sqd->seq_number] = 1;        while (s->c == '<' || s->c == '(' || s->c == '|') {          if (s->c == '<') {            ighmm_scanner_consume (s, '<');            if (s->err)              goto STOP;            sqd->seq_label[sqd->seq_number] = ighmm_scanner_get_int (s);            if (s->err)              goto STOP;            ighmm_scanner_consume (s, '>');            if (s->err)              goto STOP;          }          if (s->c == '(') {            ighmm_scanner_consume (s, '(');            if (s->err)              goto STOP;            sqd->seq_id[sqd->seq_number] = ighmm_scanner_get_edouble (s);            if (s->err)              goto STOP;            ighmm_scanner_consume (s, ')');            if (s->err)              goto STOP;          }          if (s->c == '|') {            ighmm_scanner_consume (s, '|');            if (s->err)              goto STOP;            sqd->seq_w[sqd->seq_number] = (double) ighmm_scanner_get_int (s);            if (sqd->seq_w[sqd->seq_number] < 0) {              ighmm_scanner_error (s, "negativ sequence weight\n");              goto STOP;            }            if (s->err)              goto STOP;            ighmm_scanner_consume (s, '|');            if (s->err)              goto STOP;          }        }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?