sequence.c
来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 1,678 行 · 第 1/4 页
C
1,678 行
/********************************************************************************* This file is part of the General Hidden Markov Model Library,* GHMM version 0.8_beta1, see http://ghmm.org** Filename: ghmm/ghmm/sequence.c* Authors: Bernd Wichern, Andrea Weisse, Utz J. Pape, Benjamin Georgi** Copyright (C) 1998-2004 Alexander Schliep* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln* Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,* Berlin** Contact: schliep@ghmm.org** This library is free software; you can redistribute it and/or* modify it under the terms of the GNU Library General Public* License as published by the Free Software Foundation; either* version 2 of the License, or (at your option) any later version.** This library is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU* Library General Public License for more details.** You should have received a copy of the GNU Library General Public* License along with this library; if not, write to the Free* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*** This file is version $Revision: 1931 $* from $Date: 2007-10-30 15:54:41 +0100 (Tue, 30 Oct 2007) $* last change by $Author: grunau $.********************************************************************************/#ifdef WIN32# include "win_config.h"#endif#ifdef HAVE_CONFIG_H# include "../config.h"#endif#include <math.h>#include <float.h>#include <string.h>#include <stdio.h>#include <sys/types.h>#include <sys/stat.h>#include <unistd.h>#include "ghmm.h"#include "mprintf.h"#include "mes.h"#include "sequence.h"#include "matrix.h"#include "vector.h"#include "model.h"#include "foba.h"#include "sfoba.h"#include "vector.h"#include "rng.h"#include "ghmm_internals.h"#include "obsolete.h"enum sequence_flags{ kBlockAllocation = 1<<0, kHasLabels = 1<<1,};#ifdef GHMM_OBSOLETE/*============================================================================*/ghmm_dseq **ghmm_dseq_read (const char *filename, int *sq_number){#define CUR_PROC "ghmm_dseq_read" int i; ghmm_dseq **sequence = NULL; scanner_t *s = NULL; *sq_number = 0; s = ighmm_scanner_alloc (filename); if (!s) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } while (!s->err && !s->eof && s->c - '}') { ighmm_scanner_get_name (s); ighmm_scanner_consume (s, '='); if (s->err) goto STOP; /* sequence file */ if (!strcmp (s->id, "SEQ")) { (*sq_number)++; /* more mem */ ARRAY_REALLOC (sequence, *sq_number); sequence[*sq_number - 1] = ghmm_dseq_read_alloc (s); if (!sequence[*sq_number - 1]) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } } else { ighmm_scanner_error (s, "unknown identifier"); goto STOP; } ighmm_scanner_consume (s, ';'); if (s->err) goto STOP; } ighmm_scanner_free (&s); return sequence;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ighmm_scanner_free (&s); for (i = 0; i < *sq_number; i++) ghmm_dseq_free (&(sequence[i])); m_free (sequence); *sq_number = 0; return NULL;#undef CUR_PROC}/*============================================================================*/ghmm_dseq *ghmm_dseq_read_alloc (scanner_t * s){#define CUR_PROC "ghmm_dseq_read_alloc" int symbols = 0, lexWord = 0; ghmm_dseq *sq = NULL; int seq_len_lex = 0; char * str; ARRAY_CALLOC (sq, 1); ighmm_scanner_consume (s, '{'); if (s->err) goto STOP; while (!s->err && !s->eof && s->c - '}') { ighmm_scanner_get_name (s); ighmm_scanner_consume (s, '='); if (s->err) goto STOP; /* array of sequences to read */ if (!strcmp (s->id, "O")) { ighmm_scanner_consume (s, '{'); if (s->err) goto STOP; sq->seq_number = 0; sq->total_w = 0.0; while (!s->eof && !s->err && s->c - '}') { /* another sequence --> realloc */ ARRAY_REALLOC (sq->seq, sq->seq_number + 1); ARRAY_REALLOC (sq->seq_len, sq->seq_number + 1); ARRAY_REALLOC (sq->seq_label, sq->seq_number + 1); ARRAY_REALLOC (sq->seq_id, sq->seq_number + 1); ARRAY_REALLOC (sq->seq_w, sq->seq_number + 1); /* Label and ID */ /* default */ sq->seq_label[sq->seq_number] = -1; sq->seq_id[sq->seq_number] = -1.0; sq->seq_w[sq->seq_number] = 1; while (s->c == '<' || s->c == '(' || s->c == '|') { if (s->c == '<') { ighmm_scanner_consume (s, '<'); if (s->err) goto STOP; sq->seq_label[sq->seq_number] = ighmm_scanner_get_int (s); if (s->err) goto STOP; ighmm_scanner_consume (s, '>'); if (s->err) goto STOP; } if (s->c == '(') { ighmm_scanner_consume (s, '('); if (s->err) goto STOP; sq->seq_id[sq->seq_number] = ighmm_scanner_get_edouble (s); if (s->err) goto STOP; ighmm_scanner_consume (s, ')'); if (s->err) goto STOP; } if (s->c == '|') { ighmm_scanner_consume (s, '|'); if (s->err) goto STOP; sq->seq_w[sq->seq_number] = (double) ighmm_scanner_get_int (s); if (sq->seq_w[sq->seq_number] <= 0) { ighmm_scanner_error (s, "sequence weight not positiv\n"); goto STOP; } if (s->err) goto STOP; ighmm_scanner_consume (s, '|'); if (s->err) goto STOP; } } sq->seq[sq->seq_number] = scanner_get_int_array (s, sq->seq_len + sq->seq_number); if (sq->seq_len[sq->seq_number] > GHMM_MAX_SEQ_LEN) { ighmm_scanner_error (s, "sequence too long"); goto STOP; } ighmm_scanner_consume (s, ';'); if (s->err) goto STOP; sq->total_w += sq->seq_w[sq->seq_number]; sq->seq_number++; } /* while( !s->eof...) */ if ((sq->seq_number == 0) || (sq->seq_number > GHMM_MAX_SEQ_NUMBER)) { str = ighmm_mprintf (NULL, 0, "Number of sequences %ld exceeds possible range", sq->seq_number); GHMM_LOG(LCONVERTED, str); m_free (str); goto STOP; } ighmm_scanner_consume (s, '}'); if (s->err) goto STOP; } /* all possible seqs., sorted lexicographical */ else if (!strcmp (s->id, "L")) { lexWord = 1; ighmm_scanner_consume (s, '{'); if (s->err) goto STOP; while (!s->err && !s->eof && s->c - '}') { ighmm_scanner_get_name (s); ighmm_scanner_consume (s, '='); if (s->err) goto STOP; if (!strcmp (s->id, "seq_len")) { seq_len_lex = ighmm_scanner_get_int (s); if (s->err) goto STOP; if (seq_len_lex <= 0) { GHMM_LOG(LCONVERTED, "Value for sequence length not allowed"); goto STOP; } } else if (!strcmp (s->id, "symb")) { if (symbols < 0) { GHMM_LOG(LCONVERTED, "Value for number of symbols not allowed"); goto STOP; } symbols = ighmm_scanner_get_int (s); if (s->err) goto STOP; } else { ighmm_scanner_error (s, "unknown identifier"); goto STOP; } ighmm_scanner_consume (s, ';'); if (s->err) goto STOP; } ighmm_scanner_consume (s, '}'); if ((seq_len_lex <= 0) || (symbols < 0)) { GHMM_LOG(LCONVERTED, "Values for seq. length or number of symbols not spezified"); goto STOP; } sq = ghmm_dseq_lexWords (seq_len_lex, symbols); if (!sq) goto STOP; } /*if (!strcmp(s->id, "L")) */ else { ighmm_scanner_error (s, "unknown identifier"); goto STOP; } ighmm_scanner_consume (s, ';'); if (s->err) goto STOP; } /* while(!s->err && !s->eof && s->c - '}') */ ighmm_scanner_consume (s, '}'); if (s->err) goto STOP; return (sq);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ghmm_dseq_free (&sq); return (NULL);#undef CUR_PROC} /* ghmm_dseq_read_alloc *//*============================================================================*/ghmm_cseq **ghmm_cseq_read (const char *filename, int *sqd_number){#define CUR_PROC "ghmm_cseq_read" int i; scanner_t *s = NULL; ghmm_cseq **sequence = NULL; *sqd_number = 0; s = ighmm_scanner_alloc (filename); if (!s) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } while (!s->err && !s->eof && s->c - '}') { ighmm_scanner_get_name (s); ighmm_scanner_consume (s, '='); if (s->err) goto STOP; /* sequence file */ if (!strcmp (s->id, "SEQD")) { (*sqd_number)++; /* more mem */ ARRAY_REALLOC (sequence, *sqd_number); sequence[*sqd_number - 1] = ghmm_cseq_read_alloc (s); if (!sequence[*sqd_number - 1]) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } } else { ighmm_scanner_error (s, "unknown identifier"); goto STOP; } ighmm_scanner_consume (s, ';'); if (s->err) goto STOP; } ighmm_scanner_free (&s); return sequence;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ighmm_scanner_free (&s); for (i = 0; i < *sqd_number; i++) ghmm_cseq_free (&(sequence[i])); m_free (sequence); *sqd_number = 0; return NULL;#undef CUR_PROC} /* ghmm_cseq_read *//*============================================================================*/ghmm_cseq *ghmm_cseq_read_alloc (scanner_t * s){#define CUR_PROC "ghmm_cseq_read_alloc" char * str; ghmm_cseq *sqd = NULL; ARRAY_CALLOC (sqd, 1); ighmm_scanner_consume (s, '{'); if (s->err) goto STOP; while (!s->err && !s->eof && s->c - '}') { ighmm_scanner_get_name (s); ighmm_scanner_consume (s, '='); if (s->err) goto STOP; /* array of sequences to read */ if (!strcmp (s->id, "O")) { ighmm_scanner_consume (s, '{'); if (s->err) goto STOP; sqd->seq_number = 0; sqd->total_w = 0.0; while (!s->eof && !s->err && s->c - '}') { /* another sequence --> realloc */ ARRAY_REALLOC (sqd->seq, sqd->seq_number + 1); ARRAY_REALLOC (sqd->seq_len, sqd->seq_number + 1); ARRAY_REALLOC (sqd->seq_label, sqd->seq_number + 1); ARRAY_REALLOC (sqd->seq_id, sqd->seq_number + 1); ARRAY_REALLOC (sqd->seq_w, sqd->seq_number + 1); /* Label and ID and weight */ /* default */ sqd->seq_label[sqd->seq_number] = -1; sqd->seq_id[sqd->seq_number] = -1.0; sqd->seq_w[sqd->seq_number] = 1; while (s->c == '<' || s->c == '(' || s->c == '|') { if (s->c == '<') { ighmm_scanner_consume (s, '<'); if (s->err) goto STOP; sqd->seq_label[sqd->seq_number] = ighmm_scanner_get_int (s); if (s->err) goto STOP; ighmm_scanner_consume (s, '>'); if (s->err) goto STOP; } if (s->c == '(') { ighmm_scanner_consume (s, '('); if (s->err) goto STOP; sqd->seq_id[sqd->seq_number] = ighmm_scanner_get_edouble (s); if (s->err) goto STOP; ighmm_scanner_consume (s, ')'); if (s->err) goto STOP; } if (s->c == '|') { ighmm_scanner_consume (s, '|'); if (s->err) goto STOP; sqd->seq_w[sqd->seq_number] = (double) ighmm_scanner_get_int (s); if (sqd->seq_w[sqd->seq_number] < 0) { ighmm_scanner_error (s, "negativ sequence weight\n"); goto STOP; } if (s->err) goto STOP; ighmm_scanner_consume (s, '|'); if (s->err) goto STOP; } }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?