⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sequence.c

📁 一个通用的隐性马尔可夫C代码库 开发环境:C语言 简要说明:这是一个通用的隐性马尔可夫C代码库
💻 C
📖 第 1 页 / 共 3 页
字号:
/*******************************************************************************  author       : Bernd Wichern and extended by Andrea Weisse and Utz J. Pape  filename     : /zpr/bspk/src/hmm/ghmm/ghmm/sequence.c  created      : TIME: 11:29:02     DATE: Thu 12. February 1998  $Id: sequence.c,v 1.17 2004/03/01 14:59:08 wasinee Exp $Copyright (C) 1998-2001, ZAIK/ZPR, Universit鋞 zu K鰈nThis program is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2 of the License, or(at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*****************************************************************************/#include "mprintf.h"#include "mes.h"#include "sequence.h"#include <math.h>#include <float.h>#include "mprintf.h"#include "mes.h"#include "matrix.h"#include "vector.h"#include "const.h"#include "model.h"#include "foba.h"#include "sfoba.h"#include "vector.h"#include "rng.h"#include "string.h"/*============================================================================*/sequence_t** sequence_read(const char *filename, int *sq_number) {#define CUR_PROC "sequence_read"  int i;  sequence_t **sequence = NULL;  scanner_t *s = NULL;  *sq_number = 0;  s = scanner_alloc(filename);  if(!s) {mes_proc(); goto STOP;}  while(!s->err && !s->eof && s->c - '}') {    scanner_get_name(s);    scanner_consume(s, '='); if(s->err) goto STOP;     /* sequence file */    if (!strcmp(s->id, "SEQ")) {      (*sq_number)++;      /* more mem */	       if (m_realloc(sequence, *sq_number)) { mes_proc(); goto STOP; }      sequence[*sq_number-1] = sequence_read_alloc(s);      if (!sequence[*sq_number-1]) { mes_proc(); goto STOP; }    }    else {      scanner_error(s, "unknown identifier"); goto STOP;    }    scanner_consume( s, ';' ); if(s->err) goto STOP;  }  scanner_free(&s);  return sequence;STOP:  for (i = 0; i < *sq_number; i++)     sequence_free(&(sequence[i]));  m_free(sequence);  *sq_number = 0;  return NULL;#undef CUR_PROC}/*============================================================================*/sequence_t* sequence_read_alloc(scanner_t *s){#define CUR_PROC "sequence_read_alloc"  int symbols = 0, lexWord = 0;  sequence_t *sq = NULL;  int seq_len_lex = 0;  if( !m_calloc( sq, 1 ) ) {mes_proc();goto STOP;}   scanner_consume( s, '{' ); if(s->err) goto STOP;   while(!s->err && !s->eof && s->c - '}') {    scanner_get_name(s);    scanner_consume(s, '='); if(s->err) goto STOP;    /* array of sequences to read */    if (!strcmp(s->id, "O")) {      scanner_consume(s, '{'); if (s->err) goto STOP;      sq->seq_number = 0;      sq->total_w = 0.0;      while( !s->eof && !s->err && s->c - '}') {	/* another sequence --> realloc */	if (m_realloc(sq->seq, sq->seq_number + 1)) 	  { mes_proc(); goto STOP; }	if (m_realloc(sq->seq_len, sq->seq_number + 1)) 	  { mes_proc(); goto STOP; }	if (m_realloc(sq->seq_label, sq->seq_number + 1)) 	  { mes_proc(); goto STOP; }	if (m_realloc(sq->seq_id, sq->seq_number + 1))	  { mes_proc(); goto STOP; }        if (m_realloc(sq->seq_w, sq->seq_number + 1))          { mes_proc(); goto STOP; }	/* Label and ID */	/* default */	sq->seq_label[sq->seq_number] = -1;	sq->seq_id[sq->seq_number] = -1.0;	sq->seq_w[sq->seq_number] = 1;	while (s->c == '<' || s->c == '(' || s->c == '|') {	  if (s->c == '<') {	    scanner_consume(s, '<'); if (s->err) goto STOP;	  	    sq->seq_label[sq->seq_number] = scanner_get_int(s); 	    if(s->err) goto STOP; 	    scanner_consume(s, '>'); if (s->err) goto STOP;	  }	  if (s->c == '(') {	    scanner_consume(s, '('); if (s->err) goto STOP;	  	    sq->seq_id[sq->seq_number] = scanner_get_edouble(s); 	    if(s->err) goto STOP; 	    scanner_consume(s, ')'); if (s->err) goto STOP;	  }	  if (s->c == '|') {	    scanner_consume(s, '|'); if (s->err) goto STOP;	  	    sq->seq_w[sq->seq_number] = (double) scanner_get_int(s); 	    if (sq->seq_w[sq->seq_number] <= 0) {	      scanner_error(s, "sequence weight not positiv\n");	      goto STOP;	    }	    if(s->err) goto STOP; 	    scanner_consume(s, '|'); if (s->err) goto STOP;	  }	}	sq->seq[sq->seq_number] = 	  scanner_get_int_array(s, sq->seq_len+sq->seq_number);	if (sq->seq_len[sq->seq_number] > MAX_SEQ_LEN) {	  scanner_error(s, "sequence too long"); goto STOP;	}	scanner_consume(s, ';'); if (s->err) goto STOP;	sq->total_w += sq->seq_w[sq->seq_number];	sq->seq_number++;      } /* while( !s->eof...) */      if ((sq->seq_number == 0) || (sq->seq_number > MAX_SEQ_NUMBER)) {	char *str = mprintf(NULL, 0,			    "Number of sequences %ld exceeds possible range", 			    sq->seq_number); 	mes_prot(str);	m_free(str);	goto STOP;      }      scanner_consume(s, '}'); if (s->err) goto STOP;	        }      /* all possible seqs., sorted lexicographical */    else if (!strcmp(s->id, "L")) {      lexWord = 1;      scanner_consume( s, '{' ); if(s->err) goto STOP;       while(!s->err && !s->eof && s->c - '}') {	scanner_get_name(s);	scanner_consume(s, '='); if(s->err) goto STOP; 	if (!strcmp(s->id, "seq_len")) {   	  seq_len_lex = scanner_get_int(s); if(s->err) goto STOP; 	  if (seq_len_lex <= 0) {	    mes_prot("Value for sequence length not allowed"); goto STOP;	  }	}	else if (!strcmp(s->id, "symb")) {   	  if (symbols < 0) {	    mes_prot("Value for number of symbols not allowed"); goto STOP;	  }	  symbols = scanner_get_int(s); if(s->err) goto STOP; 	}	else {	  scanner_error(s, "unknown identifier"); goto STOP;	}		scanner_consume(s, ';'); if(s->err) goto STOP;      }      scanner_consume(s, '}');      if ((seq_len_lex <= 0) || (symbols < 0)) {	mes_prot("Values for seq. length or number of symbols not spezified");	goto STOP;      }      sq = sequence_lexWords(seq_len_lex, symbols);       if (!sq) goto STOP;    }  /*if (!strcmp(s->id, "L"))*/    else {      scanner_error(s, "unknown identifier"); goto STOP;    }    scanner_consume( s, ';' ); if(s->err) goto STOP;  } /* while(!s->err && !s->eof && s->c - '}') */  scanner_consume( s, '}'); if(s->err) goto STOP;   return(sq);  STOP:  sequence_free(&sq);  return(NULL);#undef CUR_PROC} /* sequence_read_alloc */ /*============================================================================*/sequence_d_t** sequence_d_read(const char *filename, int *sqd_number) {#define CUR_PROC "sequence_d_read"  int i;  scanner_t *s = NULL;  sequence_d_t **sequence = NULL;  *sqd_number = 0;  s = scanner_alloc(filename);  if(!s) {mes_proc(); goto STOP;}    while(!s->err && !s->eof && s->c - '}') {    scanner_get_name(s);    scanner_consume(s, '='); if(s->err) goto STOP;     /* sequence file */	if (!strcmp(s->id, "SEQD")) {      (*sqd_number)++;      /* more mem */	       if (m_realloc(sequence, *sqd_number)) { mes_proc(); goto STOP; }	  sequence[*sqd_number-1] = sequence_d_read_alloc(s);      if (!sequence[*sqd_number-1]) { mes_proc(); goto STOP; }    }    else {      scanner_error(s, "unknown identifier"); goto STOP;    }    scanner_consume( s, ';' ); if(s->err) goto STOP;  }  scanner_free(&s);  return sequence;STOP:  scanner_free(&s);  for (i = 0; i < *sqd_number; i++)     sequence_d_free(&(sequence[i]));  m_free(sequence);  *sqd_number = 0;  return NULL;#undef CUR_PROC} /* sequence_d_read */ /*============================================================================*/sequence_d_t* sequence_d_read_alloc(scanner_t *s){#define CUR_PROC "sequence_d_read_alloc"  sequence_d_t *sqd = NULL;  if( !m_calloc( sqd, 1 ) ) {mes_proc();goto STOP;}   scanner_consume( s, '{' ); if(s->err) goto STOP;   while(!s->err && !s->eof && s->c - '}') {    scanner_get_name(s);    scanner_consume(s, '='); if(s->err) goto STOP;    /* array of sequences to read */    if (!strcmp(s->id, "O")) {      scanner_consume(s, '{'); if (s->err) goto STOP;      sqd->seq_number = 0;      sqd->total_w = 0.0;      while( !s->eof && !s->err && s->c - '}') {	/* another sequence --> realloc */	if (m_realloc(sqd->seq, sqd->seq_number+1)) 	  { mes_proc(); goto STOP; }	if (m_realloc(sqd->seq_len, sqd->seq_number+1))	  { mes_proc(); goto STOP; }	if (m_realloc(sqd->seq_label, sqd->seq_number + 1)) 	  { mes_proc(); goto STOP; }	if (m_realloc(sqd->seq_id, sqd->seq_number + 1)) 	  { mes_proc(); goto STOP; }	if (m_realloc(sqd->seq_w, sqd->seq_number + 1)) 	  { mes_proc(); goto STOP; }	/* Label and ID and weight */	/* default */	sqd->seq_label[sqd->seq_number] = -1;	sqd->seq_id[sqd->seq_number] = -1.0;	sqd->seq_w[sqd->seq_number] = 1;	while (s->c == '<' || s->c == '(' || s->c == '|') {	  if (s->c == '<') {	    scanner_consume(s, '<'); if (s->err) goto STOP;	  	    sqd->seq_label[sqd->seq_number] = scanner_get_int(s); 	    if(s->err) goto STOP; 	    scanner_consume(s, '>'); if (s->err) goto STOP;	  }	  if (s->c == '(') {	    scanner_consume(s, '('); if (s->err) goto STOP;	  	    sqd->seq_id[sqd->seq_number] = scanner_get_edouble(s); 	    if(s->err) goto STOP; 	    scanner_consume(s, ')'); if (s->err) goto STOP;	  }	  if (s->c == '|') {	    scanner_consume(s, '|'); if (s->err) goto STOP;	  	    sqd->seq_w[sqd->seq_number] = (double) scanner_get_int(s);	    if (sqd->seq_w[sqd->seq_number] < 0) {	      scanner_error(s, "negativ sequence weight\n");	      goto STOP;	    }	    if(s->err) goto STOP; 	    scanner_consume(s, '|'); if (s->err) goto STOP;	  }	}	sqd->seq[sqd->seq_number] = 	  scanner_get_double_earray(s, sqd->seq_len + sqd->seq_number);	if (sqd->seq_len[sqd->seq_number] > MAX_SEQ_LEN) {	  scanner_error(s, "sequence too long"); goto STOP;	}	scanner_consume(s, ';'); if (s->err) goto STOP;	sqd->total_w += sqd->seq_w[sqd->seq_number];	sqd->seq_number++;      } /* while( !s->eof...) */      if ((sqd->seq_number == 0) || (sqd->seq_number > MAX_SEQ_NUMBER)) {	char *str = mprintf(NULL, 0,			    "Number of sequences %ld exceeds possible range", 			    sqd->seq_number); 	mes_prot(str);	m_free(str);	goto STOP;      }      scanner_consume(s, '}'); if (s->err) goto STOP;	        }      else {      scanner_error(s, "unknown identifier"); goto STOP;    }    scanner_consume( s, ';' ); if(s->err) goto STOP;  } /* while(!s->err && !s->eof && s->c - '}') */  scanner_consume( s, '}'); if(s->err) goto STOP;   return(sqd);  STOP:  sequence_d_free(&sqd);  return(NULL);#undef CUR_PROC} /* sequence_d_read_alloc */ /*============================================================================*//* Truncate Sequences in a given sqd_field; useful for Testing;   returns truncated sqd_field;    trunc_ratio 0: no truncation   trunc_ratio 1: truncation (mean truncation faktor = 0.5)   trunc_ratio -1: 100 % truncation*/sequence_d_t **sequence_d_truncate(sequence_d_t **sqd_in, int sqd_fields, 				  double trunc_ratio, int seed) {  #define CUR_PROC "sequence_d_truncate"  sequence_d_t **sq;  int i, j, trunc_len;  /* Hack, use -1 for complete truncation */  if ((0 > trunc_ratio || 1 < trunc_ratio) && trunc_ratio != -1) {    mes_prot("Error: trunc_ratio not valid\n"); goto STOP;  }  if( !m_calloc(sq, sqd_fields) ) {mes_proc();goto STOP;}  gsl_rng_init();  gsl_rng_set(RNG,seed);  for (i = 0; i < sqd_fields; i++) {    sq[i] = sequence_d_calloc(sqd_in[i]->seq_number);    sq[i]->total_w = sqd_in[i]->total_w;    for (j = 0; j < sqd_in[i]->seq_number; j++) {      if(!m_calloc(sq[i]->seq[j], sqd_in[i]->seq_len[j])) {	mes_proc(); goto STOP;      }      /* length of truncated seq. */      if (trunc_ratio == -1) 	trunc_len = 0;      else	trunc_len = (int) ceil(( sqd_in[i]->seq_len[j] * 				 (1 - trunc_ratio * gsl_rng_uniform(RNG)) ));      sequence_d_copy(sq[i]->seq[j], sqd_in[i]->seq[j], trunc_len);      if(m_realloc(sq[i]->seq[j], trunc_len)) {mes_proc(); goto STOP;}      sq[i]->seq_len[j] = trunc_len;      sq[i]->seq_label[j] = sqd_in[i]->seq_label[j];      sq[i]->seq_id[j] = sqd_in[i]->seq_id[j];      sq[i]->seq_w[j] = sqd_in[i]->seq_w[j];    }  } /* for all sqd_fields */    return sq;STOP:  return NULL;#undef CUR_PROC}/*============================================================================*/sequence_d_t *sequence_d_calloc(long seq_number) {#define CUR_PROC "sequence_dcalloc"  int i;  //printf("*** sequence_d_t *sequence_d_calloc, nr: %d\n",seq_number);  sequence_d_t *sqd = NULL;  if (seq_number > MAX_SEQ_NUMBER) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -