sequence.h

来自「General Hidden Markov Model Library 一个通用」· C头文件 代码 · 共 470 行

H
470
字号
/*********************************************************************************       This file is part of the General Hidden Markov Model Library,*       GHMM version 0.8_beta1, see http://ghmm.org**       Filename: ghmm/ghmm/sequence.h*       Authors:  Bernd Wichern, Benjamin Georgi**       Copyright (C) 1998-2004 Alexander Schliep *       Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln*	Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik, *                               Berlin*                                   *       Contact: schliep@ghmm.org             **       This library is free software; you can redistribute it and/or*       modify it under the terms of the GNU Library General Public*       License as published by the Free Software Foundation; either*       version 2 of the License, or (at your option) any later version.**       This library is distributed in the hope that it will be useful,*       but WITHOUT ANY WARRANTY; without even the implied warranty of*       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU*       Library General Public License for more details.**       You should have received a copy of the GNU Library General Public*       License along with this library; if not, write to the Free*       Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA***       This file is version $Revision: 1931 $ *                       from $Date: 2007-10-30 15:54:41 +0100 (Tue, 30 Oct 2007) $*             last change by $Author: grunau $.********************************************************************************/#ifndef GHMM_OBSOLETE#warning "GHMM_OBSOLETE is not defined!"#endif#ifndef GHMM_SEQUENCE_H#define GHMM_SEQUENCE_H#ifdef __cplusplusextern "C" {#endif/**@name sequences  (double and int) *//*@{ (Doc++-Group: sequence) *//** @name struct ghmm_dseq    Sequence structure for integer sequences.     Contains an array of sequences and corresponding    data like sequence label, sequence weight, etc. Sequences may have different    length.     */  typedef struct ghmm_dseq {  /** sequence array. sequence[i] [j] = j-th symbol of i-th seq.   */    int **seq;  /** matrix of state ids, can be used to save the viterbi path during sequence generation.   ATTENTION: is NOT allocated by ghmm_dseq_calloc  */    int **states;  /** array of sequence length */    int *seq_len;  /** array of state path lengths */    int *states_len;#ifdef GHMM_OBSOLETE  /**  array of sequence labels */    long *seq_label;#endif /* GHMM_OBSOLETE */  /**  array of sequence IDs*/    double *seq_id;  /** positiv! sequence weights.  default is 1 = no weight */    double *seq_w;  /** total number of sequences */    long seq_number;  /** reserved space for sequences is always >= seq_number */    long capacity;  /** sum of sequence weights */    double total_w;  /** matrix of state labels corresponding to seq */    int **state_labels;  /** number of labels for each sequence */    int *state_labels_len;  /** flags (internal) */    unsigned int flags;  } ghmm_dseq;/** @name struct ghmm_cseq    Sequence structure for double sequences.     Contains an array of sequences and corresponding    data like sequnce label, sequence weight, etc. Sequences may have different    length.     */  typedef struct ghmm_cseq {  /** sequence array. sequence[i][j] = j-th symbol of i-th seq. */    double **seq;  /** array of sequence length */    int *seq_len;#ifdef GHMM_OBSOLETE  /**  array of sequence labels */    long *seq_label;#endif /* GHMM_OBSOLETE */  /**  array of sequence IDs*/    double *seq_id;  /** positive! sequence weights.  default is 1 = no weight */    double *seq_w;  /** total number of sequences */    long seq_number;  /** reserved space for sequences is always >= seq_number */    long capacity;  /** sum of sequence weights */    double total_w;  /** flags (internal) */    unsigned int flags;  } ghmm_cseq;#ifdef __cplusplus}#endif/* don't include model.h at the beginning of this file. struct ghmm_dseq has   to be known in model.h */#include "model.h"#include "smodel.h"#ifdef __cplusplusextern "C" {#endif/** Truncate double sequences in a given sequence array.     Useful for Testing;   @return truncated sqd_field;    @param sqd\_in sequence arrays for truncation   @param sqd\_arrays number of sequence arrays   @param  trunc\_ratio 0 means  no truncation, 1 max. truncation   @param seed rng seed*/  ghmm_cseq **ghmm_cseq_truncate (ghmm_cseq ** sqd_in, int sqd_arrays,                                      double trunc_ratio, int seed);/**  Extract a single sequence from a larger ghmm_dseq into a new struct.    @return ghmm_dseq struct containing a single sequence  @param sq   source ghmm_dseq  @param index   index of sequence to extract*/ghmm_dseq *ghmm_dseq_get_singlesequence(ghmm_dseq *sq, int index);/**  Extract a single sequence_d from a larger ghmm_cseq into a new struct.    @return ghmm_cseq struct containing a single sequence  @param sq   source ghmm_cseq  @param index   index of sequence to extract*/ghmm_cseq *ghmm_cseq_get_singlesequence(ghmm_cseq *sq, int index);/*XXX TEST: frees everything but the seq field *//**  Free a ghmm_dseq struct which holds as sequence a reference to a sequence in a different  sequence_t. The function deallocates everything but the reference.*/int ghmm_dseq_subseq_free (ghmm_dseq *sq);/**  Free a ghmm_cseq struct which holds as sequence a reference to a sequence in a different  sequence_d_t. The function deallocates everything but the reference.*/int ghmm_cseq_subseq_free (ghmm_cseq *sqd);/**   Reads a FastA file and returns a ghmm_dseq object   @param filename filemane of the fasta file   @param alfabet  alfabet   @return  ghmm_dseq of the fasta file*/ghmm_dseq *ghmm_dseq_open_fasta(const char *filename, ghmm_alphabet *alphabet);/** Generates all possible integer sequence of lenght n from an alphabet with    M letters. Use lexicographical ordering. Memory allocation here.    @param n      length of sequences    @param M     size of alphabet    @return array of generated integer sequences*/  ghmm_dseq *ghmm_dseq_lexWords (int n, int M);/**   Determine best model for a given integer sequence.    Choose from the set of models the    one with the highest likelihood for the given sequence.   @param mo            array of models   @param model\_number  number of models   @param sequence      sequence   @param seq\_len      length of sequence   @param log\_p         log likelihood of the sequence given the best model   @return index of best\_model (between 0 and model\_number - 1)*/  int ghmm_dseq_best_model (ghmm_dmodel ** mo, int model_number, int *sequence,                           int seq_len, double *log_p);/**   Make sure that the sequences only contain allowed symbols.    (between 0 and max\_symbol - 1)   @param sq          sequences   @param max_symb    number of different symbols   @return            -1 for error, 0 for no errors*/  int ghmm_dseq_check (ghmm_dseq * sq, int max_symb);/**  copy one integer sequence. Memory for target has to be allocated outside.  @param target  target sequence  @param source source sequence  @param len     length of source sequence  */  void ghmm_dseq_copy (int *target, int *source, int len);/**  copy one double sequence. Memory for target has to be allocated outside.  @param target  target sequence  @param source source sequence  @param len     length of source sequence  */  void ghmm_cseq_copy (double *target, double *source, int len);/**  Adds all integer sequences, sequence lengths etc   from source to target. Memory allocation is done here.  @param target target sequence structure  @param source  source sequence structure  @return -1 for error, 0 for success  */  int ghmm_dseq_add (ghmm_dseq * target, ghmm_dseq * source);/**  Adds all double sequences, sequence lengths etc   from source to target. Memory allocation is done here.  @param target target sequence structure  @param source  source sequence structure  @return -1 for error, 0 for success  */  int ghmm_cseq_add (ghmm_cseq * target, ghmm_cseq * source);/**  Prints one array of integer sequences in a file.  @param file       output file  @param sequence    array of sequences  */  void ghmm_dseq_print (ghmm_dseq * sequence, FILE * file);/**  Prints one array of integer sequences in a xml file  @param file       output file  @param sequence   array of sequences  */  void ghmm_dseq_print_xml (ghmm_dseq * sequence, FILE * file);/**   Prints one array of integer sequences in Mathematica format.   (List of lists)   @param file       output file   @param sq    array of sequences   @param name arbitrary sequence name for usage in Mathematica. */  void ghmm_dseq_mathematica_print (ghmm_dseq * sq, FILE * file, char *name);/**  Prints one array of double sequences in a file.  @param file       output file  @param sqd    array of sequences  @param discrete   switch: 0 means double output for symbols,       1 means truncate symbols to integer  */  void ghmm_cseq_print (ghmm_cseq * sqd, FILE * file, int discrete);/**   Prints one array of double sequences in Mathematica format.   (List of lists)   @param file       output file   @param sqd    array of sequences   @param name arbitrary sequence name for usage in Mathematica. */  void ghmm_cseq_mathematica_print (ghmm_cseq * sqd, FILE * file,                                     char *name);/** Output of double sequences suitable for gnuplot. One symbol per line,    sequences seperated by double newline.    @param file output file    @param sqd array of double sequences*/  void ghmm_cseq_gnu_print (ghmm_cseq * sqd, FILE * file);/**   Cleans integer sequence pointers in sequence struct. sets    seq\_number to zero.   Differs from sequence\_free since memory is not freed here.    @param sq sequence structure  */  void ghmm_dseq_clean (ghmm_dseq * sq);/**   Cleans double sequence pointers in sequence struct. sets    seq\_number to zero.   Differs from sequence\_free since memory is not freed here.    @param sqd sequence structure  */  void ghmm_cseq_clean (ghmm_cseq * sqd);/**  Frees all memory in a given array of integer sequences.  @param sq sequence  structure  @return 0 for succes, -1 for error  */  int ghmm_dseq_free (ghmm_dseq ** sq);/**  Frees all memory in a given array of double sequences.  @param sq sequence  structure  @return 0 for succes, -1 for error  */  int ghmm_cseq_free (ghmm_cseq ** sq);/**   Return biggest symbol in an interger sequence.   @param sq sequence structure   @return max value */  int ghmm_dseq_max_symbol (ghmm_dseq * sq);/**   Memory allocation for an integer sequence struct. Allocates arrays of lenght   seq\_number. NO allocation for the actual sequence, since its length is    unknown.   @param seq\_number:  number of sequences   @return:     pointer of sequence struct*/  ghmm_dseq *ghmm_dseq_calloc (long seq_number);/**   Completes Memory allocation for an integer sequence struct.   NO allocation for the actual sequence, since its length is    unknown.*/  int ghmm_dseq_calloc_state_labels (ghmm_dseq *sq);/**   Memory allocation for a double  sequence struct. Allocates arrays of lenght   seq\_number. NO allocation for the actual sequence, since its length is    unknown.   @param seq\_number:  number of sequences   @return:     pointer of sequence struct*/  ghmm_cseq *ghmm_cseq_calloc (long seq_number);/**   Copies array of integer sequences to double sequences.   @return       double sequence struct (target)   @param sq    integer sequence struct (source)   */  ghmm_cseq *ghmm_cseq_create_from_dseq (const ghmm_dseq * sq);/**   Copies array of double sequences into an array of integer   sequences. Truncates positions after decimal point.   @return       integer sequence struct (target)   @param sq    double sequence struct (source)   */  ghmm_dseq *ghmm_dseq_create_from_cseq (const ghmm_cseq * sqd);/**     Determines max sequence length in a given int sequence struct.    @author Peter Pipenbacher    @param sqd sequence struct    @return max sequence length */  int ghmm_dseq_max_len (const ghmm_dseq * sqd);/**     Determines max sequence length in a given double sequence struct.    @param sqd sequence struct    @return max sequence length */  int ghmm_cseq_max_len (const ghmm_cseq * sqd);/**  Calculates a mean sequence of a given array of double sequences.  Missing values of shorter sequences a assumed to be zero.  @param sqd sequence struct  @return pointer of sequence struct containing the mean sequence  */  ghmm_cseq *ghmm_cseq_mean (const ghmm_cseq * sqd);/**   Calculates the scatter matrix of an array of double sequences.    Missing parts of short sequences are NOT taken into account.   @return        scatter matrix   @param sqd     sequence struct   @param sqd     (calculated) dimension of scatter matrix  */  double **ghmm_cseq_scatter_matrix (const ghmm_cseq * sqd, int *dim);/**   Calculates transition class for a given double sequence   at a specified position. Very application specific!!! Currently    implemented only dummy function: allways returns 0 which   means no usage of multiple transition classes.   @param O double sequence   @param index position for class calculation   @param osum sum of symbols upto index   @return currently always 0 */  int ghmm_cseq_class (const double *O, int index, double *osum);  /*int ghmm_cseq_class(const ghmm_cseq *sqd, const int seq_number, int index, double *osum, );*//** Divides randomly a given array of double sequences into two sets.     Useful if a training and test set is needed. Memory allocation is done     here.    @param sqd input sequence array    @param sqd\_train training sequences    @param sqd\_test test sequences    @param train\_ratio ratio of number of train vs number of test sequences    @return 0 for success, -1 for error*/  int ghmm_cseq_partition (ghmm_cseq * sqd, ghmm_cseq * sqd_train,                            ghmm_cseq * sqd_test, double train_ratio);/**     Copies all entries from one sequence in a source array to a target array.    No memory allocation here.    @param target double sequence target    @param source double sequence source    @param t_num position in target array    @param s_num position in source array*/  void ghmm_cseq_copy_all (ghmm_cseq * target, long t_num,                            ghmm_cseq * source, long s_num);/** Log-Likelihood function in a mixture model:    (mathe mode?)    $\sum_k w^k \log( \sum_c (\alpha_c p(O^k | \lambda_c)))$    @param smo pointer to array of smodels    @param smo\_number number of models    @param sqd sequence struct    @param like log likelihood*/  int ghmm_cseq_mix_like (ghmm_cmodel ** smo, int smo_number, ghmm_cseq * sqd,                           double *like);#ifdef __cplusplus}#endif#endif/*@} (Doc++-Group: sequence) */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?