📄 smixturehmm.c

📁 General Hidden Markov Model Library 一个通用的隐马尔科夫模型的C代码库
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*********************************************************************************       This file is part of the General Hidden Markov Model Library,*       GHMM version 0.8_beta1, see http://ghmm.org**       Filename: ghmm/ghmm/smixturehmm.c*       Authors:  Bernd Wichern**       Copyright (C) 1998-2004 Alexander Schliep*       Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln*       Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,*                               Berlin**       Contact: schliep@ghmm.org**       This library is free software; you can redistribute it and/or*       modify it under the terms of the GNU Library General Public*       License as published by the Free Software Foundation; either*       version 2 of the License, or (at your option) any later version.**       This library is distributed in the hope that it will be useful,*       but WITHOUT ANY WARRANTY; without even the implied warranty of*       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU*       Library General Public License for more details.**       You should have received a copy of the GNU Library General Public*       License along with this library; if not, write to the Free*       Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA***       This file is version $Revision: 1713 $*                       from $Date: 2006-10-16 16:06:28 +0200 (Mon, 16 Oct 2006) $*             last change by $Author: grunau $.********************************************************************************/#ifdef WIN32#  include "win_config.h"#endif#ifdef HAVE_CONFIG_H#  include "../config.h"#endif#ifdef GHMM_OBSOLETE#include <stdio.h>#include <float.h>#include <math.h>#include <time.h>#include "ghmm.h"#include "mprintf.h"#include "mes.h"#include "matrix.h"#include "smixturehmm.h"#include "smodel.h"#include "sreestimate.h"#include "sfoba.h"#include "rng.h"#include "sequence.h"#include "smap_classify.h"#include "ghmm_internals.h"/* used in main and deactivated other functions */#if 0double total_train_w = 0.0;double total_test_w = 0.0;#endif#if 0                           /* no main *//*============================================================================*/int main (int argc, char *argv[]){#define CUR_PROC "smixturehmm_main"  int exitcode = -1;  ghmm_cseq *sqd = NULL, *sqd_train = NULL, *sqd_test = NULL;  ghmm_cseq **sqd_dummy = NULL;  ghmm_cmodel **smo_initial = NULL, **smo = NULL;  int i, k, iter, iterations, field_number, min_smo, max_smo, smo_number,    total_smo_number, print_models, mode, free_parameter;  long errors_train = 0, errors_test = 0;  char *str;  double train_likelihood, test_likelihood, train_ratio, bic;  double *avg_comp_like = NULL;  double **cp = NULL;           /* component probabilities for all train-seqs. */  FILE *outfile = NULL, *likefile = NULL, *smofile = NULL;  char filename[256], likefilename[256];  ghmm_rng_init ();  if (argc == 10 || argc == 11) {    sprintf (filename, "%s.smo", argv[3]);    if (!(smofile = ighmm_mes_fopen (filename, "wt"))) {      GHMM_LOG_QUEUED(LCONVERTED);      goto STOP;    }    sprintf (likefilename, "%s.like", argv[3]);    if (!(likefile = ighmm_mes_fopen (likefilename, "at"))) {      GHMM_LOG_QUEUED(LCONVERTED);      goto STOP;    }    sprintf (filename, "%s", argv[3]);    if (!(outfile = ighmm_mes_fopen (filename, "at"))) {      GHMM_LOG_QUEUED(LCONVERTED);      goto STOP;    }    iterations = atoi (argv[4]);    min_smo = atoi (argv[5]);    max_smo = atoi (argv[6]);    printf ("Iter %d, min model no %d, max model no %d\n", iterations,            min_smo, max_smo);    if (max_smo < min_smo) {      printf (" Max. Model No. < Min. Model No.! \n");      goto STOP;    }    train_ratio = atof (argv[7]);    if (train_ratio > 1 || train_ratio < 0) {      printf ("Fehler Train_Ratio (argv[7]): %f\n", train_ratio);      goto STOP;    }    print_models = atoi (argv[8]);    mode = atoi (argv[9]);    if (mode < 1 || mode > 5) {      printf ("Error: initial mode out of range\n");      goto STOP;    }    printf ("TRAIN RATIO %.2f\n", train_ratio);    if (argc == 11)      GHMM_RNG_SET (RNG, atoi (argv[10]));    else {      ghmm_rng_timeseed (RNG);    }  }  else {    printf ("Insufficient arguments. Usage: \n");    printf      ("smixturehmm [Seq.File] [Model File] [Out File] [No. of runs] \n");    printf      ("[Min Model No.] [Max Model No.] [train ratio] [print models] [mode] <seed>\n");    goto STOP;  }  ghmm_smixturehmm_print_header (likefile, argv, 1);  ghmm_smixturehmm_print_header (outfile, argv, 0);  /* Read Seqs. and Initial Models only once */  sqd_dummy = ghmm_cseq_read (argv[1], &field_number);  printf ("Length first Seq: %d\n", sqd_dummy[0]->seq_len[0]);  if (!sqd_dummy) {    GHMM_LOG_QUEUED(LCONVERTED);    goto STOP;  }  sqd = sqd_dummy[0];  if (field_number > 1)    GHMM_LOG(LCONVERTED,      "Warning: Seq. File contains multiple Seq. Fields; use only the first one\n");  smo_initial = ghmm_cmodel_read (argv[2], &total_smo_number);  if (!smo_initial) {    GHMM_LOG_QUEUED(LCONVERTED);    goto STOP;  }  ARRAY_CALLOC (smo, total_smo_number);  if (total_smo_number < max_smo) {    str = ighmm_mprintf (NULL, 0,                   "Need %d initial models, read only %d from model file\n",                   max_smo, total_smo_number);    GHMM_LOG(LCONVERTED, str);    m_free (str);    goto STOP;  }  if (smo_initial[0]->density == 0) {    printf ("normal\n");    fprintf (outfile, "normal\n");  }  else {    printf ("normal_pos\n");    fprintf (outfile, "normal_pos\n");  }  /*-------------- Main loop over different no. of models-------------------------*/  for (smo_number = min_smo; smo_number <= max_smo; smo_number++) {    /* different partitions of test und train seqs. */    for (iter = 0; iter < iterations; iter++) {      if (!outfile)        if (!(outfile = ighmm_mes_fopen (filename, "at"))) {          GHMM_LOG_QUEUED(LCONVERTED);          goto STOP;        }      if (!likefile)        if (!(likefile = ighmm_mes_fopen (likefilename, "at"))) {          GHMM_LOG_QUEUED(LCONVERTED);          goto STOP;        }      fprintf (outfile,               "*********** Model No. %d, CV Iter %d ****************\n",               smo_number, iter + 1);      /*==================INIT========================================*/      /* divide into train and test seqs. */      ARRAY_CALLOC (sqd_train, 1);      ARRAY_CALLOC (sqd_test, 1);      if (ghmm_cseq_partition (sqd, sqd_train, sqd_test, train_ratio) == -1) {        str =          ighmm_mprintf (NULL, 0, "Error partitioning seqs, (model %d, iter %d)\n",                   smo_number, iter);        GHMM_LOG(LCONVERTED, str);        m_free (str);        if (sqd_train->seq == NULL) {          str =            ighmm_mprintf (NULL, 0,                     "Error: empty train seqs, (model %d, iter %d)\n",                     smo_number, iter);          GHMM_LOG(LCONVERTED, str);          m_free (str);        }        goto STOP;      }      printf ("%ld seqs divided into %ld train seqs and %ld test seqs\n",              sqd->seq_number, sqd_train->seq_number, sqd_test->seq_number);      /* copy appropriate no. of initial models */      for (k = 0; k < smo_number; k++) {        smo[k] = ghmm_cmodel_copy (smo_initial[k]);        if (!smo[k]) {          str =            ighmm_mprintf (NULL, 0, "Error copying models, (model %d, iter %d)\n",                     smo_number, iter);          GHMM_LOG(LCONVERTED, str);          m_free (str);          goto STOP;        }      }      /* find out no. of free parameters;  smo_number - 1 = component weights */      free_parameter = ghmm_cmodel_count_free_parameter (smo, smo_number)        + smo_number - 1;      cp = ighmm_cmatrix_alloc (sqd_train->seq_number, smo_number);      if (!cp) {        GHMM_LOG_QUEUED(LCONVERTED);        goto STOP;      }      /* Initial values for component probs for all seqs. and model priors         before the actual  clustering starts */      if (ghmm_smixturehmm_init (cp, sqd_train, smo, smo_number, mode) == -1) {        str =          ighmm_mprintf (NULL, 0,                   "Error in initialization comp. prob (model %d, iter %d)\n",                   smo_number, iter);        GHMM_LOG(LCONVERTED, str);        m_free (str);        goto STOP;      }      /*==================END INIT=====================================*/      /* clustering */      if (ghmm_smixturehmm_cluster (outfile, cp, sqd_train, smo, smo_number) == -1) {        str = ighmm_mprintf (NULL, 0, "Error in clustering, (model %d, iter %d)\n",                       smo_number, iter);        GHMM_LOG(LCONVERTED, str);        m_free (str);        goto STOP;      }      /* TEST */      /*         if (ghmm_smixturehmm_calc_cp(cp, sqd_train, smo, smo_number)  == -1) {         str = ighmm_mprintf(NULL, 0, "Error after clustering, (model %d, CV Iter %d)\n",         smo_number, iter + 1);         GHMM_LOG(LCONVERTED, str); m_free(str); goto STOP;         }         fprintf(outfile, "Component Probs. after Clustering:\n");         ighmm_cmatrix_print(outfile, cp, sqd_train->seq_number, smo_number, " ", ",", ";");       */      /* End TEST */      /*================ OUTPUT=====================================*/      /* print trained models */      if (print_models) {        fprintf (smofile,                 "# ******************  Models: %d, CV-Iter %d *****************\n",                 smo_number, iter);        for (k = 0; k < smo_number; k++)          ghmm_cmodel_print (smofile, smo[k]);      }      avg_comp_like = ghmm_smixturehmm_avg_like (cp, sqd_train, smo, smo_number);      if (avg_comp_like == NULL) {        GHMM_LOG(LCONVERTED, "Error calculating avg_like \n");        goto STOP;      }      fprintf (outfile, "\nTrain Set:\nModel Priors \t  Likel. per Seq\n");      for (k = 0; k < smo_number; k++)        fprintf (outfile, "%.4f\t%.4f\t%.4f\n", smo[k]->prior,                 avg_comp_like[k]);      errors_train = ghmm_cseq_mix_like        (smo, smo_number, sqd_train, &train_likelihood);      fprintf (outfile,               "Likelihood Train Set %.4f (%ld seqs); (per seq %.4f)\n",               train_likelihood, sqd_train->seq_number,               train_likelihood / total_train_w);      /* likelihood on test set */      if (sqd_test != NULL) {        errors_test = ghmm_cseq_mix_like          (smo, smo_number, sqd_test, &test_likelihood);        fprintf (outfile,                 "Likelihood Test Set %.4f (%ld seqs); (per normseq ",                 test_likelihood, sqd_test->seq_number);        if (total_test_w > 0)          fprintf (outfile, " %.4f)\n", test_likelihood / total_test_w);        else          fprintf (outfile, " ---)\n");      }      /* Bayes Information Criterion, see: Kass, Raftery: Bayes Faktors. */      bic =        2 * train_likelihood - free_parameter * log (sqd_train->seq_number);      fprintf (likefile, "%d\t%d\t%ld\t%ld\t%.4f\t%.4f\t%.4f\t", smo_number,               iter, sqd_train->seq_number, sqd_test->seq_number,               train_likelihood, test_likelihood,               train_likelihood / total_train_w);      if (total_test_w > 0)        fprintf (likefile, " %.4f\t", test_likelihood / total_test_w);      else        fprintf (likefile, " ---\t");      fprintf (likefile, "%ld\t%ld\t%.4f\n", errors_train, errors_test, bic);      /*================ FREE=====================================*/      for (k = 0; k < smo_number; k++)        ghmm_cmodel_free (&(smo[k]));      ighmm_cmatrix_free (&cp, sqd_train->seq_number);      ghmm_cseq_free (&sqd_train);      ghmm_cseq_free (&sqd_test);      m_free (avg_comp_like);      if (outfile)        fclose (outfile);       /* save results, open to append at the beginning of the loop */      if (likefile)        fclose (likefile);      likefile = NULL;      outfile = NULL;    }                           /* for (iterations) */
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -