📄 smixturehmm.c

📁 一个通用的隐性马尔可夫C代码库开发环境:C语言简要说明:这是一个通用的隐性马尔可夫C代码库
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*******************************************************************************  author       : Bernd Wichern  filename     : /zpr/bspk/src/hmm/ghmm/ghmm/smixturehmm.c  created      : TIME: 14:35:48     DATE: Thu 20. July 2000  $Id: smixturehmm.c,v 1.9 2002/02/22 23:47:12 achim Exp $Copyright (C) 1998-2001, ZAIK/ZPR, Universit鋞 zu K鰈nThis program is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2 of the License, or(at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*******************************************************************************/#include <stdio.h>#include <float.h>#include <math.h>#include <time.h>#include "mprintf.h"#include "mes.h"#include "matrix.h"#include "smixturehmm.h"#include "smodel.h"#include "sreestimate.h"#include "sfoba.h"#include "rng.h"#include "sequence.h"#include "const.h"#include "smap_classify.h"/* used in main and deactivated other functions */#if 0double total_train_w = 0.0;double total_test_w = 0.0;#endif#if 0 /* no main *//*============================================================================*/int main(int argc, char* argv[]) {#define CUR_PROC "smixturehmm_main"  int exitcode = -1;  sequence_d_t *sqd = NULL, *sqd_train = NULL, *sqd_test =NULL;  sequence_d_t **sqd_dummy = NULL;  smodel **smo_initial = NULL, **smo = NULL;  int i, k, iter, iterations, field_number, min_smo, max_smo, smo_number,    total_smo_number, print_models, mode, free_parameter;  long errors_train= 0, errors_test = 0;  char  *str;  double train_likelihood, test_likelihood, train_ratio, bic;  double *avg_comp_like = NULL;  double **cp = NULL; /* component probabilities for all train-seqs. */  FILE *outfile = NULL, *likefile = NULL, *smofile = NULL;  char filename[256], likefilename[256];  gsl_rng_init();    if (argc == 10 || argc == 11) {    sprintf(filename, "%s.smo", argv[3]);    if(!(smofile = mes_fopen(filename, "wt"))) {mes_proc(); goto STOP;}    sprintf(likefilename, "%s.like", argv[3]);    if(!(likefile = mes_fopen(likefilename, "at"))) {mes_proc(); goto STOP;}    sprintf(filename, "%s", argv[3]);     if(!(outfile = mes_fopen(filename, "at"))) {mes_proc(); goto STOP;}    iterations = atoi(argv[4]);    min_smo = atoi(argv[5]);    max_smo = atoi(argv[6]);    printf("Iter %d, min model no %d, max model no %d\n", iterations, min_smo, 	   max_smo);    if (max_smo < min_smo) {      printf(" Max. Model No. < Min. Model No.! \n");      goto STOP;    }    train_ratio = atof(argv[7]);    if (train_ratio > 1 || train_ratio < 0) {      printf("Fehler Train_Ratio (argv[7]): %f\n", train_ratio);      goto STOP;    }    print_models = atoi(argv[8]);    mode = atoi(argv[9]);    if (mode < 1 || mode > 5) {      printf("Error: initial mode out of range\n"); goto STOP;    }    printf("TRAIN RATIO %.2f\n", train_ratio);    if (argc == 11)      gsl_rng_set(RNG,atoi(argv[10]));    else {            gsl_rng_timeseed(RNG); /* Before: gsl_rng_set(RNG,0); */    }  }  else {    printf("Insufficient arguments. Usage: \n");    printf("smixturehmm [Seq.File] [Model File] [Out File] [No. of runs] \n");    printf("[Min Model No.] [Max Model No.] [train ratio] [print models] [mode] <seed>\n");     goto STOP;  }  smixturehmm_print_header(likefile, argv, 1);  smixturehmm_print_header(outfile, argv, 0);  /* Read Seqs. and Initial Models only once */  sqd_dummy = sequence_d_read(argv[1], &field_number);  printf("Length first Seq: %d\n", sqd_dummy[0]->seq_len[0]);  if (!sqd_dummy) {mes_proc(); goto STOP;}  sqd = sqd_dummy[0];  if (field_number > 1)    mes_prot("Warning: Seq. File contains multiple Seq. Fields; use only the first one\n");    smo_initial = smodel_read(argv[2], &total_smo_number);  if (!smo_initial) {mes_proc(); goto STOP;}  if (!m_calloc(smo, total_smo_number)) {mes_proc(); goto STOP;}  if (total_smo_number < max_smo) {    str = mprintf(NULL, 0, 		  "Need %d initial models, read only %d from model file\n", 		  max_smo, total_smo_number);    mes_prot(str); m_free(str); goto STOP;  }  if (smo_initial[0]->density == 0) {    printf("normal\n");    fprintf(outfile, "normal\n");  }  else {    printf("normal_pos\n");    fprintf(outfile, "normal_pos\n");  }  /*-------------- Main loop over different no. of models-------------------------*/  for (smo_number = min_smo; smo_number <= max_smo; smo_number++) {    /* different partitions of test und train seqs. */    for (iter = 0; iter < iterations; iter++) {          if (!outfile)	if(!(outfile = mes_fopen(filename, "at"))) {mes_proc(); goto STOP;}      if (!likefile)	if(!(likefile = mes_fopen(likefilename, "at"))) {mes_proc(); goto STOP;}      fprintf(outfile, "*********** Model No. %d, CV Iter %d ****************\n",	      smo_number, iter + 1);      /*==================INIT========================================*/      /* divide into train and test seqs. */      if(!m_calloc(sqd_train, 1)) {mes_proc(); goto STOP;}      if(!m_calloc(sqd_test, 1)) {mes_proc(); goto STOP;}      if (sequence_d_partition(sqd, sqd_train, sqd_test, train_ratio) == -1) {	str = mprintf(NULL, 0, "Error partitioning seqs, (model %d, iter %d)\n",		      smo_number, iter);	mes_prot(str); m_free(str);	if (sqd_train->seq == NULL) {	  str = mprintf(NULL, 0, "Error: empty train seqs, (model %d, iter %d)\n",			smo_number, iter);	  mes_prot(str); m_free(str);	}	goto STOP;      }      printf("%ld seqs divided into %ld train seqs and %ld test seqs\n", sqd->seq_number,	     sqd_train->seq_number, sqd_test->seq_number);      /* copy appropriate no. of initial models */      for (k = 0; k < smo_number; k++) {	smo[k] = smodel_copy(smo_initial[k]);	if (!smo[k]) {	  str = mprintf(NULL, 0, "Error copying models, (model %d, iter %d)\n",			smo_number, iter);	  mes_prot(str); m_free(str); goto STOP;	  	}      }      /* find out no. of free parameters;  smo_number - 1 = component weights */      free_parameter = smodel_count_free_parameter(smo, smo_number)	+ smo_number - 1;      cp = matrix_d_alloc(sqd_train->seq_number, smo_number);      if (!cp) { mes_proc(); goto STOP;}      /* Initial values for component probs for all seqs. and model priors	 before the actual  clustering starts */      if (smixturehmm_init(cp, sqd_train, smo, smo_number, mode) == -1) {	str = mprintf(NULL, 0, "Error in initialization comp. prob (model %d, iter %d)\n",		      smo_number, iter);	mes_prot(str); m_free(str); goto STOP;      }      /*==================END INIT=====================================*/      /* clustering */      if (smixturehmm_cluster(outfile, cp, sqd_train, smo, smo_number) == -1) {	str = mprintf(NULL, 0, "Error in clustering, (model %d, iter %d)\n",		      smo_number, iter);	mes_prot(str); m_free(str); goto STOP;      }      /* TEST */            /*		if (smixturehmm_calc_cp(cp, sqd_train, smo, smo_number)  == -1) {	str = mprintf(NULL, 0, "Error after clustering, (model %d, CV Iter %d)\n",	smo_number, iter + 1);	mes_prot(str); m_free(str); goto STOP;	}	fprintf(outfile, "Component Probs. after Clustering:\n");	matrix_d_print(outfile, cp, sqd_train->seq_number, smo_number, " ", ",", ";");      */      /* End TEST */      /*================ OUTPUT=====================================*/      /* print trained models */            if (print_models) {	fprintf(smofile, "# ******************  Models: %d, CV-Iter %d *****************\n",		smo_number, iter);	for (k = 0; k < smo_number; k++)	  smodel_print(smofile, smo[k]);        }          avg_comp_like = smixturehmm_avg_like(cp, sqd_train, smo, smo_number);      if (avg_comp_like == NULL) {	mes_prot("Error calculating avg_like \n"); goto STOP;      }      fprintf(outfile, "\nTrain Set:\nModel Priors \t  Likel. per Seq\n");      for (k = 0; k < smo_number; k++)	fprintf(outfile, "%.4f\t%.4f\t%.4f\n", smo[k]->prior,  avg_comp_like[k]);      errors_train = sequence_d_mix_like	(smo, smo_number, sqd_train, &train_likelihood);      fprintf(outfile, "Likelihood Train Set %.4f (%ld seqs); (per seq %.4f)\n", 	      train_likelihood, sqd_train->seq_number, 	      train_likelihood/total_train_w);         /* likelihood on test set */            if (sqd_test != NULL){	errors_test = sequence_d_mix_like	  (smo, smo_number, sqd_test, &test_likelihood);		fprintf(outfile, "Likelihood Test Set %.4f (%ld seqs); (per normseq ", 		test_likelihood, sqd_test->seq_number);		if (total_test_w > 0)	  fprintf(outfile, " %.4f)\n", test_likelihood/total_test_w);	else	  fprintf(outfile, " ---)\n");      }                  /* Bayes Information Criterion, see: Kass, Raftery: Bayes Faktors. */      bic = 2 * train_likelihood - free_parameter * log(sqd_train->seq_number);      fprintf(likefile, "%d\t%d\t%ld\t%ld\t%.4f\t%.4f\t%.4f\t",	      smo_number, iter, sqd_train->seq_number, sqd_test->seq_number,	      train_likelihood, test_likelihood, 	      train_likelihood/total_train_w);	      	            if (total_test_w > 0)	fprintf(likefile," %.4f\t", test_likelihood/total_test_w);      else	fprintf(likefile," ---\t");      fprintf(likefile,"%ld\t%ld\t%.4f\n", errors_train, errors_test, bic);           /*================ FREE=====================================*/           for (k = 0; k < smo_number; k++)	smodel_free(&(smo[k]));           matrix_d_free(&cp, sqd_train->seq_number);      sequence_d_free(&sqd_train);      sequence_d_free(&sqd_test);      m_free(avg_comp_like);      if (outfile) 	fclose(outfile);  /* save results, open to append at the beginning of the loop */       if (likefile)	fclose(likefile);      likefile = NULL; outfile = NULL;    } /* for (iterations) */      } /* for (smo_number) */  if (smofile)    fclose(smofile);  exitcode = 0;  /*------------------------------------------------------------------------*/ STOP:  mes(MES_WIN, "\n(%2.2T): Program finished with exitcode %d.\n", exitcode );  mes_exit();  return(exitcode);# undef CUR_PROC} /* main */#endif /* nomain *//*============================================================================*/ /* Original version, without attempt to avoid lokal traps */int smixturehmm_cluster(FILE *outfile, double **cp, sequence_d_t *sqd,
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -