📄 smixturehmm.c
字号:
/******************************************************************************* author : Bernd Wichern filename : /zpr/bspk/src/hmm/ghmm/ghmm/smixturehmm.c created : TIME: 14:35:48 DATE: Thu 20. July 2000 $Id: smixturehmm.c,v 1.9 2002/02/22 23:47:12 achim Exp $Copyright (C) 1998-2001, ZAIK/ZPR, Universit鋞 zu K鰈nThis program is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2 of the License, or(at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*******************************************************************************/#include <stdio.h>#include <float.h>#include <math.h>#include <time.h>#include "mprintf.h"#include "mes.h"#include "matrix.h"#include "smixturehmm.h"#include "smodel.h"#include "sreestimate.h"#include "sfoba.h"#include "rng.h"#include "sequence.h"#include "const.h"#include "smap_classify.h"/* used in main and deactivated other functions */#if 0double total_train_w = 0.0;double total_test_w = 0.0;#endif#if 0 /* no main *//*============================================================================*/int main(int argc, char* argv[]) {#define CUR_PROC "smixturehmm_main" int exitcode = -1; sequence_d_t *sqd = NULL, *sqd_train = NULL, *sqd_test =NULL; sequence_d_t **sqd_dummy = NULL; smodel **smo_initial = NULL, **smo = NULL; int i, k, iter, iterations, field_number, min_smo, max_smo, smo_number, total_smo_number, print_models, mode, free_parameter; long errors_train= 0, errors_test = 0; char *str; double train_likelihood, test_likelihood, train_ratio, bic; double *avg_comp_like = NULL; double **cp = NULL; /* component probabilities for all train-seqs. */ FILE *outfile = NULL, *likefile = NULL, *smofile = NULL; char filename[256], likefilename[256]; gsl_rng_init(); if (argc == 10 || argc == 11) { sprintf(filename, "%s.smo", argv[3]); if(!(smofile = mes_fopen(filename, "wt"))) {mes_proc(); goto STOP;} sprintf(likefilename, "%s.like", argv[3]); if(!(likefile = mes_fopen(likefilename, "at"))) {mes_proc(); goto STOP;} sprintf(filename, "%s", argv[3]); if(!(outfile = mes_fopen(filename, "at"))) {mes_proc(); goto STOP;} iterations = atoi(argv[4]); min_smo = atoi(argv[5]); max_smo = atoi(argv[6]); printf("Iter %d, min model no %d, max model no %d\n", iterations, min_smo, max_smo); if (max_smo < min_smo) { printf(" Max. Model No. < Min. Model No.! \n"); goto STOP; } train_ratio = atof(argv[7]); if (train_ratio > 1 || train_ratio < 0) { printf("Fehler Train_Ratio (argv[7]): %f\n", train_ratio); goto STOP; } print_models = atoi(argv[8]); mode = atoi(argv[9]); if (mode < 1 || mode > 5) { printf("Error: initial mode out of range\n"); goto STOP; } printf("TRAIN RATIO %.2f\n", train_ratio); if (argc == 11) gsl_rng_set(RNG,atoi(argv[10])); else { gsl_rng_timeseed(RNG); /* Before: gsl_rng_set(RNG,0); */ } } else { printf("Insufficient arguments. Usage: \n"); printf("smixturehmm [Seq.File] [Model File] [Out File] [No. of runs] \n"); printf("[Min Model No.] [Max Model No.] [train ratio] [print models] [mode] <seed>\n"); goto STOP; } smixturehmm_print_header(likefile, argv, 1); smixturehmm_print_header(outfile, argv, 0); /* Read Seqs. and Initial Models only once */ sqd_dummy = sequence_d_read(argv[1], &field_number); printf("Length first Seq: %d\n", sqd_dummy[0]->seq_len[0]); if (!sqd_dummy) {mes_proc(); goto STOP;} sqd = sqd_dummy[0]; if (field_number > 1) mes_prot("Warning: Seq. File contains multiple Seq. Fields; use only the first one\n"); smo_initial = smodel_read(argv[2], &total_smo_number); if (!smo_initial) {mes_proc(); goto STOP;} if (!m_calloc(smo, total_smo_number)) {mes_proc(); goto STOP;} if (total_smo_number < max_smo) { str = mprintf(NULL, 0, "Need %d initial models, read only %d from model file\n", max_smo, total_smo_number); mes_prot(str); m_free(str); goto STOP; } if (smo_initial[0]->density == 0) { printf("normal\n"); fprintf(outfile, "normal\n"); } else { printf("normal_pos\n"); fprintf(outfile, "normal_pos\n"); } /*-------------- Main loop over different no. of models-------------------------*/ for (smo_number = min_smo; smo_number <= max_smo; smo_number++) { /* different partitions of test und train seqs. */ for (iter = 0; iter < iterations; iter++) { if (!outfile) if(!(outfile = mes_fopen(filename, "at"))) {mes_proc(); goto STOP;} if (!likefile) if(!(likefile = mes_fopen(likefilename, "at"))) {mes_proc(); goto STOP;} fprintf(outfile, "*********** Model No. %d, CV Iter %d ****************\n", smo_number, iter + 1); /*==================INIT========================================*/ /* divide into train and test seqs. */ if(!m_calloc(sqd_train, 1)) {mes_proc(); goto STOP;} if(!m_calloc(sqd_test, 1)) {mes_proc(); goto STOP;} if (sequence_d_partition(sqd, sqd_train, sqd_test, train_ratio) == -1) { str = mprintf(NULL, 0, "Error partitioning seqs, (model %d, iter %d)\n", smo_number, iter); mes_prot(str); m_free(str); if (sqd_train->seq == NULL) { str = mprintf(NULL, 0, "Error: empty train seqs, (model %d, iter %d)\n", smo_number, iter); mes_prot(str); m_free(str); } goto STOP; } printf("%ld seqs divided into %ld train seqs and %ld test seqs\n", sqd->seq_number, sqd_train->seq_number, sqd_test->seq_number); /* copy appropriate no. of initial models */ for (k = 0; k < smo_number; k++) { smo[k] = smodel_copy(smo_initial[k]); if (!smo[k]) { str = mprintf(NULL, 0, "Error copying models, (model %d, iter %d)\n", smo_number, iter); mes_prot(str); m_free(str); goto STOP; } } /* find out no. of free parameters; smo_number - 1 = component weights */ free_parameter = smodel_count_free_parameter(smo, smo_number) + smo_number - 1; cp = matrix_d_alloc(sqd_train->seq_number, smo_number); if (!cp) { mes_proc(); goto STOP;} /* Initial values for component probs for all seqs. and model priors before the actual clustering starts */ if (smixturehmm_init(cp, sqd_train, smo, smo_number, mode) == -1) { str = mprintf(NULL, 0, "Error in initialization comp. prob (model %d, iter %d)\n", smo_number, iter); mes_prot(str); m_free(str); goto STOP; } /*==================END INIT=====================================*/ /* clustering */ if (smixturehmm_cluster(outfile, cp, sqd_train, smo, smo_number) == -1) { str = mprintf(NULL, 0, "Error in clustering, (model %d, iter %d)\n", smo_number, iter); mes_prot(str); m_free(str); goto STOP; } /* TEST */ /* if (smixturehmm_calc_cp(cp, sqd_train, smo, smo_number) == -1) { str = mprintf(NULL, 0, "Error after clustering, (model %d, CV Iter %d)\n", smo_number, iter + 1); mes_prot(str); m_free(str); goto STOP; } fprintf(outfile, "Component Probs. after Clustering:\n"); matrix_d_print(outfile, cp, sqd_train->seq_number, smo_number, " ", ",", ";"); */ /* End TEST */ /*================ OUTPUT=====================================*/ /* print trained models */ if (print_models) { fprintf(smofile, "# ****************** Models: %d, CV-Iter %d *****************\n", smo_number, iter); for (k = 0; k < smo_number; k++) smodel_print(smofile, smo[k]); } avg_comp_like = smixturehmm_avg_like(cp, sqd_train, smo, smo_number); if (avg_comp_like == NULL) { mes_prot("Error calculating avg_like \n"); goto STOP; } fprintf(outfile, "\nTrain Set:\nModel Priors \t Likel. per Seq\n"); for (k = 0; k < smo_number; k++) fprintf(outfile, "%.4f\t%.4f\t%.4f\n", smo[k]->prior, avg_comp_like[k]); errors_train = sequence_d_mix_like (smo, smo_number, sqd_train, &train_likelihood); fprintf(outfile, "Likelihood Train Set %.4f (%ld seqs); (per seq %.4f)\n", train_likelihood, sqd_train->seq_number, train_likelihood/total_train_w); /* likelihood on test set */ if (sqd_test != NULL){ errors_test = sequence_d_mix_like (smo, smo_number, sqd_test, &test_likelihood); fprintf(outfile, "Likelihood Test Set %.4f (%ld seqs); (per normseq ", test_likelihood, sqd_test->seq_number); if (total_test_w > 0) fprintf(outfile, " %.4f)\n", test_likelihood/total_test_w); else fprintf(outfile, " ---)\n"); } /* Bayes Information Criterion, see: Kass, Raftery: Bayes Faktors. */ bic = 2 * train_likelihood - free_parameter * log(sqd_train->seq_number); fprintf(likefile, "%d\t%d\t%ld\t%ld\t%.4f\t%.4f\t%.4f\t", smo_number, iter, sqd_train->seq_number, sqd_test->seq_number, train_likelihood, test_likelihood, train_likelihood/total_train_w); if (total_test_w > 0) fprintf(likefile," %.4f\t", test_likelihood/total_test_w); else fprintf(likefile," ---\t"); fprintf(likefile,"%ld\t%ld\t%.4f\n", errors_train, errors_test, bic); /*================ FREE=====================================*/ for (k = 0; k < smo_number; k++) smodel_free(&(smo[k])); matrix_d_free(&cp, sqd_train->seq_number); sequence_d_free(&sqd_train); sequence_d_free(&sqd_test); m_free(avg_comp_like); if (outfile) fclose(outfile); /* save results, open to append at the beginning of the loop */ if (likefile) fclose(likefile); likefile = NULL; outfile = NULL; } /* for (iterations) */ } /* for (smo_number) */ if (smofile) fclose(smofile); exitcode = 0; /*------------------------------------------------------------------------*/ STOP: mes(MES_WIN, "\n(%2.2T): Program finished with exitcode %d.\n", exitcode ); mes_exit(); return(exitcode);# undef CUR_PROC} /* main */#endif /* nomain *//*============================================================================*/ /* Original version, without attempt to avoid lokal traps */int smixturehmm_cluster(FILE *outfile, double **cp, sequence_d_t *sqd,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -