📄 sgenerate.c
字号:
/********************************************************************************* This file is part of the General Hidden Markov Model Library,* GHMM version 0.8_beta1, see http://ghmm.org** Filename: ghmm/ghmm/sgenerate.c* Authors: Bernhard Knab, Benjamin Georgi** Copyright (C) 1998-2004 Alexander Schliep* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln* Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,* Berlin** Contact: schliep@ghmm.org** This library is free software; you can redistribute it and/or* modify it under the terms of the GNU Library General Public* License as published by the Free Software Foundation; either* version 2 of the License, or (at your option) any later version.** This library is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU* Library General Public License for more details.** You should have received a copy of the GNU Library General Public* License along with this library; if not, write to the Free* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*** This file is version $Revision: 1713 $* from $Date: 2006-10-16 16:06:28 +0200 (Mon, 16 Oct 2006) $* last change by $Author: grunau $.********************************************************************************/#ifdef WIN32# include "win_config.h"#endif#ifdef HAVE_CONFIG_H# include "../config.h"#endif#include <stdio.h>#include <stdlib.h>#include <float.h>#include "ghmm.h"#include "mprintf.h"#include "mes.h"#include "sequence.h"#include "smodel.h"#include "sgenerate.h"#include "sfoba.h"#include "matrix.h"#include "rng.h"#include "ghmm_internals.h"/*============================================================================*//* Extend given sequences on the basis of the model. Following modes are possible: mode = 0: Initial state: Viterbi, Extension: Viterbi-Path = 1: Initial state: Viterbi, Extension: all paths possible = 2: Initial state: probability(i) ~= alpha_t(i), Extension: Viterbi-Path = 3: Initial state: probability(i) ~= alpha_t(i), Extension: all paths possible FRAGE: macht Extension Viterbi ueberhaupt Sinn???*/ghmm_cseq *ghmm_sgenerate_extensions (ghmm_cmodel * smo, ghmm_cseq * sqd_short, int seed, int global_len, sgeneration_mode_t mode){#define CUR_PROC "ghmm_sgenerate_extensions" ghmm_cseq *sq = NULL; int i, j, t, n, m, len = global_len, short_len, max_short_len = 0, up = 0;#ifdef bausparkasse int tilgphase = 0;#endif /* int *v_path = NULL; */ double log_p, *initial_distribution, **alpha, *scale, p, sum; /* aicj */ int class = -1; /* TEMP */ if (mode == all_viterbi || mode == viterbi_viterbi || mode == viterbi_all) { GHMM_LOG(LCONVERTED, "Error: mode not implemented yet\n"); goto STOP; } if (len <= 0) /* no global length; model should have a final state */ len = (int) GHMM_MAX_SEQ_LEN; max_short_len = ghmm_cseq_max_len (sqd_short); /*---------------alloc-------------------------------------------------*/ sq = ghmm_cseq_calloc (sqd_short->seq_number); if (!sq) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } ARRAY_CALLOC (initial_distribution, smo->N); /* is needed in cfoba_forward() */ alpha = ighmm_cmatrix_alloc (max_short_len, smo->N); if (!alpha) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } ARRAY_CALLOC (scale, max_short_len); ghmm_rng_init (); GHMM_RNG_SET (RNG, seed); /*---------------main loop over all seqs-------------------------------*/ for (n = 0; n < sqd_short->seq_number; n++) { ARRAY_CALLOC (sq->seq[n], len); short_len = sqd_short->seq_len[n]; if (len < short_len) { GHMM_LOG(LCONVERTED, "Error: given sequence is too long\n"); goto STOP; } ghmm_cseq_copy (sq->seq[n], sqd_short->seq[n], short_len);#ifdef GHMM_OBSOLETE sq->seq_label[n] = sqd_short->seq_label[n];#endif /* GHMM_OBSOLETE */ /* Initial distribution */ /* 1. Viterbi-state */#if 0 /* wieder aktivieren, wenn ghmm_cmodel_viterbi realisiert */ if (mode == viterbi_all || mode == viterbi_viterbi) { v_path = cviterbi (smo, sqd_short->seq[n], short_len, &log_p); if (v_path[short_len - 1] < 0 || v_path[short_len - 1] >= smo->N) { GHMM_LOG(LCONVERTED, "Warning:Error: from viterbi()\n"); sq->seq_len[n] = short_len; m_realloc (sq->seq[n], short_len); continue; } m_memset (initial_distribution, 0, smo->N); initial_distribution[v_path[short_len - 1]] = 1.0; /* all other 0 */ m_free (v_path); }#endif /* 2. Initial Distribution ??? Pi(i) = alpha_t(i)/P(O|lambda) */ if (mode == all_all || mode == all_viterbi) { if (short_len > 0) { if (ghmm_cmodel_forward (smo, sqd_short->seq[n], short_len, NULL /* ?? */ , alpha, scale, &log_p)) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } sum = 0.0; for (i = 0; i < smo->N; i++) { /* alpha ist skaliert! */ initial_distribution[i] = alpha[short_len - 1][i]; sum += initial_distribution[i]; } /* nicht ok.? auf eins skalieren? */ for (i = 0; i < smo->N; i++) initial_distribution[i] /= sum; } else { for (i = 0; i < smo->N; i++) initial_distribution[i] = smo->s[i].pi; } } /* if short_len > 0: Initial state == final state from sqd_short; no output here else choose inittial state according to pi and do output */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (i = 0; i < smo->N; i++) { sum += initial_distribution[i]; if (sum >= p) break; } /* error due to incorrect normalization ?? */ if (i == smo->N) { i--; while (i > 0 && initial_distribution[i] == 0.0) i--; } t = 0; if (short_len == 0) { /* Output in state i */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (m = 0; m < smo->M; m++) { sum += smo->s[i].c[m]; if (sum >= p) break; } /* error due to incorrect normalization ?? */ if (m == smo->M) { m--; while (m > 0 && smo->s[i].c[m] == 0.0) m--; } sq->seq[n][t] = ghmm_cmodel_get_random_var (smo, i, m); if (smo->cos == 1) { class = 0; } else { if (!smo->class_change->get_class) { printf ("ERROR: get_class not initialized\n"); goto STOP; } /*printf("1: cos = %d, k = %d, t = %d\n",smo->cos,smo->class_change->k,t);*/ class = smo->class_change->get_class (smo, sq->seq[n], n, t); } t++; } /* generate completion for sequence */ else { for (t = 0; t < short_len; t++) if (smo->cos == 1) { class = 0; } else { if (!smo->class_change->get_class) { printf ("ERROR: get_class not initialized\n"); goto STOP; } /*printf("1: cos = %d, k = %d, t = %d\n",smo->cos,smo->class_change->k,t);*/ class = smo->class_change->get_class (smo, sq->seq[n], n, t); } t = short_len; } while (t < len) { if (smo->s[i].out_states == 0) /* reached final state, exit while loop */ break; sum = 0.0; for (j = 0; j < smo->s[i].out_states; j++) { sum += smo->s[i].out_a[class][j]; if (sum >= p) break; } /* error due to incorrect normalization ?? */ if (j == smo->s[i].out_states) { j--; while (j > 0 && smo->s[i].out_a[class][j] == 0.0) j--; } if (sum == 0.0) { /* Test: If an "empty" class, try the neighbour class; first, sweep down to zero, if still no success sweep up to COs - 1. If still no success --> discard the sequence. */ if (class > 0 && up == 0) { class--; continue; } else if (class < smo->cos - 1) { class++; up = 1; continue; } else { break; } } /* new state */ i = smo->s[i].out_id[j]; /* Output in state i */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (m = 0; m < smo->M; m++) { sum += smo->s[i].c[m]; if (sum >= p) break; } if (m == smo->M) { m--; while (m > 0 && smo->s[i].c[m] == 0.0) m--; } /* random variable from density function */ sq->seq[n][t] = ghmm_cmodel_get_random_var (smo, i, m); if (smo->cos == 1) { class = 0; } else { if (!smo->class_change->get_class) { printf ("ERROR: get_class not initialized\n"); goto STOP; } /*printf("1: cos = %d, k = %d, t = %d\n",smo->cos,smo->class_change->k,t);*/ class = smo->class_change->get_class (smo, sq->seq[n], n, t); } up = 0; t++; } /* while (t < len) */ if (t < len)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -