📄 xmlreader.c
字号:
/********************************************************************************* This file is part of the General Hidden Markov Model Library,* GHMM version 0.8_beta1, see http://ghmm.org** Filename: ghmm/ghmm/xmlreader.c* Authors: Janne Grunau** Copyright (C) 1998-2006 Alexander Schliep * Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln* Copyright (C) 2002-2006 Max-Planck-Institut fuer Molekulare Genetik, * Berlin* * Contact: schliep@ghmm.org ** This library is free software; you can redistribute it and/or* modify it under the terms of the GNU Library General Public* License as published by the Free Software Foundation; either* version 2 of the License, or (at your option) any later version.** This library is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU* Library General Public License for more details.** You should have received a copy of the GNU Library General Public* License along with this library; if not, write to the Free* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*** This file is version $Revision: 1882 $ * from $Date: 2007-08-20 16:48:09 +0200 (Mon, 20 Aug 2007) $* last change by $Author: grunau $.********************************************************************************/#ifdef HAVE_CONFIG_H# include "../config.h"#endif#include <stdio.h>#include <string.h>#include <stdio.h>#include <stdlib.h>#include <math.h>#include <assert.h>#include <limits.h>#include <libxml/xmlmemory.h>#include <libxml/tree.h>#include <libxml/parser.h>#include "ghmm.h"#include "ghmm_internals.h"#include "mes.h"#include "mprintf.h"#include "xmlreader.h"/* we should not need more than two alphabets, no plan to implement triple HMMs */#define MAX_ALPHABETS 2/* Bitmask to test the modeltype against to choose the type of the model pointer we use in the union */#define PTR_TYPE_MASK (GHMM_kDiscreteHMM + GHMM_kTransitionClasses + GHMM_kPairHMM + GHMM_kContinuousHMM)/* holds all valid modeltypes sorted */static int validModelTypes[35] = { (GHMM_kDiscreteHMM), (GHMM_kDiscreteHMM + GHMM_kLeftRight), (GHMM_kDiscreteHMM + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kTiedEmissions), (GHMM_kDiscreteHMM + GHMM_kTiedEmissions + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kHigherOrderEmissions), (GHMM_kDiscreteHMM + GHMM_kHigherOrderEmissions + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions), (GHMM_kDiscreteHMM + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kBackgroundDistributions), (GHMM_kDiscreteHMM + GHMM_kBackgroundDistributions + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kBackgroundDistributions + GHMM_kTiedEmissions), (GHMM_kDiscreteHMM + GHMM_kBackgroundDistributions + GHMM_kTiedEmissions + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kBackgroundDistributions + GHMM_kHigherOrderEmissions + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kBackgroundDistributions + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions), (GHMM_kDiscreteHMM + GHMM_kBackgroundDistributions + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kLabeledStates), (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kTiedEmissions), (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kTiedEmissions + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kHigherOrderEmissions), (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kHigherOrderEmissions + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions), (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kBackgroundDistributions), (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kBackgroundDistributions + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kBackgroundDistributions + GHMM_kTiedEmissions), (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kBackgroundDistributions + GHMM_kTiedEmissions + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kBackgroundDistributions + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions), (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kBackgroundDistributions + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions + GHMM_kSilentStates), (GHMM_kDiscreteHMM + GHMM_kTransitionClasses), (GHMM_kContinuousHMM), (GHMM_kContinuousHMM + GHMM_kTransitionClasses), (GHMM_kPairHMM + GHMM_kDiscreteHMM), (GHMM_kPairHMM + GHMM_kDiscreteHMM + GHMM_kTransitionClasses)};/*===========================================================================*/static int getIntAttribute(xmlNodePtr node, const char *name, int *error) { xmlChar *attr; int value = -3894; if ((attr = xmlGetProp(node, BAD_CAST name)) != NULL) { value = atoi((char *)attr); xmlFree(attr); *error = 0; } else { *error = 1; } return value;}/*===========================================================================*/static double getDoubleAttribute(xmlNodePtr node, const char *name, int *error) { xmlChar *attr; double value = 0.0; if ((attr = xmlGetProp(node, BAD_CAST name)) != NULL) { value = atof((char *)attr); xmlFree(attr); *error = 0; } else { *error = 1; } return value;}/*===========================================================================*//* Caller owns return value */static char * getXMLCharAttribute(xmlNodePtr node, const char *name, int *error) { xmlChar *attr; if ((attr = xmlGetProp(node, BAD_CAST name)) != NULL) { *error = 0; return (char *)attr; } else { *error = 1; return NULL; }}/*===========================================================================*/static int parseCSVList(const char * data, unsigned int size, double * array, int reverse) {#define CUR_PROC "parseCSVList" int retval=0; int i; char * * next, * estr; double tmp; ARRAY_CALLOC(next, 1); for (i=0; i<size; i++) { array[i] = strtod(data, next); if (data == *next) { estr = ighmm_mprintf(NULL, 0, "error in parsing CSV. entry %d of %d. (%s)", i, size, *next); GHMM_LOG(LERROR, estr); m_free(estr); retval=-1; break; } if (next) data = *next+1; else break; } if (i != size) { retval=-1; estr = ighmm_mprintf(NULL, 0, "error in parsing CSV. sizes do not match (%d != %d)", i, size); GHMM_LOG(LERROR, estr); m_free(estr); } if (reverse) { for (i=0; i<size/2; i++) { tmp = array[i]; array[i] = array[size-i-1]; array[size-i-1] = tmp; } }STOP: m_free(next); return retval;#undef CUR_PROC}/*===========================================================================*/static int matchModelType(const char * data, unsigned int size) {#define CUR_PROC "matchModelType" if (!strncmp(data, "left-right", size)) return GHMM_kLeftRight; if (!strncmp(data, "silent", size)) return GHMM_kSilentStates; if (!strncmp(data, "tied", size)) return GHMM_kTiedEmissions; if (!strncmp(data, "higher-order", size)) return GHMM_kHigherOrderEmissions; if (!strncmp(data, "background", size)) return GHMM_kBackgroundDistributions; if (!strncmp(data, "labeled", size)) return GHMM_kLabeledStates; if (!strncmp(data, "transition-classes", size)) return GHMM_kTransitionClasses; if (!strncmp(data, "discrete", size)) return GHMM_kDiscreteHMM; if (!strncmp(data, "continuous", size)) return GHMM_kContinuousHMM; if (!strncmp(data, "pair", size)) return GHMM_kPairHMM; return INT_MIN;#undef CUR_PROC}/*===========================================================================*/static int parseModelType(const char * data, unsigned int size) {#define CUR_PROC "parseModelType" int i, noValidMo, modelType=0; const char * end = data; char * str; while ((end = strchr(data, ' '))) { modelType += matchModelType(data, end-data); size -= (end-data)+1; data = end+1; } modelType += matchModelType(data, size); noValidMo = sizeof(validModelTypes)/sizeof(validModelTypes[0]); for (i=0; i<noValidMo; i++) { if (modelType == validModelTypes[i]) break; } if (i == noValidMo) { str = ighmm_mprintf(NULL, 0, "%d is no known valid model type", modelType); GHMM_LOG(LERROR, str); m_free(str); return -1; } return modelType;#undef CUR_PROC}/*===========================================================================*/static ghmm_alphabet * parseAlphabet(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f) {#define CUR_PROC "parseAlphabet" char * str; int M, code, error; xmlNodePtr symbol; ghmm_alphabet * alfa; ARRAY_CALLOC(alfa, 1); symbol = cur->children; M=0; while (symbol!=NULL) { if ((!xmlStrcmp(symbol->name, BAD_CAST "symbol"))) { code = getIntAttribute(symbol, "code", &error); if (error || code!=M) { str = ighmm_mprintf(NULL, 0, "non consecutive code %d == %d", code, M); GHMM_LOG(LERROR, str); m_free(str); goto STOP; } else M++; } symbol=symbol->next; } alfa->size = M; /*printf("Parsing alphabet with %d symbols\n", alfa->size);*/ ARRAY_MALLOC(alfa->symbols, M); symbol = cur->children; M=0; while (symbol!=NULL) { if ((!xmlStrcmp(symbol->name, BAD_CAST "symbol"))) { alfa->symbols[M++] = (char *)xmlNodeGetContent(symbol); /*printf("%d. symbol: %s\n", M, alfa->symbols[M-1]);*/ } symbol=symbol->next; } return alfa;STOP: m_free(alfa->symbols); m_free(alfa) return NULL;#undef CUR_PROC}/*===========================================================================*/static int parseBackground(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f, int modelNo) {#define CUR_PROC "parseBackground" int error, order; int bgNr, rev; double *b = NULL; char *s = NULL; assert(f->modelType & GHMM_kDiscreteHMM); bgNr = f->model.d[modelNo]->bp->n++; /* get order */ order = getIntAttribute(cur, "order", &error); if (error) order=0; else if (order && !(f->modelType & GHMM_kHigherOrderEmissions)) { GHMM_LOG(LERROR, "background distribution has order > 0, but model is not higher order"); goto STOP; } f->model.d[modelNo]->bp->order[bgNr] = order; /* get name */ s = (char *)getXMLCharAttribute(cur, "key", &error); f->model.d[modelNo]->bp->name[bgNr] = s; rev = getIntAttribute(cur, "rev", &error); if (error) rev = 0; /* get distribution */ s = (char *)xmlNodeGetContent(cur); ARRAY_MALLOC(b, pow(f->model.d[modelNo]->bp->m, order+1)); if (-1 != parseCSVList(s, pow(f->model.d[modelNo]->bp->m, order+1), b, rev)) f->model.d[modelNo]->bp->b[bgNr] = b; else { GHMM_LOG(LERROR, "Can not parse background CSV list."); goto STOP; } return 0;STOP: m_free(b); return -1;#undef CUR_PROC}/*===========================================================================*/static int parseState(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f, int * inDegree, int * outDegree, int modelNo) {#define CUR_PROC "parseState" int i, error, order=0, state=-1442, fixed=-985, tied=-9354, M, aprox, label; int curX=0, curY=0; double pi, prior; double *emissions = NULL; unsigned char *desc = NULL; char *s = NULL, *estr; int rev, stateFixed=1; xmlNodePtr elem, child; state = getIntAttribute(cur, "id", &error); pi = getDoubleAttribute(cur, "initial", &error); if (error) { estr = ighmm_mprintf(NULL, 0, "can't read required intial probability for" "state %d", state); GHMM_LOG(LERROR, estr); goto STOP; } else desc = xmlGetProp(cur, BAD_CAST "desc"); elem = cur->children; while (elem!=NULL) { /* ======== silent state ============================================== */ if ((!xmlStrcmp(elem->name, BAD_CAST "silent"))) { switch (f->modelType & PTR_TYPE_MASK) { case (GHMM_kDiscreteHMM): f->model.d[modelNo]->silent[state] = 1; break; case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses): f->model.ds[modelNo]->silent[state] = 1; break; case (GHMM_kDiscreteHMM+GHMM_kPairHMM): case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses): f->model.dp[modelNo]->silent[state] = 1; break; default: GHMM_LOG(LERROR, "invalid modelType"); goto STOP; } } /* ======== discrete state (possible higher order) ==================== */ if ((!xmlStrcmp(elem->name, BAD_CAST "discrete"))) { assert((f->modelType & GHMM_kDiscreteHMM) && ((f->modelType & GHMM_kPairHMM) == 0)); /* fixed is a propety of the distribution and optional */ fixed = getIntAttribute(elem, "fixed", &error); if (error) fixed = 0; /* order is optional for discrete */ if (f->modelType & GHMM_kHigherOrderEmissions) { order = getIntAttribute(elem, "order", &error); if (error) order = 0; } rev = getIntAttribute(cur, "rev", &error); if (error) rev = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -