⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlreader.c

📁 General Hidden Markov Model Library 一个通用的隐马尔科夫模型的C代码库
💻 C
📖 第 1 页 / 共 3 页
字号:
/*********************************************************************************       This file is part of the General Hidden Markov Model Library,*       GHMM version 0.8_beta1, see http://ghmm.org**       Filename: ghmm/ghmm/xmlreader.c*       Authors:  Janne Grunau**       Copyright (C) 1998-2006 Alexander Schliep *       Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln*	Copyright (C) 2002-2006 Max-Planck-Institut fuer Molekulare Genetik, *                               Berlin*                                   *       Contact: schliep@ghmm.org             **       This library is free software; you can redistribute it and/or*       modify it under the terms of the GNU Library General Public*       License as published by the Free Software Foundation; either*       version 2 of the License, or (at your option) any later version.**       This library is distributed in the hope that it will be useful,*       but WITHOUT ANY WARRANTY; without even the implied warranty of*       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU*       Library General Public License for more details.**       You should have received a copy of the GNU Library General Public*       License along with this library; if not, write to the Free*       Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA***       This file is version $Revision: 1882 $ *                       from $Date: 2007-08-20 16:48:09 +0200 (Mon, 20 Aug 2007) $*             last change by $Author: grunau $.********************************************************************************/#ifdef HAVE_CONFIG_H#  include "../config.h"#endif#include <stdio.h>#include <string.h>#include <stdio.h>#include <stdlib.h>#include <math.h>#include <assert.h>#include <limits.h>#include <libxml/xmlmemory.h>#include <libxml/tree.h>#include <libxml/parser.h>#include "ghmm.h"#include "ghmm_internals.h"#include "mes.h"#include "mprintf.h"#include "xmlreader.h"/* we should not need more than two alphabets, no plan to implement triple HMMs */#define MAX_ALPHABETS 2/* Bitmask to test the modeltype against to choose the type of the model pointer   we use in the union */#define PTR_TYPE_MASK (GHMM_kDiscreteHMM + GHMM_kTransitionClasses + GHMM_kPairHMM + GHMM_kContinuousHMM)/* holds all valid modeltypes sorted */static int validModelTypes[35] = {  (GHMM_kDiscreteHMM),  (GHMM_kDiscreteHMM + GHMM_kLeftRight),  (GHMM_kDiscreteHMM + GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kTiedEmissions),  (GHMM_kDiscreteHMM + GHMM_kTiedEmissions + GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kHigherOrderEmissions),  (GHMM_kDiscreteHMM + GHMM_kHigherOrderEmissions + GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions),  (GHMM_kDiscreteHMM + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions	+ GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kBackgroundDistributions),  (GHMM_kDiscreteHMM + GHMM_kBackgroundDistributions + GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kBackgroundDistributions + GHMM_kTiedEmissions),  (GHMM_kDiscreteHMM + GHMM_kBackgroundDistributions + GHMM_kTiedEmissions + GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kBackgroundDistributions + GHMM_kHigherOrderEmissions + GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kBackgroundDistributions + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions),  (GHMM_kDiscreteHMM + GHMM_kBackgroundDistributions + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions + GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kLabeledStates),  (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kTiedEmissions),  (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kTiedEmissions + GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kHigherOrderEmissions),  (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kHigherOrderEmissions + GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions),  (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions + GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kBackgroundDistributions),  (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kBackgroundDistributions + GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kBackgroundDistributions + GHMM_kTiedEmissions),  (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kBackgroundDistributions + GHMM_kTiedEmissions + GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kBackgroundDistributions + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions),  (GHMM_kDiscreteHMM + GHMM_kLabeledStates + GHMM_kBackgroundDistributions + GHMM_kHigherOrderEmissions + GHMM_kTiedEmissions + GHMM_kSilentStates),  (GHMM_kDiscreteHMM + GHMM_kTransitionClasses),  (GHMM_kContinuousHMM),  (GHMM_kContinuousHMM + GHMM_kTransitionClasses),  (GHMM_kPairHMM + GHMM_kDiscreteHMM),  (GHMM_kPairHMM + GHMM_kDiscreteHMM + GHMM_kTransitionClasses)};/*===========================================================================*/static int getIntAttribute(xmlNodePtr node, const char *name, int *error) {  xmlChar *attr;  int value = -3894;  if ((attr = xmlGetProp(node, BAD_CAST name)) != NULL) {    value = atoi((char *)attr);    xmlFree(attr);    *error = 0;  } else {    *error = 1;  }  return value;}/*===========================================================================*/static double getDoubleAttribute(xmlNodePtr node, const char *name,				 int *error) {  xmlChar *attr;  double value = 0.0;  if ((attr = xmlGetProp(node, BAD_CAST name)) != NULL) {    value = atof((char *)attr);    xmlFree(attr);    *error = 0;  } else {    *error = 1;  }  return value;}/*===========================================================================*//* Caller owns return value */static char * getXMLCharAttribute(xmlNodePtr node, const char *name,				    int *error) {  xmlChar *attr;  if ((attr = xmlGetProp(node, BAD_CAST name)) != NULL) {    *error = 0;    return (char *)attr;  } else {    *error = 1;    return NULL;  }}/*===========================================================================*/static int parseCSVList(const char * data, unsigned int size, double * array, int reverse) {#define CUR_PROC "parseCSVList"  int retval=0;  int i;  char * * next, * estr;  double tmp;  ARRAY_CALLOC(next, 1);  for (i=0; i<size; i++) {    array[i] = strtod(data, next);    if (data == *next) {      estr = ighmm_mprintf(NULL, 0, "error in parsing CSV. entry %d of %d. (%s)", i, size, *next);      GHMM_LOG(LERROR, estr);      m_free(estr);      retval=-1;      break;    }    if (next)      data = *next+1;    else      break;  }  if (i != size) {    retval=-1;    estr = ighmm_mprintf(NULL, 0, "error in parsing CSV. sizes do not match (%d != %d)", i, size);    GHMM_LOG(LERROR, estr);    m_free(estr);  }  if (reverse) {    for (i=0; i<size/2; i++) {      tmp = array[i];      array[i] = array[size-i-1];      array[size-i-1] = tmp;    }  }STOP:  m_free(next);  return retval;#undef CUR_PROC}/*===========================================================================*/static int matchModelType(const char * data, unsigned int size) {#define CUR_PROC "matchModelType"  if (!strncmp(data, "left-right", size))    return GHMM_kLeftRight;  if (!strncmp(data, "silent", size))    return GHMM_kSilentStates;  if (!strncmp(data, "tied", size))    return GHMM_kTiedEmissions;  if (!strncmp(data, "higher-order", size))    return GHMM_kHigherOrderEmissions;  if (!strncmp(data, "background", size))    return GHMM_kBackgroundDistributions;  if (!strncmp(data, "labeled", size))    return GHMM_kLabeledStates;  if (!strncmp(data, "transition-classes", size))    return GHMM_kTransitionClasses;  if (!strncmp(data, "discrete", size))    return GHMM_kDiscreteHMM;  if (!strncmp(data, "continuous", size))    return GHMM_kContinuousHMM;  if (!strncmp(data, "pair", size))    return GHMM_kPairHMM;  return INT_MIN;#undef CUR_PROC}/*===========================================================================*/static int parseModelType(const char * data, unsigned int size) {#define CUR_PROC "parseModelType"  int i, noValidMo, modelType=0;  const char * end = data;  char * str;  while ((end = strchr(data, ' '))) {    modelType += matchModelType(data, end-data);    size -= (end-data)+1;    data = end+1;  }  modelType += matchModelType(data, size);  noValidMo = sizeof(validModelTypes)/sizeof(validModelTypes[0]);  for (i=0; i<noValidMo; i++) {    if (modelType == validModelTypes[i])      break;  }  if (i == noValidMo) {    str = ighmm_mprintf(NULL, 0, "%d is no known valid model type", modelType);    GHMM_LOG(LERROR, str);    m_free(str);    return -1;  }  return modelType;#undef CUR_PROC}/*===========================================================================*/static ghmm_alphabet * parseAlphabet(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f) {#define CUR_PROC "parseAlphabet"    char * str;  int M, code, error;  xmlNodePtr symbol;  ghmm_alphabet * alfa;  ARRAY_CALLOC(alfa, 1);  symbol = cur->children;  M=0;  while (symbol!=NULL) {    if ((!xmlStrcmp(symbol->name, BAD_CAST "symbol"))) {      code = getIntAttribute(symbol, "code", &error);      if (error || code!=M) {	str = ighmm_mprintf(NULL, 0, "non consecutive code %d == %d", code, M);	GHMM_LOG(LERROR, str);	m_free(str);	goto STOP;      } else	M++;    }    symbol=symbol->next;  }  alfa->size = M;  /*printf("Parsing alphabet with %d symbols\n", alfa->size);*/  ARRAY_MALLOC(alfa->symbols, M);  symbol = cur->children;  M=0;  while (symbol!=NULL) {    if ((!xmlStrcmp(symbol->name, BAD_CAST "symbol"))) {      alfa->symbols[M++] = (char *)xmlNodeGetContent(symbol);      /*printf("%d. symbol: %s\n", M, alfa->symbols[M-1]);*/    }    symbol=symbol->next;  }    return alfa;STOP:  m_free(alfa->symbols);  m_free(alfa)  return NULL;#undef CUR_PROC}/*===========================================================================*/static int parseBackground(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f, int modelNo) {#define CUR_PROC "parseBackground"  int error, order;  int bgNr, rev;  double *b = NULL;  char   *s = NULL;  assert(f->modelType & GHMM_kDiscreteHMM);  bgNr = f->model.d[modelNo]->bp->n++;  /* get order */  order = getIntAttribute(cur, "order", &error);  if (error)    order=0;  else if (order && !(f->modelType & GHMM_kHigherOrderEmissions)) {    GHMM_LOG(LERROR, "background distribution has order > 0, but model is not higher order");    goto STOP;  }  f->model.d[modelNo]->bp->order[bgNr] = order;  /* get name */  s = (char *)getXMLCharAttribute(cur, "key", &error);  f->model.d[modelNo]->bp->name[bgNr] = s;  rev = getIntAttribute(cur, "rev", &error);  if (error)    rev = 0;  /* get distribution */  s = (char *)xmlNodeGetContent(cur);  ARRAY_MALLOC(b, pow(f->model.d[modelNo]->bp->m, order+1));  if (-1 !=  parseCSVList(s, pow(f->model.d[modelNo]->bp->m, order+1), b, rev))    f->model.d[modelNo]->bp->b[bgNr] = b;  else {    GHMM_LOG(LERROR, "Can not parse background CSV list.");    goto STOP;  }  return 0;STOP:  m_free(b);  return -1;#undef CUR_PROC}/*===========================================================================*/static int parseState(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f, int * inDegree, int * outDegree, int modelNo) {#define CUR_PROC "parseState"  int i, error, order=0, state=-1442, fixed=-985, tied=-9354, M, aprox, label;  int curX=0, curY=0;  double pi, prior;  double *emissions = NULL;  unsigned char *desc = NULL;  char *s = NULL, *estr;  int rev, stateFixed=1;  xmlNodePtr elem, child;  state = getIntAttribute(cur, "id", &error);  pi = getDoubleAttribute(cur, "initial", &error);  if (error) {    estr = ighmm_mprintf(NULL, 0, "can't read required intial probability for"			 "state %d", state);    GHMM_LOG(LERROR, estr);    goto STOP;  } else  desc = xmlGetProp(cur, BAD_CAST "desc");  elem = cur->children;  while (elem!=NULL) {    /* ======== silent state ============================================== */    if ((!xmlStrcmp(elem->name, BAD_CAST "silent"))) {      switch (f->modelType & PTR_TYPE_MASK) {      case (GHMM_kDiscreteHMM):	f->model.d[modelNo]->silent[state] = 1;	break;      case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):	f->model.ds[modelNo]->silent[state] = 1;	break;      case (GHMM_kDiscreteHMM+GHMM_kPairHMM):      case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):	f->model.dp[modelNo]->silent[state] = 1;	break;      default:	GHMM_LOG(LERROR, "invalid modelType");	goto STOP;      }    }    /* ======== discrete state (possible higher order) ==================== */    if ((!xmlStrcmp(elem->name, BAD_CAST "discrete"))) {      assert((f->modelType & GHMM_kDiscreteHMM) && ((f->modelType & GHMM_kPairHMM) == 0));      /* fixed is a propety of the distribution and optional */      fixed = getIntAttribute(elem, "fixed", &error);      if (error)	fixed = 0;      /* order is optional for discrete */      if (f->modelType & GHMM_kHigherOrderEmissions) {	order = getIntAttribute(elem, "order", &error);	if (error)	  order = 0;      }      rev = getIntAttribute(cur, "rev", &error);      if (error)	rev = 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -