📄 xmlreader.c
字号:
/* ========== ALPHABETS ================================================ */ if ((!xmlStrcmp(child->name, BAD_CAST "alphabet"))) { if (!alphabets) ARRAY_MALLOC(alphabets, MAX_ALPHABETS); alfa = parseAlphabet(doc, child, f); if (alfa && nrAlphabets<MAX_ALPHABETS) { alphabets[nrAlphabets++] = alfa; } else { GHMM_LOG(LERROR, "Error in parsing alphabets."); goto STOP; } } /* ========== NODES ================================================== */ if ((!xmlStrcmp(child->name, BAD_CAST "state"))) { id = getIntAttribute(child, "id", &error); if (error || id!=N) { GHMM_LOG(LERROR, "non consecutive node ids"); goto STOP; } N++; } /* ========== EDGES ================================================== */ if ((!xmlStrcmp(child->name, BAD_CAST "transition"))) { if (inDegree == NULL) { ARRAY_CALLOC(inDegree, N); ARRAY_CALLOC(outDegree, N); } source = getIntAttribute(child, "source", &error); if (error || source<0 || source>N) { estr = ighmm_mprintf(NULL, 0, "source (%d) node not existing (%d)", source, error); GHMM_LOG(LERROR, estr); m_free(estr); goto STOP; } target = getIntAttribute(child, "target", &error); if (error || target<0 || target>N) { estr = ighmm_mprintf(NULL, 0, "target (%d) node not existing (%d)", target, error); GHMM_LOG(LERROR, estr); m_free(estr); goto STOP; } inDegree[target]++; outDegree[source]++; } /* ========== BACKGROUND DISTRIBUTIONS ================================ */ if ((!xmlStrcmp(child->name, BAD_CAST "background"))) nrBackgrounds++; child = child->next; } /* allocate zero degree count in the case of a HMM without transitions */ if (inDegree == NULL) { ARRAY_CALLOC(inDegree, N); ARRAY_CALLOC(outDegree, N); } estr = ighmm_mprintf(NULL, 0, "Found HMM with %d states\n", N); GHMM_LOG(LDEBUG, estr); m_free(estr); for (i=0; i<N; i++) { estr = ighmm_mprintf(NULL, 0, " %d\t%d\n", inDegree[i], outDegree[i]); GHMM_LOG(LDEBUG, estr); m_free(estr); } /* starting real parsing */ modelname = xmlGetProp(cur, BAD_CAST "name"); mt = getXMLCharAttribute(cur, "type", &error); modeltype = parseModelType(mt, strlen(mt)); f->modelType = modeltype; /* reading common optional atribute prior, 1.0 if not defined */ prior = getDoubleAttribute(cur, "prior", &error); if (error) prior = 1.0; /* reading common optional atribute cos, 1 if not defined */ cos = getIntAttribute(cur, "transitionClasses", &error); if (error) cos = 1; /* if first model, initialize model structures */ if ( modelNo == 0){ switch (f->modelType & PTR_TYPE_MASK) { case GHMM_kDiscreteHMM: ARRAY_CALLOC(f->model.d, f->noModels); break; case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses): ARRAY_CALLOC(f->model.ds, f->noModels); break; case (GHMM_kDiscreteHMM+GHMM_kPairHMM): case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses): ARRAY_CALLOC(f->model.dp, f->noModels); break; case GHMM_kContinuousHMM: case (GHMM_kContinuousHMM+GHMM_kTransitionClasses): ARRAY_CALLOC(f->model.c, f->noModels); break; break; default: GHMM_LOG(LERROR, "invalid modelType"); goto STOP; } } /* allocating the different models */ switch (f->modelType & PTR_TYPE_MASK) { case GHMM_kDiscreteHMM: assert(nrAlphabets == 1); M = alphabets[0]->size; f->model.d[modelNo] = ghmm_dmodel_calloc(M, N, modeltype, inDegree, outDegree); f->model.d[modelNo]->alphabet = alphabets[0]; f->model.d[modelNo]->prior = prior; f->model.d[modelNo]->name = modelname; break; case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses): assert(nrAlphabets == 1); M = alphabets[0]->size; f->model.ds[modelNo] = ghmm_dsmodel_calloc(M, N, modeltype, cos, inDegree, outDegree); f->model.ds[modelNo]->alphabet = alphabets[0]; f->model.ds[modelNo]->prior = prior; f->model.ds[modelNo]->name = modelname; f->model.ds[modelNo]->cos = cos; break; /* XXX case (GHMM_kDiscreteHMM+GHMM_kPairHMM): case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses): f->model.dp[modelNo] = NULL; break; */ case GHMM_kContinuousHMM: case (GHMM_kContinuousHMM+GHMM_kTransitionClasses): f->model.c[modelNo] = ghmm_cmodel_calloc(N,modeltype); f->model.c[modelNo]->prior = prior; f->model.c[modelNo]->name = modelname; f->model.c[modelNo]->cos = cos; break; default: GHMM_LOG(LERROR, "invalid or unimplemented model type"); goto STOP; } /* allocating background distributions for approtiate models */ if (modeltype & GHMM_kBackgroundDistributions) { switch (f->modelType & PTR_TYPE_MASK) { case GHMM_kDiscreteHMM: ARRAY_CALLOC(bg_orders, N); ARRAY_CALLOC(bg_ptr, N); f->model.d[modelNo]->bp = ghmm_dbackground_alloc(nrBackgrounds, M, bg_orders, bg_ptr); ARRAY_CALLOC(f->model.d[modelNo]->bp->name, N); f->model.d[modelNo]->bp->n = 0; break; default: GHMM_LOG(LERROR, "invalid modelType"); goto STOP; } } child = cur->xmlChildrenNode; /* parse HMM for real */ while (child != NULL) { /* ========== LABEL ALPHABETS ========================================== */ if ((!xmlStrcmp(child->name, BAD_CAST "classAlphabet"))) { alfa = parseAlphabet(doc, child, f); if (alfa) { f->model.d[modelNo]->label_alphabet = alfa; } else { GHMM_LOG(LERROR, "Error in parsing alphabets."); goto STOP; } } if ((!xmlStrcmp(child->name, BAD_CAST "background"))) { if (modeltype & GHMM_kBackgroundDistributions) { parseBackground(doc, child, f, modelNo); } else { GHMM_LOG(LWARN, "Ignoring background distribution."); } } if ((!xmlStrcmp(child->name, BAD_CAST "state"))) { parseState(doc, child, f, inDegree, outDegree, modelNo); } if ((!xmlStrcmp(child->name, BAD_CAST "transition"))) { if (modeltype & GHMM_kTransitionClasses) parseMultipleTransition(doc, child, f, modelNo, cos); else parseSingleTransition(doc, child, f, modelNo); } child = child->next; } if (modeltype & GHMM_kHigherOrderEmissions) { ARRAY_MALLOC(f->model.d[modelNo]->pow_lookup, f->model.d[modelNo]->maxorder+2); f->model.d[modelNo]->pow_lookup[0] = 1; for (i=1; i < f->model.d[modelNo]->maxorder+2; ++i) f->model.d[modelNo]->pow_lookup[i] = f->model.d[modelNo]->M * f->model.d[modelNo]->pow_lookup[i-1]; } /* freeing temporary data */ m_free(inDegree); m_free(outDegree); return 0; STOP: free(inDegree); free(outDegree); free(bg_orders); free(bg_ptr); free(alphabets); free(f); return -1;#undef CUR_PROC}/*===========================================================================*/ghmm_xmlfile* ghmm_xmlfile_parse(const char *filename) {#define CUR_PROC "ghmm_xmlfile_parse" xmlParserCtxtPtr ctxt; /* the parser context */ xmlDocPtr doc; /* the resulting document tree */ xmlNodePtr cur, child; int modelNo = 0; int error; char * estr; ghmm_xmlfile* filedata = NULL; /* validate the document */ if (!ghmm_xmlfile_validate(filename)) { estr = ighmm_mprintf(NULL, 0, "Failed to validate document %s", filename); GHMM_LOG(LERROR, estr); m_free(estr); goto STOP; } /* create a parser context */ ctxt = xmlNewParserCtxt(); if (ctxt == NULL) { GHMM_LOG(LERROR, "Failed to allocate parser context"); goto STOP; } /* parse the file, activating the DTD validation option */ doc = xmlCtxtReadFile(ctxt, filename, NULL, 0); /* check if parsing suceeded */ if (doc == NULL) { estr = ighmm_mprintf(NULL, 0, "Failed to parse %s", filename); GHMM_LOG(LERROR, estr); m_free(estr); } else { /* checking the root node, creating the file structure and iteration over all HMMs */ cur = xmlDocGetRootElement(doc); /* file contains a mixture of HMMs */ if ((!xmlStrcmp(cur->name, BAD_CAST "mixture"))) { ARRAY_CALLOC(filedata, 1); filedata->noModels = getIntAttribute(cur, "noComponents", &error); child = cur->children; while (child!=NULL) { if ((!xmlStrcmp(child->name, BAD_CAST "HMM"))) { if (modelNo >= filedata->noModels) { estr = ighmm_mprintf(NULL, 0, "The mixture has more models than" " defined, ignoring all following HMMs (%d/%d)", modelNo, filedata->noModels); GHMM_LOG(LWARN, estr); m_free(estr); break; } else { if (parseHMM(filedata, doc, child, modelNo)) { estr = ighmm_mprintf(NULL, 0, "could not parse model no. %d", modelNo); GHMM_LOG(LERROR, estr); m_free(estr); goto STOP; } modelNo++; } } child=child->next; } if (modelNo < filedata->noModels){ GHMM_LOG(LERROR, "The mixture has less models than defined"); goto STOP; } /* only single hmm in file */ } else if (!xmlStrcmp(cur->name, BAD_CAST "HMM")) { ARRAY_CALLOC(filedata, 1); filedata->noModels = 1; if (parseHMM(filedata, doc, cur, 0)) { GHMM_LOG(LERROR, "could not parse the hidden markov model"); goto STOP; } /* invalid root entry */ } else { estr = ighmm_mprintf(NULL, 0, "The file does not contains the appropriate root %s", filename); GHMM_LOG(LERROR, estr); m_free(estr); } } /* free up the resulting document */ xmlFreeDoc(doc); /* free up the parser context */ xmlFreeParserCtxt(ctxt); return filedata;STOP: return NULL; #undef CUR_PROC}/*===========================================================================*/static void silence(void* x, const char* y, ...) {return;}/*===========================================================================*/static int validateFixedDTD(const char* filename) {#define CUR_PROC "validateFixedDTD"#ifndef DTD_LOC#define DTD_LOC "/usr/share/ghmm/ghmm.dtd.1.0"#endif const char fileDTD[] = DTD_LOC; char * estr; int retval = 0; xmlDtdPtr dtd = NULL; xmlDocPtr doc = NULL; xmlValidCtxtPtr cvp = NULL; if (filename != NULL && fileDTD != NULL) { dtd = xmlParseDTD(NULL, (const xmlChar *)fileDTD); if (dtd == NULL) { estr = ighmm_mprintf(NULL, 0, "Could not parse DTD %s.", fileDTD); GHMM_LOG(LDEBUG, estr); m_free(estr); goto STOP; } doc = xmlReadFile(filename, NULL, 0); if (doc == NULL) { estr = ighmm_mprintf(NULL, 0, "Could not parse document %s.", filename); GHMM_LOG(LERROR, estr); m_free(estr); goto STOP; } if ((cvp = xmlNewValidCtxt()) == NULL) { GHMM_LOG(LERROR, "Couldn't allocate validation context\n"); goto STOP; } /* set error and warning functions to NULL to make validation silent */ cvp->error = (xmlValidityErrorFunc) silence; cvp->warning = (xmlValidityWarningFunc) silence; /* check if validation suceeded */ if (xmlValidateDtd(cvp, doc, dtd)) { retval = 1; } else { estr = ighmm_mprintf(NULL, 0, "Failed to validate document %s against %s", filename, fileDTD); GHMM_LOG(LDEBUG, estr); m_free(estr); } }STOP: if (cvp != NULL) xmlFreeValidCtxt(cvp); if (doc != NULL) xmlFreeDoc(doc); if (dtd != NULL) xmlFreeDtd(dtd); return retval;#undef CUR_PROC}/*===========================================================================*/static int validateDynamicDTD(const char* filename) {#define CUR_PROC "validateDynamicDTD" int retval = 0; char *estr; xmlParserCtxtPtr ctxt = NULL; xmlDocPtr doc = NULL; ctxt = xmlNewParserCtxt(); /* silencing the validation errors */ ctxt->vctxt.error = (xmlValidityErrorFunc) silence; ctxt->vctxt.warning = (xmlValidityWarningFunc) silence; doc = xmlCtxtReadFile(ctxt, filename, NULL, XML_PARSE_DTDVALID); /* check if parsing suceeded */ if (doc == NULL) { estr = ighmm_mprintf(NULL, 0, "Failed to parse %s", filename); GHMM_LOG(LDEBUG, estr); m_free(estr); } else { if (ctxt->valid == 0) { estr = ighmm_mprintf(NULL, 0, "Failed to validate %s", filename); GHMM_LOG(LDEBUG, estr); m_free(estr); } else retval = 1; } xmlFreeDoc(doc); xmlFreeParserCtxt(ctxt); return retval;#undef CUR_PROC}/*===========================================================================*/int ghmm_xmlfile_validate(const char *filename) {#define CUR_PROC "ghmm_xmlfile_validate" return (validateFixedDTD(filename) || validateDynamicDTD(filename));#undef CUR_PROC}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -