📄 xmlutil.py

📁 General Hidden Markov Model Library 一个通用的隐马尔科夫模型的C代码库
💻 PY
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
	del self.id2index[self.state[index].id]	del self.state[index]    def fromDOM(self, XMLNode):                # self.hmmClass.fromDOM(XMLNode.getElementsByTagName("hmm:class")[0])         for tag in XMLNode.getElementsByTagName("hmm:class"):            self.hmmClass.fromDOM(tag)        nameNodes = XMLNode.getElementsByTagName("hmm:name")        if (len(nameNodes) > 0):            self.modelType = nameNodes[0].firstChild.nodeValue        # model type node        modelTypeNodes = XMLNode.getElementsByTagName("hmm:modeltype")        if (len(modelTypeNodes) > 0):            self.modelType = modelTypeNodes[0].firstChild.nodeValue        if (self.modelType == "pairHMM"):            alphabetNodes = XMLNode.getElementsByTagName("hmm:alphabet")            for alphabetNode in alphabetNodes:                alphabet = DiscreteHMMAlphabet()                alphabet.fromDOM(alphabetNode)                self.hmmAlphabets[alphabet.id] = alphabet            transitionFunctionNodes = XMLNode.getElementsByTagName("hmm:transitionfunction")            for transitionFunctionNode in transitionFunctionNodes:                transitionFunction = TransitionFunction()                transitionFunction.fromDom(transitionFunctionNode)                self.transitionFunctions[transitionFunction.id] = transitionFunction        else:            # If it is no pair hmm One "hmm:alphabet" XML element            self.hmmAlphabets[0] = DiscreteHMMAlphabet()            self.hmmAlphabets[0].fromDOM(XMLNode.getElementsByTagName("hmm:alphabet")[0])        self.backgroundDistributions.fromDOM(XMLNode)        nodes = XMLNode.getElementsByTagName("node")        for n in nodes:            state = HMMState(-1, self)            state.fromDOM(n)            self.state[state.index] = state # key must be string            self.id2index[state.id] = state.index            self.G.embedding[state.index] = state.pos            self.G.labeling[state.index] = "%s\n%s" % (state.id, state.label) # XXX Hack Aaaargh!        edges = XMLNode.getElementsByTagName("edge")        # nr_classes = int(self.hmmClass.high()-self.hmmClass.low())+1        nr_classes = 1        # search in all states for the maximal kclasses        for s in self.state.values():            if (s.kclasses > nr_classes):                nr_classes = s.kclasses                        for i in range(nr_classes):            self.G.edgeWeights[i] = EdgeWeight(self.G)        for edge in edges:            i = self.id2index[int(edge.attributes['source'].nodeValue)]            j = self.id2index[int(edge.attributes['target'].nodeValue)]            source = self.state[i]            datas = edge.getElementsByTagName("data")            for data in datas:                dataKey = data.attributes['key'].nodeValue                # dataValue = data.firstChild.nodeValue            if dataKey == 'prob':                #p = float(dataValue)                # collect all strings from childnodes                dataValue = ""                for child in data.childNodes:                    dataValue += child.nodeValue                p = listFromCSV(dataValue, types.FloatType)                self.G.AddEdge(i, j)                if len(p) == 1: # only one class                    for cl in range(source.kclasses - 1):                        p.append(0.0)                                        for cl in range(source.kclasses):                    self.G.edgeWeights[cl][(i,j)] = p[cl]    def modelCheck(self):	        # Compute sums of initial probabilities for renormalization         initial_sum = 0.0        for s in self.state:            initial_sum = initial_sum + self.state[s].initial	if initial_sum == 0.0:	    raise NotValidHMMType("Initial state is not specified.")	    	if (len(self.hmmAlphabets) == 0):	    raise AlphabetErrorType("Alphabet object is empty. You must create alphabet before saving.")	    def toDOM(self, XMLDoc, XMLNode):        graphml = XMLDoc.createElement("graphml")        # define namespaces (proper XML and new expat needs it)        graphml.setAttribute('xmlns', 'http://graphml.graphdrawing.org/xmlns')        graphml.setAttribute('xmlns:gd', 'gdnamespace') # find the correct URI        graphml.setAttribute('xmlns:hmm', 'http://www.ghmm.org/xml/')#arbitrary        XMLNode.appendChild(graphml)        # Create key elements        hmmtype = XMLDoc.createElement("key")        hmmtype.setAttribute('id', 'emissions')        hmmtype.setAttribute('gd:type', 'HigherDiscreteProbDist') # what's your type?        hmmtype.setAttribute('for', 'node')        graphml.appendChild(hmmtype)                self.hmmClass.toDOM(XMLDoc, graphml)        if (self.modelType == "pairHMM"):            modelType = XMLDoc.createElement("hmm:modeltype")            modelType.appendChild(XMLDoc.createTextNode("pairHMM"))            graphml.appendChild(modelType)                for alphabet in self.hmmAlphabets.values():            alphabet.toDOM(XMLDoc, graphml)        self.backgroundDistributions.toDOM(XMLDoc, graphml)         if len(self.transitionFunctions.keys()) != 0:            transitionFunctionsNode = XMLDoc.createElement("hmm:transitionfunctions")            for transitionFunction in self.transitionFunctions.values():                transitionFunction.toDom(XMLDoc, transitionFunctionsNode)            graphml.appendChild(transitionFunctionsNode)        graph = XMLDoc.createElement("graph")        # Compute sums of initial probabilities for renormalization         initial_sum = 0.0        for s in self.state.keys():            initial_sum = initial_sum + self.state[s].initial                for s in self.state.keys():            self.state[s].toDOM(XMLDoc, graph, initial_sum)                # Compute sums of outgoing probabilities for renormalization of transition probabilities        # NOTE: need dictionaries here        out_sum = {}        nr_classes = int(self.hmmClass.high())-int(self.hmmClass.low())+1        for v in self.G.vertices:            out_sum[v] = [0.0]*nr_classes        for cl in range(1): # XXX Assuming one transition class            for e in self.G.Edges():                if self.G.edgeWeights[cl].has_key(e):                    out_sum[e[0]][cl] = out_sum[e[0]][cl] + self.G.edgeWeights[cl][e]                        for e in self.G.Edges():            transitions = []            edge_elem = XMLDoc.createElement("edge")            edge_elem.setAttribute('source', "%s" % self.state[e[0]].id)            edge_elem.setAttribute('target', "%s" % self.state[e[1]].id)            # writeData(XMLDoc, edge_elem, 'prob', self.G.edgeWeights[cl][e] / out_sum[e[0]])            # XXX Assuming one transition class for cl in range(nr_classes):            for cl in range(1):                if self.G.edgeWeights[cl].has_key(e) and out_sum[e[0]][cl]:                    transitions.append(self.G.edgeWeights[cl][e]/ out_sum[e[0]][cl])                else:                    transitions.append(0.0)                            writeData(XMLDoc, edge_elem, 'prob', csvFromList( transitions ))            graph.appendChild(edge_elem)                      graphml.appendChild(graph)    def AlphabetType(self):	""" return the type of emission domain 	    XXX should call the method in HMMAlphabet	"""	return int        def ClassType(self):	pass        def DistributionType(self):	pass    def getBackgroundDist(self):        """ Return a pair of two dictionaries: (distribution, its orders):            a distribution is a list of real values of length N^(order+1).           """                return (self.backgroundDistributions.dist, self.backgroundDistributions.order, self.backgroundDistributions.code2name)            def buildMatrices(self):    	""" return [alphabets_code, A, B, pi, state_orders] """	pi = []	B  = []	A  = []	nstates = len(self.state.keys())	orders = {}	k = 0 # C style index	for s in self.state.values(): # ordering from XML	    orders[s.index] = k	    k = k + 1        state_orders = []	for s in self.state.values(): # a list of indices	    pi.append(s.initial)	    state_orders.append(s.order) # state order            size = self.hmmAlphabets[s.alphabet_id].size()            if (self.modelType != "pairHMM" and                size**(s.order+1) != len(s.emissions)): 		raise ValueError # exception: inconsistency between ordering and emission	                B.append(s.emissions) # emission	    	    # transition probability	    v = s.index	    outprobs = [0.0] * nstates	    for outid in self.G.OutNeighbors(v)[:]:		myorder = orders[outid]		outprobs[myorder] = self.G.edgeWeights[0][(v,outid)]	    A.append(outprobs)        alphabets = self.hmmAlphabets[0].name.values() # list of alphabets	return [alphabets, A, B, pi, state_orders]    def getStateAlphabets(self):        alphabets = []        for s in self.state.values():            alphabets.append(self.hmmAlphabets[s.alphabet_id])        return alphabets    def getAlphabets(self):        return self.hmmAlphabets        def getLabels(self):        """ returns list of state labels and unique labels """        label_list = []        labels = {}        for s in self.state.values(): # a list of indices           label_list.append(self.hmmClass.code2name[s.state_class])           labels[label_list[-1]] = 0        return (label_list, labels.keys())    def getTiedStates(self):            """ returns list of tied states, entry is None if a state isn't to            any other state, returns an empty list, if no state is tied """        tiedstates = []        isTied = 0                orders = {}        k = 0 # C style index        for s in self.state.values(): # ordering from XML            orders[s.id] = k            k = k + 1        for s in self.state.values(): # a list of indices            if s.tiedto == '':                tiedstates.append(-1)            else:                tiedstates.append(orders[int(s.tiedto)])                isTied = 1        if not isTied:            tiedstates = []        return tiedstates    def getStateDurations(self):        """ returns a list of the minimal number of times a state is evaluated            before the HMM changes to another state."""        durations = []        hasduration = 0                for s in self.state.values(): # a list of indices            if s.duration==0:                durations.append(1)            else:                durations.append(s.duration)                hasduration = 1        if not hasduration:            durations = []        return durations        def OpenXML(self, fileName_file_or_dom):        if (not isinstance(fileName_file_or_dom, xml.dom.minidom.Document)):            dom = xml.dom.minidom.parse(fileName_file_or_dom)        else:            dom = fileName_file_or_dom        if dom.documentElement.tagName == "ghmm":            sys.stderr.write("Do not support ghmm format")            raise FormatError            dom.unlink()            #self.DocumentName = "ghmm"            #ghmmdom  = dom            #ghmml = GHMMXML()            #dom   = ghmml.GraphMLDOM(ghmmdom)            #ghmmdom.unlink()        else:            assert dom.documentElement.tagName == "graphml"   	    self.fromDOM(dom)	    # dom.unlink()    def WriteXML(self, fileName):        try:            self.modelCheck()   # raise exceptions here            doc = xml.dom.minidom.Document()            self.toDOM(doc, doc)            file = open(fileName, 'w')            # xml.dom.ext.PrettyPrint(doc, file)                    file.write(toprettyxml(doc)) # problem with white spaces            file.close()            doc.unlink()        except HMMEdError:            print "HMMEdError: No file was written due to errors in the model."                def WriteGHMM(self, fileName):	self.modelCheck()   # raise exceptions here        doc = xml.dom.minidom.Document()        ghmm = doc.createElement("ghmm")        doc.appendChild(ghmm)        self.toGHMM(doc, ghmm)        file = open(fileName, 'w')        # xml.dom.ext.PrettyPrint(doc, file)                file.write(toprettyxml(doc)) # problem with white spaces        file.close()        doc.unlink()            def SaveAs(self, fileName):        if ( self.DocumentName == "graphml" ):            self.WriteXML(fileName)        else:            self.WriteGHMM(fileName)                def SaveAsGHMM(self, fileName):        self.WriteGHMM(fileName)class TransitionFunction:    """ this class holds information on the function which determines the        transition class for the state and holds the necesary parameters """    def __init__(self, type=None, paramDict=None):        self.type = type        self.paramDict = paramDict    def fromDom(self, XMLNode):        self.name = XMLNode.getAttribute("hmm:name")        self.id = int(XMLNode.getAttribute("hmm:id"))        typeNode = XMLNode.getElementsByTagName("hmm:transitiontype")[0]        self.type = typeNode.firstChild.nodeValue        parameterNodes = XMLNode.getElementsByTagName("hmm:transitionparameter")        self.paramDict = {}        for parameter in parameterNodes:            self.paramDict[parameter.getAttribute("hmm:name")] = parameter.firstChild.nodeValue    def toDom(self, XMLDoc, XMLNode):        transitionFunctionNode = XMLDoc.createElement("hmm:transitionfunction")        transitionFunctionNode.setAttribute("hmm:name", self.name)        transitionFunctionNode.setAttribute("hmm:id", self.id)        XMLNode.appendChild(transitionFunctionNode)        typeNode = XMLDoc.createElement("hmm:transitiontype")        type = XMLDoc.createTextNode(self.type)        typeNode.appendChild(type)        transitionFunctionNode.appendChild(typeNode)        for paramName in self.paramDict.keys():            paramNode = XMLDoc.createElement("hmm:transitionparameter")            paramNode.setAttribute("hmm:name", paramName)            param = XMLDoc.createTextNode(self.paramDict[paramName])            paramNode.appendChild(param)            transitionFunctionNode.appendChild(paramNode)        ################################################################################if __name__ == '__main__':    hmmobj = HMM()    hmmobj.OpenXML(sys.argv[1])    hmmobj.WriteXML("utz.xml")
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -