📄 xmlutil.py
字号:
def toGHMM(self, XMLDoc, XMLNode, initial_sum): node = XMLDoc.createElement("state") node.setAttribute('id', "%s" % self.id) writeXMLTextNode(XMLDoc, node, 'initial', self.initial / initial_sum) # ignore order writeXMLTextNode(XMLDoc, node, 'emission', string.join(map(str,self.emissions),'\n')) XMLNode.appendChild(node) class HMM: def __init__(self, XMLFileName = None, G = None): # self.itsEditor = itsEditor if ( G is None ): self.G = Graph() else: self.G = G self.G.directed = 1 self.G.euclidian = 0 self.G.simple = 0 self.Pi = {} self.id2index = {} self.hmmAlphabet = DiscreteHMMAlphabet() self.hmmClass = HMMClass() self.editableAttr = {} self.editableAttr['HMM'] = ['desc'] self.desc = ValidatingString() self.state = {} self.backgroundDistributions = NamedDistributions(self) self.DocumentName = "graphml" if XMLFileName != None: self.OpenXML(XMLFileName) def AddState(self, id, label='None'): state = HMMState(-1, self) state.id = id state.index = state.id self.id2index[state.id] = state.index self.state[state.index] = state # XXX Use canvas id state.label = typed_assign(state.label, label) self.G.labeling[state.index] = "%s" % (state.label) return state.index def DeleteState(self, index): """ The method only deletes a map between index and its state object. The caller must delete the corresponding vertex in the owner Graph self.G. """ del self.id2index[self.state[index].id] del self.state[index] def fromDOM(self, XMLNode): # self.hmmClass.fromDOM(XMLNode.getElementsByTagName("hmm:class")[0]) for tag in XMLNode.getElementsByTagName("hmm:class"): self.hmmClass.fromDOM(tag) # One "hmm:alphabet" XML element self.hmmAlphabet.fromDOM(XMLNode.getElementsByTagName("hmm:alphabet")[0]) self.backgroundDistributions.fromDOM(XMLNode) nodes = XMLNode.getElementsByTagName("node") for n in nodes: state = HMMState(-1, self) state.fromDOM(n) self.state[state.index] = state # key must be string self.id2index[state.id] = state.index self.G.embedding[state.index] = state.pos self.G.labeling[state.index] = "%s\n%s" % (state.id, state.label) # XXX Hack Aaaargh! edges = XMLNode.getElementsByTagName("edge") nr_classes = int(self.hmmClass.high()-self.hmmClass.low())+1 for i in range(nr_classes): self.G.edgeWeights[i] = EdgeWeight(self.G) for edge in edges: i = self.id2index[edge.attributes['source'].nodeValue] j = self.id2index[edge.attributes['target'].nodeValue] datas = edge.getElementsByTagName("data") for data in datas: dataKey = data.attributes['key'].nodeValue # dataValue = data.firstChild.nodeValue if dataKey == 'prob': #p = float(dataValue) # collect all strings from childnodes dataValue = "" for child in data.childNodes: dataValue += child.nodeValue p = listFromCSV(dataValue, types.FloatType) self.G.AddEdge(i, j) if len(p) == 1: # only one class for cl in range(nr_classes): p.append(0.0) for cl in range(nr_classes): self.G.edgeWeights[cl][(i,j)] = p[cl] def modelCheck(self): # Compute sums of initial probabilities for renormalization initial_sum = 0.0 for s in self.state: initial_sum = initial_sum + self.state[s].initial if initial_sum == 0.0: raise NotValidHMMType("Initial state is not specified.") if self.hmmAlphabet.size() == 0.0: raise AlphabetErrorType("Alphabet object is empty. You must create alphabet before saving.") def toDOM(self, XMLDoc, XMLNode): graphml = XMLDoc.createElement("graphml") XMLNode.appendChild(graphml) # Create key elements hmmtype = XMLDoc.createElement("key") hmmtype.setAttribute('id', 'emissions') hmmtype.setAttribute('gd:type', 'HigherDiscreteProbDist') # what's your type? hmmtype.setAttribute('for', 'node') graphml.appendChild(hmmtype) self.hmmClass.toDOM(XMLDoc, graphml) self.hmmAlphabet.toDOM(XMLDoc, graphml) self.backgroundDistributions.toDOM(XMLDoc, graphml) graph = XMLDoc.createElement("graph") # Compute sums of initial probabilities for renormalization initial_sum = 0.0 for s in self.state.keys(): initial_sum = initial_sum + self.state[s].initial for s in self.state.keys(): self.state[s].toDOM(XMLDoc, graph, initial_sum) # Compute sums of outgoing probabilities for renormalization of transition probabilities # NOTE: need dictionaries here out_sum = {} nr_classes = int(self.hmmClass.high())-int(self.hmmClass.low())+1 for v in self.G.vertices: out_sum[v] = [0.0]*nr_classes for cl in range(1): # XXX Assuming one transition class for e in self.G.Edges(): if self.G.edgeWeights[cl].has_key(e): out_sum[e[0]][cl] = out_sum[e[0]][cl] + self.G.edgeWeights[cl][e] for e in self.G.Edges(): transitions = [] edge_elem = XMLDoc.createElement("edge") edge_elem.setAttribute('source', "%s" % self.state[e[0]].id) edge_elem.setAttribute('target', "%s" % self.state[e[1]].id) # writeData(XMLDoc, edge_elem, 'prob', self.G.edgeWeights[cl][e] / out_sum[e[0]]) # XXX Assuming one transition class for cl in range(nr_classes): for cl in range(1): if self.G.edgeWeights[cl].has_key(e) and out_sum[e[0]][cl]: transitions.append(self.G.edgeWeights[cl][e]/ out_sum[e[0]][cl]) else: transitions.append(0.0) writeData(XMLDoc, edge_elem, 'prob', csvFromList( transitions )) graph.appendChild(edge_elem) graphml.appendChild(graph) def AlphabetType(self): """ return the type of emission domain XXX should call the method in HMMAlphabet """ return int def ClassType(self): pass def DistributionType(self): pass def getBackgroundDist(self): """ Return a pair of two dictionaries: (distribution, its orders): a distribution is a list of real values of length N^(order+1). """ return (self.backgroundDistributions.dist, self.backgroundDistributions.order) def buildMatrices(self): """ return [alphabets_code, A, B, pi, state_orders] """ pi = [] B = [] A = [] nstates = len(self.state.keys()) orders = {} k = 0 # C style index for s in self.state.values(): # ordering from XML orders[s.index] = k k = k + 1 state_orders = [] for s in self.state.values(): # a list of indices pi.append(s.initial) state_orders.append(s.order) # state order size = self.hmmAlphabet.size() if s.order != 1 and size == len(s.emissions): raise ValueError # exception: inconsistency between ordering and emission else: B.append(s.emissions) # emission # transition probability v = s.index outprobs = [0.0] * nstates for outid in self.G.OutNeighbors(v)[:]: myorder = orders[outid] outprobs[myorder] = self.G.edgeWeights[0][(v,outid)] A.append(outprobs) alphabets = self.hmmAlphabet.name.values() # list of alphabets return [alphabets, A, B, pi, state_orders] def OpenXML(self, fileName): dom = xml.dom.minidom.parse(fileName) if dom.documentElement.tagName == "ghmm": sys.stderr.write("Do not support ghmm format") raise FormatError dom.unlink() #self.DocumentName = "ghmm" #ghmmdom = dom #ghmml = GHMMXML() #dom = ghmml.GraphMLDOM(ghmmdom) #ghmmdom.unlink() else: assert dom.documentElement.tagName == "graphml" self.fromDOM(dom) # dom.unlink() def WriteXML(self, fileName): self.modelCheck() # raise exceptions here doc = xml.dom.minidom.Document() self.toDOM(doc, doc) file = open(fileName, 'w') # xml.dom.ext.PrettyPrint(doc, file) file.write(toprettyxml(doc)) # problem with white spaces file.close() doc.unlink() def WriteGHMM(self, fileName): self.modelCheck() # raise exceptions here doc = xml.dom.minidom.Document() ghmm = doc.createElement("ghmm") doc.appendChild(ghmm) self.toGHMM(doc, ghmm) file = open(fileName, 'w') # xml.dom.ext.PrettyPrint(doc, file) file.write(toprettyxml(doc)) # problem with white spaces file.close() doc.unlink() def SaveAs(self, fileName): if ( self.DocumentName == "graphml" ): self.WriteXML(fileName) else: self.WriteGHMM(fileName) def SaveAsGHMM(self, fileName): self.WriteGHMM(fileName) ################################################################################if __name__ == '__main__': hmmobj = HMM() hmmobj.OpenXML(sys.argv[1]) hmmobj.WriteXML("utz.xml")
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -