📄 objecthmm.py
字号:
self.name2id[vname].append(v.id) return v.id def DeleteVertex(self, v): vname = self.vertices[v].name self.name2id[vname] = [x for x in self.name2id[vname] if x != v] if len(self.name2id[vname]) == 0: del self.name2id[vname] del self.vertices_ids[v] ObjectGraph.DeleteVertex(self, v) def AddEdge(self,tail,head): ObjectGraph.AddEdge(self,tail,head) edge = self.edges[tail,head] edge.SetWeight(1.0) def DeleteEdge(self,tail,head): ObjectGraph.DeleteEdge(self,tail,head) self.vertices[tail].normalize() def SetLabeling(self,v, value): self.vertices[v].labeling = ValidatingString(value) def edit(self, parent, attributes = None): if attributes == None: editBox = EditObjectAttributesDialog(parent, self, self.editableAttr) else: editableAttr = {} for attr in attributes: editableAttr[attr] = self.editableAttr[attr] editBox = EditObjectAttributesDialog(parent, self, editableAttr) mt = self.computeModelType() if mt > 0: self.initHMM(mt) else: print "invalid model type:", mt def computeModelType(self): modelType = 0 if self.etype == 0: modelType += ghmmwrapper.kDiscreteHMM if self.maxOrder > 0: modelType += ghmmwrapper.kHigherOrderEmissions elif self.etype == 1: modelType += ghmmwrapper.kContinuousHMM elif self.etype == 2: modelType += ghmmwrapper.kDiscreteHMM modelType += ghmmwrapper.kPairHMM else: print "invalid type:", self.etype if self.switching > 1: modelType += ghmmwrapper.kTransitionClasses if self.tied: modelType += ghmmwrapper.kTiedEmissions if self.silent: modelType += ghmmwrapper.kSilentStates if self.background: modelType += ghmmwrapper.kBackgroundDistributions if self.labels: modelType += ghmmwrapper.kLabeledStates return modelType def initHMM(self, modelType): # set the right emission type if modelType & ghmmwrapper.kDiscreteHMM: if modelType & ghmmwrapper.kPairHMM: emissionClass = DiscretePairEmission # alphabet missing else: if modelType & ghmmwrapper.kHigherOrderEmissions: emissionClass = DiscreteHigherOrderEmission else: emissionClass = DiscreteEmission alphabet = self.initAlphabet() elif modelType & ghmmwrapper.kContinuousHMM: if self.emissionClass == Emission: emissionClass = ContinuousEmission else: emissionClass = self.emissionClass alphabet = None else: print "not a valid model type" # set the right transition type if modelType & ghmmwrapper.kTransitionClasses: edgeClass = SwitchedTransition else: edgeClass = Transition # masking unnecessary model type flags out mt = modelType if modelType & ghmmwrapper.kDiscreteHMM: mt -= ghmmwrapper.kDiscreteHMM if modelType & ghmmwrapper.kContinuousHMM: mt -= ghmmwrapper.kContinuousHMM if modelType & ghmmwrapper.kPairHMM: mt -= ghmmwrapper.kPairHMM if modelType & (ghmmwrapper.kHigherOrderEmissions): mt -= ghmmwrapper.kHigherOrderEmissions # setting the right vertex type if mt == (ghmmwrapper.kSilentStates): vertexClass = SilentState elif mt == (ghmmwrapper.kTiedEmissions): vertexClass = TiedState elif mt == (ghmmwrapper.kBackgroundDistributions): vertexClass = BackgroundState elif mt == (ghmmwrapper.kLabeledStates): vertexClass = LabeledState # 2 elif mt == (ghmmwrapper.kSilentStates + ghmmwrapper.kTiedEmissions): vertexClass = SilentTiedState elif mt == (ghmmwrapper.kSilentStates + ghmmwrapper.kLabeledStates): vertexClass = SilentLabeledState elif mt == (ghmmwrapper.kSilentStates + ghmmwrapper.kBackgroundDistributions): vertexClass = SilentBackgroundState elif mt == (ghmmwrapper.kTiedEmissions + ghmmwrapper.kBackgroundDistributions): vertexClass = TiedBackgroundState elif mt == (ghmmwrapper.kTiedEmissions + ghmmwrapper.kLabeledStates): vertexClass = LabeledTiedState elif mt == (ghmmwrapper.kLabeledStates + ghmmwrapper.kBackgroundDistributions): vertexClass = LabeledBackgroundState # 3 elif mt == (ghmmwrapper.kSilentStates + ghmmwrapper.kTiedEmissions + ghmmwrapper.kBackgroundDistributions): vertexClass = SilentTiedBackgroundState elif mt == (ghmmwrapper.kSilentStates + ghmmwrapper.kTiedEmissions + ghmmwrapper.kLabeledStates): vertexClass = SilentLabeledTiedState elif mt == (ghmmwrapper.kSilentStates + ghmmwrapper.kLabeledStates + ghmmwrapper.kBackgroundDistributions): vertexClass = SilentLabeledBackgroundState elif mt == (ghmmwrapper.kTiedEmissions + ghmmwrapper.kLabeledStates + ghmmwrapper.kBackgroundDistributions): vertexClass = LabeledTiedBackgroundState # 4 elif mt == (ghmmwrapper.kSilentStates + ghmmwrapper.kTiedEmissions + ghmmwrapper.kLabeledStates + ghmmwrapper.kBackgroundDistributions): vertexClass = SilentLabeledTiedBackgroundState else: vertexClass = (modelType & ghmmwrapper.kContinuousHMM) and ContinuousState or State # initialize state labels if mt & ghmmwrapper.kLabeledStates: self.label_alphabet = DiscreteHMMAlphabet(description = "state labels") # initialize background distributions if mt & ghmmwrapper.kBackgroundDistributions: self.backgroundDistributions = DiscreteHMMBackground(emissionClass) # initialize background distributions if mt & ghmmwrapper.kTiedEmissions: self.tie_groups = TieGroups(emissionClass) self.__init__(vertexClass, edgeClass, emissionClass, alphabet) self.modelType = modelType def initAlphabet(self): if isinstance(self.alphabet, ghmm.Alphabet): return DiscreteHMMAlphabet(self.alphabet.listOfCharacters) if self.alphatype == 0: return DiscreteHMMAlphabet(["0", "1"]) elif self.alphatype == 1: return DiscreteHMMAlphabet(["1", "2", "3", "4", "5", "6"]) elif self.alphatype == 2: return DiscreteHMMAlphabet(["A", "C", "G", "T"]) elif self.alphatype == 3: return DiscreteHMMAlphabet(["ala", "arg", "asn", "asp", "asx", "cys", "glu", "gln", "glx", "gly", "his", "ile", "leu", "lys", "met", "phe", "pro", "ser", "thr", "try", "tyr", "val"]) elif self.alphatype == 4: return DiscreteHMMAlphabet() else: print "invalid alphabet type" return None def openXML(self, filename="test.xml"): # simple check of file filedata = ghmmwrapper.ghmm_xmlfile_parse(filename) if filedata == None: raise UnknownFileTypeException(filename) if filedata.noModels > 1: raise UnsupportedFileException(filename + "more than one HMM per file currently not supported") # initialize model and set auxiliary data accordingly to the model type self.initHMM(filedata.modelType) #cmodel = filedata.getModel(0) if self.modelType & ghmmwrapper.kContinuousHMM: self.buildFromCModel(filedata.get_cmodel(0)) elif self.modelType & ghmmwrapper.kDiscreteHMM: if self.modelType & ghmmwrapper.kPairHMM: self.buildFromCModel(filedata.get_dpmodel(0)) elif self.modelType & ghmmwrapper.kTransitionClasses: self.buildFromCModel(filedata.get_dsmodel(0)) else: self.buildFromCModel(filedata.get_dmodel(0)) def buildFromCModel(self, cmodel): cos = 1 # Add alphabet if appropiate first if self.modelType & ghmmwrapper.kDiscreteHMM: self.alphabet = DiscreteHMMAlphabet() self.alphabet.ReadCAlphabet(cmodel.alphabet) # Add all states vdict = {} for i in xrange(cmodel.N): vdict[i] = self.AddVertex() # Add all transitions for i in xrange(cmodel.N): state = cmodel.getState(i) for j in xrange(state.out_states): outID = state.getOutState(j) tail = vdict[i] head = vdict[outID] self.AddEdge(tail, head) # Add label alphabet if self.modelType & ghmmwrapper.kLabeledStates: self.label_alphabet = DiscreteHMMAlphabet() self.label_alphabet.ReadCAlphabet(cmodel.label_alphabet) # Add background distributions if appropiate if self.modelType & ghmmwrapper.kBackgroundDistributions: self.backgroundDistributions = DiscreteHMMBackground(self.emissionClass) self.backgroundDistributions.ReadCBackground(self.alphabet, cmodel.bp) # Add switching functions if appropiate if self.modelType & ghmmwrapper.kTransitionClasses: cos = cmodel.cos print "TODO: transition classes???" if self.modelType & ghmmwrapper.kContinuousHMM: cos = cmodel.cos # Set all states' values and set transition weights for i in xrange(cmodel.N): state = cmodel.getState(i) self.vertices[vdict[i]].ReadCState(cmodel, state, i) for j in xrange(state.out_states): outID = state.getOutState(j) tail = vdict[i] head = vdict[outID] self.edges[tail, head].ReadCTransition(state, cos, j) def normalize(self): # normalize initial probablilities initials = [v.initial for v in self.vertices.values() if v.initial >= 0.0] isum = sum(initials) if len(initials) == self.Order(): if isum == 0.0: for vertex in self.vertices.values(): vertex.initial = 1.0 / self.Order() else: factor = 1.0 / isum for vertex in self.vertices.values(): if vertex.initial >= 0.0: vertex.initial *= factor else: if isum > 1.0: factor = 1.0 / isum for vertex in self.vertices.values(): if vertex.initial >= 0.0: vertex.initial *= factor elif isum < 1.0: mean = (1.0-isum) / (self.Order()-len(initials)) for vertex in self.vertices.values(): if vertex.initial < 0.0: vertex.initial = mean # normalize state's transition probablilities for vertex in self.vertices.values(): vertex.normalize() def finalize(self): # ensure that all entities are properly normalized and initialized self.normalize() # build cmodel if self.modelType & ghmmwrapper.kContinuousHMM: cmodel = ghmmwrapper.ghmm_cmodel() cmodel.s = ghmmwrapper.cstate_array_alloc(self.Order()) elif self.modelType & ghmmwrapper.kDiscreteHMM: if self.modelType & ghmmwrapper.kPairHMM: cmodel = None elif self.modelType & ghmmwrapper.kTransitionClasses: cmodel = None else: cmodel = ghmmwrapper.ghmm_dmodel() cmodel.s = ghmmwrapper.dstate_array_alloc(self.Order()) cmodel.M = self.alphabet.size() cmodel.alphabet = self.alphabet.WriteCAlphabet() if self.modelType & ghmmwrapper.kTransitionClasses: cmodel.cos = maxcos() else: cmodel.cos = 1 # sort state IDs sortedIDs = self.vertices.keys() sortedIDs.sort() # fill state property arrays according to the model type with default values cmodel.N = self.Order() # fill silent array if self.modelType & ghmmwrapper.kSilentStates: cmodel.silent = ghmmhelper.list2int_array([self.vertices[id].silent for id in sortedIDs]) # fill tied to array if self.modelType & ghmmwrapper.kTiedEmissions: tied_list = [ghmmwrapper.kUntied] * self.Order() tieddict = {} # map python id to continious C array indeces for i, id in enumerate(sortedIDs): if self.vertices[id].tiedto > 0: tiedto = self.vertices[id].tiedto-1 if tieddict.has_key(tiedto): tieddict[tiedto].append(i) else: tieddict[tiedto] = [i] # tiedto has to be sorted, the first entry points to it self for k in tieddict.keys(): temp = tieddict[k] temp.sort() first = temp[0] for index in temp: tied_list[index] = first cmodel.tied_to = ghmmhelper.list2int_array(tied_list) # fill background id arrary if self.modelType & ghmmwrapper.kBackgroundDistributions: N = self.backgroundDistributions.size() M = self.alphabet.size() orders = ghmmhelper.list2int_array(self.backgroundDistributions.getOrders()) (weights,lengths) = ghmmhelper.list2double_matrix(self.backgroundDistributions.getWeights()) cmodel.bp = ghmmwrapper.ghmm_dbackground(N, M, orders, weights) for i,name in enumerate(self.backgroundDistributions.getNames()): cmodel.bp.setName(i, name) cmodel.background_id = ghmmhelper.list2int_array([(self.vertices[id].background-1) for id in sortedIDs]) # fill higher order array if self.modelType & ghmmwrapper.kHigherOrderEmissions: cmodel.order = ghmmhelper.list2int_array([self.vertices[id].emission.order for id in sortedIDs]) # fil label id array if self.modelType & ghmmwrapper.kLabeledStates: cmodel.label_alphabet = self.label_alphabet.WriteCAlphabet() cmodel.label = ghmmhelper.list2int_array([self.vertices[id].label for id in sortedIDs]) cmodel.model_type = self.modelType # create each state initial_sum = 0.0 for i, id in enumerate(sortedIDs): self.vertices[id].num = i initial_sum += self.vertices[id].initial if initial_sum < 1E-14: for id in sortedIDs: self.vertices[id].initial = 1.0 initial_sum = float(self.Order()) for i, id in enumerate(sortedIDs): cstate = cmodel.getState(i) self.vertices[id].initial /= initial_sum self.vertices[id].WriteCState(cstate) return cmodel def writeXML(self, filename="test.xml"): cmodel = self.finalize() # write to file cmodel.write_xml(filename)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -