⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 objecthmm.py

📁 General Hidden Markov Model Library 一个通用的隐马尔科夫模型的C代码库
💻 PY
📖 第 1 页 / 共 4 页
字号:
        self.name2id[vname].append(v.id)        return v.id    def DeleteVertex(self, v):        vname = self.vertices[v].name        self.name2id[vname] = [x for x in self.name2id[vname] if x != v]        if len(self.name2id[vname]) == 0:            del self.name2id[vname]        del self.vertices_ids[v]        ObjectGraph.DeleteVertex(self, v)    def AddEdge(self,tail,head):        ObjectGraph.AddEdge(self,tail,head)        edge = self.edges[tail,head]        edge.SetWeight(1.0)    def DeleteEdge(self,tail,head):        ObjectGraph.DeleteEdge(self,tail,head)        self.vertices[tail].normalize()    def SetLabeling(self,v, value):        self.vertices[v].labeling = ValidatingString(value)    def edit(self, parent, attributes = None):        if attributes == None:            editBox = EditObjectAttributesDialog(parent, self, self.editableAttr)        else:            editableAttr = {}            for attr in attributes:                editableAttr[attr] = self.editableAttr[attr]            editBox = EditObjectAttributesDialog(parent, self, editableAttr)        mt = self.computeModelType()        if mt > 0:            self.initHMM(mt)        else:            print "invalid model type:", mt    def computeModelType(self):        modelType = 0        if self.etype == 0:            modelType += ghmmwrapper.kDiscreteHMM            if self.maxOrder > 0:                modelType += ghmmwrapper.kHigherOrderEmissions        elif self.etype == 1:            modelType += ghmmwrapper.kContinuousHMM        elif self.etype == 2:            modelType += ghmmwrapper.kDiscreteHMM            modelType += ghmmwrapper.kPairHMM        else:            print "invalid type:", self.etype        if self.switching > 1:            modelType += ghmmwrapper.kTransitionClasses                    if self.tied:            modelType += ghmmwrapper.kTiedEmissions        if self.silent:            modelType += ghmmwrapper.kSilentStates        if self.background:            modelType += ghmmwrapper.kBackgroundDistributions        if self.labels:            modelType += ghmmwrapper.kLabeledStates        return modelType    def initHMM(self, modelType):        # set the right emission type        if modelType & ghmmwrapper.kDiscreteHMM:            if modelType & ghmmwrapper.kPairHMM:                emissionClass = DiscretePairEmission                # alphabet missing            else:                if modelType & ghmmwrapper.kHigherOrderEmissions:                    emissionClass = DiscreteHigherOrderEmission                else:                    emissionClass = DiscreteEmission                alphabet = self.initAlphabet()        elif modelType & ghmmwrapper.kContinuousHMM:            if self.emissionClass == Emission:                emissionClass = ContinuousEmission            else:                emissionClass = self.emissionClass            alphabet = None        else:            print "not a valid model type"        # set the right transition type        if modelType & ghmmwrapper.kTransitionClasses:            edgeClass = SwitchedTransition        else:            edgeClass = Transition                    # masking unnecessary model type flags out        mt = modelType        if modelType & ghmmwrapper.kDiscreteHMM:            mt -= ghmmwrapper.kDiscreteHMM        if modelType & ghmmwrapper.kContinuousHMM:            mt -= ghmmwrapper.kContinuousHMM        if modelType & ghmmwrapper.kPairHMM:            mt -= ghmmwrapper.kPairHMM        if modelType & (ghmmwrapper.kHigherOrderEmissions):            mt -= ghmmwrapper.kHigherOrderEmissions        # setting the right vertex type        if mt == (ghmmwrapper.kSilentStates):            vertexClass = SilentState        elif mt == (ghmmwrapper.kTiedEmissions):            vertexClass = TiedState        elif mt == (ghmmwrapper.kBackgroundDistributions):            vertexClass = BackgroundState        elif mt == (ghmmwrapper.kLabeledStates):            vertexClass = LabeledState        # 2        elif mt == (ghmmwrapper.kSilentStates + ghmmwrapper.kTiedEmissions):            vertexClass = SilentTiedState        elif mt == (ghmmwrapper.kSilentStates + ghmmwrapper.kLabeledStates):            vertexClass = SilentLabeledState        elif mt == (ghmmwrapper.kSilentStates + ghmmwrapper.kBackgroundDistributions):            vertexClass = SilentBackgroundState        elif mt == (ghmmwrapper.kTiedEmissions + ghmmwrapper.kBackgroundDistributions):            vertexClass = TiedBackgroundState        elif mt == (ghmmwrapper.kTiedEmissions + ghmmwrapper.kLabeledStates):            vertexClass = LabeledTiedState        elif mt == (ghmmwrapper.kLabeledStates + ghmmwrapper.kBackgroundDistributions):            vertexClass = LabeledBackgroundState        # 3        elif mt == (ghmmwrapper.kSilentStates + ghmmwrapper.kTiedEmissions + ghmmwrapper.kBackgroundDistributions):            vertexClass = SilentTiedBackgroundState        elif mt == (ghmmwrapper.kSilentStates + ghmmwrapper.kTiedEmissions + ghmmwrapper.kLabeledStates):            vertexClass = SilentLabeledTiedState        elif mt == (ghmmwrapper.kSilentStates + ghmmwrapper.kLabeledStates + ghmmwrapper.kBackgroundDistributions):            vertexClass = SilentLabeledBackgroundState        elif mt == (ghmmwrapper.kTiedEmissions + ghmmwrapper.kLabeledStates + ghmmwrapper.kBackgroundDistributions):            vertexClass = LabeledTiedBackgroundState        # 4        elif mt == (ghmmwrapper.kSilentStates + ghmmwrapper.kTiedEmissions + ghmmwrapper.kLabeledStates + ghmmwrapper.kBackgroundDistributions):            vertexClass = SilentLabeledTiedBackgroundState        else:            vertexClass = (modelType & ghmmwrapper.kContinuousHMM) and ContinuousState or State        # initialize state labels        if mt & ghmmwrapper.kLabeledStates:            self.label_alphabet = DiscreteHMMAlphabet(description = "state labels")        # initialize background distributions        if mt & ghmmwrapper.kBackgroundDistributions:            self.backgroundDistributions = DiscreteHMMBackground(emissionClass)                    # initialize background distributions        if mt & ghmmwrapper.kTiedEmissions:            self.tie_groups = TieGroups(emissionClass)        self.__init__(vertexClass, edgeClass, emissionClass, alphabet)        self.modelType = modelType    def initAlphabet(self):        if isinstance(self.alphabet, ghmm.Alphabet):            return DiscreteHMMAlphabet(self.alphabet.listOfCharacters)                    if self.alphatype == 0:            return DiscreteHMMAlphabet(["0", "1"])        elif self.alphatype == 1:            return DiscreteHMMAlphabet(["1", "2", "3", "4", "5", "6"])        elif self.alphatype == 2:            return DiscreteHMMAlphabet(["A", "C", "G", "T"])        elif self.alphatype == 3:            return DiscreteHMMAlphabet(["ala", "arg", "asn", "asp", "asx",                                        "cys", "glu", "gln", "glx", "gly",                                        "his", "ile", "leu", "lys", "met",                                        "phe", "pro", "ser", "thr", "try",                                        "tyr", "val"])        elif self.alphatype == 4:            return DiscreteHMMAlphabet()        else:            print "invalid alphabet type"            return None    def openXML(self, filename="test.xml"):        # simple check of file         filedata = ghmmwrapper.ghmm_xmlfile_parse(filename)        if filedata == None:            raise UnknownFileTypeException(filename)        if filedata.noModels > 1:            raise UnsupportedFileException(filename + "more than one HMM per file currently not supported")        # initialize model and set auxiliary data accordingly to the model type        self.initHMM(filedata.modelType)        #cmodel = filedata.getModel(0)        if self.modelType & ghmmwrapper.kContinuousHMM:            self.buildFromCModel(filedata.get_cmodel(0))        elif self.modelType & ghmmwrapper.kDiscreteHMM:            if self.modelType & ghmmwrapper.kPairHMM:                self.buildFromCModel(filedata.get_dpmodel(0))            elif self.modelType & ghmmwrapper.kTransitionClasses:                self.buildFromCModel(filedata.get_dsmodel(0))            else:                self.buildFromCModel(filedata.get_dmodel(0))    def buildFromCModel(self, cmodel):        cos = 1        # Add alphabet if appropiate first        if self.modelType & ghmmwrapper.kDiscreteHMM:            self.alphabet = DiscreteHMMAlphabet()            self.alphabet.ReadCAlphabet(cmodel.alphabet)        # Add all states        vdict = {}        for i in xrange(cmodel.N):            vdict[i] = self.AddVertex()        # Add all transitions        for i in xrange(cmodel.N):            state = cmodel.getState(i)            for j in xrange(state.out_states):                outID = state.getOutState(j)                tail = vdict[i]                head = vdict[outID]                self.AddEdge(tail, head)                # Add label alphabet        if self.modelType & ghmmwrapper.kLabeledStates:            self.label_alphabet = DiscreteHMMAlphabet()            self.label_alphabet.ReadCAlphabet(cmodel.label_alphabet)        # Add background distributions if appropiate        if self.modelType & ghmmwrapper.kBackgroundDistributions:            self.backgroundDistributions = DiscreteHMMBackground(self.emissionClass)            self.backgroundDistributions.ReadCBackground(self.alphabet, cmodel.bp)        # Add switching functions if appropiate        if self.modelType & ghmmwrapper.kTransitionClasses:            cos = cmodel.cos            print "TODO: transition classes???"        if self.modelType & ghmmwrapper.kContinuousHMM:            cos = cmodel.cos        # Set all states' values and set transition weights        for i in xrange(cmodel.N):            state = cmodel.getState(i)            self.vertices[vdict[i]].ReadCState(cmodel, state, i)            for j in xrange(state.out_states):                outID = state.getOutState(j)                tail = vdict[i]                head = vdict[outID]                self.edges[tail, head].ReadCTransition(state, cos, j)    def normalize(self):        # normalize initial probablilities        initials = [v.initial for v in self.vertices.values() if v.initial >= 0.0]        isum = sum(initials)        if len(initials) == self.Order():            if isum == 0.0:                for vertex in self.vertices.values():                    vertex.initial = 1.0 / self.Order()            else:                factor = 1.0 / isum                for vertex in self.vertices.values():                    if vertex.initial >= 0.0:                        vertex.initial *= factor        else:            if isum > 1.0:                factor = 1.0 / isum                for vertex in self.vertices.values():                    if vertex.initial >= 0.0:                        vertex.initial *= factor            elif isum < 1.0:                mean = (1.0-isum) / (self.Order()-len(initials))                for vertex in self.vertices.values():                    if vertex.initial < 0.0:                        vertex.initial = mean        # normalize state's transition probablilities        for vertex in self.vertices.values():            vertex.normalize()            def finalize(self):        # ensure that all entities are properly normalized and initialized        self.normalize()        # build cmodel        if self.modelType & ghmmwrapper.kContinuousHMM:            cmodel = ghmmwrapper.ghmm_cmodel()            cmodel.s = ghmmwrapper.cstate_array_alloc(self.Order())        elif self.modelType & ghmmwrapper.kDiscreteHMM:            if self.modelType & ghmmwrapper.kPairHMM:                cmodel = None            elif self.modelType & ghmmwrapper.kTransitionClasses:                cmodel = None            else:                cmodel   = ghmmwrapper.ghmm_dmodel()                cmodel.s = ghmmwrapper.dstate_array_alloc(self.Order())                cmodel.M = self.alphabet.size()                cmodel.alphabet = self.alphabet.WriteCAlphabet()                        if self.modelType & ghmmwrapper.kTransitionClasses:            cmodel.cos = maxcos()        else:            cmodel.cos = 1        # sort state IDs        sortedIDs = self.vertices.keys()        sortedIDs.sort()        # fill state property arrays according to the model type with default values        cmodel.N = self.Order()        # fill silent array        if self.modelType & ghmmwrapper.kSilentStates:            cmodel.silent = ghmmhelper.list2int_array([self.vertices[id].silent for id in sortedIDs])        # fill tied to array        if self.modelType & ghmmwrapper.kTiedEmissions:            tied_list = [ghmmwrapper.kUntied] * self.Order()            tieddict = {}            # map python id to continious C array indeces            for i, id in enumerate(sortedIDs):                if self.vertices[id].tiedto > 0:                    tiedto = self.vertices[id].tiedto-1                    if tieddict.has_key(tiedto):                        tieddict[tiedto].append(i)                    else:                        tieddict[tiedto] = [i]            # tiedto has to be sorted, the first entry points to it self            for k in tieddict.keys():                temp = tieddict[k]                temp.sort()                first = temp[0]                for index in temp:                    tied_list[index] = first            cmodel.tied_to = ghmmhelper.list2int_array(tied_list)        # fill background id arrary        if self.modelType & ghmmwrapper.kBackgroundDistributions:            N = self.backgroundDistributions.size()            M = self.alphabet.size()            orders = ghmmhelper.list2int_array(self.backgroundDistributions.getOrders())            (weights,lengths) = ghmmhelper.list2double_matrix(self.backgroundDistributions.getWeights())            cmodel.bp = ghmmwrapper.ghmm_dbackground(N, M, orders, weights)            for i,name in enumerate(self.backgroundDistributions.getNames()):                cmodel.bp.setName(i, name)            cmodel.background_id = ghmmhelper.list2int_array([(self.vertices[id].background-1) for id in sortedIDs])        # fill higher order array        if self.modelType & ghmmwrapper.kHigherOrderEmissions:            cmodel.order = ghmmhelper.list2int_array([self.vertices[id].emission.order for id in sortedIDs])        # fil label id array        if self.modelType & ghmmwrapper.kLabeledStates:            cmodel.label_alphabet = self.label_alphabet.WriteCAlphabet()            cmodel.label = ghmmhelper.list2int_array([self.vertices[id].label for id in sortedIDs])        cmodel.model_type = self.modelType        # create each state        initial_sum = 0.0        for i, id in enumerate(sortedIDs):            self.vertices[id].num = i            initial_sum += self.vertices[id].initial        if initial_sum < 1E-14:            for id in sortedIDs:                self.vertices[id].initial = 1.0            initial_sum = float(self.Order())        for i, id in enumerate(sortedIDs):            cstate = cmodel.getState(i)            self.vertices[id].initial /= initial_sum            self.vertices[id].WriteCState(cstate)        return cmodel    def writeXML(self, filename="test.xml"):        cmodel = self.finalize()        # write to file        cmodel.write_xml(filename)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -