⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ghmm.py

📁 General Hidden Markov Model Library 一个通用的隐马尔科夫模型的C代码库
💻 PY
📖 第 1 页 / 共 5 页
字号:
                      ids[s.index-1] = s.background # s.index ranges from [1, m.N]                   m.setBackground(bg, ids)                 log.debug( "model_type %x" % m.cmodel.model_type)                 log.debug("background_id" + str( ghmmhelper.int_array2list(m.cmodel.background_id, m.N)))            else:                 m.cmodel.bp = None                 m.cmodel.background_id = None            # check for tied states            tied = hmm_dom.getTiedStates()            if len(tied) > 0:                m.setFlags(kTiedEmissions)                m.cmodel.tied_to = ghmmhelper.list2int_array(tied)            durations = hmm_dom.getStateDurations()            if len(durations) == m.N:                 log.debug("durations: " + str(durations))                m.extendDurations(durations)            return m    def openSMO(self, fileName, modelIndex):        # MO & SMO Files, format is deprecated        # check if ghmm is build with smo support        if not ghmmwrapper.SMO_FILE_SUPPORT:            raise UnsupportedFeature ("smo files are deprecated. Please convert your files"                                      "to the new xml-format or rebuild the GHMM with the"                                      "conditional \"GHMM_OBSOLETE\".")                    (hmmClass, emission_domain, distribution) = self.determineHMMClass(fileName)        log.debug("determineHMMClass = "+ str(  (hmmClass, emission_domain, distribution)))                nrModelPtr = ghmmwrapper.int_array_alloc(1)        # XXX broken since silent states are not supported by .smo file format         if hmmClass == DiscreteEmissionHMM:            models = ghmmwrapper.ghmm_dmodel_read(fileName, nrModelPtr)            getPtr = ghmmwrapper.dmodel_ptr_array_getitem            base_model_type = ghmmwrapper.KDiscreteHMM        else:            models = ghmmwrapper.ghmm_cmodel_read(fileName, nrModelPtr)            getPtr = ghmmwrapper.cmodel_ptr_array_getitem            base_model_type = ghmmwrapper.kContinuousHMM        nrModels = ghmmwrapper.int_array_getitem(nrModelPtr, 0)        if modelIndex == None:            result = []            for i in range(nrModels):                cmodel = getPtr(models, i)                cmodel.setModelTypeFlag(base_model_type)                m = hmmClass(emission_domain, distribution(emission_domain), cmodel)                result.append(m)        else:            if modelIndex < nrModels:                cmodel = getPtr(models, modelIndex)                cmodel.setModelTypeFlag(base_model_type)                result = hmmClass(emission_domain, distribution(emission_domain), cmodel)            else:                result = None        return result    def openHMMER(self, fileName):        # HMMER format models        h = modhmmer.hmmer(fileName)                if h.m == 4:  # DNA model            emission_domain = DNA        elif h.m == 20:   # Peptide model            emission_domain = AminoAcids        else:   # some other model            emission_domain = IntegerRange(0,h.m)        distribution = DiscreteDistribution(emission_domain)                # XXX TODO: Probably slow for large matrices (Rewrite for 0.9)        [A,B,pi,modelName] = h.getGHMMmatrices()        return  HMMFromMatrices(emission_domain, distribution, A, B, pi, hmmName=modelName)            def determineHMMClass(self, fileName):        #        # smo files. Obsolete         #        file = open(fileName,'r')                hmmRe = re.compile("^HMM\s*=")        shmmRe = re.compile("^SHMM\s*=")        mvalueRe = re.compile("M\s*=\s*([0-9]+)")        densityvalueRe = re.compile("density\s*=\s*([0-9]+)")        cosvalueRe = re.compile("cos\s*=\s*([0-9]+)")        emission_domain = None        while 1:            l = file.readline()            if not l:                break            l = l.strip()            if len(l) > 0 and l[0] != '#': # Not a comment line                hmm = hmmRe.search(l)                shmm = shmmRe.search(l)                mvalue = mvalueRe.search(l)                densityvalue = densityvalueRe.search(l)                cosvalue = cosvalueRe.search(l)                                if hmm != None:                    if emission_domain != None and emission_domain != 'int':                        log.error( "HMMOpenFactory:determineHMMClass: both HMM and SHMM? " + str(emission_domain))                    else:                        emission_domain = 'int'                                    if shmm != None:                    if emission_domain != None and emission_domain != 'double':                        log.error( "HMMOpenFactory:determineHMMClass: both HMM and SHMM? " + str(emission_domain))                    else:                        emission_domain = 'double'                if mvalue != None:                    M = int(mvalue.group(1))                if densityvalue != None:                    density = int(densityvalue.group(1))                if cosvalue != None:                    cos = int(cosvalue.group(1))        file.close()        if emission_domain == 'int':            # only integer alphabet            emission_domain = IntegerRange(0,M)            distribution = DiscreteDistribution            hmm_class = DiscreteEmissionHMM            return (hmm_class, emission_domain, distribution)                elif emission_domain == 'double':            # M        number of mixture components            # density  component type            # cos      number of state transition classes            if M == 1 and density == 0:                emission_domain = Float()                distribution = GaussianDistribution                hmm_class = GaussianEmissionHMM                            return (hmm_class, emission_domain, distribution)            elif  M > 1 and density == 0:                emission_domain = Float()                distribution = GaussianMixtureDistribution                hmm_class = GaussianMixtureHMM                return (hmm_class, emission_domain, distribution)            else:                raise TypeError, "Model type can not be determined."        return (None, None, None)# the following three methods are deprecatedHMMOpenHMMER = HMMOpenFactory(GHMM_FILETYPE_HMMER) # read single HMMER model from fileHMMOpenSMO   = HMMOpenFactory(GHMM_FILETYPE_SMO)HMMOpenXML   = HMMOpenFactory(GHMM_FILETYPE_XML)# use only HMMOpen and specify the filetype if it can't guessed from the extensionHMMOpen      = HMMOpenFactory()def readMultipleHMMERModels(fileName):    """        Reads a file containing multiple HMMs in HMMER format, returns list of        HMM objects.    """    # XXX Integrate into HMMOpen, check for single hmm files    if not os.path.exists(fileName):        raise IOError, 'File ' + str(fileName) + ' not found.'        modelList = []    string = ""    f = open(fileName,"r")        res = re.compile("^//")    stat = re.compile("^ACC\s+(\w+)")    for line in f.readlines():        string = string + line        m = stat.match(line)        if m:            name = m.group(1)            log.info( "Reading model " + str(name) + ".")                    match = res.match(line)        if match:            fileLike = StringIO.StringIO(string)            modelList.append(HMMOpenHMMER(fileLike))            string = ""            match = None    return modelList    class HMMFromMatricesFactory(HMMFactory):    """ XXX Document matrix formats """    # XXX TODO: this should use the editing context    def __call__(self, emissionDomain, distribution, A, B, pi, hmmName = None, labelDomain= None, labelList = None, densities = None):        if isinstance(emissionDomain,Alphabet):                        if not emissionDomain == distribution.alphabet:                raise TypeError, "emissionDomain and distribution must be compatible"                        # checking matrix dimensions and argument validation, only some obvious errors are checked            if not len(A) == len(A[0]):                raise InvalidModelParameters, "A is not quadratic."            if not len(pi) == len(A):                raise InvalidModelParameters,  "Length of pi does not match length of A."            if not len(A) == len(B):                raise InvalidModelParameters, " Different number of entries in A and B."            if (labelDomain is None and labelList is not None) or (labelList is None and labelList is not None):                raise InvalidModelParameters, "Specify either both labelDomain and labelInput or neither."                        if isinstance(distribution,DiscreteDistribution):                # HMM has discrete emissions over finite alphabet: DiscreteEmissionHMM                cmodel = ghmmwrapper.ghmm_dmodel(len(A), len(emissionDomain))                                # assign model identifier (if specified)                if hmmName != None:                    cmodel.name = hmmName                else:                    cmodel.name = ''                states = ghmmwrapper.dstate_array_alloc(cmodel.N)                silent_states = []                tmpOrder = []                #initialize states                for i in range(cmodel.N):                    state = ghmmwrapper.dstate_array_getRef(states, i)                    # compute state order                    if cmodel.M > 1:                        order = math.log(len(B[i]), cmodel.M)-1                    else:                        order = len(B[i]) - 1                                            log.debug( "order in state " + str(i) + " = " + str(order) )                    # check or valid number of emission parameters                    order = int(order)                    if  cmodel.M**(order+1) == len(B[i]):                        tmpOrder.append(order)                    else:                        raise InvalidModelParameters("The number of " + str(len(B[i])) +                                                     " emission parameters for state " +                                                     str(i) + " is invalid. State order can not be determined.")                                        state.b = ghmmhelper.list2double_array(B[i])                    state.pi = pi[i]                                        if sum(B[i]) == 0.0:                         silent_states.append(1)                    else:                        silent_states.append(0)                    #set out probabilities                    state.out_states, state.out_id, state.out_a = ghmmhelper.extract_out(A[i])                    #set "in" probabilities                    A_col_i = map(lambda x: x[i], A)                    # Numarray use A[,:i]                    state.in_states, state.in_id, state.in_a = ghmmhelper.extract_out(A_col_i)                    #fix probabilities in reestimation, else 0                    state.fix = 0                cmodel.s = states                if sum(silent_states) > 0:                    cmodel.model_type |= kSilentStates                    cmodel.silent = ghmmhelper.list2int_array(silent_states)                cmodel.maxorder = max(tmpOrder)                if cmodel.maxorder > 0:                    log.debug( "Set kHigherOrderEmissions.")                    cmodel.model_type |= kHigherOrderEmissions                    cmodel.order = ghmmhelper.list2int_array(tmpOrder)                # initialize lookup table for powers of the alphabet size,                # speeds up models with higher order states                powLookUp = [1] * (cmodel.maxorder+2)                for i in range(1,len(powLookUp)):                    powLookUp[i] = powLookUp[i-1] * cmodel.M                cmodel.pow_lookup = ghmmhelper.list2int_array(powLookUp)                                # check for state labels                if labelDomain is not None and labelList is not None:                    if not isinstance(labelDomain,LabelDomain):                        raise TypeError, "LabelDomain object required."                                        cmodel.model_type |= kLabeledStates                    m = StateLabelHMM(emissionDomain, distribution, labelDomain, cmodel)                    m.setLabels(labelList)                    return m                else:                    return DiscreteEmissionHMM(emissionDomain, distribution, cmodel)            else:                raise GHMMError(type(distribution), "Not a valid distribution for Alphabet")         else:            if isinstance(distribution,GaussianDistribution):                # determining number of transition classes                cos = ghmmhelper.classNumber(A)                if cos == 1:                    A = [A]                cmodel = ghmmwrapper.ghmm_cmodel(len(A[0]), 1, cos)                log.debug("cmodel.cos = " + str(cmodel.cos))                states = ghmmwrapper.cstate_array_alloc(cmodel.N)                # Switching functions and transition classes are handled                # elswhere                #initialize states                for i in range(cmodel.N):                    state = ghmmwrapper.cstate_array_getRef(states, i)                    state.pi = pi[i]                    state.M = 1                    # allocate arrays of emmission parameters                    state.c = ghmmhelper.list2double_array([1.0]) # Mixture weights. Unused                    (mu, sigma) = B[i]                    state.mue = ghmmhelper.list2double_array([mu]) #mu = mue in GHMM C-lib.                    state.u = ghmmhelper.list2double_array([sigma])                    state.c = ghmmhelper.list2double_array([1.0])                    state.a = ghmmhelper.list2double_array([0.0])                                        # mixture fixing deactivated by default                    state.mixture_fix = ghmmhelper.list2int_array([0])                    # setting densities types (all normal by default)                                        densities = ghmmwrapper.density_array_alloc(1)                    state.density = densities                    state.setDensity(0,0)                                                             

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -