📄 modhmmer.py
字号:
#print "index1: " +str(i) #print self.maems[i] norm_mat(self.maems[i]) #print "trans:" norm_mat(self.matrans) #print "66666" self.matrans[self.lisHead.index("T")][self.lisHead.index("T")] = 0.0 #print "parsen fertig" def __str__(self): print "oben" hmm_str = "N= " + str(self.n) +", M= " + str(self.m) + "\n" hmm_str += "Transitions: \n" for row in self.matrans: hmm_str += str(row) + "\n" hmm_str += "Emissions: \n" for row in self.maems: hmm_str += str(row) + "\n" print hmm_str return hmm_str def set_matrix(self,type,lis,offset=0): "fills matrix with values from line <type> and adds optional offset" for k in range(len(lis)): if lis[k]!="*": x_c = self.dicTEntries[type][k][0] y_c = self.dicTEntries[type][k][1] x = self.dicTE[type][k][0]+offset*(x_c in self.lisMID) y = self.dicTE[type][k][1] if y_c in self.lisMID: y = y+offset if ((x_c in ["D","I"]) or (x_c==y_c=="M") or (x_c=="M" and y_c=="D")) and not (x_c==y_c=="I"): y=y+3 self.matrans[x][y] = self.H2P(lis[k],1.0) # print (x_c + str(offset),y_c + str(offset)),self.matrans[x][y] def H2P(self,score,null_prob): "returns the probability" if score == "*": return 0 else: return null_prob * 2**(float(score)/self.intscale) def getGHMMmatrices(self): """ Converts the MODHMMER matrix format into matrices to be used in ghmm.HMMFromMatrices. """ emiss_mat = [] # intitializing pi vector, always starting in B state (index 0) pi = [1] + [0] * ((self.n * 3) + 4) silent = [0] * (self.m) # emissions for silent states equal = [1.0/(self.m)] * self.m # uniform distribution over the number of symbols # conversion of the HMMER emission matrices into ghmm format # emmission probs in HMMER: [match=0/insert=1][state][emission-letter] # order of states in HMMER transition matrix: N B E J C T M1 I1 D1 M2 I2 D2 ... Mn In Dn emiss_mat.append(equal) # N state (equal) emiss_mat.append(silent) # B state (silent) emiss_mat.append(silent) # E state (silent) emiss_mat.append(equal) # J state (equal) emiss_mat.append(equal) # C state (equal) emiss_mat.append(silent) # T (silent) for ind1 in range(self.n): # n = number of M/I/D blocks emiss_mat.append(self.maems[0][ind1]) # M state if (ind1 != self.n-1): emiss_mat.append(self.maems[1][ind1]) # I state emiss_mat.append(silent) #(silent) # D state (silent) return [self.matrans,emiss_mat,pi,self.acc] def get_dom(self): "returns DOM object" doc = minidom.Document() nodgraphml = doc.createElement("graphml") nodkey = doc.createElement("key") nodkey.setAttribute("for","node") nodkey.setAttribute("gd:type","HigherDiscreteProbDist") nodkey.setAttribute("id","emissions") nodgraphml.appendChild(nodkey) #declare dummy class nodclass = doc.createElement("hmm:class") nodclass.setAttribute("hmm:high","0") nodclass.setAttribute("hmm:low","0") nodmap = doc.createElement("map") nodsym = doc.createElement("symbol") nodsym.setAttribute("code","0") nodsym.setAttribute("desc","Simple") nodsym.appendChild(doc.createTextNode("N")) nodmap.appendChild(nodsym) nodclass.appendChild(nodmap) nodgraphml.appendChild(nodclass) #declare alphabet nodalpha = doc.createElement("hmm:alphabet") nodalpha.setAttribute("hmm:high",str(self.m-1)) nodalpha.setAttribute("hmm:low","0") nodalpha.setAttribute("hmm:type","discrete") nodmap = doc.createElement("map") for k in self.dicLetters[self.m]: nodmap.appendChild(xml_newdatanode(doc,"symbol","code",str(self.dicLetters[self.m].index(k)),str(k))) nodalpha.appendChild(nodmap) nodgraphml.appendChild(nodalpha) dicHash = {} #nodes/states nodgraph = doc.createElement("graph") #add match/insert/delete states for k in self.lisHead: if k in self.lisMID: n = self.n if k == "I": n=n-1 else: n = 1 for i in range(n): pos = self.lisHead.index(k) nodnode = doc.createElement("node") #node:data:label strlabel = k if (n>1) or (k in self.lisMID): strlabel = strlabel + str(i) nodnode.setAttribute("id",strlabel) h = pos + (i*3) if (k=="D") and (i==n-1): h=h-1 #because last I column is deleted and therefore delete column index is old one minus one dicHash[h] = strlabel nodnode.appendChild(xml_newdatanode(doc,"data","key","label",strlabel)) #node:data:class nodnode.appendChild(xml_newdatanode(doc,"data","key","class","0")) #node:data:initial nodnode.appendChild(xml_newdatanode(doc,"data","key","initial",str(k=="N"))) #node:data:ngeom noddata = doc.createElement("data") noddata.setAttribute("key","ngeom") nodpos = doc.createElement("pos") if k == "N": strx = self.off_distx stry = 4*self.off_disty elif k=="B": strx = 2*self.off_distx stry = 4*self.off_disty elif k=="J": strx = (self.n+4)/2*self.off_distx stry = 7*self.off_disty elif k=="E": strx = (self.n + 3)*self.off_distx stry = 2*self.off_disty elif k=="C": strx = (self.n + 4)*self.off_distx stry = 2*self.off_disty elif k=="T": strx = (self.n + 5)*self.off_distx stry = 2*self.off_disty elif k=="M": strx = (3+i)*self.off_distx stry = 3*self.off_disty elif k=="I": strx = (3+i)*self.off_distx stry = 1*self.off_disty elif k=="D": strx = (3+i)*self.off_distx stry = 5*self.off_disty nodpos.setAttribute("x",str(strx)) nodpos.setAttribute("y",str(stry)) noddata.appendChild(nodpos) nodnode.appendChild(noddata) #node:data:emissions if k in ["M","I"]: strem = string.join(map(str,self.maems[self.lisMID.index(k)][i]),", ") elif k in ["N","C"]: strem = string.join(map(str,self.maems[2][0]),", ") else: strem = (self.m-1)*"0.0, " + "0.0" nodnode.appendChild(xml_newdatanode(doc,"data","key","emissions",strem)) nodgraph.appendChild(nodnode) #edges/transitions for i in range(len(self.matrans)): for j in range(len(self.matrans[i])): if self.matrans[i][j]!=0: nodedge = doc.createElement("edge") nodedge.setAttribute("source",dicHash[i]) nodedge.setAttribute("target",dicHash[j]) nodedge.appendChild(xml_newdatanode(doc,"data","key","prob",str(self.matrans[i][j]))) nodgraph.appendChild(nodedge) nodgraphml.appendChild(nodgraph) doc.appendChild(nodgraphml) return doc def write_prettyxml(self,strf): "writes hmm in pretty xml format to file" write_file(strf,self.get_dom().toprettyxml()) def write_xml(self,strf): "writes hmm in xml format to file" write_file(strf,self.get_dom().toxml())
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -