📄 ghmmunittests.py
字号:
self.assertEqual(map(g, alpha),map(g, talpha)) tscale = [0.69999999999999996, 0.57285714285714284, 0.56965087281795512, 0.38953070962658143, 0.027857142857142858, 0.56461538461538452, 0.57168937329700276, 0.39770983270578136, 0.57285714285714284, 0.56965087281795512, 0.57043689532898478, 0.39269141068371188, 0.57285714285714284, 0.56965087281795501, 0.57043689532898478, 0.39269141068371188, 0.34999999999999998, 0.34999999999999998, 0.57285714285714284, 0.40236907730673316, 0.57285714285714284, 0.56965087281795512, 0.38953070962658143, 0.34999999999999998, 0.57285714285714284, 0.40236907730673316, 0.57285714285714284, 0.40236907730673316, 0.34999999999999998, 0.34999999999999998, 0.57285714285714284, 0.40236907730673316, 0.57285714285714284, 0.40236907730673316, 0.57285714285714284, 0.56965087281795512, 0.38953070962658143, 0.027857142857142858, 0.48461538461538456, 0.34999999999999998] self.assertEqual(map(f, scale), map(f,tscale)) beta = self.model.backward(seq,scale) tbeta = [[0.99999999999999944, 0.93777288282264037, 0.90665932423396078], [1.0387725400509094, 0.87202814099718418, 0.78865594147032159], [0.89851709082326969, 1.1552362596299182, 1.2835958440332425], [0.99999999999999956, 0.3333333333333332, 0.0], [0.99018797150167415, 0.92857142857142794, 0.89776315710630483], [1.0137817804861966, 0.8510489133365684, 0.76968247976175441], [0.88003858898536058, 1.1314781858383207, 1.2571979842648009], [0.999999999999999, 0.91297745742272951, 0.86946618613409465], [0.99615983039934863, 0.93417167590571015, 0.90317759865889091], [1.028991814771324, 0.86381742368004855, 0.7812302281344109], [0.89128509174829584, 1.1459379751049517, 1.2732644167832796], [0.99999999999999944, 0.91297745742272962, 0.86946618613409488], [0.99615983039934886, 0.93417167590571037, 0.90317759865889113], [1.0289918147713242, 0.86381742368004866, 0.78123022813441101], [0.89128509174829595, 1.1459379751049521, 1.2732644167832801], [0.99999999999999967, 1.2857142857142854, 1.4285714285714282], [1.0, 1.2857142857142856, 1.4285714285714286], [1.0, 0.83947939262472882, 0.75921908893709322], [0.86984815618221256, 1.1183762008057019, 1.2426402231174465], [1.0, 0.93777288282264093, 0.90665932423396134], [1.03877254005091, 0.87202814099718462, 0.78865594147032214], [0.89851709082327025, 1.1552362596299188, 1.2835958440332431], [1.0000000000000002, 1.285714285714286, 1.428571428571429], [1.0000000000000002, 0.83947939262472893, 0.75921908893709344], [0.86984815618221256, 1.1183762008057019, 1.2426402231174465], [1.0, 0.83947939262472882, 0.75921908893709322], [0.86984815618221256, 1.1183762008057019, 1.2426402231174465], [1.0, 1.2857142857142856, 1.4285714285714286], [1.0, 1.2857142857142856, 1.4285714285714286], [1.0, 0.83947939262472882, 0.75921908893709322], [0.86984815618221256, 1.1183762008057019, 1.2426402231174465], [1.0, 0.83947939262472882, 0.75921908893709322], [0.86984815618221256, 1.1183762008057019, 1.2426402231174465], [1.0, 0.93777288282264093, 0.90665932423396134], [1.03877254005091, 0.87202814099718462, 0.78865594147032214], [0.89851709082327025, 1.1552362596299188, 1.2835958440332431], [1.0000000000000002, 0.33333333333333343, 0.0], [0.72222222222222221, 0.92857142857142849, 1.0317460317460319], [1.0, 1.2857142857142856, 1.4285714285714286], [1.0, 1.0, 1.0]] self.assertEqual (map(g, beta),map(g, tbeta)) #testing forward and backward log probabilities self.assertEqual (f(self.model.loglikelihood(seq)), f(self.model.backwardTermination (seq, beta, scale))) log.debug("testFoBa -- end") def testTiedStates(self): log.debug( "testTiedStates -- begin") f = lambda x: round(x,15) t = (-1,1,1) self.model.setTieGroups(t) self.model.updateTiedEmissions() em2 = map(f,self.model.getEmission(2)) self.assertEqual(em2, [0.0, 0.0, 0.0, 0.0]) self.model.setEmission(2,[0.2,0.2,0.2,0.4]) self.model.updateTiedEmissions() em0 = map(f,self.model.getEmission(0)) self.assertEqual(em0, [0.0,0.5,0.5,0.0]) em2 = map(f,self.model.getEmission(2)) self.assertEqual(em2, [0.15, 0.1, 0.5, 0.25]) log.debug("testTiedStates -- end") def testNormalization(self): log.debug("testNormalization") self.model.setInitial(0, 2.0) self.model.normalize() self.assertEqual(1.0, self.model.getInitial(0)) class BackgroundDistributionTests(unittest.TestCase): " Tests for background distributions " def setUp(self): self.sigma = ghmm.Alphabet(['rot','blau','gruen','gelb']) self.model = ghmm.HMMFromMatrices(self.sigma,ghmm.DiscreteDistribution(self.sigma), [[0.3,0.3,0.4],[0.6,0.1,0.3],[1.0,0.0,0.0]], [[0.0,0.5,0.5,0.0],[0.1,0.0,0.8,0.1], [0.25,0.25,0.25,0.25, 0.0,0.5,0.5,0.0, 0.1,0.0,0.8,0.1, 0.1,0.35,0.3,0.25 ]], [1.0,0,0]) self.bg = ghmm.BackgroundDistribution(self.sigma, [[0.2,0.3,0.1,0.4], [0.1,0.2,0.4,0.3, 0.2,0.3,0.1,0.4, 0.25,0.25,0.25,0.25, 0.0,0.5,0.5,0.0 ]] ) def test__str__(self): # we aren't interested in the output but the function should run fine str(self.model) def testprint(self): #print "*** testprint" s = self.bg.verboseStr() ts = "BackgroundDistribution instance:\nNumber of distributions: 2\n\n<Alphabet:['rot', 'blau', 'gruen', 'gelb']>\nDistributions:\n Order: 0\n 1: [0.20000000000000001, 0.29999999999999999, 0.10000000000000001, 0.40000000000000002]\n Order: 1\n 2: [0.10000000000000001, 0.20000000000000001, 0.40000000000000002, 0.29999999999999999]\n" self.assertEqual(s,ts) def testmodelbackgroundaccessfunctions(self): #print "*** testmodelbackgroundaccessfunctions" self.model.setBackgrounds(self.bg, [0,-1,1]) # deleting background del(self.bg) s = self.model.background.verboseStr() ts = "BackgroundDistribution instance:\nNumber of distributions: 2\n\n<Alphabet:['rot', 'blau', 'gruen', 'gelb']>\nDistributions:\n Order: 0\n 1: [0.20000000000000001, 0.29999999999999999, 0.10000000000000001, 0.40000000000000002]\n Order: 1\n 2: [0.10000000000000001, 0.20000000000000001, 0.40000000000000002, 0.29999999999999999]\n" self.assertEqual(s,ts) def testapplybackground(self): self.model.setBackgrounds(self.bg,[0, -1, 1]) self.model.applyBackgrounds([0.1, 0.2, .3]) #print self.model f = lambda x: round(x,15) e1 = map(f, self.model.getEmission(0)) e2 = map(f, self.model.getEmission(1)) e3 = map(f, self.model.getEmission(2)) self.assertEqual(e1, [0.02, 0.48, 0.46, 0.04]) self.assertEqual(e2, [0.1, 0.0, 0.8, 0.1]) self.assertEqual(e3, [0.205, 0.235, 0.295, 0.265, 0.06, 0.44, 0.38, 0.12, 0.145, 0.075, 0.635, 0.145, 0.07, 0.395, 0.36, 0.175]) def testbackgroundtraining(self): # XXX test for background distributions self.model.setEmission(2,[0.25,0.25,0.25,0.25]) # XXX ... class StateLabelHMMTests(unittest.TestCase): def setUp(self): random.seed(0) slength = 45 self.labels = ['One']*slength self.allLabels = ['a','b','c','d','e','f','g'] self.l_domain= ghmm.LabelDomain(['One','a','b','c','d','e','f','g']) self.A = [[0.0,0.5,0.5],[0.4,0.2,0.4],[0.3,0.3,0.4]] self.B = [[0.2,0.1,0.1,0.6],[0.3,0.1,0.1,0.5], [0.25,0.25,0.25,0.25, 0.0, 0.0, 1.0, 0.0, 0.25,0.25,0.25,0.25, 0.25,0.25,0.25,0.25]] self.pi = [1.0,0,0.0] self.l_domain2 = ghmm.LabelDomain(['fst','scd','thr']) self.model = ghmm.HMMFromMatrices(ghmm.DNA,ghmm.DiscreteDistribution(ghmm.DNA), self.A, self.B, self.pi,labelDomain=self.l_domain2,labelList=['fst','scd','thr']) sequence = [] for i in range(slength): sequence.append(random.choice(ghmm.DNA.listOfCharacters)) self.tSeq = ghmm.EmissionSequence(ghmm.DNA, sequence, labelDomain=self.l_domain,labelInput=self.labels) def test__str__(self): # we aren't interested in the output but the function should run fine str(self.model) #create a random model with len(LabelList) states and def oneModel(self, LabelList): no_states = len(LabelList) A = [] B = [] pi = [] pisum = 0 for i in range(no_states): asum = 0 A_e = [] #get a random A-row for j in range(no_states): A_e.append(random.random()) asum += A_e[-1] #normalize this A-row for j in range(no_states): A_e[j] /= asum A.append(A_e) bsum = 0 B_e = [] #get a random B-row for j in range(4): B_e.append(random.random()) bsum += B_e[-1] #normalize this B-row for j in range(4): B_e[j] /= bsum B.append(B_e) #get random pi pi.append(random.random()) pisum += pi[-1] #normalize pi for i in range(no_states): pi[i] /= pisum return ghmm.HMMFromMatrices(ghmm.DNA, ghmm.DiscreteDistribution(ghmm.DNA), A, B, pi, None, self.l_domain, LabelList) def testsample(self): # print"\ntestsample ", seq = self.model.sampleSingle(100,seed=3586662) seq2 = self.model.sample(10,100,seed=3586662) def testaccessfunctions(self): # print"\ntestaccessfunctions", self.assertEqual(self.model.N,3) self.assertEqual(self.model.M,4) pi = self.model.getInitial(2) self.assertEqual(pi,0) self.model.setInitial(2,0.5,fixProb=1) pi = self.model.getInitial(2) self.assertEqual(pi,0.5) trans = self.model.getTransition(0,1) self.assertEqual(trans, 0.5) self.model.setTransition(0,1,0.6) trans = self.model.getTransition(0,1) self.assertEqual(trans, 0.6) emission = self.model.getEmission(1) self.assertEqual(emission, [0.3,0.1,0.1,0.5] ) # introducing silent state self.model.setEmission(1,[0.0,0.0,0.0,0.0]) emission = self.model.getEmission(1) self.assertEqual(emission,[0.0,0.0,0.0,0.0] ) self.assertEqual(self.model.cmodel.model_type & 4, 4) self.assertEqual(ghmmwrapper.int_array_getitem(self.model.cmodel.silent,1),1) # removing silent state self.model.setEmission(1,[0.2,0.2,0.2,0.4]) emission = self.model.getEmission(1) self.assertEqual(emission,[0.2,0.2,0.2,0.4] ) #print "model_type = ",self.model.cmodel.model_type self.assertEqual(self.model.cmodel.model_type & 4,0) self.assertEqual(self.model.isSilent(1), False) # inserting silent state self.model.setEmission(0,[0.0,0.0,0.0,0.0]) emission = self.model.getEmission(0) self.assertEqual(emission,[0.0,0.0,0.0,0.0]) self.assertEqual(self.model.cmodel.model_type & 4,4) self.assertEqual(ghmmwrapper.int_array_getitem(self.model.cmodel.silent,0),1) # label access labels = self.model.getLabels() self.assertEqual(labels,['fst','scd','thr']) self.model.setLabels(['fst','thr','fst']) labels = self.model.getLabels() self.assertEqual(labels, ['fst','thr','fst']) def testonelabelcomparebackward(self): model = self.oneModel(['One']*11) # backward and labeled backward use numerical different algorithms # to be changed f = lambda x: round (x, 12) g = lambda x: map (f, x) labelSequence = self.labels (alpha, scale) = model.forward( self.tSeq) (b_beta) = model.backward( self.tSeq, scale) (bl_logp, bl_beta) = model.backwardLabels( self.tSeq, labelSequence, scale) #compare beta matrizes from backward and backwardLabels (all states share one label) self.assertEqual (map(g, b_beta), map(g, bl_beta)) def testalldifferentlabelsbackward(self): model2 = self.oneModel(self.allLabels) labelSequence = self.allLabels*4 sequence = [] for i in range(len(labelSequence)): sequence.append(random.choice(ghmm.DNA.listOfCharacters)) Seq = ghmm.EmissionSequence(ghmm.DNA, sequence, self.l_domain,labelSequence) (fl_logp, alpha, scale) = model2.forwardLabels( Seq, labelSequence) (bl_logp, bl_beta) = model2.backwardLabels( Seq, labelSequence, scale) #check if the beta matrix is at the appropriated entries 0 or different from 0 for i in range(len(bl_beta)): i = len(bl_beta)-i-1 for j in range(len(bl_beta[i])): if model2.labelDomain.internal(labelSequence[i]) == ghmmwrapper.int_array_getitem(model2.cmodel.label, j): self.assertNotEqual(bl_beta[i][j], 0.0, "Zeichen: " + str(i) + ", State: " + str(j) + ", value: " + str(bl_beta[i][j]) ) else: self.assertEqual(bl_beta[i][j], 0.0, "Zeichen: " + str(i) + ", State: " + str(j) + ", value: " + str(bl_beta[i][j])) def testonelabelcompareforward(self): model = self.oneModel(['One']*11) labelSequence = self.labels (alpha, scale) = model.forward(self.tSeq) (logp, lalpha, lscale) = model.forwardLabels(self.tSeq, labelSequence ) # compare beta matrizes from backward and backwardLabels (all states share one label) # XXX due to rounding errors in the Python floating point representation # we have to round for 15 decimal positions f = lambda x: round(x,12) #XXX for i in range(len(alpha)): alpha[i] = map(f, alpha[i]) lalpha[i] = map(f, lalpha[i]) self.assertEqual(alpha, lalpha) scale = map(f, scale) lscale = map(f, lscale) self.assertEqual(scale, lscale) def testalldifferentlabelsforward(self): model2 = self.oneModel(self.allLabels) labelSequence = self.allLabels*4 sequence = [] for i in range(len(labelSequence)): sequence.append(random.choice(ghmm.DNA.listOfCharacters)) Seq = ghmm.EmissionSequence(ghmm.DNA, sequence, self.l_domain, labelSequence) (logp, alpha, scale) = model2.forwardLabels(Seq, labelSequence) #check if the beta matrix is 0 or different from 0 at the appropriate entries for i in range(len(alpha)): i = len(alpha)-i-1 for j in range(len(alpha[i])): if model2.labelDomain.internal(labelSequence[i]) == ghmmwrapper.int_array_getitem(model2.cmodel.label, j): self.assertNotEqual(alpha[i][j], 0.0, "Zeichen: " + str(i) + ", State: " + str(j)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -