📄 ghmmunittests.py
字号:
#!/usr/bin/env python################################################################################## This file is part of the General Hidden Markov Model Library,# GHMM version 0.8_beta1, see http://ghmm.org## file: ghmmunittests.py# authors: Benjamin Georgi, Wasinee Rungsarityotin, Alexander Schliep## Copyright (C) 1998-2004 Alexander Schliep# Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln# Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,# Berlin## Contact: schliep@ghmm.org## This library is free software; you can redistribute it and/or# modify it under the terms of the GNU Library General Public# License as published by the Free Software Foundation; either# version 2 of the License, or (at your option) any later version.## This library is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU# Library General Public License for more details.## You should have received a copy of the GNU Library General Public# License along with this library; if not, write to the Free# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA###################################################################################"""Testing GHMM- EmissionDomainTests- AlphabetTests- FloatTestsTests- AbstractDistributionTests- DiscreteDistributionTests- GaussianDistributionTests- GaussianMixtureDistributionTests- EmissionSequenceTests- SequenceSet..."""import unittestimport ghmmimport ghmmwrapperimport random# adjust verbosity levelimport logging, syslog = logging.getLogger("GHMM unit tests")# creating StreamHandler to stderrhdlr = logging.StreamHandler(sys.stderr)# setting message formatfmt = logging.Formatter("%(name)s %(filename)s:%(lineno)d - %(message)s")hdlr.setFormatter(fmt)# adding handler to logger objectlog.addHandler(hdlr)#set unittests log levellog.setLevel(logging.ERROR)#set GHMM log levelghmm.log.setLevel(logging.ERROR)class AlphabetTests(unittest.TestCase): """Unittests for Emissiondomains subclasses""" def setUp(self): self.binaryAlphabet = ghmm.Alphabet(['zero','one']) self.dna = ['a','c','g','t'] self.dnaAlphabet = ghmm.Alphabet(self.dna) def testinternalexternal(self): """ Check that internal -> external is a bijection """ # print"\ntestinternalexternal ", for l in self.dna: self.assertEqual(l, self.dnaAlphabet.external(self.dnaAlphabet.internal(l))) self.assertRaises(KeyError, self.dnaAlphabet.internal, '') self.assertRaises(KeyError, self.dnaAlphabet.internal, 'x') # remove this assertion because -1 now represents a gap '-' # self.assertRaises(KeyError, self.dnaAlphabet.external, -1) self.assertRaises(KeyError, self.dnaAlphabet.external, len(self.dna) + 1) def testinternalexternalSequence(self): """ Check internal -> external applied to a sequence """ # print"\ntestinternalexternalSequence ", extseq = ['a','c','g','t','a','g','t'] intseq = self.dnaAlphabet.internalSequence(extseq) self.assertEqual(min(intseq), 0) self.assertEqual(max(intseq), len(self.dna) - 1) result = self.dnaAlphabet.externalSequence(intseq) for i in range(len(result)): self.assertEqual(extseq[i], result[i]) self.assertEqual(intseq[i], self.dnaAlphabet.internal(extseq[i])) self.assertEqual(result[i], self.dnaAlphabet.external(intseq[i])) extseq = ['a','c','g','x','a','g','t'] self.assertRaises(KeyError, self.dnaAlphabet.internalSequence, extseq) intseq[3] = 5 self.assertRaises(KeyError, self.dnaAlphabet.externalSequence, intseq) def testlen(self): #print"\ntestlen ", self.assertEqual(len(self.binaryAlphabet),2) self.assertEqual(len(self.dnaAlphabet),len(self.dna)) self.assertEqual(len(self.binaryAlphabet),2) self.assertEqual(len(self.dnaAlphabet),len(self.dna)) class EmissionSequenceTests(unittest.TestCase): def setUp(self): self.i_dom = ghmm.IntegerRange(0,5) self.d_dom = ghmm.Float() l_domain = ghmm.LabelDomain(['E','R','T']) self.i_seq = ghmm.EmissionSequence(self.i_dom,[1,2,0,0,0,3,4]) self.d_seq = ghmm.EmissionSequence(self.d_dom,[1.3, 2.1, 0.8, 0.1, 0.03, 3.6, 43.3]) self.labeled = ghmm.EmissionSequence(ghmm.DNA, list('acgttgatgga'),labelDomain=l_domain, labelInput= ['E','R','T','T','T','E','R','T','T','T','R']) def testprint(self): #print"\ntestprint ", s = "\nEmissionSequence Instance:\nlength 7, weight 1.0:\n1200034" self.assertEqual(self.i_seq.verboseStr(),s) s2 = "\nEmissionSequence Instance:\nlength 7, weight 1.0:\n1.3 2.1 0.8 0.1 0.03 3.6 43.3 " self.assertEqual(self.d_seq.verboseStr(),s2) def testattributes(self): #print"\ntestattributes ", self.assertEqual(self.i_seq.cseq.state_labels,None) self.assertEqual(self.i_seq.cseq.state_labels_len,None) self.assertEqual(self.i_seq.cseq.seq_number,1) self.assertEqual(len(self.i_seq),7) self.assertEqual(self.d_seq.cseq.seq_number,1) self.assertEqual(len(self.d_seq),7) def testitemaccess(self): # print"\ntestitemaccess ", b = self.i_seq[5] self.assertEqual(b,3) self.i_seq[5] = 1 self.assertEqual(self.i_seq[5],1) b2 = self.d_seq[1] self.assertEqual(b2,2.1) self.d_seq[1] = 8.34 self.assertEqual(self.d_seq[1],8.34) def testFileIO(self): # print"\ntestFileIO ", self.i_seq.write("testdata/es_discrete_testwrite.seq") self.d_seq.write("testdata/es_continuous_testwrite.seq") discrete_seq = ghmm.EmissionSequence(self.i_dom, "testdata/es_discrete_testwrite.seq") continuous_seq = ghmm.EmissionSequence(self.d_dom, "testdata/es_continuous_testwrite.seq") def testweightaccess(self): # print"\ntestweightaccess ", w = self.i_seq.getWeight() self.assertEqual(w,1.0) self.i_seq.setWeight(4.0) w = self.i_seq.getWeight() self.assertEqual(w,4.0) w2 = self.d_seq.getWeight() self.assertEqual(w2,1.0) self.d_seq.setWeight(2.0) w2 = self.d_seq.getWeight() self.assertEqual(w2,2.0) def testlabelaccess(self): self.i_seq.setSeqLabel(8) l = self.i_seq.getSeqLabel() self.assertEqual(l,8) l = self.d_seq.getSeqLabel() self.assertEqual(l,-1) self.d_seq.setSeqLabel(5) l = self.d_seq.getSeqLabel() self.assertEqual(l,5) def testerrors(self): pass def testlabeled(self): #testing length self.assertEqual(len(self.labeled), 11) #testing sequence sequence = "" for i in range(len(self.labeled) ): sequence += str( self.labeled.emissionDomain.external(self.labeled[i]) ) self.assertEqual(sequence,'acgttgatgga') label = self.labeled.getStateLabel() self.assertEqual(label,['E','R','T','T','T','E','R','T','T','T','R'])class SequenceSetTests(unittest.TestCase): def setUp(self): #print "----------------- Setting up... ---------" self.i_alph = ghmm.IntegerRange(0,7) self.d_alph = ghmm.Float() self.l_domain = ghmm.LabelDomain(['E','R','T']) self.i_seq = ghmm.SequenceSet(self.i_alph,[ [1,2,3,4,5],[0,3,0],[4,3,2,2,1,1,1,1], [0,0,0,2,1],[1,1,1,1,1,1] ]) self.d_seq = ghmm.SequenceSet(self.d_alph,[ [1.5,2.3,3.7,4.1,5.1],[0.0,3.1,0.7],[4.4,3.05,2.0,2.4,1.2,1.8,1.0,1.0], [0.4,0.1,0.33,2.7,1.345],[1.0,1.0,1.0,1.0,1.0,1.0] ]) self.seqList = [list('aaaa'), list('acctttg'), list('ttgggaaaaaa'), list('ggggggggggggggtaaatttaa'), list('gggttccgcggaagggggggggctttta')] self.labelList = [['E','R','T','T'], ['E','R','T','T','E','R','T'], ['E','R','T','T','R','T','T','R','T','E','T'], ['E','R','T','T','R','T','T','R','T','T','R','T','E','T','R','T','T','R','T','T','R','E','T'], ['E','R','T','T','R','T','T','R','T','T','R','T','E','T','R','T','T','R','T','T','R','T','E','T','R','T','T','R'],] self.l_seq = ghmm.SequenceSet(ghmm.DNA, self.seqList,labelDomain=self.l_domain,labelInput= self.labelList) def testlabelseqset(self): self.assertEqual(len(self.l_seq), 5) for i in range(len(self.l_seq)): # testing length self.assertEqual(len(self.l_seq.getSequence(i)), len(self.seqList[i])) # testing sequence sequence = map(self.l_seq.emissionDomain.external, self.l_seq.getSequence(i)) seq = [] for j in range(len(sequence)): seq.append(sequence[j]) self.assertEqual(seq, self.seqList[i]) # testing labels label = self.l_seq.getStateLabel(i) self.assertEqual(label, self.labelList[i]) # XXX check different input types def testseqerror(self): # self.assertRaises(ghmm.UnknownInputType,ghmm.SequenceSet,) pass def testprint(self): #print"\n----------------- testprint " s = "\nNumber of sequences: 5\nSeq 0, length 5, weight 1.0:\n12345\nSeq 1, length 3, weight 1.0:\n030\nSeq 2, length 8, weight 1.0:\n43221111\nSeq 3, length 5, weight 1.0:\n00021\nSeq 4, length 6, weight 1.0:\n111111" self.assertEqual(self.i_seq.verboseStr(), s) s2 = "\nNumber of sequences: 5\nSeq 0, length 5, weight 1.0:\n1.5 2.3 3.7 4.1 5.1 \nSeq 1, length 3, weight 1.0:\n0.0 3.1 0.7 \nSeq 2, length 8, weight 1.0:\n4.4 3.05 2.0 2.4 1.2 1.8 1.0 1.0 \nSeq 3, length 5, weight 1.0:\n0.4 0.1 0.33 2.7 1.345 \nSeq 4, length 6, weight 1.0:\n1.0 1.0 1.0 1.0 1.0 1.0 " self.assertEqual(self.d_seq.verboseStr(), s2) # XXX str(self.l_seq) def testattributes(self): #print"\n----------------- testattributes " self.assertEqual(len(self.i_seq),5) self.assertEqual(self.i_seq.sequenceLength(1),3) self.assertEqual(len(self.d_seq),5) self.assertEqual(self.d_seq.sequenceLength(4),6) def testgetitem(self): #print"\n----------------- testgetitem ", s = self.i_seq[2] self.assertEqual(len(s),8) s2 = self.d_seq[4] self.assertEqual(len(s2),6) def testweightaccess(self): #print"\n----------------- testweightaccess "
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -