test_unicode.py

来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Python 代码 · 共 445 行 · 第 1/2 页

PY
445
字号
""" Test script for the Unicode implementation.Written by Marc-Andre Lemburg (mal@lemburg.com).(c) Copyright CNRI, All Rights Reserved. NO WARRANTY."""#"from test_support import verify, verbose, TestFailedimport sys, stringif not sys.platform.startswith('java'):    # Test basic sanity of repr()    verify(repr(u'abc') == "u'abc'")    verify(repr(u'ab\\c') == "u'ab\\\\c'")    verify(repr(u'ab\\') == "u'ab\\\\'")    verify(repr(u'\\c') == "u'\\\\c'")    verify(repr(u'\\') == "u'\\\\'")    verify(repr(u'\n') == "u'\\n'")    verify(repr(u'\r') == "u'\\r'")    verify(repr(u'\t') == "u'\\t'")    verify(repr(u'\b') == "u'\\x08'")    verify(repr(u"'\"") == """u'\\'"'""")    verify(repr(u"'\"") == """u'\\'"'""")    verify(repr(u"'") == '''u"'"''')    verify(repr(u'"') == """u'"'""")    verify(repr(u''.join(map(unichr, range(256)))) ==       "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"       "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"       "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"       "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"       "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"       "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"       "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"       "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"       "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"       "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"       "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"       "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"       "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"       "\\xfe\\xff'")def test(method, input, output, *args):    if verbose:        print '%s.%s%s =? %s... ' % (repr(input), method, args, repr(output)),    try:        f = getattr(input, method)        value = apply(f, args)    except:        value = sys.exc_type        exc = sys.exc_info()[:2]    else:        exc = None    if value == output and type(value) is type(output):        # if the original is returned make sure that        # this doesn't happen with subclasses        if value is input:            class usub(unicode):                def __repr__(self):                    return 'usub(%r)' % unicode.__repr__(self)            input = usub(input)            try:                f = getattr(input, method)                value = apply(f, args)            except:                value = sys.exc_type                exc = sys.exc_info()[:2]            if value is input:                if verbose:                    print 'no'                print '*',f, `input`, `output`, `value`                return    if value != output or type(value) is not type(output):        if verbose:            print 'no'        print '*',f, `input`, `output`, `value`        if exc:            print '  value == %s: %s' % (exc)    else:        if verbose:            print 'yes'test('capitalize', u' hello ', u' hello ')test('capitalize', u'hello ', u'Hello ')test('capitalize', u'aaaa', u'Aaaa')test('capitalize', u'AaAa', u'Aaaa')test('count', u'aaa', 3, u'a')test('count', u'aaa', 0, u'b')test('count', 'aaa', 3, u'a')test('count', 'aaa', 0, u'b')test('count', u'aaa', 3, 'a')test('count', u'aaa', 0, 'b')test('title', u' hello ', u' Hello ')test('title', u'hello ', u'Hello ')test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')test('title', u"getInt", u'Getint')test('find', u'abcdefghiabc', 0, u'abc')test('find', u'abcdefghiabc', 9, u'abc', 1)test('find', u'abcdefghiabc', -1, u'def', 4)test('rfind', u'abcdefghiabc', 9, u'abc')test('rfind', 'abcdefghiabc', 9, u'abc')test('rfind', 'abcdefghiabc', 12, u'')test('rfind', u'abcdefghiabc', 12, '')test('rfind', u'abcdefghiabc', 12, u'')test('lower', u'HeLLo', u'hello')test('lower', u'hello', u'hello')test('upper', u'HeLLo', u'HELLO')test('upper', u'HELLO', u'HELLO')if 0:    transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'    test('maketrans', u'abc', transtable, u'xyz')    test('maketrans', u'abc', ValueError, u'xyzq')test('split', u'this is the split function',     [u'this', u'is', u'the', u'split', u'function'])test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)test('split', u'a b c d', [u'a', u'b c d'], None, 1)test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)test('split', u'a b c d', [u'a b c d'], None, 0)test('split', u'a  b  c  d', [u'a', u'b', u'c  d'], None, 2)test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')test('split', u'a//b//c//d', [u'a', u'b', u'c', u'd'], '//')test('split', 'a//b//c//d', [u'a', u'b', u'c', u'd'], u'//')test('split', u'endcase test', [u'endcase ', u''], u'test')test('split', u'endcase test', [u'endcase ', u''], 'test')test('split', 'endcase test', [u'endcase ', u''], u'test')# join now works with any sequence typeclass Sequence:    def __init__(self, seq): self.seq = seq    def __len__(self): return len(self.seq)    def __getitem__(self, i): return self.seq[i]test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])test('join', u' ', u'a b c d', ['a', 'b', u'c', u'd'])test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))test('join', u' ', u'w x y z', Sequence('wxyz'))test('join', u' ', TypeError, 7)test('join', u' ', TypeError, Sequence([7, u'hello', 123L]))test('join', ' ', u'a b c d', [u'a', u'b', u'c', u'd'])test('join', ' ', u'a b c d', ['a', 'b', u'c', u'd'])test('join', '', u'abcd', (u'a', u'b', u'c', u'd'))test('join', ' ', u'w x y z', Sequence(u'wxyz'))test('join', ' ', TypeError, 7)result = u''for i in range(10):    if i > 0:        result = result + u':'    result = result + u'x'*10test('join', u':', result, [u'x' * 10] * 10)test('join', u':', result, (u'x' * 10,) * 10)test('strip', u'   hello   ', u'hello')test('lstrip', u'   hello   ', u'hello   ')test('rstrip', u'   hello   ', u'   hello')test('strip', u'hello', u'hello')# strip/lstrip/rstrip with None argtest('strip', u'   hello   ', u'hello', None)test('lstrip', u'   hello   ', u'hello   ', None)test('rstrip', u'   hello   ', u'   hello', None)test('strip', u'hello', u'hello', None)# strip/lstrip/rstrip with unicode argtest('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')test('strip', u'hello', u'hello', u'xyz')# strip/lstrip/rstrip with str argtest('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')test('strip', u'hello', u'hello', 'xyz')test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')if 0:    test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')    table = string.maketrans('a', u'A')    test('translate', u'abc', u'Abc', table)    test('translate', u'xyz', u'xyz', table)test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)test('replace', u'one!two!three!', u'onetwothree', '!', '')test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)test('replace', u'abc', u'abc', u'ab', u'--', 0)test('replace', u'abc', u'abc', u'xy', u'--')test('startswith', u'hello', 1, u'he')test('startswith', u'hello', 1, u'hello')test('startswith', u'hello', 0, u'hello world')test('startswith', u'hello', 1, u'')test('startswith', u'hello', 0, u'ello')test('startswith', u'hello', 1, u'ello', 1)test('startswith', u'hello', 1, u'o', 4)test('startswith', u'hello', 0, u'o', 5)test('startswith', u'hello', 1, u'', 5)test('startswith', u'hello', 0, u'lo', 6)test('startswith', u'helloworld', 1, u'lowo', 3)test('startswith', u'helloworld', 1, u'lowo', 3, 7)test('startswith', u'helloworld', 0, u'lowo', 3, 6)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?