📄 splitn.py
字号:
#! /usr/bin/env python"""Split an mbox into N random mboxes.Usage: %(program)s [-h] [-s seed] [-v] -n N sourcembox outfilebaseOptions: -h / --help Print this help message and exit -s seed Seed the random number generator with seed (an integer). By default, use system time at startup to seed. -v Verbose. Displays a period for each 100 messages parsed. May display other stuff. -n N The number of output mboxes desired. This is required.Arguments: sourcembox The mbox to split. outfilebase The base path + name prefix for each of the N output files. Output mboxes have names of the form outfilebase + ("%%d.mbox" %% i)Example: %(program)s -s 123 -n5 spam.mbox rspamproduces 5 mboxes, named rspam1.mbox through rspam5.mbox. Each containsa random selection of the messages in spam.mbox, and together they containevery message in spam.mbox exactly once. Each has approximately the samenumber of messages. spam.mbox is not altered. In addition, the seed forthe random number generator is forced to 123, so that while the split israndom, it's reproducible."""import sysimport randomimport mailboximport emailimport getoptfrom spambayes import mboxutilstry: True, Falseexcept NameError: # Maintain compatibility with Python 2.2 True, False = 1, 0program = sys.argv[0]def usage(code, msg=''): print >> sys.stderr, __doc__ % globals() if msg: print >> sys.stderr, msg sys.exit(code)def main(): try: opts, args = getopt.getopt(sys.argv[1:], 'hn:s:v', ['help']) except getopt.error, msg: usage(1, msg) n = None verbose = False for opt, arg in opts: if opt in ('-h', '--help'): usage(0) elif opt == '-s': random.seed(int(arg)) elif opt == '-n': n = int(arg) elif opt == '-v': verbose = True if n is None or n <= 1: usage(1, "an -n value > 1 is required") if len(args) != 2: usage(1, "input mbox name and output base path are required") inputpath, outputbasepath = args infile = file(inputpath, 'rb') outfiles = [file(outputbasepath + ("%d.mbox" % i), 'wb') for i in range(1, n+1)] mbox = mailbox.PortableUnixMailbox(infile, mboxutils.get_message) counter = 0 for msg in mbox: i = random.randrange(n) astext = str(msg) outfiles[i].write(astext) counter += 1 if verbose: if counter % 100 == 0: print '.', if verbose: print print counter, "messages split into", n, "files" infile.close() for f in outfiles: f.close()if __name__ == '__main__': main()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -