📄 split.py
字号:
#! /usr/bin/env python"""Split an mbox into two files based on a given percentage.This script will troll through a Unix mbox file randomly assigning eachmessage to one of two bins. The split is based on a given float percentage.E.g. % split.py sourcembox 20 mbox20 mbox80yields two mbox files, where mbox20 contains approximately 20% of the messagesand mbox80 contains 80% of the messages. Messages are assigned to each binrandomly.Usage: %(programs)s [options] sourcembox percent file1 file2Options: -h / --help Print this help message and exitfile1 and file2 are where the output goes. Approximately percent % ofmessages will go to file1 and (100 - percent) % of messages will go to file2.percent is a floating point number between 1 and 99. sourcembox is a Unixmailbox file. All arguments except -h/--help are required."""import sysimport randomimport mailboximport emailimport getoptfrom spambayes import mboxutilsprogram = sys.argv[0]def usage(code, msg=''): print >> sys.stderr, __doc__ if msg: print >> sys.stderr, msg sys.exit(code)def main(): try: opts, args = getopt.getopt(sys.argv[1:], 'h', ['help']) except getopt.error, msg: usage(1, msg) bin1 = bin2 = percentage = mboxfile = None for opt, arg in opts: if opt in ('-h', '--help'): usage(0) try: mboxfile = args[0] percent = float(args[1]) if not (0 < percent < 100): raise ValueError percent /= 100.0 bin1 = args[2] bin2 = args[3] except IndexError: usage(1, 'Not enough arguments') except ValueError: usage(1, 'Percent argument must be a float between 1.0 and 99.0') # Cruise bin1out = open(bin1, 'wb') bin2out = open(bin2, 'wb') infp = open(mboxfile, 'rb') mbox = mailbox.PortableUnixMailbox(infp, mboxutils.get_message) for msg in mbox: if random.random() < percent: outfp = bin1out else: outfp = bin2out astext = str(msg) assert astext.endswith('\n') outfp.write(astext) outfp.close() bin1out.close() bin2out.close()if __name__ == '__main__': main()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -