⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 splitndirs.py

📁 用python实现的邮件过滤器
💻 PY
字号:
#! /usr/bin/env python"""Split an mbox into N random directories of files.Usage: %(program)s [-h] [-g] [-s seed] [-v] -n N sourcembox ... outdirbaseOptions:    -h / --help        Print this help message and exit    -g        Do globbing on each sourcepath.  This is helpful on Windows, where        the native shells don't glob, or when you have more mboxes than        your shell allows you to specify on the commandline.    -s seed        Seed the random number generator with seed (an integer).        By default, use system time at startup to seed.    -v        Verbose.  Displays a period for each 100 messages parsed.        May display other stuff.    -n N        The number of output mboxes desired.  This is required.Arguments:    sourcembox        The mbox or path to an mbox to split.    outdirbase        The base path + name prefix for each of the N output dirs.        Output files have names of the form            outdirbase + ("Set%%d/%%d" %% (i, n))Example:    %(program)s -s 123 -n5 Data/spam.mbox Data/Spam/Setproduces 5 directories, named Data/Spam/Set1 through Data/Spam/Set5.  Eachcontains a random selection of the messages in spam.mbox, and togetherthey contain every message in spam.mbox exactly once.  Each hasapproximately the same number of messages.  spam.mbox is not altered.  Inaddition, the seed for the random number generator is forced to 123, sothat while the split is random, it's reproducible."""import sysimport osimport randomimport mailboximport emailimport getoptimport globfrom spambayes import mboxutilstry:    True, Falseexcept NameError:    # Maintain compatibility with Python 2.2    True, False = 1, 0program = sys.argv[0]def usage(code, msg=''):    print >> sys.stderr, __doc__ % globals()    if msg:        print >> sys.stderr, msg    sys.exit(code)def main():    try:        opts, args = getopt.getopt(sys.argv[1:], 'hgn:s:v', ['help'])    except getopt.error, msg:        usage(1, msg)    doglob = False    n = None    verbose = False    for opt, arg in opts:        if opt in ('-h', '--help'):            usage(0)        elif opt == '-g':            doglob = True        elif opt == '-s':            random.seed(int(arg))        elif opt == '-n':            n = int(arg)        elif opt == '-v':            verbose = True    if n is None or n <= 1:        usage(1, "an -n value > 1 is required")    if len(args) < 2:        usage(1, "input mbox name and output base path are required")    inputpaths, outputbasepath = args[:-1], args[-1]    outdirs = [outputbasepath + ("%d" % i) for i in range(1, n+1)]    for dir in outdirs:        if not os.path.isdir(dir):            os.makedirs(dir)    counter = 0    for inputpath in inputpaths:        if doglob:            inpaths = glob.glob(inputpath)        else:            inpaths = [inputpath]        for inpath in inpaths:            mbox = mboxutils.getmbox(inpath)            for msg in mbox:                i = random.randrange(n)                astext = str(msg)                #assert astext.endswith('\n')                counter += 1                msgfile = open('%s/%d' % (outdirs[i], counter), 'wb')                msgfile.write(astext)                msgfile.close()                if verbose:                    if counter % 100 == 0:                        sys.stdout.write('.')                        sys.stdout.flush()    if verbose:        print        print counter, "messages split into", n, "directories"if __name__ == '__main__':    main()

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -