⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rebal.py

📁 用python实现的邮件过滤器
💻 PY
字号:
#!/usr/bin/env python"""rebal.py - rebalance a ham or spam test directoryusage: rebal.py [ options ]options:   -d     - dry run; display what would be moved, but don't do it [%(DRYRUN)s]   -n num - specify number of files per Set dir desired [%(NPERDIR)s]   -t     - top directory, holding Set and reservoir subdirs [%(TOPDIR)s]   -v     - tell user what's happening; opposite of -q [%(VERBOSE)s]   -q     - be quiet about what's happening; opposite of -v [not %(VERBOSE)s]   -c     - confirm file moves into Set directory; opposite of -Q [%(CONFIRM)s]   -Q     - don't confirm moves; opposite of -c; independent of -v/-q   -h     - display this message and quitIf you have a non-standard test setup, you can use -r/-s instead of -t:   -r res - specify an alternate reservoir [%(RESDIR)s]   -s set - specify an alternate Set prefix [%(SETPREFIX)s]Moves files randomly among the Set subdirectories and a reservoir directory toleave -n files in each Set directory.  By default, the Set1, Set2, ..., andreservoir subdirectories under (relative path) Data/Ham/ are rebalanced; thiscan be changed with the -t option.  The script will work with a variablenumber of Set directories, but they must already exist, and the reservoirdirectory must also exist.It's recommended that you run with the -d (dry run) option first, to see whatthe script would do without actually moving any files.  If, e.g., youaccidentally mix up spam Sets with your Ham reservoir, it could be verydifficult to recover from that mistake.See the module comments for examples."""# Examples:##    rebal.py -n 300## Moves files among the Set1, Set2, ..., and reservoir directories under# Data/Ham/, leaving 300 files in each Set directory.##    rebal.py -t Data/Spam -n 300## The same, but under Data/Spam/.##    rebal.py -r reservoir -s Set -n 300## The same, but under the Set1, Set2, ..., and reservoir directories# in the current directory.## Supposing you want to shuffle your Set files around randomly, winding up# with 300 files in each one, you can execute:##    rebal.py -n 0#    rebal.py -n 300 -Q## The first moves all files from the various Data/Ham/Set directories to the# Data/Ham/reservoir directory.  The second run randomly parcels out 300 files# to each of the Data/Ham/Set directories.import osimport sysimport randomimport globimport getopttry:    True, Falseexcept NameError:    # Maintain compatibility with Python 2.2    True, False = 1, 0# defaultsNPERDIR = 4000TOPDIR = os.path.join('Data', 'Ham')RESDIR = os.path.join(TOPDIR, 'reservoir')SETPREFIX = os.path.join(TOPDIR, 'Set')VERBOSE = TrueCONFIRM = TrueDRYRUN = Falsedef usage(msg=None):    if msg:        print >> sys.stderr, str(msg)        print >> sys.stderr    print >> sys.stderr, __doc__ % globals()def migrate(f, targetdir, verbose):    """Move f into targetdir, renaming if needed to avoid name clashes.       The basename of the moved file is returned; this may not be the       same as the basename of f, if the file had to be renamed because       a file with f's basename already existed in targetdir.    """    base = os.path.basename(f)    out = os.path.join(targetdir, base)    while os.path.exists(out):        basename, ext = os.path.splitext(base)        digits = random.randrange(100000000)        out = os.path.join(targetdir, str(digits) + ext)    if verbose:        print "moving", f, "to", out    os.rename(f, out)    return os.path.basename(out)def main(args):    nperdir = NPERDIR    verbose = VERBOSE    confirm = CONFIRM    dryrun = DRYRUN    topdir = resdir = setprefix = None    try:        opts, args = getopt.getopt(args, "dr:s:t:n:vqcQh")    except getopt.GetoptError, msg:        usage(msg)        return 1    for opt, arg in opts:        if opt == "-n":            nperdir = int(arg)        elif opt == "-t":            topdir = arg        elif opt == "-r":            resdir = arg        elif opt == "-s":            setprefix = arg        elif opt == "-v":            verbose = True        elif opt == "-c":            confirm = True        elif opt == "-q":            verbose = False        elif opt == "-Q":            confirm = False        elif opt == "-d":            dryrun = True        elif opt == "-h":            usage()            return 0        else:            raise SystemError("internal error on option '%s'" % opt)    # Derive setprefix and resdir from topdir, if the latter was given.    if topdir is not None:        if resdir is not None or setprefix is not None:            usage("-t can't be specified with -r or -s")            return -1        setprefix = os.path.join(topdir, "Set")        resdir = os.path.join(topdir, "reservoir")    else:        if setprefix is None:            setprefix = SETPREFIX        if resdir is None:            resdir = RESDIR    if not os.path.exists(resdir):        print >> sys.stderr, "reservoir directory %s doesn't exist" % resdir        return 1    res = os.listdir(resdir)    dirs = glob.glob(setprefix + "*")    if not dirs:        print >> sys.stderr, "no directories starting with", setprefix, "exist."        return 1    # stuff <- list of (directory, files) pairs, where directory is the    # name of a Set subdirectory, and files is a list of files in that dir.    stuff = []    n = len(res)    # total number of all files    for d in dirs:        fs = os.listdir(d)        n += len(fs)        stuff.append((d, fs))    if nperdir * len(dirs) > n:        print >> sys.stderr, "not enough files to go around - use lower -n."        return 1    # weak check against mixing ham and spam    if ((setprefix.find("Ham") >= 0 and resdir.find("Spam") >= 0) or        (setprefix.find("Spam") >= 0 and resdir.find("Ham") >= 0)):        yn = raw_input("Reservoir and Set dirs appear not to match. "                       "Continue? (y/n) ")        if yn.lower()[0:1] != 'y':            return 1    # If necessary, migrate random files to the reservoir.    for (d, fs) in stuff:        if len(fs) <= nperdir:            continue        # Retain only nperdir files, moving the rest to reservoir.        random.shuffle(fs)        movethese = fs[nperdir:]        del fs[nperdir:]        if dryrun:            print "would move", len(movethese), "files from", d, \                  "to reservoir", resdir            res.extend(movethese)        else:            for f in movethese:                newname = migrate(os.path.join(d, f), resdir, verbose)                res.append(newname)    # Randomize reservoir once so we can just bite chunks from the end.    random.shuffle(res)    # Grow Set* directories from the reservoir as needed.    for (d, fs) in stuff:        assert len(fs) <= nperdir        if nperdir == len(fs):            continue        numtomove = nperdir - len(fs)        assert 0 < numtomove <= len(res)        movethese = res[-numtomove:]        del res[-numtomove:]        if dryrun:            print "would move", len(movethese), "files from reservoir", \                  resdir, "to", d        else:            for f in movethese:                if confirm:                    print file(os.path.join(resdir, f)).read()                    ok = raw_input('good enough? ').lower()                    if not ok.startswith('y'):                        continue                migrate(os.path.join(resdir, f), d, verbose)    return 0if __name__ == "__main__":    sys.exit(main(sys.argv[1:]))

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -