📄 rebal.py
字号:
#!/usr/bin/env python"""rebal.py - rebalance a ham or spam test directoryusage: rebal.py [ options ]options: -d - dry run; display what would be moved, but don't do it [%(DRYRUN)s] -n num - specify number of files per Set dir desired [%(NPERDIR)s] -t - top directory, holding Set and reservoir subdirs [%(TOPDIR)s] -v - tell user what's happening; opposite of -q [%(VERBOSE)s] -q - be quiet about what's happening; opposite of -v [not %(VERBOSE)s] -c - confirm file moves into Set directory; opposite of -Q [%(CONFIRM)s] -Q - don't confirm moves; opposite of -c; independent of -v/-q -h - display this message and quitIf you have a non-standard test setup, you can use -r/-s instead of -t: -r res - specify an alternate reservoir [%(RESDIR)s] -s set - specify an alternate Set prefix [%(SETPREFIX)s]Moves files randomly among the Set subdirectories and a reservoir directory toleave -n files in each Set directory. By default, the Set1, Set2, ..., andreservoir subdirectories under (relative path) Data/Ham/ are rebalanced; thiscan be changed with the -t option. The script will work with a variablenumber of Set directories, but they must already exist, and the reservoirdirectory must also exist.It's recommended that you run with the -d (dry run) option first, to see whatthe script would do without actually moving any files. If, e.g., youaccidentally mix up spam Sets with your Ham reservoir, it could be verydifficult to recover from that mistake.See the module comments for examples."""# Examples:## rebal.py -n 300## Moves files among the Set1, Set2, ..., and reservoir directories under# Data/Ham/, leaving 300 files in each Set directory.## rebal.py -t Data/Spam -n 300## The same, but under Data/Spam/.## rebal.py -r reservoir -s Set -n 300## The same, but under the Set1, Set2, ..., and reservoir directories# in the current directory.## Supposing you want to shuffle your Set files around randomly, winding up# with 300 files in each one, you can execute:## rebal.py -n 0# rebal.py -n 300 -Q## The first moves all files from the various Data/Ham/Set directories to the# Data/Ham/reservoir directory. The second run randomly parcels out 300 files# to each of the Data/Ham/Set directories.import osimport sysimport randomimport globimport getopttry: True, Falseexcept NameError: # Maintain compatibility with Python 2.2 True, False = 1, 0# defaultsNPERDIR = 4000TOPDIR = os.path.join('Data', 'Ham')RESDIR = os.path.join(TOPDIR, 'reservoir')SETPREFIX = os.path.join(TOPDIR, 'Set')VERBOSE = TrueCONFIRM = TrueDRYRUN = Falsedef usage(msg=None): if msg: print >> sys.stderr, str(msg) print >> sys.stderr print >> sys.stderr, __doc__ % globals()def migrate(f, targetdir, verbose): """Move f into targetdir, renaming if needed to avoid name clashes. The basename of the moved file is returned; this may not be the same as the basename of f, if the file had to be renamed because a file with f's basename already existed in targetdir. """ base = os.path.basename(f) out = os.path.join(targetdir, base) while os.path.exists(out): basename, ext = os.path.splitext(base) digits = random.randrange(100000000) out = os.path.join(targetdir, str(digits) + ext) if verbose: print "moving", f, "to", out os.rename(f, out) return os.path.basename(out)def main(args): nperdir = NPERDIR verbose = VERBOSE confirm = CONFIRM dryrun = DRYRUN topdir = resdir = setprefix = None try: opts, args = getopt.getopt(args, "dr:s:t:n:vqcQh") except getopt.GetoptError, msg: usage(msg) return 1 for opt, arg in opts: if opt == "-n": nperdir = int(arg) elif opt == "-t": topdir = arg elif opt == "-r": resdir = arg elif opt == "-s": setprefix = arg elif opt == "-v": verbose = True elif opt == "-c": confirm = True elif opt == "-q": verbose = False elif opt == "-Q": confirm = False elif opt == "-d": dryrun = True elif opt == "-h": usage() return 0 else: raise SystemError("internal error on option '%s'" % opt) # Derive setprefix and resdir from topdir, if the latter was given. if topdir is not None: if resdir is not None or setprefix is not None: usage("-t can't be specified with -r or -s") return -1 setprefix = os.path.join(topdir, "Set") resdir = os.path.join(topdir, "reservoir") else: if setprefix is None: setprefix = SETPREFIX if resdir is None: resdir = RESDIR if not os.path.exists(resdir): print >> sys.stderr, "reservoir directory %s doesn't exist" % resdir return 1 res = os.listdir(resdir) dirs = glob.glob(setprefix + "*") if not dirs: print >> sys.stderr, "no directories starting with", setprefix, "exist." return 1 # stuff <- list of (directory, files) pairs, where directory is the # name of a Set subdirectory, and files is a list of files in that dir. stuff = [] n = len(res) # total number of all files for d in dirs: fs = os.listdir(d) n += len(fs) stuff.append((d, fs)) if nperdir * len(dirs) > n: print >> sys.stderr, "not enough files to go around - use lower -n." return 1 # weak check against mixing ham and spam if ((setprefix.find("Ham") >= 0 and resdir.find("Spam") >= 0) or (setprefix.find("Spam") >= 0 and resdir.find("Ham") >= 0)): yn = raw_input("Reservoir and Set dirs appear not to match. " "Continue? (y/n) ") if yn.lower()[0:1] != 'y': return 1 # If necessary, migrate random files to the reservoir. for (d, fs) in stuff: if len(fs) <= nperdir: continue # Retain only nperdir files, moving the rest to reservoir. random.shuffle(fs) movethese = fs[nperdir:] del fs[nperdir:] if dryrun: print "would move", len(movethese), "files from", d, \ "to reservoir", resdir res.extend(movethese) else: for f in movethese: newname = migrate(os.path.join(d, f), resdir, verbose) res.append(newname) # Randomize reservoir once so we can just bite chunks from the end. random.shuffle(res) # Grow Set* directories from the reservoir as needed. for (d, fs) in stuff: assert len(fs) <= nperdir if nperdir == len(fs): continue numtomove = nperdir - len(fs) assert 0 < numtomove <= len(res) movethese = res[-numtomove:] del res[-numtomove:] if dryrun: print "would move", len(movethese), "files from reservoir", \ resdir, "to", d else: for f in movethese: if confirm: print file(os.path.join(resdir, f)).read() ok = raw_input('good enough? ').lower() if not ok.startswith('y'): continue migrate(os.path.join(resdir, f), d, verbose) return 0if __name__ == "__main__": sys.exit(main(sys.argv[1:]))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -