📄 logcondense.py
字号:
#!/bin/sh#Licensed to the Apache Software Foundation (ASF) under one#or more contributor license agreements. See the NOTICE file#distributed with this work for additional information#regarding copyright ownership. The ASF licenses this file#to you under the Apache License, Version 2.0 (the#"License"); you may not use this file except in compliance#with the License. You may obtain a copy of the License at# http://www.apache.org/licenses/LICENSE-2.0#Unless required by applicable law or agreed to in writing, software#distributed under the License is distributed on an "AS IS" BASIS,#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.#See the License for the specific language governing permissions and#limitations under the License.""":"work_dir=$(dirname $0)base_name=$(basename $0)cd $work_dirif [ $HOD_PYTHON_HOME ]; thenexec $HOD_PYTHON_HOME -OO -u $base_name ${1+"$@"}elif [ -e /usr/bin/python ]; thenexec /usr/bin/python -OO -u $base_name ${1+"$@"}elif [ -e /usr/local/bin/python ]; thenexec /usr/local/bin/python -OO -u $base_name ${1+"$@"}elseexec python -OO -u $base_name ${1+"$@"}fi":""" from os import popen3import os, sysimport reimport timefrom datetime import datetimefrom optparse import OptionParsermyName = os.path.basename(sys.argv[0])myName = re.sub(".*/", "", myName)reVersion = re.compile(".*(\d+_\d+).*")VERSION = '$HeadURL: http://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.18/src/contrib/hod/support/logcondense.py $'reMatch = reVersion.match(VERSION)if reMatch: VERSION = reMatch.group(1) VERSION = re.sub("_", ".", VERSION)else: VERSION = 'DEV'options = ( {'short' : "-p", 'long' : "--package", 'type' : "string", 'action' : "store", 'dest' : "package", 'metavar' : " ", 'default' : 'hadoop', 'help' : "Bin file for hadoop"}, {'short' : "-d", 'long' : "--days", 'type' : "int", 'action' : "store", 'dest' : "days", 'metavar' : " ", 'default' : 7, 'help' : "Number of days before logs are deleted"}, {'short' : "-c", 'long' : "--config", 'type' : "string", 'action' : "store", 'dest' : "config", 'metavar' : " ", 'default' : None, 'help' : "config directory for hadoop"}, {'short' : "-l", 'long' : "--logs", 'type' : "string", 'action' : "store", 'dest' : "log", 'metavar' : " ", 'default' : "/user", 'help' : "directory prefix under which logs are stored per user"}, {'short' : "-n", 'long' : "--dynamicdfs", 'type' : "string", 'action' : "store", 'dest' : "dynamicdfs", 'metavar' : " ", 'default' : "false", 'help' : "'true', if the cluster is used to bring up dynamic dfs clusters, 'false' otherwise"} )def getDfsCommand(options, args): if (options.config == None): cmd = options.package + " " + "dfs " + args else: cmd = options.package + " " + "--config " + options.config + " dfs " + args return cmddef runcondense(): import shutil options = process_args() # if the cluster is used to bring up dynamic dfs, we must leave NameNode and JobTracker logs, # otherwise only JobTracker logs. Likewise, in case of dynamic dfs, we must also look for # deleting datanode logs filteredNames = ['jobtracker'] deletedNamePrefixes = ['0-tasktracker-*'] if options.dynamicdfs == 'true': filteredNames.append('namenode') deletedNamePrefixes.append('1-tasktracker-*') deletedNamePrefixes.append('0-datanode-*') filepath = '%s/\*/hod-logs/' % (options.log) cmd = getDfsCommand(options, "-lsr " + filepath) (stdin, stdout, stderr) = popen3(cmd) lastjobid = 'none' toPurge = { } for line in stdout: try: m = re.match("^.*\s(.*)\n$", line) filename = m.group(1) # file name format: <prefix>/<user>/hod-logs/<jobid>/[0-1]-[jobtracker|tasktracker|datanode|namenode|]-hostname-YYYYMMDDtime-random.tar.gz # first strip prefix: if filename.startswith(options.log): filename = filename.lstrip(options.log) if not filename.startswith('/'): filename = '/' + filename else: continue # Now get other details from filename. k = re.match("/(.*)/hod-logs/(.*)/.*-.*-([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9]).*$", filename) if k: username = k.group(1) jobid = k.group(2) datetimefile = datetime(int(k.group(3)), int(k.group(4)), int(k.group(5))) datetimenow = datetime.utcnow() diff = datetimenow - datetimefile filedate = k.group(3) + k.group(4) + k.group(5) newdate = datetimenow.strftime("%Y%m%d") print "%s %s %s %d" % (filename, filedate, newdate, diff.days) # if the cluster is used to bring up dynamic dfs, we must also leave NameNode logs. foundFilteredName = False for name in filteredNames: if filename.find(name) >= 0: foundFilteredName = True break if foundFilteredName: continue if (diff.days > options.days): desttodel = filename if not toPurge.has_key(jobid): toPurge[jobid] = options.log.rstrip("/") + "/" + username + "/hod-logs/" + jobid except Exception, e: print >> sys.stderr, e for job in toPurge.keys(): try: for prefix in deletedNamePrefixes: cmd = getDfsCommand(options, "-rm " + toPurge[job] + '/' + prefix) print cmd ret = 0 ret = os.system(cmd) if (ret != 0): print >> sys.stderr, "Command failed to delete file " + cmd except Exception, e: print >> sys.stderr, e def process_args(): global options, myName, VERSION usage = "usage: %s <ARGS>" % (myName) version = "%s %s" % (myName, VERSION) argParser = OptionParser(usage=usage, version=VERSION) for option_element in options: argParser.add_option(option_element['short'], option_element['long'], type=option_element['type'], action=option_element['action'], dest=option_element['dest'], default=option_element['default'], metavar=option_element['metavar'], help=option_element['help']) (parsedOptions, args) = argParser.parse_args() if not os.path.exists(parsedOptions.package): argParser.error("Could not find path to hadoop binary: %s" % parsedOptions.package) if not os.path.exists(parsedOptions.config): argParser.error("Could not find config: %s" % parsedOptions.config) if parsedOptions.days <= 0: argParser.error("Invalid number of days specified, must be > 0: %s" % parsedOptions.config) if parsedOptions.dynamicdfs!='true' and parsedOptions.dynamicdfs!='false': argParser.error("Invalid option for dynamicdfs, must be true or false: %s" % parsedOptions.dynamicdfs) return parsedOptions if __name__ == '__main__': runcondense()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -