📄 gnatsparse.py
字号:
try:# Using Psyco makes it about 25% faster, but there's a bug in psyco in# handling of eval causing it to use unlimited memory with the magic# file enabled.# import psyco# psyco.full()# from psyco.classes import * passexcept: passimport reimport base64import cStringIOimport specialuuimport arrayimport email.Utilsimport zlibimport magic# Comment out if you don't want magic detectionmagicf = magic.MagicFile()# Open our output fileoutfile = open("gnats2bz_data.sql", "w")# List of GNATS fieldsfieldnames = ("Number", "Category", "Synopsis", "Confidential", "Severity", "Priority", "Responsible", "State", "Quarter", "Keywords", "Date-Required", "Class", "Submitter-Id", "Arrival-Date", "Closed-Date", "Last-Modified", "Originator", "Release", "Organization", "Environment", "Description", "How-To-Repeat", "Fix", "Release-Note", "Audit-Trail", "Unformatted")# Dictionary telling us which GNATS fields are multilinemultilinefields = {"Organization":1, "Environment":1, "Description":1, "How-To-Repeat":1, "Fix":1, "Release-Note":1, "Audit-Trail":1, "Unformatted":1}# Mapping of GCC release to version. Our version string is updated every# so we need to funnel all release's with 3.4 in the string to be version# 3.4 for bug tracking purposes# The key is a regex to match, the value is the version it corresponds# withreleasetovermap = {r"3\.4":"3.4", r"3\.3":"3.3", r"3\.2\.2":"3.2.2", r"3\.2\.1":"3.2.1", r"3\.2":"3.2", r"3\.1\.2":"3.1.2", r"3\.1\.1":"3.1.1", r"3\.1":"3.1", r"3\.0\.4":"3.0.4", r"3\.0\.3":"3.0.3", r"3\.0\.2":"3.0.2", r"3\.0\.1":"3.0.1", r"3\.0":"3.0", r"2\.95\.4":"2.95.4", r"2\.95\.3":"2.95.3", r"2\.95\.2":"2.95.2", r"2\.95\.1":"2.95.1", r"2\.95":"2.95", r"2\.97":"2.97", r"2\.96.*[rR][eE][dD].*[hH][aA][tT]":"2.96 (redhat)", r"2\.96":"2.96"}# These map the field name to the field id bugzilla assigns. We need# the id when doing bug activity.fieldids = {"State":8, "Responsible":15}# These are the keywords we use in gcc bug tracking. They are transformed# into bugzilla keywords. The format here is <keyword>-><bugzilla keyword id>keywordids = {"wrong-code":1, "ice-on-legal-code":2, "ice-on-illegal-code":3, "rejects-legal":4, "accepts-illegal":5, "pessimizes-code":6}# Map from GNATS states to Bugzilla states. Duplicates and reopened bugs# are handled when parsing the audit trail, so no need for them here.state_lookup = {"":"NEW", "open":"ASSIGNED", "analyzed":"ASSIGNED", "feedback":"WAITING", "closed":"CLOSED", "suspended":"SUSPENDED"}# Table of versions that exist in the bugs, built up as we go alongversions_table = {}# Delimiter gnatsweb uses for attachmentsattachment_delimiter = "----gnatsweb-attachment----\n"# Here starts the various regular expressions we use# Matches an entire GNATS single line fieldgnatfieldre = re.compile(r"""^([>\w\-]+)\s*:\s*(.*)\s*$""")# Matches the name of a GNATS fieldfieldnamere = re.compile(r"""^>(.*)$""")# Matches the useless part of an envelopeuselessre = re.compile(r"""^(\S*?):\s*""", re.MULTILINE)# Matches the filename in a content dispositiondispositionre = re.compile("(\\S+);\\s*filename=\"([^\"]+)\"")# Matches the last changed date in the entire text of a bug# If you have other editable fields that get audit trail entries, modify this# The field names are explicitly listed in order to speed up matchinglastdatere = re.compile(r"""^(?:(?:State|Responsible|Priority|Severity)-Changed-When: )(.+?)$""", re.MULTILINE)# Matches the From line of an email or the first line of an audit trail entry# We use this re to find the begin lines of all the audit trail entries# The field names are explicitly listed in order to speed up matchingfromtore=re.compile(r"""^(?:(?:State|Responsible|Priority|Severity)-Changed-From-To: |From: )""", re.MULTILINE)# These re's match the various parts of an audit trail entrychangedfromtore=re.compile(r"""^(\w+?)-Changed-From-To: (.+?)$""", re.MULTILINE)changedbyre=re.compile(r"""^\w+?-Changed-By: (.+?)$""", re.MULTILINE)changedwhenre=re.compile(r"""^\w+?-Changed-When: (.+?)$""", re.MULTILINE)changedwhyre=re.compile(r"""^\w+?-Changed-Why:\s*(.*?)$""", re.MULTILINE)# This re matches audit trail text saying that the current bug is a duplicate of anotherduplicatere=re.compile(r"""(?:")?Dup(?:licate)?(?:d)?(?:")? of .*?(\d+)""", re.IGNORECASE | re.MULTILINE)# Get the text of a From: linefromre=re.compile(r"""^From: (.*?)$""", re.MULTILINE)# Get the text of a Date: Linedatere=re.compile(r"""^Date: (.*?)$""", re.MULTILINE)# Map of the responsible file to email addressesresponsible_map = {}# List of records in the responsible fileresponsible_list = []# List of records in the categories filecategories_list = []# List of pr's in the indexpr_list = []# Map usernames to user idsusermapping = {}# Start with this user iduserid_base = 2# Name of gnats usergnats_username = "gnats@gcc.gnu.org"# Name of unassigned userunassigned_username = "unassigned@gcc.gnu.org"gnats_db_dir = "."product = "gcc"productdesc = "GNU Compiler Connection"milestoneurl = "http://gcc/gnu.org"defaultmilestone = "3.4"def write_non_bug_tables(): """ Write out the non-bug related tables, such as products, profiles, etc.""" # Set all non-unconfirmed bugs's everconfirmed flag print >>outfile, "update bugs set everconfirmed=1 where bug_status != 'UNCONFIRMED';" # Set all bugs assigned to the unassigned user to NEW print >>outfile, "update bugs set bug_status='NEW',assigned_to='NULL' where bug_status='ASSIGNED' AND assigned_to=3;" # Insert the products print >>outfile, "\ninsert into products (" print >>outfile, " product, description, milestoneurl, disallownew," print >>outfile, " defaultmilestone, votestoconfirm) values (" print >>outfile, " '%s', '%s', '%s', 0, '%s', 1);" % (product, productdesc, milestoneurl, defaultmilestone) # Insert the components for category in categories_list: component = SqlQuote(category[0]) productstr = SqlQuote(product) description = SqlQuote(category[1]) initialowner = SqlQuote("3") print >>outfile, "\ninsert into components ("; print >>outfile, " value, program, initialowner, initialqacontact," print >>outfile, " description) values (" print >>outfile, " %s, %s, %s, '', %s);" % (component, productstr, initialowner, description) # Insert the versions for productstr, version_list in versions_table.items(): productstr = SqlQuote(productstr) for version in version_list: version = SqlQuote(version) print >>outfile, "\ninsert into versions (value, program) " print >>outfile, " values (%s, %s);" % (version, productstr) # Insert the users for username, userid in usermapping.items(): realname = map_username_to_realname(username) username = SqlQuote(username) realname = SqlQuote(realname) print >>outfile, "\ninsert into profiles (" print >>outfile, " userid, login_name, password, cryptpassword, realname, groupset" print >>outfile, ") values (" print >>outfile, "%s,%s,'password',encrypt('password'), %s, 0);" % (userid, username, realname) print >>outfile, "update profiles set groupset=1 << 32 where login_name like '%\@gcc.gnu.org';" def unixdate2datetime(unixdate): """ Convert a unix date to a datetime value """ year, month, day, hour, min, sec, x, x, x, x = email.Utils.parsedate_tz(unixdate) return "%d-%02d-%02d %02d:%02d:%02d" % (year,month,day,hour,min,sec)def unixdate2timestamp(unixdate): """ Convert a unix date to a timestamp value """ year, month, day, hour, min, sec, x, x, x, x = email.Utils.parsedate_tz(unixdate) return "%d%02d%02d%02d%02d%02d" % (year,month,day,hour,min,sec)def SqlQuote(str): """ Perform SQL quoting on a string """ return "'%s'" % str.replace("'", """''""").replace("\\", "\\\\").replace("\0","\\0")def convert_gccver_to_ver(gccver): """ Given a gcc version, convert it to a Bugzilla version. """ for k in releasetovermap.keys(): if re.search(".*%s.*" % k, gccver) is not None: return releasetovermap[k] result = re.search(r""".*(\d\.\d) \d+ \(experimental\).*""", gccver) if result is not None: return result.group(1) return "unknown"def load_index(fname): """ Load in the GNATS index file """ global pr_list ifp = open(fname) for record in ifp.xreadlines(): fields = record.split("|") pr_list.append(fields[0]) ifp.close() def load_categories(fname): """ Load in the GNATS categories file """ global categories_list cfp = open(fname) for record in cfp.xreadlines(): if re.search("^#", record) is not None: continue categories_list.append(record.split(":")) cfp.close() def map_username_to_realname(username): """ Given a username, find the real name """ name = username name = re.sub("@.*", "", name) for responsible_record in responsible_list: if responsible_record[0] == name: return responsible_record[1] if len(responsible_record) > 2: if responsible_record[2] == username: return responsible_record[1] return ""def get_userid(responsible): """ Given an email address, get the user id """ global responsible_map global usermapping global userid_base if responsible is None: return -1 responsible = responsible.lower() responsible = re.sub("sources.redhat.com", "gcc.gnu.org", responsible) if responsible_map.has_key(responsible): responsible = responsible_map[responsible] if usermapping.has_key(responsible): return usermapping[responsible] else: usermapping[responsible] = userid_base userid_base += 1 return usermapping[responsible]def load_responsible(fname): """ Load in the GNATS responsible file """ global responsible_map global responsible_list rfp = open(fname) for record in rfp.xreadlines(): if re.search("^#", record) is not None: continue split_record = record.split(":") responsible_map[split_record[0]] = split_record[2].rstrip() responsible_list.append(record.split(":"))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -