rfc822.py
来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Python 代码 · 共 1,011 行 · 第 1/3 页
PY
1,011 行
elif self.field[self.pos] == '.': self.pos = self.pos + 1 sdlist.append('.') elif self.field[self.pos] in self.atomends: break else: sdlist.append(self.getatom()) return ''.join(sdlist) def getdelimited(self, beginchar, endchars, allowcomments = 1): """Parse a header fragment delimited by special characters. `beginchar' is the start character for the fragment. If self is not looking at an instance of `beginchar' then getdelimited returns the empty string. `endchars' is a sequence of allowable end-delimiting characters. Parsing stops when one of these is encountered. If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed within the parsed fragment. """ if self.field[self.pos] != beginchar: return '' slist = [''] quote = 0 self.pos = self.pos + 1 while self.pos < len(self.field): if quote == 1: slist.append(self.field[self.pos]) quote = 0 elif self.field[self.pos] in endchars: self.pos = self.pos + 1 break elif allowcomments and self.field[self.pos] == '(': slist.append(self.getcomment()) elif self.field[self.pos] == '\\': quote = 1 else: slist.append(self.field[self.pos]) self.pos = self.pos + 1 return ''.join(slist) def getquote(self): """Get a quote-delimited fragment from self's field.""" return self.getdelimited('"', '"\r', 0) def getcomment(self): """Get a parenthesis-delimited fragment from self's field.""" return self.getdelimited('(', ')\r', 1) def getdomainliteral(self): """Parse an RFC 2822 domain-literal.""" return '[%s]' % self.getdelimited('[', ']\r', 0) def getatom(self, atomends=None): """Parse an RFC 2822 atom. Optional atomends specifies a different set of end token delimiters (the default is to use self.atomends). This is used e.g. in getphraselist() since phrase endings must not include the `.' (which is legal in phrases).""" atomlist = [''] if atomends is None: atomends = self.atomends while self.pos < len(self.field): if self.field[self.pos] in atomends: break else: atomlist.append(self.field[self.pos]) self.pos = self.pos + 1 return ''.join(atomlist) def getphraselist(self): """Parse a sequence of RFC 2822 phrases. A phrase is a sequence of words, which are in turn either RFC 2822 atoms or quoted-strings. Phrases are canonicalized by squeezing all runs of continuous whitespace into one space. """ plist = [] while self.pos < len(self.field): if self.field[self.pos] in self.LWS: self.pos = self.pos + 1 elif self.field[self.pos] == '"': plist.append(self.getquote()) elif self.field[self.pos] == '(': self.commentlist.append(self.getcomment()) elif self.field[self.pos] in self.phraseends: break else: plist.append(self.getatom(self.phraseends)) return plistclass AddressList(AddrlistClass): """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" def __init__(self, field): AddrlistClass.__init__(self, field) if field: self.addresslist = self.getaddrlist() else: self.addresslist = [] def __len__(self): return len(self.addresslist) def __str__(self): return ", ".join(map(dump_address_pair, self.addresslist)) def __add__(self, other): # Set union newaddr = AddressList(None) newaddr.addresslist = self.addresslist[:] for x in other.addresslist: if not x in self.addresslist: newaddr.addresslist.append(x) return newaddr def __iadd__(self, other): # Set union, in-place for x in other.addresslist: if not x in self.addresslist: self.addresslist.append(x) return self def __sub__(self, other): # Set difference newaddr = AddressList(None) for x in self.addresslist: if not x in other.addresslist: newaddr.addresslist.append(x) return newaddr def __isub__(self, other): # Set difference, in-place for x in other.addresslist: if x in self.addresslist: self.addresslist.remove(x) return self def __getitem__(self, index): # Make indexing, slices, and 'in' work return self.addresslist[index]def dump_address_pair(pair): """Dump a (name, address) pair in a canonicalized form.""" if pair[0]: return '"' + pair[0] + '" <' + pair[1] + '>' else: return pair[1]# Parse a date field_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec', 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december']_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']# The timezone table does not include the military time zones defined# in RFC822, other than Z. According to RFC1123, the description in# RFC822 gets the signs wrong, so we can't rely on any such time# zones. RFC1123 recommends that numeric timezone indicators be used# instead of timezone names._timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) 'EST': -500, 'EDT': -400, # Eastern 'CST': -600, 'CDT': -500, # Central 'MST': -700, 'MDT': -600, # Mountain 'PST': -800, 'PDT': -700 # Pacific }def parsedate_tz(data): """Convert a date string to a time tuple. Accounts for military timezones. """ if not data: return None data = data.split() if data[0][-1] in (',', '.') or data[0].lower() in _daynames: # There's a dayname here. Skip it del data[0] if len(data) == 3: # RFC 850 date, deprecated stuff = data[0].split('-') if len(stuff) == 3: data = stuff + data[1:] if len(data) == 4: s = data[3] i = s.find('+') if i > 0: data[3:] = [s[:i], s[i+1:]] else: data.append('') # Dummy tz if len(data) < 5: return None data = data[:5] [dd, mm, yy, tm, tz] = data mm = mm.lower() if not mm in _monthnames: dd, mm = mm, dd.lower() if not mm in _monthnames: return None mm = _monthnames.index(mm)+1 if mm > 12: mm = mm - 12 if dd[-1] == ',': dd = dd[:-1] i = yy.find(':') if i > 0: yy, tm = tm, yy if yy[-1] == ',': yy = yy[:-1] if not yy[0].isdigit(): yy, tz = tz, yy if tm[-1] == ',': tm = tm[:-1] tm = tm.split(':') if len(tm) == 2: [thh, tmm] = tm tss = '0' elif len(tm) == 3: [thh, tmm, tss] = tm else: return None try: yy = int(yy) dd = int(dd) thh = int(thh) tmm = int(tmm) tss = int(tss) except ValueError: return None tzoffset = None tz = tz.upper() if _timezones.has_key(tz): tzoffset = _timezones[tz] else: try: tzoffset = int(tz) except ValueError: pass # Convert a timezone offset into seconds ; -0500 -> -18000 if tzoffset: if tzoffset < 0: tzsign = -1 tzoffset = -tzoffset else: tzsign = 1 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset) return tupledef parsedate(data): """Convert a time string to a time tuple.""" t = parsedate_tz(data) if type(t) == type( () ): return t[:9] else: return tdef mktime_tz(data): """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.""" if data[9] is None: # No zone info, so localtime is better assumption than GMT return time.mktime(data[:8] + (-1,)) else: t = time.mktime(data[:8] + (0,)) return t - data[9] - time.timezonedef formatdate(timeval=None): """Returns time format preferred for Internet standards. Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 According to RFC 1123, day and month names must always be in English. If not for that, this code could use strftime(). It can't because strftime() honors the locale and could generated non-English names. """ if timeval is None: timeval = time.time() timeval = time.gmtime(timeval) return "%s, %02d %s %04d %02d:%02d:%02d GMT" % ( ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][timeval[6]], timeval[2], ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][timeval[1]-1], timeval[0], timeval[3], timeval[4], timeval[5])# When used as script, run a small test program.# The first command line argument must be a filename containing one# message in RFC-822 format.if __name__ == '__main__': import sys, os file = os.path.join(os.environ['HOME'], 'Mail/inbox/1') if sys.argv[1:]: file = sys.argv[1] f = open(file, 'r') m = Message(f) print 'From:', m.getaddr('from') print 'To:', m.getaddrlist('to') print 'Subject:', m.getheader('subject') print 'Date:', m.getheader('date') date = m.getdate_tz('date') tz = date[-1] date = time.localtime(mktime_tz(date)) if date: print 'ParsedDate:', time.asctime(date), hhmmss = tz hhmm, ss = divmod(hhmmss, 60) hh, mm = divmod(hhmm, 60) print "%+03d%02d" % (hh, mm), if ss: print ".%02d" % ss, print else: print 'ParsedDate:', None m.rewindbody() n = 0 while f.readline(): n = n + 1 print 'Lines:', n print '-'*70 print 'len =', len(m) if m.has_key('Date'): print 'Date =', m['Date'] if m.has_key('X-Nonsense'): pass print 'keys =', m.keys() print 'values =', m.values() print 'items =', m.items()
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?