rfc822.py

来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Python 代码 · 共 1,011 行 · 第 1/3 页
1,011 行
            elif self.field[self.pos] == '.':                self.pos = self.pos + 1                sdlist.append('.')            elif self.field[self.pos] in self.atomends:                break            else: sdlist.append(self.getatom())        return ''.join(sdlist)    def getdelimited(self, beginchar, endchars, allowcomments = 1):        """Parse a header fragment delimited by special characters.        `beginchar' is the start character for the fragment.  If self is not        looking at an instance of `beginchar' then getdelimited returns the        empty string.        `endchars' is a sequence of allowable end-delimiting characters.        Parsing stops when one of these is encountered.        If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed        within the parsed fragment.        """        if self.field[self.pos] != beginchar:            return ''        slist = ['']        quote = 0        self.pos = self.pos + 1        while self.pos < len(self.field):            if quote == 1:                slist.append(self.field[self.pos])                quote = 0            elif self.field[self.pos] in endchars:                self.pos = self.pos + 1                break            elif allowcomments and self.field[self.pos] == '(':                slist.append(self.getcomment())            elif self.field[self.pos] == '\\':                quote = 1            else:                slist.append(self.field[self.pos])            self.pos = self.pos + 1        return ''.join(slist)    def getquote(self):        """Get a quote-delimited fragment from self's field."""        return self.getdelimited('"', '"\r', 0)    def getcomment(self):        """Get a parenthesis-delimited fragment from self's field."""        return self.getdelimited('(', ')\r', 1)    def getdomainliteral(self):        """Parse an RFC 2822 domain-literal."""        return '[%s]' % self.getdelimited('[', ']\r', 0)    def getatom(self, atomends=None):        """Parse an RFC 2822 atom.        Optional atomends specifies a different set of end token delimiters        (the default is to use self.atomends).  This is used e.g. in        getphraselist() since phrase endings must not include the `.' (which        is legal in phrases)."""        atomlist = ['']        if atomends is None:            atomends = self.atomends        while self.pos < len(self.field):            if self.field[self.pos] in atomends:                break            else: atomlist.append(self.field[self.pos])            self.pos = self.pos + 1        return ''.join(atomlist)    def getphraselist(self):        """Parse a sequence of RFC 2822 phrases.        A phrase is a sequence of words, which are in turn either RFC 2822        atoms or quoted-strings.  Phrases are canonicalized by squeezing all        runs of continuous whitespace into one space.        """        plist = []        while self.pos < len(self.field):            if self.field[self.pos] in self.LWS:                self.pos = self.pos + 1            elif self.field[self.pos] == '"':                plist.append(self.getquote())            elif self.field[self.pos] == '(':                self.commentlist.append(self.getcomment())            elif self.field[self.pos] in self.phraseends:                break            else:                plist.append(self.getatom(self.phraseends))        return plistclass AddressList(AddrlistClass):    """An AddressList encapsulates a list of parsed RFC 2822 addresses."""    def __init__(self, field):        AddrlistClass.__init__(self, field)        if field:            self.addresslist = self.getaddrlist()        else:            self.addresslist = []    def __len__(self):        return len(self.addresslist)    def __str__(self):        return ", ".join(map(dump_address_pair, self.addresslist))    def __add__(self, other):        # Set union        newaddr = AddressList(None)        newaddr.addresslist = self.addresslist[:]        for x in other.addresslist:            if not x in self.addresslist:                newaddr.addresslist.append(x)        return newaddr    def __iadd__(self, other):        # Set union, in-place        for x in other.addresslist:            if not x in self.addresslist:                self.addresslist.append(x)        return self    def __sub__(self, other):        # Set difference        newaddr = AddressList(None)        for x in self.addresslist:            if not x in other.addresslist:                newaddr.addresslist.append(x)        return newaddr    def __isub__(self, other):        # Set difference, in-place        for x in other.addresslist:            if x in self.addresslist:                self.addresslist.remove(x)        return self    def __getitem__(self, index):        # Make indexing, slices, and 'in' work        return self.addresslist[index]def dump_address_pair(pair):    """Dump a (name, address) pair in a canonicalized form."""    if pair[0]:        return '"' + pair[0] + '" <' + pair[1] + '>'    else:        return pair[1]# Parse a date field_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',               'aug', 'sep', 'oct', 'nov', 'dec',               'january', 'february', 'march', 'april', 'may', 'june', 'july',               'august', 'september', 'october', 'november', 'december']_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']# The timezone table does not include the military time zones defined# in RFC822, other than Z.  According to RFC1123, the description in# RFC822 gets the signs wrong, so we can't rely on any such time# zones.  RFC1123 recommends that numeric timezone indicators be used# instead of timezone names._timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,              'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)              'EST': -500, 'EDT': -400,  # Eastern              'CST': -600, 'CDT': -500,  # Central              'MST': -700, 'MDT': -600,  # Mountain              'PST': -800, 'PDT': -700   # Pacific              }def parsedate_tz(data):    """Convert a date string to a time tuple.    Accounts for military timezones.    """    if not data:        return None    data = data.split()    if data[0][-1] in (',', '.') or data[0].lower() in _daynames:        # There's a dayname here. Skip it        del data[0]    if len(data) == 3: # RFC 850 date, deprecated        stuff = data[0].split('-')        if len(stuff) == 3:            data = stuff + data[1:]    if len(data) == 4:        s = data[3]        i = s.find('+')        if i > 0:            data[3:] = [s[:i], s[i+1:]]        else:            data.append('') # Dummy tz    if len(data) < 5:        return None    data = data[:5]    [dd, mm, yy, tm, tz] = data    mm = mm.lower()    if not mm in _monthnames:        dd, mm = mm, dd.lower()        if not mm in _monthnames:            return None    mm = _monthnames.index(mm)+1    if mm > 12: mm = mm - 12    if dd[-1] == ',':        dd = dd[:-1]    i = yy.find(':')    if i > 0:        yy, tm = tm, yy    if yy[-1] == ',':        yy = yy[:-1]    if not yy[0].isdigit():        yy, tz = tz, yy    if tm[-1] == ',':        tm = tm[:-1]    tm = tm.split(':')    if len(tm) == 2:        [thh, tmm] = tm        tss = '0'    elif len(tm) == 3:        [thh, tmm, tss] = tm    else:        return None    try:        yy = int(yy)        dd = int(dd)        thh = int(thh)        tmm = int(tmm)        tss = int(tss)    except ValueError:        return None    tzoffset = None    tz = tz.upper()    if _timezones.has_key(tz):        tzoffset = _timezones[tz]    else:        try:            tzoffset = int(tz)        except ValueError:            pass    # Convert a timezone offset into seconds ; -0500 -> -18000    if tzoffset:        if tzoffset < 0:            tzsign = -1            tzoffset = -tzoffset        else:            tzsign = 1        tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)    tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)    return tupledef parsedate(data):    """Convert a time string to a time tuple."""    t = parsedate_tz(data)    if type(t) == type( () ):        return t[:9]    else: return tdef mktime_tz(data):    """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""    if data[9] is None:        # No zone info, so localtime is better assumption than GMT        return time.mktime(data[:8] + (-1,))    else:        t = time.mktime(data[:8] + (0,))        return t - data[9] - time.timezonedef formatdate(timeval=None):    """Returns time format preferred for Internet standards.    Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123    According to RFC 1123, day and month names must always be in    English.  If not for that, this code could use strftime().  It    can't because strftime() honors the locale and could generated    non-English names.    """    if timeval is None:        timeval = time.time()    timeval = time.gmtime(timeval)    return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (            ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][timeval[6]],            timeval[2],            ["Jan", "Feb", "Mar", "Apr", "May", "Jun",             "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][timeval[1]-1],                                timeval[0], timeval[3], timeval[4], timeval[5])# When used as script, run a small test program.# The first command line argument must be a filename containing one# message in RFC-822 format.if __name__ == '__main__':    import sys, os    file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')    if sys.argv[1:]: file = sys.argv[1]    f = open(file, 'r')    m = Message(f)    print 'From:', m.getaddr('from')    print 'To:', m.getaddrlist('to')    print 'Subject:', m.getheader('subject')    print 'Date:', m.getheader('date')    date = m.getdate_tz('date')    tz = date[-1]    date = time.localtime(mktime_tz(date))    if date:        print 'ParsedDate:', time.asctime(date),        hhmmss = tz        hhmm, ss = divmod(hhmmss, 60)        hh, mm = divmod(hhmm, 60)        print "%+03d%02d" % (hh, mm),        if ss: print ".%02d" % ss,        print    else:        print 'ParsedDate:', None    m.rewindbody()    n = 0    while f.readline():        n = n + 1    print 'Lines:', n    print '-'*70    print 'len =', len(m)    if m.has_key('Date'): print 'Date =', m['Date']    if m.has_key('X-Nonsense'): pass    print 'keys =', m.keys()    print 'values =', m.values()    print 'items =', m.items()
rfc822.py - 源码说明

本页面展示了「mallet是自然语言处理、机器学习领域的一个开源项目。」中的 rfc822.py 源码文件，采用 Python 编程语言编写，共 1,011 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与mallet相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?