📄 pymeldlite.py

📁 用python实现的邮件过滤器
💻 PY
📖 第 1 页 / 共 3 页
字号:
                doctype += ' "%s"' % syslit            if data:                doctype += ' [%s]>' % data            else:                doctype += '>'            self._pendingText.append(doctype)        def handle_comment(self, data):            self._pendingText.append('<!--%s-->' % data)        def handle_proc(self, name, data):            self._pendingText.append('<?%s %s ?>' % (name, data.strip()))        def handle_data(self, data):            self._pendingText.append(data)        def handle_charref(self, ref):            self._pendingText.append('&#%s;' % ref)        unknown_charref = handle_charref        def handle_entityref(self, ref):            self._pendingText.append('&%s;' % ref)        unknown_entityref = handle_entityref        def handle_cdata(self, data):            if self._pendingText:                self._collapsePendingText()            self._pendingText.append('<![CDATA[%s]]>' % data)        def unknown_starttag(self, tag, attributes):            if self._pendingText:                self._collapsePendingText()            newNode = _ElementNode(self._currentNode, tag, attributes)            self._currentNode.children.append(newNode)            self._currentNode = newNode        def unknown_endtag(self, tag):            if self._pendingText:                self._collapsePendingText()            self._currentNode = self._currentNode.parentelse:    # Entrian.Coverage: Pragma Stop    import xml.parsers.expat    class _TreeGenerator:        # Entrian.Coverage: Pragma Start        """An XML parser that generates a lightweight DOM tree.  Call `feed()`        with XML source, then `close()`, then `getTree()` will give you the        tree's `_RootNode`:        >>> g = _TreeGenerator()        >>> g.feed("<xml>Stuff. ")        >>> g.feed("More stuff.</xml>")        >>> g.close()        >>> tree = g.getTree()        >>> print tree.toText()        <xml>Stuff. More stuff.</xml>        """        def __init__(self):            self._tree = _RootNode()            self._currentNode = self._tree            self._pendingText = []            self._parser = xml.parsers.expat.ParserCreate()            self._parser.buffer_text = True            self._parser.DefaultHandler = self.DefaultHandler            self._parser.StartElementHandler = self.StartElementHandler            self._parser.EndElementHandler = self.EndElementHandler        # All entities and charrefs, like &bull; and &#160;, are considered        # valid - who are we to argue?  Expat thinks it knows better, so we        # fool it here.        def _mungeEntities(self, data):            return re.sub(r'&([A-Za-z0-9#]+);', r':PyMeldEntity:\1:', data)        def _unmungeEntities(self, data):            return re.sub(r':PyMeldEntity:([A-Za-z0-9#]+):', r'&\1;', data)        def feed(self, data):            """Call this with XML content to be parsed."""            data = self._mungeEntities(data)            self._parser.Parse(data)        def close(self):            """Call this when you've passed all your XML content to `feed`."""            self._parser.Parse("", True)        def getTree(self):            """Returns the generated tree; call `feed` then `close` first."""            return self._tree        def _collapsePendingText(self):            """Text (any content that isn't an open/close element) is built up            in `self._pendingText` until an open/close element is seen, at            which point it gets collapsed into a `_TextNode`."""            data = ''.join(self._pendingText)            data = self._unmungeEntities(data)            self._currentNode.children.append(_TextNode(data))            self._pendingText = []        def DefaultHandler(self, data):            """Expat handler."""            self._pendingText.append(str(data))        def StartElementHandler(self, tag, attributes):            """Expat handler."""            if self._pendingText:                self._collapsePendingText()            newAttributes = {}            for name, value in attributes.iteritems():                newAttributes[str(name)] = self._unmungeEntities(str(value))            newNode = _ElementNode(self._currentNode, str(tag), newAttributes)            self._currentNode.children.append(newNode)            self._currentNode = newNode        def EndElementHandler(self, tag):            """Expat handler."""            if self._pendingText:                self._collapsePendingText()            self._currentNode = self._currentNode.parentdef _generateTree(source):    """Given some XML source, generates a lightweight DOM tree rooted at a    `_RootNode`."""    # Lots of HTML files start with a DOCTYPE declaration like this:    #   <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">    # The fact that the DTD URL is missing is deliberate - it acts as a hint    # to the browser that it should emulate historical browser behaviour.  It    # also breaks xmllib, so we see whether we can spot it here and cope.    doctypeRE = r'(?i)^(\s*<!DOCTYPE\s+HTML\s+PUBLIC\s+"[^"]+"\s*>)'    match = re.search(doctypeRE, source)    if match:        source = source[match.end():]        doctype = match.group(1)    else:        doctype = ''    # Another hack for HTML: the DOCTYPE usually gives HTML (in upper case)    # as the root tag name, but xmllib will complain if you then use <html>    # (lowercase) for the tag.  We fix up the DOCTYPE here if it looks like    # that's the case.    rootRE = r'(?i)^\s*<!DOCTYPE\s+([-a-z0-9._:]+)\s+[^<]{1,200}<(\1)'    match = re.search(rootRE, source)    if match and match.group(1) != match.group(2):        source = source[:match.start(1)] + match.group(2) + \                 source[match.end(1):]    # Map characters not allowed in XML content to sensible things.    source = source.translate(badxml_map)    source = re.sub('([\x80-\xff])', replaceHighCharacters, source)    # Parse the XML and generate the tree.    g = _TreeGenerator()    g.feed(source)    g.close()    # Get the tree and put the DOCTYPE back in if we hacked it out above.    tree = g.getTree()    if doctype:        tree.children.insert(0, _TextNode(doctype))    # Return the tree.    return tree############################################################################### PyMeldLite.Meld is the business end of the module.##READ_ONLY_MESSAGE = "You can't modify this read-only Meld object"class ReadOnlyError(Exception):    """Raised if you try to modify a readonly PyMeldLite.Meld."""    passclass Meld:    """Represents an XML document, or a fragment of one.  Pass XML/XHTML    source to the constructor.  You can then access all the elements with    `id="name"` attributes as `object.name`, and all the attributes of the    outermost element as `object.attribute`."""    def __init__(self, source, readonly=False):        """Creates a `Meld` from XML source.  `readonly` does what it        says."""        self._readonly = readonly        if isinstance(source, str):            self._tree = _generateTree(source)        elif isinstance(source, _Node): # For internal use only.            self._tree = source        else:            raise TypeError, "Melds must be constructed from ASCII strings"    def _findByID(self, node, name):        """Returns the node with the given ID, or None."""        if node.attributes.get('id') == name:            return node        for child in node.children:            result = self._findByID(child, name)            if result:                return result    def _quoteAttribute(self, value):        """Minimally quotes an attribute value, using `&quot;`, `&amp;`,        `&lt;` and `&gt;`."""        if not isinstance(value, str):            value = str(value)        value = value.replace('"', '&quot;')        value = value.replace('<', '&lt;').replace('>', '&gt;')        value = re.sub(r'&(?![a-zA-Z0-9]+;)', '&amp;', value)        return value    def _unquoteAttribute(self, value):        """Unquotes an attribute value quoted by `_quoteAttribute()`."""        value = value.replace('&quot;', '"').replace('&amp;', '&')        return value.replace('&lt;', '<').replace('&gt;', '>')    def _nodeListFromSource(self, value):        """Given a snippet of XML source, returns a list of `_Node`s."""        tree = _generateTree("<x>"+value+"</x>")        return tree.children[0].children    def _replaceNodeContent(self, node, value):        """Replaces the content of the given node.  If `value` is a string, it        is parsed as XML.  If it is a Meld, it it cloned.  The existing        children are deleted, the new nodes are set as the children of        `node`."""        if isinstance(value, Meld):            node.children = [value._tree.getElementNode().clone()]        else:            if not isinstance(value, str):                value = str(value)            node.children = self._nodeListFromSource(value)    def clone(self, readonly=False):        """Creates a clone of a `Meld`, for instance to change an attribute        without affecting the original document:        >>> p = Meld('<p style="one">Hello <b id="who">World</b></p>')        >>> q = p.clone()        >>> q.who = "Richie"        >>> print q.who        <b id="who">Richie</b>        >>> print p.who        <b id="who">World</b>        By default, clones are not readonly even if the Meld from which        they're cloned is readonly (the most common reason for taking a        clone is to create a modified clone of a piece of a document).  To        make a readonly clone, say `clone = object.clone(readonly=True)`."""        return Meld(self._tree.clone(), readonly)    def __getattr__(self, name):        """`object.<name>`, if this Meld contains an element with an `id`        attribute of `name`, returns a Meld representing that element.        Otherwise, `object.<name>` returns the value of the attribute with        the given name, as a string.  If no such attribute exists, an        AttributeError is raised.        `object._content` returns the content of the Meld, not including        the enclosing `<element></element>`, as a string.        >>> p = Meld('<p style="one">Hello <b id="who">World</b></p>')        >>> print p.who        <b id="who">World</b>        >>> print p.style        one        >>> print p._content        Hello <b id="who">World</b>        >>> print p.who._content        World        """        if name == '_content':            return self._tree.getElementNode().childrenToText()        if name.startswith('_'):            try:                return self.__dict__[name]            except KeyError:                raise AttributeError, name        node = self._findByID(self._tree, name)        if node:            return Meld(node, self._readonly)        attribute = self._tree.getElementNode().attributes.get(name, _fail)        if attribute is not _fail:            return self._unquoteAttribute(attribute)        raise AttributeError, "No element or attribute named %r" % name    def __setattr__(self, name, value):        """`object.<name> = value` sets the XML content of the element with an        `id` of `name`, or if no such element exists, sets the value of the        `name` attribute on the outermost element.  If the attribute is not        already there, a new attribute is created.        >>> p = Meld('<p style="one">Hello <b id="who">World</b></p>')        >>> p.who = "Richie"        >>> p.style = "two"        >>> p.align = "center"        >>> p.who.id = "newwho"        >>> print p        <p align="center" style="two">Hello <b id="newwho">Richie</b></p>        """        if name.startswith('_') and name != '_content':            self.__dict__[name] = value            return        if self._readonly:            raise ReadOnlyError, READ_ONLY_MESSAGE        node = self._findByID(self._tree, name)        if hasattr(value, '_tree') and value._tree is node:            return   # x.y = x.y        if not node and name == '_content':            node = self._tree.getElementNode()        if node:            self._replaceNodeContent(node, value)        else:            value = self._quoteAttribute(value)            self._tree.getElementNode().attributes[name] = value    def __delattr__(self, name):        """Deletes the named element or attribute from the `Meld`:        >>> p = Meld('<p style="one">Hello <b id="who">World</b></p>')        >>> del p.who        >>> del p.style        >>> print p        <p>Hello </p>        """        if name == '_content':            self._tree.getElementNode().children = []            return        if name.startswith('_'):            try:                del self.__dict__[name]                return            except KeyError:                raise AttributeError, name        if self._readonly:            raise ReadOnlyError, READ_ONLY_MESSAGE        node = self._findByID(self._tree, name)        if node:            node.parent.children.remove(node)            return        node = self._tree.getElementNode()        attribute = node.attributes.get(name, _fail)        if attribute is not _fail:            del node.attributes[name]        else:            raise AttributeError, "No element or attribute named %r" % name    def __getitem__(self, name):        """`object[<name>]`, if this Meld contains an element with an `id`        attribute of `name`, returns a Meld representing that element.        If no such element exists, a KeyError is raised.        >>> p = Meld('<p style="one">Hello <b id="who">World</b></p>')        >>> print p["who"]        <b id="who">World</b>        >>> print p["who"]_content        World        """        node = self._findByID(self._tree, name)        if node:            return Meld(node, self._readonly)        raise KeyError, "No element named %r" % name    def __setitem__(self, name, value):        """`object[<name>] = value` sets the XML content of the element with an        `id` of `name`.                If no such element exists, a KeyError is raised because there is no        info about the type of element to add.        >>> p = Meld('<p>Hello <b id="who">World</b></p>')        >>> p["who"] = "Richie"        >>> p["who"].id = "newwho"        >>> print p        <p>Hello <b id="newwho">Richie</b></p>        """        if self._readonly:            raise ReadOnlyError, READ_ONLY_MESSAGE        node = self._findByID(self._tree, name)        if hasattr(value, '_tree') and value._tree is node:            return   # x["y"] = x.y        if node:            self._replaceNodeContent(node, value)            return        raise KeyError, "No element named %r" % name    def __delitem__(self, name):        """Deletes the named element from the `Meld`:        >>> p = Meld('<p style="one">Hello <b id="who">World</b></p>')        >>> del p["who"]        >>> print p        <p style="one">Hello </p>        """        if self._readonly:            raise ReadOnlyError, READ_ONLY_MESSAGE        node = self._findByID(self._tree, name)        if node:            node.parent.children.remove(node)
💿 文件大小 1791 K
👤 上传用户 guigong
📂 所属分类数学计算
🏷️ 相关标签

#python #邮件过滤
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -