📄 pymeldlite.py
字号:
doctype += ' "%s"' % syslit if data: doctype += ' [%s]>' % data else: doctype += '>' self._pendingText.append(doctype) def handle_comment(self, data): self._pendingText.append('<!--%s-->' % data) def handle_proc(self, name, data): self._pendingText.append('<?%s %s ?>' % (name, data.strip())) def handle_data(self, data): self._pendingText.append(data) def handle_charref(self, ref): self._pendingText.append('&#%s;' % ref) unknown_charref = handle_charref def handle_entityref(self, ref): self._pendingText.append('&%s;' % ref) unknown_entityref = handle_entityref def handle_cdata(self, data): if self._pendingText: self._collapsePendingText() self._pendingText.append('<![CDATA[%s]]>' % data) def unknown_starttag(self, tag, attributes): if self._pendingText: self._collapsePendingText() newNode = _ElementNode(self._currentNode, tag, attributes) self._currentNode.children.append(newNode) self._currentNode = newNode def unknown_endtag(self, tag): if self._pendingText: self._collapsePendingText() self._currentNode = self._currentNode.parentelse: # Entrian.Coverage: Pragma Stop import xml.parsers.expat class _TreeGenerator: # Entrian.Coverage: Pragma Start """An XML parser that generates a lightweight DOM tree. Call `feed()` with XML source, then `close()`, then `getTree()` will give you the tree's `_RootNode`: >>> g = _TreeGenerator() >>> g.feed("<xml>Stuff. ") >>> g.feed("More stuff.</xml>") >>> g.close() >>> tree = g.getTree() >>> print tree.toText() <xml>Stuff. More stuff.</xml> """ def __init__(self): self._tree = _RootNode() self._currentNode = self._tree self._pendingText = [] self._parser = xml.parsers.expat.ParserCreate() self._parser.buffer_text = True self._parser.DefaultHandler = self.DefaultHandler self._parser.StartElementHandler = self.StartElementHandler self._parser.EndElementHandler = self.EndElementHandler # All entities and charrefs, like • and  , are considered # valid - who are we to argue? Expat thinks it knows better, so we # fool it here. def _mungeEntities(self, data): return re.sub(r'&([A-Za-z0-9#]+);', r':PyMeldEntity:\1:', data) def _unmungeEntities(self, data): return re.sub(r':PyMeldEntity:([A-Za-z0-9#]+):', r'&\1;', data) def feed(self, data): """Call this with XML content to be parsed.""" data = self._mungeEntities(data) self._parser.Parse(data) def close(self): """Call this when you've passed all your XML content to `feed`.""" self._parser.Parse("", True) def getTree(self): """Returns the generated tree; call `feed` then `close` first.""" return self._tree def _collapsePendingText(self): """Text (any content that isn't an open/close element) is built up in `self._pendingText` until an open/close element is seen, at which point it gets collapsed into a `_TextNode`.""" data = ''.join(self._pendingText) data = self._unmungeEntities(data) self._currentNode.children.append(_TextNode(data)) self._pendingText = [] def DefaultHandler(self, data): """Expat handler.""" self._pendingText.append(str(data)) def StartElementHandler(self, tag, attributes): """Expat handler.""" if self._pendingText: self._collapsePendingText() newAttributes = {} for name, value in attributes.iteritems(): newAttributes[str(name)] = self._unmungeEntities(str(value)) newNode = _ElementNode(self._currentNode, str(tag), newAttributes) self._currentNode.children.append(newNode) self._currentNode = newNode def EndElementHandler(self, tag): """Expat handler.""" if self._pendingText: self._collapsePendingText() self._currentNode = self._currentNode.parentdef _generateTree(source): """Given some XML source, generates a lightweight DOM tree rooted at a `_RootNode`.""" # Lots of HTML files start with a DOCTYPE declaration like this: # <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> # The fact that the DTD URL is missing is deliberate - it acts as a hint # to the browser that it should emulate historical browser behaviour. It # also breaks xmllib, so we see whether we can spot it here and cope. doctypeRE = r'(?i)^(\s*<!DOCTYPE\s+HTML\s+PUBLIC\s+"[^"]+"\s*>)' match = re.search(doctypeRE, source) if match: source = source[match.end():] doctype = match.group(1) else: doctype = '' # Another hack for HTML: the DOCTYPE usually gives HTML (in upper case) # as the root tag name, but xmllib will complain if you then use <html> # (lowercase) for the tag. We fix up the DOCTYPE here if it looks like # that's the case. rootRE = r'(?i)^\s*<!DOCTYPE\s+([-a-z0-9._:]+)\s+[^<]{1,200}<(\1)' match = re.search(rootRE, source) if match and match.group(1) != match.group(2): source = source[:match.start(1)] + match.group(2) + \ source[match.end(1):] # Map characters not allowed in XML content to sensible things. source = source.translate(badxml_map) source = re.sub('([\x80-\xff])', replaceHighCharacters, source) # Parse the XML and generate the tree. g = _TreeGenerator() g.feed(source) g.close() # Get the tree and put the DOCTYPE back in if we hacked it out above. tree = g.getTree() if doctype: tree.children.insert(0, _TextNode(doctype)) # Return the tree. return tree############################################################################### PyMeldLite.Meld is the business end of the module.##READ_ONLY_MESSAGE = "You can't modify this read-only Meld object"class ReadOnlyError(Exception): """Raised if you try to modify a readonly PyMeldLite.Meld.""" passclass Meld: """Represents an XML document, or a fragment of one. Pass XML/XHTML source to the constructor. You can then access all the elements with `id="name"` attributes as `object.name`, and all the attributes of the outermost element as `object.attribute`.""" def __init__(self, source, readonly=False): """Creates a `Meld` from XML source. `readonly` does what it says.""" self._readonly = readonly if isinstance(source, str): self._tree = _generateTree(source) elif isinstance(source, _Node): # For internal use only. self._tree = source else: raise TypeError, "Melds must be constructed from ASCII strings" def _findByID(self, node, name): """Returns the node with the given ID, or None.""" if node.attributes.get('id') == name: return node for child in node.children: result = self._findByID(child, name) if result: return result def _quoteAttribute(self, value): """Minimally quotes an attribute value, using `"`, `&`, `<` and `>`.""" if not isinstance(value, str): value = str(value) value = value.replace('"', '"') value = value.replace('<', '<').replace('>', '>') value = re.sub(r'&(?![a-zA-Z0-9]+;)', '&', value) return value def _unquoteAttribute(self, value): """Unquotes an attribute value quoted by `_quoteAttribute()`.""" value = value.replace('"', '"').replace('&', '&') return value.replace('<', '<').replace('>', '>') def _nodeListFromSource(self, value): """Given a snippet of XML source, returns a list of `_Node`s.""" tree = _generateTree("<x>"+value+"</x>") return tree.children[0].children def _replaceNodeContent(self, node, value): """Replaces the content of the given node. If `value` is a string, it is parsed as XML. If it is a Meld, it it cloned. The existing children are deleted, the new nodes are set as the children of `node`.""" if isinstance(value, Meld): node.children = [value._tree.getElementNode().clone()] else: if not isinstance(value, str): value = str(value) node.children = self._nodeListFromSource(value) def clone(self, readonly=False): """Creates a clone of a `Meld`, for instance to change an attribute without affecting the original document: >>> p = Meld('<p style="one">Hello <b id="who">World</b></p>') >>> q = p.clone() >>> q.who = "Richie" >>> print q.who <b id="who">Richie</b> >>> print p.who <b id="who">World</b> By default, clones are not readonly even if the Meld from which they're cloned is readonly (the most common reason for taking a clone is to create a modified clone of a piece of a document). To make a readonly clone, say `clone = object.clone(readonly=True)`.""" return Meld(self._tree.clone(), readonly) def __getattr__(self, name): """`object.<name>`, if this Meld contains an element with an `id` attribute of `name`, returns a Meld representing that element. Otherwise, `object.<name>` returns the value of the attribute with the given name, as a string. If no such attribute exists, an AttributeError is raised. `object._content` returns the content of the Meld, not including the enclosing `<element></element>`, as a string. >>> p = Meld('<p style="one">Hello <b id="who">World</b></p>') >>> print p.who <b id="who">World</b> >>> print p.style one >>> print p._content Hello <b id="who">World</b> >>> print p.who._content World """ if name == '_content': return self._tree.getElementNode().childrenToText() if name.startswith('_'): try: return self.__dict__[name] except KeyError: raise AttributeError, name node = self._findByID(self._tree, name) if node: return Meld(node, self._readonly) attribute = self._tree.getElementNode().attributes.get(name, _fail) if attribute is not _fail: return self._unquoteAttribute(attribute) raise AttributeError, "No element or attribute named %r" % name def __setattr__(self, name, value): """`object.<name> = value` sets the XML content of the element with an `id` of `name`, or if no such element exists, sets the value of the `name` attribute on the outermost element. If the attribute is not already there, a new attribute is created. >>> p = Meld('<p style="one">Hello <b id="who">World</b></p>') >>> p.who = "Richie" >>> p.style = "two" >>> p.align = "center" >>> p.who.id = "newwho" >>> print p <p align="center" style="two">Hello <b id="newwho">Richie</b></p> """ if name.startswith('_') and name != '_content': self.__dict__[name] = value return if self._readonly: raise ReadOnlyError, READ_ONLY_MESSAGE node = self._findByID(self._tree, name) if hasattr(value, '_tree') and value._tree is node: return # x.y = x.y if not node and name == '_content': node = self._tree.getElementNode() if node: self._replaceNodeContent(node, value) else: value = self._quoteAttribute(value) self._tree.getElementNode().attributes[name] = value def __delattr__(self, name): """Deletes the named element or attribute from the `Meld`: >>> p = Meld('<p style="one">Hello <b id="who">World</b></p>') >>> del p.who >>> del p.style >>> print p <p>Hello </p> """ if name == '_content': self._tree.getElementNode().children = [] return if name.startswith('_'): try: del self.__dict__[name] return except KeyError: raise AttributeError, name if self._readonly: raise ReadOnlyError, READ_ONLY_MESSAGE node = self._findByID(self._tree, name) if node: node.parent.children.remove(node) return node = self._tree.getElementNode() attribute = node.attributes.get(name, _fail) if attribute is not _fail: del node.attributes[name] else: raise AttributeError, "No element or attribute named %r" % name def __getitem__(self, name): """`object[<name>]`, if this Meld contains an element with an `id` attribute of `name`, returns a Meld representing that element. If no such element exists, a KeyError is raised. >>> p = Meld('<p style="one">Hello <b id="who">World</b></p>') >>> print p["who"] <b id="who">World</b> >>> print p["who"]_content World """ node = self._findByID(self._tree, name) if node: return Meld(node, self._readonly) raise KeyError, "No element named %r" % name def __setitem__(self, name, value): """`object[<name>] = value` sets the XML content of the element with an `id` of `name`. If no such element exists, a KeyError is raised because there is no info about the type of element to add. >>> p = Meld('<p>Hello <b id="who">World</b></p>') >>> p["who"] = "Richie" >>> p["who"].id = "newwho" >>> print p <p>Hello <b id="newwho">Richie</b></p> """ if self._readonly: raise ReadOnlyError, READ_ONLY_MESSAGE node = self._findByID(self._tree, name) if hasattr(value, '_tree') and value._tree is node: return # x["y"] = x.y if node: self._replaceNodeContent(node, value) return raise KeyError, "No element named %r" % name def __delitem__(self, name): """Deletes the named element from the `Meld`: >>> p = Meld('<p style="one">Hello <b id="who">World</b></p>') >>> del p["who"] >>> print p <p style="one">Hello </p> """ if self._readonly: raise ReadOnlyError, READ_ONLY_MESSAGE node = self._findByID(self._tree, name) if node: node.parent.children.remove(node)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -