⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 drv_libxml2.py

📁 libxml,在UNIX/LINUX下非常重要的一个库,为XML相关应用提供方便.目前上载的是最新版本,若要取得最新版本,请参考里面的readme.
💻 PY
字号:
# -*- coding: iso-8859-1 -*-""" A SAX2 driver for libxml2, on top of it's XmlReader APIUSAGE    # put this file (drv_libxml2.py) in PYTHONPATH    import xml.sax    reader = xml.sax.make_parser(["drv_libxml2"])    # ...and the rest is standard python sax.CAVEATS    - Lexical handlers are supported, except for start/endEntity      (waiting for XmlReader.ResolveEntity) and start/endDTD    - Error callbacks are not exactly synchronous, they tend      to be invoked before the corresponding content callback,      because the underlying reader interface parses      data by chunks of 512 bytes    TODO    - search for TODO    - some ErrorHandler events (warning)    - some ContentHandler events (setDocumentLocator, skippedEntity)    - EntityResolver (using libxml2.?)    - DTDHandler (if/when libxml2 exposes such node types)    - DeclHandler (if/when libxml2 exposes such node types)    - property_xml_string?    - feature_string_interning?    - Incremental parser    - additional performance tuning:      - one might cache callbacks to avoid some name lookups      - one might implement a smarter way to pass attributes to startElement        (some kind of lazy evaluation?)      - there might be room for improvement in start/endPrefixMapping      - other?"""__author__  = u"St閜hane Bidoul <sbi@skynet.be>"__version__ = "0.3"import codecsfrom types import StringType, UnicodeTypeStringTypes = (StringType,UnicodeType)from xml.sax._exceptions import *from xml.sax import xmlreader, saxutilsfrom xml.sax.handler import \     feature_namespaces, \     feature_namespace_prefixes, \     feature_string_interning, \     feature_validation, \     feature_external_ges, \     feature_external_pes, \     property_lexical_handler, \     property_declaration_handler, \     property_dom_node, \     property_xml_string# libxml2 returns strings as UTF8_decoder = codecs.lookup("utf8")[1]def _d(s):    if s is None:        return s    else:        return _decoder(s)[0]try:    import libxml2except ImportError, e:    raise SAXReaderNotAvailable("libxml2 not available: " \                                "import error was: %s" % e)class Locator(xmlreader.Locator):    """SAX Locator adapter for libxml2.xmlTextReaderLocator"""    def __init__(self,locator):        self.__locator = locator    def getColumnNumber(self):        "Return the column number where the current event ends."        return -1    def getLineNumber(self):        "Return the line number where the current event ends."        return self.__locator.LineNumber()    def getPublicId(self):        "Return the public identifier for the current event."        return None    def getSystemId(self):        "Return the system identifier for the current event."        return self.__locator.BaseURI()class LibXml2Reader(xmlreader.XMLReader):    def __init__(self):        xmlreader.XMLReader.__init__(self)        # features        self.__ns = 0        self.__nspfx = 0        self.__validate = 0        self.__extparams = 1        # parsing flag        self.__parsing = 0        # additional handlers        self.__lex_handler = None        self.__decl_handler = None        # error messages accumulator        self.__errors = None    def _errorHandler(self,arg,msg,severity,locator):        if self.__errors is None:            self.__errors = []        self.__errors.append((severity,                              SAXParseException(msg,None,                                                Locator(locator))))    def _reportErrors(self,fatal):        for severity,exception in self.__errors:            if severity in (libxml2.PARSER_SEVERITY_VALIDITY_WARNING,                            libxml2.PARSER_SEVERITY_WARNING):                self._err_handler.warning(exception)            else:                # when fatal is set, the parse will stop;                # we consider that the last error reported                # is the fatal one.                if fatal and exception is self.__errors[-1][1]:                    self._err_handler.fatalError(exception)                else:                    self._err_handler.error(exception)        self.__errors = None    def parse(self, source):        self.__parsing = 1        try:            # prepare source and create reader            if type(source) in StringTypes:                reader = libxml2.newTextReaderFilename(source)            else:                source = saxutils.prepare_input_source(source)                input = libxml2.inputBuffer(source.getByteStream())                reader = input.newTextReader(source.getSystemId())            reader.SetErrorHandler(self._errorHandler,None)            # configure reader            if self.__extparams:                reader.SetParserProp(libxml2.PARSER_LOADDTD,1)                reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)                reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)                reader.SetParserProp(libxml2.PARSER_VALIDATE,self.__validate)            else:                reader.SetParserProp(libxml2.PARSER_LOADDTD, 0)            # we reuse attribute maps (for a slight performance gain)            if self.__ns:                attributesNSImpl = xmlreader.AttributesNSImpl({},{})            else:                attributesImpl = xmlreader.AttributesImpl({})            # prefixes to pop (for endPrefixMapping)            prefixes = []            # start loop            self._cont_handler.startDocument()            while 1:                r = reader.Read()                # check for errors                if r == 1:                    if not self.__errors is None:                        self._reportErrors(0)                elif r == 0:                    if not self.__errors is None:                        self._reportErrors(0)                    break # end of parse                else:                    if not self.__errors is None:                        self._reportErrors(1)                    else:                        self._err_handler.fatalError(\                            SAXException("Read failed (no details available)"))                    break # fatal parse error                # get node type                nodeType = reader.NodeType()                # Element                if nodeType == 1:                     if self.__ns:                        eltName = (_d(reader.NamespaceUri()),\                                   _d(reader.LocalName()))                        eltQName = _d(reader.Name())                        attributesNSImpl._attrs = attrs = {}                        attributesNSImpl._qnames = qnames = {}                        newPrefixes = []                        while reader.MoveToNextAttribute():                            qname = _d(reader.Name())                            value = _d(reader.Value())                            if qname.startswith("xmlns"):                                if len(qname) > 5:                                    newPrefix = qname[6:]                                else:                                    newPrefix = None                                newPrefixes.append(newPrefix)                                self._cont_handler.startPrefixMapping(\                                    newPrefix,value)                                if not self.__nspfx:                                    continue # don't report xmlns attribute                            attName = (_d(reader.NamespaceUri()),                                       _d(reader.LocalName()))                            qnames[attName] = qname                            attrs[attName] = value                        reader.MoveToElement()                        self._cont_handler.startElementNS( \                            eltName,eltQName,attributesNSImpl)                         if reader.IsEmptyElement():                            self._cont_handler.endElementNS(eltName,eltQName)                            for newPrefix in newPrefixes:                                self._cont_handler.endPrefixMapping(newPrefix)                        else:                            prefixes.append(newPrefixes)                    else:                        eltName = _d(reader.Name())                        attributesImpl._attrs = attrs = {}                        while reader.MoveToNextAttribute():                            attName = _d(reader.Name())                            attrs[attName] = _d(reader.Value())                        reader.MoveToElement()                        self._cont_handler.startElement( \                            eltName,attributesImpl)                        if reader.IsEmptyElement():                            self._cont_handler.endElement(eltName)                # EndElement                elif nodeType == 15:                     if self.__ns:                        self._cont_handler.endElementNS( \                             (_d(reader.NamespaceUri()),_d(reader.LocalName())),                             _d(reader.Name()))                        for prefix in prefixes.pop():                            self._cont_handler.endPrefixMapping(prefix)                    else:                        self._cont_handler.endElement(_d(reader.Name()))                # Text                elif nodeType == 3:                     self._cont_handler.characters(_d(reader.Value()))                # Whitespace                elif nodeType == 13:                     self._cont_handler.ignorableWhitespace(_d(reader.Value()))                # SignificantWhitespace                elif nodeType == 14:                    self._cont_handler.characters(_d(reader.Value()))                # CDATA                elif nodeType == 4:                    if not self.__lex_handler is None:                        self.__lex_handler.startCDATA()                    self._cont_handler.characters(_d(reader.Value()))                    if not self.__lex_handler is None:                        self.__lex_handler.endCDATA()                # EntityReference                elif nodeType == 5:                    if not self.__lex_handler is None:                        self.startEntity(_d(reader.Name()))                    reader.ResolveEntity()                # EndEntity                elif nodeType == 16:                    if not self.__lex_handler is None:                        self.endEntity(_d(reader.Name()))                # ProcessingInstruction                elif nodeType == 7:                     self._cont_handler.processingInstruction( \                        _d(reader.Name()),_d(reader.Value()))                # Comment                elif nodeType == 8:                    if not self.__lex_handler is None:                        self.__lex_handler.comment(_d(reader.Value()))                # DocumentType                elif nodeType == 10:                    #if not self.__lex_handler is None:                    #    self.__lex_handler.startDTD()                    pass # TODO (how to detect endDTD? on first non-dtd event?)                # XmlDeclaration                elif nodeType == 17:                    pass # TODO                # Entity                elif nodeType == 6:                    pass # TODO (entity decl)                # Notation (decl)                elif nodeType == 12:                    pass # TODO                # Attribute (never in this loop)                #elif nodeType == 2:                 #    pass                # Document (not exposed)                #elif nodeType == 9:                 #    pass                # DocumentFragment (never returned by XmlReader)                #elif nodeType == 11:                #    pass                # None                #elif nodeType == 0:                #    pass                # -                else:                    raise SAXException("Unexpected node type %d" % nodeType)            if r == 0:                self._cont_handler.endDocument()            reader.Close()        finally:            self.__parsing = 0    def setDTDHandler(self, handler):        # TODO (when supported, the inherited method works just fine)        raise SAXNotSupportedException("DTDHandler not supported")    def setEntityResolver(self, resolver):        # TODO (when supported, the inherited method works just fine)        raise SAXNotSupportedException("EntityResolver not supported")    def getFeature(self, name):        if name == feature_namespaces:            return self.__ns        elif name == feature_namespace_prefixes:            return self.__nspfx        elif name == feature_validation:            return self.__validate        elif name == feature_external_ges:            return 1 # TODO (does that relate to PARSER_LOADDTD)?        elif name == feature_external_pes:            return self.__extparams        else:            raise SAXNotRecognizedException("Feature '%s' not recognized" % \                                            name)    def setFeature(self, name, state):        if self.__parsing:            raise SAXNotSupportedException("Cannot set feature %s " \                                           "while parsing" % name)        if name == feature_namespaces:            self.__ns = state        elif name == feature_namespace_prefixes:            self.__nspfx = state        elif name == feature_validation:            self.__validate = state        elif name == feature_external_ges:            if state == 0:                # TODO (does that relate to PARSER_LOADDTD)?                raise SAXNotSupportedException("Feature '%s' not supported" % \                                               name)        elif name == feature_external_pes:            self.__extparams = state        else:            raise SAXNotRecognizedException("Feature '%s' not recognized" % \                                            name)    def getProperty(self, name):        if name == property_lexical_handler:            return self.__lex_handler        elif name == property_declaration_handler:            return self.__decl_handler        else:            raise SAXNotRecognizedException("Property '%s' not recognized" % \                                            name)    def setProperty(self, name, value):             if name == property_lexical_handler:            self.__lex_handler = value        elif name == property_declaration_handler:            # TODO: remove if/when libxml2 supports dtd events            raise SAXNotSupportedException("Property '%s' not supported" % \                                           name)            self.__decl_handler = value        else:            raise SAXNotRecognizedException("Property '%s' not recognized" % \                                            name)def create_parser():    return LibXml2Reader()

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -