⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlutils.py

📁 Python Development Environment (Python IDE plugin for Eclipse). Features editor, code completion, re
💻 PY
📖 第 1 页 / 共 2 页
字号:
                msg="%s'%s', " % (msg,wrap)
            self.report_error(3004,(msg[:-2],wraps[-1][0]))

        data=self.get_match(regexp)
        if not self.now_at(wrap):
            self.report_error(3005,wrap)

        return data

    #--- ERROR HANDLING

    def report_error(self,number,args=None):
        try:
            msg=self.errors[number]
            if args!=None:
                msg=msg % args
        except KeyError:
            msg=self.errors[4002] # Unknown err msg :-)
        
        if number<2000:
            self.err.warning(msg)
        elif number<3000:
            self.err.error(msg)
        else:
            self.err.fatal(msg)
    
    #--- USEFUL METHODS

    def get_current_sysid(self):
        "Returns the sysid of the file we are reading now."
        return self.current_sysID

    def set_sysid(self,sysID):
        "Sets the current system identifier. Does not store the old one."
        self.current_sysID=sysID

    def get_offset(self):
        "Returns the current offset from the start of the stream."
        return self.block_offset+self.pos
    
    def get_line(self):
        "Returns the current line number."
        self.update_pos()
        return self.line

    def get_column(self):
        "Returns the current column position."
        self.update_pos()
        return self.pos-self.last_break  

    def is_root_entity(self):
        "Returns true if the current entity is the root entity."
        return self.ent_stack==[]

    def is_external(self):
        """Returns true if the current entity is an external entity. The root
        (or document) entity is not considered external."""
        return self.ent_stack!=[] and \
               self.ent_stack[0][0]!=self.get_current_sysid()

    # --- Internal methods

    def _push_ent_stack(self,name="None"):
        self.ent_stack.append((self.get_current_sysid(),self.data,self.pos,\
                               self.line,self.last_break,self.datasize,\
                               self.last_upd_pos,self.block_offset,self.final,
                               name))

    def _pop_ent_stack(self):
        (self.current_sysID,self.data,self.pos,self.line,self.last_break,\
         self.datasize,self.last_upd_pos,self.block_offset,self.final,dummy)=\
             self.ent_stack[-1]
        del self.ent_stack[-1]

# ==============================
# Common code for some parsers
# ==============================

class XMLCommonParser(EntityParser):

    def parse_external_id(self,required=0,sysidreq=1):
        """Parses an external ID declaration and returns a tuple consisting
        of (pubid,sysid). If the required attribute is false neither SYSTEM
        nor PUBLIC identifiers are required. If sysidreq is false a SYSTEM
        identifier is not required after a PUBLIC one."""

        pub_id=None
        sys_id=None
        
        if self.now_at("SYSTEM"):
            self.skip_ws(1)
            sys_id=self.get_wrapped_match([("\"",reg_sysid_quote),\
                           ("'",reg_sysid_apo)])
        elif self.now_at("PUBLIC"):
            self.skip_ws(1)
            pub_id=self.get_wrapped_match([("\"",reg_pubid_quote),\
                           ("'",reg_pubid_apo)])
            pub_id=string.join(string.split(pub_id))

            if sysidreq:
                self.skip_ws(1)
                sys_id=self.get_wrapped_match([("\"",reg_sysid_quote),\
                                                   ("'",reg_sysid_apo)])
            else:
                if self.test_str("'") or self.test_str('"'):
                    self.report_error(3002)
                self.skip_ws()
                if self.test_str("'") or self.test_str('"'):
                    sys_id=self.get_wrapped_match([("\"",reg_sysid_quote),\
                                                   ("'",reg_sysid_apo)])
        else:
            if required: self.report_error(3006)

        return (pub_id,sys_id)

    def __get_quoted_string(self):
        "Returns the contents of a quoted string at current position."
        try:
            quo=self.data[self.pos]
        except IndexError:
            raise OutOfDataException()
            
        if not (self.now_at('"') or self.now_at("'")):
            self.report_error(3004,("'\"'","'"))
            self.scan_to(">")
            return ""

        return self.scan_to(quo)
    
    def parse_xml_decl(self,handler=None):
        "Parses the contents of the XML declaration from after the '<?xml'."

        textdecl=self.is_external() # If this is an external entity, then this
                                    # is a text declaration, not an XML decl

        self.update_pos()
        if self.get_column()!=5 or self.get_line()!=1 or \
           (self.ent_stack!=[] and not textdecl):
            if textdecl:
                self.report_error(3007)
            else:    
                self.report_error(3008)                
            
        if self.seen_xmldecl: # Set in parse_pi, to avoid block problems
            if textdecl:
                self.report_error(3009)
            else:
                self.report_error(3010)

        enc=None
        sddecl=None
        ver=None
        self.skip_ws()
        
        if self.now_at("version"):
            self.skip_ws()
            if not self.now_at("="): self.report_error(3005,"=")
            self.skip_ws()
            ver=self.__get_quoted_string()

            m=reg_ver.match(ver)
            if m==None or m.end()!=len(ver):
                self.report_error(3901,ver)
            elif ver!="1.0":
                self.report_error(3046)                

            if self.test_str("encoding") or self.test_str("standalone"):
                self.report_error(3002)
            self.skip_ws()
        elif not textdecl:
            self.report_error(3011)

        if self.now_at("encoding"):
            self.skip_ws()
            if not self.now_at("="): self.report_error(3005,"=")
            self.skip_ws()
            enc=self.__get_quoted_string()
            if reg_enc_name.match(enc)==None:
                self.report_error(3902)

            # Setting up correct conversion
            if charconv.convdb.can_convert(enc,self.data_charset):
                self.charset_converter=\
                    charconv.convdb.get_converter(enc,self.data_charset)
            else:
                self.report_error(1002,enc)

            self.skip_ws()      

        if self.now_at("standalone"):
            if textdecl:
                self.report_error(3012)
                sddecl="yes"
            else:
                self.skip_ws()
                if not self.now_at("="): self.report_error(3005,"=")
                self.skip_ws()
                sddecl=self.__get_quoted_string()
                if reg_std_alone.match(sddecl)==None:
                    self.report_error(3911)

                self.standalone= sddecl=="yes"

                self.skip_ws()

        self.skip_ws()

        if handler!=None:
            handler.set_entity_info(ver,enc,sddecl)

    def parse_pi(self,handler,report_xml_decl=0):
        """Parses a processing instruction from after the '<?' to beyond
        the '?>'."""
        trgt=self._get_name()

        if trgt=="xml":
            if report_xml_decl:
                self.parse_xml_decl(handler)
            else:
                self.parse_xml_decl()
                
            if not self.now_at("?>"):
                self.report_error(3005,"?>")
            self.seen_xmldecl=1
        else:
            if self.now_at("?>"):
                rem=""
            else:
                self.skip_ws(1)
                rem=self.scan_to("?>") # OutOfDataException if not found

            if reg_res_pi.match(trgt)!=None:
                if trgt=="xml:namespace":
                    self.report_error(1003)
                elif trgt!="xml-stylesheet":
                    self.report_error(3045)
                
            handler.handle_pi(trgt,rem)   

    def parse_comment(self,handler):
        "Parses the comment from after '<!--' to beyond '-->'."
        new_pos = self.get_index("--")
        handler.handle_comment(self.data[self.pos : new_pos])
        self.pos = new_pos
        if not self.now_at("-->"):
            self.report_error(3005,"-->")

    def _read_char_ref(self):
        "Parses a character reference and returns the character."
        if self.now_at("x"):
            digs=unhex(self.get_match(reg_hex_digits))
        else:
            digs=int(self.get_match(reg_digits))

        if not (digs==9 or digs==10 or digs==13 or \
                (digs>=32 and digs<=255)):
            if digs>255:
                self.report_error(1005,digs)
            else:
                self.report_error(3018,digs)
            return ""
        else:
            return chr(digs)

    def _get_name(self):
        """Parses the name at the current position and returns it. An error
        is reported if no name is present."""
        if self.pos>self.datasize-5 and not self.final:
            raise OutOfDataException()

        data=self.data
        pos=self.pos
        if data[pos] in namestart:
            start=pos
            pos=pos+1

            try:
                while data[pos] in namechars:
                    pos=pos+1

                self.pos=pos
                return intern(data[start:pos])
            except IndexError:
                self.pos=pos
                if self.final:
                    return intern(data[start:])
                else:
                    raise OutOfDataException()
        else:
            self.report_error(3900)
            return ""            
    
# --- A collection of useful functions

# Utility functions

def unhex(hex_value):
    "Converts a string hex-value to an integer."

    sum=0
    for char in hex_value:
        sum=sum*16
        char=ord(char)
    
        if char<58 and char>=48:
            sum=sum+(char-48)
        elif char>=97 and char<=102:
            sum=sum+(char-87)
        elif char>=65 and char<=70:
            sum=sum+(char-55)
    # else ERROR, but it can't occur here

    return sum

def matches(regexp,str):
    mo=regexp.match(str)
    return mo!=None and len(mo.group(0))==len(str)

def join_sysids_general(base,url):
    if urlparse.urlparse(base)[0]=="":
        if urlparse.urlparse(url)[0]=="":
            return os.path.join(os.path.split(base)[0],url)
        else:
            return url
    else:
        return urlparse.urljoin(base,url)

def join_sysids_win32(base,url):
    if len(urlparse.urlparse(base)[0])<2: # Handles drive identifiers correctly
        if len(urlparse.urlparse(url)[0])<2:
            return os.path.join(os.path.split(base)[0],url)
        else:
            return url
    else:
        return urlparse.urljoin(base,url)    

# here join_sysids(base,url): is set to the correct function

if sys.platform=="win32":
    join_sysids=join_sysids_win32
else:
    join_sysids=join_sysids_general
    
# --- Some useful regexps

namestart="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_:"+\
          "懒旅呐魄壬仕掏蜗醒矣哉重仝圮蒉哙徕沅彐玷殛腱眍镳耱篝貊

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -