⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dtdparser.py

📁 Python Development Environment (Python IDE plugin for Eclipse). Features editor, code completion, re
💻 PY
📖 第 1 页 / 共 2 页
字号:
"""
This module contains a DTD parser that reports DTD parse events to a listener.
Used by xmlproc to parse DTDs, but can be used for other purposes as well.

$Id$
"""

from types import StringType
import string

string_find = string.find # optimization

from xmlutils import *
from xmldtd   import *

# ==============================
# A DTD parser
# ==============================
	    
class DTDParser(XMLCommonParser):
    "A parser for XML DTDs, both internal and external."

    # --- LOW-LEVEL SCANNING METHODS
    # Redefined here with extra checking for parameter entity processing

    def find_reg(self,regexp,required=1):
	oldpos=self.pos
	mo=regexp.search(self.data,self.pos)
	if mo==None:
            if self.final and not required:                
                self.pos=len(self.data)   # Just moved to the end
                return self.data[oldpos:]            

            if self.in_peref:
                self.pop_entity()
                self.in_peref=0
                self._skip_ws()
                return self.find_reg(regexp,required)
                
            raise OutOfDataException()
                
	self.pos=mo.start(0)
	return self.data[oldpos:self.pos]
    
    def scan_to(self,target):
	new_pos=string_find(self.data,target,self.pos)
	if new_pos==-1:
            if self.in_peref:
                self.pop_entity()
                self.in_peref=0
                self._skip_ws()
                return self.scan_to(target)
	    raise OutOfDataException()
	res=self.data[self.pos:new_pos]
	self.pos=new_pos+len(target)
	return res

    def get_index(self,target):
	new_pos=string_find(self.data,target,self.pos)
	if new_pos==-1:
            if self.in_peref:
                self.pop_entity()
                self.in_peref=0
                self._skip_ws()
                return self.get_index(target)
	    raise OutOfDataException()
	return new_pos
    
    def test_str(self,str):
	if self.datasize-self.pos<len(str) and not self.final:
            if self.in_peref:
                self.pop_entity()
                self.in_peref=0
                self._skip_ws()
                return self.test_str(str)
	    raise OutOfDataException()
	return self.data[self.pos:self.pos+len(str)]==str
    
    def now_at(self,test_str):
	if self.datasize-self.pos<len(test_str) and not self.final:
            if self.in_peref:
                self.pop_entity()
                self.in_peref=0
                self._skip_ws()
                return self.now_at(test_str)
	    raise OutOfDataException()
	
	if self.data[self.pos:self.pos+len(test_str)]==test_str:
	    self.pos=self.pos+len(test_str)
	    return 1
	else:
	    return 0

    def _skip_ws(self,necessary=0):
        start=self.pos
        
        try:
            while self.data[self.pos] in whitespace:
                self.pos=self.pos+1

            if necessary and self.pos==start and self.data[self.pos]!="%":
                self.report_error(3002)
        except IndexError:
            if self.in_peref:
                self.pop_entity()
                self.in_peref=0
                self._skip_ws()
                return
                
	    if necessary and start==self.pos:
                if self.final:
                    self.report_error(3002)
                else:
                    raise OutOfDataException()
        
    def skip_ws(self,necessary=0):
        self._skip_ws(necessary)
        if not self.internal:
            try:
                if not self.now_at("%"):
                    return
            except OutOfDataException:
                return

            name=self._get_name()

            if not self.now_at(";"):
                self.report_error(3005,";")

            try:
                ent=self.dtd.resolve_pe(name)
            except KeyError,e:
                self.report_error(3038,name)
                return 

            if ent.is_internal():
                self.in_peref=1
                self.push_entity(self.get_current_sysid(),ent.value)
                self.final=1  # Reset by pop_ent, needed for buffer handling
            else:
                self.report_error(4003)

            # At this point we need to try again, since the entity we just
            # tried may have contained only whitespace (or nothing at all).
            # Using self._skip_ws() makes us fail when an empty PE is followed
            # by a non-empty one. (DocBook has examples of this.)
            self.skip_ws()
        
    def test_reg(self,regexp):
	if self.pos>self.datasize-5 and not self.final:
            if self.in_peref:
                self.pop_entity()
                self.in_peref=0
                self._skip_ws()
                return self.test_reg(regexp)
	    raise OutOfDataException()
	
	return regexp.match(self.data,self.pos)!=None
	    
    def get_match(self,regexp):
	if self.pos>self.datasize-5 and not self.final:
            if self.in_peref:
                self.pop_entity()
                self.in_peref=0
                self._skip_ws()
                return self.get_match(regexp)
	    raise OutOfDataException()

	ent=regexp.match(self.data,self.pos)
	if ent==None:
	    self.report_error(reg2code[regexp.pattern])
	    return ""

        end=ent.end(0) # Speeds us up slightly
	if end==self.datasize:
            if self.in_peref:
                self.pop_entity()
                self.in_peref=0
                #self._skip_ws()
                return ent.group(0)
	    raise OutOfDataException()

	self.pos=end
	return ent.group(0)

    # --- DTD Parser proper
    
    def __init__(self):
	EntityParser.__init__(self)
	self.internal=0
        self.seen_xmldecl=0
	self.dtd=DTDConsumerPE()            # Keeps track of PE info
        self.dtd_consumer=self.dtd          # Where all events go
        self.in_peref=0
        self.ignores_entered=0
        self.includes_entered=0
        self.own_ent_stack=[]               # Keeps includes_entered

    def reset(self):
        EntityParser.reset(self)
        if hasattr(self,"dtd"):
            self.dtd.reset()

        self.internal=0
        self.seen_xmldecl=0
        self.in_peref=0
        self.ignores_entered=0
        self.includes_entered=0
        self.own_ent_stack=[]      # Keeps includes_entered
        self.dtd_start_called = 0  # Set to 1 if parsing external subset from
                                   # xmlproc.py (which has called dtd_start...)
        
    def parseStart(self):
        if not self.dtd_start_called:
            self.dtd_consumer.dtd_start()

    def parseEnd(self):
        self.dtd_consumer.dtd_end()
        
    def set_dtd_consumer(self,dtd):
	"Tells the parser where to send DTD information."
        self.dtd_consumer=dtd

    def set_dtd_object(self,dtd):
        """Tells the parser where to mirror PE information (in addition to
        what goes to the DTD consumer and where to get PE information."""
        self.dtd=dtd
        
    def set_internal(self,yesno):
	"Tells the parser whether the DTD is internal or external."
	self.internal=yesno

    def deref(self):
        "Removes circular references."
        self.ent = self.dtd_consumer = self.dtd = self.app = self.err = None
        
    def do_parse(self):
	"Does the actual parsing."

	try:
            prepos=self.pos

            if self.ignores_entered>0:
                self.parse_ignored_data()
            
	    self._skip_ws()
	    while self.pos<self.datasize:
		if self.now_at("<!ELEMENT"):
		    self.parse_elem_type()
		elif self.now_at("<!ENTITY"):
		    self.parse_entity()
		elif self.now_at("<!ATTLIST"):
		    self.parse_attlist()
		elif self.now_at("<!NOTATION"):
		    self.parse_notation()
		elif self.test_reg(reg_pe_ref):
		    self.parse_pe_ref()
		elif self.now_at("<?"):
		    self.parse_pi(self.dtd_consumer)
		elif self.now_at("<!--"):
		    self.parse_comment(self.dtd_consumer)
		elif self.now_at("<!["):
		    self.parse_conditional()
                elif self.now_at("]]>") and self.includes_entered>0:
                    self.includes_entered=self.includes_entered-1
		else:
		    self.report_error(3013)
		    self.scan_to(">")

		prepos=self.pos
		self._skip_ws()

            if self.final and self.includes_entered>0:
                self.report_error(3043)                    
                
	except OutOfDataException,e:
	    if self.final:
		raise e
	    else:
		self.pos=prepos
	except IndexError,e:
	    if self.final:
		raise OutOfDataException()
	    else:
		self.pos=prepos

    def parse_entity(self):
	"Parses an entity declaration."

	EntityParser.skip_ws(self,1) # No PE refs allowed here
	if self.now_at("%"):
	    pedecl=1
            EntityParser.skip_ws(self,1) # No PE refs allowed here
	else:
	    pedecl=0
	
        ent_name=self._get_name()
	self.skip_ws(1)

        (pub_id,sys_id)=self.parse_external_id(0)

        if sys_id==None:
            internal=1
            ent_val=self.parse_ent_repltext()
        else:
            internal=0

        if self.now_at("NDATA"):
            self.report_error(3002)
        else:
            self.skip_ws()
        
	if not internal and self.now_at("NDATA"):
	    # Parsing the optional NDataDecl
	    if pedecl:
		self.report_error(3035)
	    self.skip_ws()

            ndata=self._get_name()
	    self.skip_ws()
	else:
	    ndata=""

	if not self.now_at(">"):
	    self.report_error(3005,">")
        
        if pedecl:
            # These are echoed to self.dtd so we remember this stuff
            if internal:
                self.dtd_consumer.new_parameter_entity(ent_name,ent_val)
                if self.dtd!=self.dtd_consumer:
                    self.dtd.new_parameter_entity(ent_name,ent_val)
            else:
                self.dtd_consumer.new_external_pe(ent_name,pub_id,sys_id)
                if self.dtd!=self.dtd_consumer:
                    self.dtd.new_external_pe(ent_name,pub_id,sys_id)
        else:
            if internal:
                self.dtd_consumer.new_general_entity(ent_name,ent_val)
            else:
                self.dtd_consumer.new_external_entity(ent_name,pub_id,sys_id,ndata)

    def parse_ent_repltext(self):
	"""Parses an entity replacement text and resolves all character
	entity and parameter entity references in it."""

	val=""
        if self.now_at('"'):
            delim='"'
        elif self.now_at("'"):
            delim="'"
        else:
            self.report_error(3004,("'","\""))
            self.scan_to(">")
            return

        return self.parse_ent_litval(self.scan_to(delim))

    def parse_ent_litval(self,litval):
        pos=0
        val=""
        
        while 1:
            res=reg_litval_stop.search(litval,pos)
            
            if res==None:
                break

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -