📄 dtdparser.py
字号:
"""
This module contains a DTD parser that reports DTD parse events to a listener.
Used by xmlproc to parse DTDs, but can be used for other purposes as well.
$Id$
"""
from types import StringType
import string
string_find = string.find # optimization
from xmlutils import *
from xmldtd import *
# ==============================
# A DTD parser
# ==============================
class DTDParser(XMLCommonParser):
"A parser for XML DTDs, both internal and external."
# --- LOW-LEVEL SCANNING METHODS
# Redefined here with extra checking for parameter entity processing
def find_reg(self,regexp,required=1):
oldpos=self.pos
mo=regexp.search(self.data,self.pos)
if mo==None:
if self.final and not required:
self.pos=len(self.data) # Just moved to the end
return self.data[oldpos:]
if self.in_peref:
self.pop_entity()
self.in_peref=0
self._skip_ws()
return self.find_reg(regexp,required)
raise OutOfDataException()
self.pos=mo.start(0)
return self.data[oldpos:self.pos]
def scan_to(self,target):
new_pos=string_find(self.data,target,self.pos)
if new_pos==-1:
if self.in_peref:
self.pop_entity()
self.in_peref=0
self._skip_ws()
return self.scan_to(target)
raise OutOfDataException()
res=self.data[self.pos:new_pos]
self.pos=new_pos+len(target)
return res
def get_index(self,target):
new_pos=string_find(self.data,target,self.pos)
if new_pos==-1:
if self.in_peref:
self.pop_entity()
self.in_peref=0
self._skip_ws()
return self.get_index(target)
raise OutOfDataException()
return new_pos
def test_str(self,str):
if self.datasize-self.pos<len(str) and not self.final:
if self.in_peref:
self.pop_entity()
self.in_peref=0
self._skip_ws()
return self.test_str(str)
raise OutOfDataException()
return self.data[self.pos:self.pos+len(str)]==str
def now_at(self,test_str):
if self.datasize-self.pos<len(test_str) and not self.final:
if self.in_peref:
self.pop_entity()
self.in_peref=0
self._skip_ws()
return self.now_at(test_str)
raise OutOfDataException()
if self.data[self.pos:self.pos+len(test_str)]==test_str:
self.pos=self.pos+len(test_str)
return 1
else:
return 0
def _skip_ws(self,necessary=0):
start=self.pos
try:
while self.data[self.pos] in whitespace:
self.pos=self.pos+1
if necessary and self.pos==start and self.data[self.pos]!="%":
self.report_error(3002)
except IndexError:
if self.in_peref:
self.pop_entity()
self.in_peref=0
self._skip_ws()
return
if necessary and start==self.pos:
if self.final:
self.report_error(3002)
else:
raise OutOfDataException()
def skip_ws(self,necessary=0):
self._skip_ws(necessary)
if not self.internal:
try:
if not self.now_at("%"):
return
except OutOfDataException:
return
name=self._get_name()
if not self.now_at(";"):
self.report_error(3005,";")
try:
ent=self.dtd.resolve_pe(name)
except KeyError,e:
self.report_error(3038,name)
return
if ent.is_internal():
self.in_peref=1
self.push_entity(self.get_current_sysid(),ent.value)
self.final=1 # Reset by pop_ent, needed for buffer handling
else:
self.report_error(4003)
# At this point we need to try again, since the entity we just
# tried may have contained only whitespace (or nothing at all).
# Using self._skip_ws() makes us fail when an empty PE is followed
# by a non-empty one. (DocBook has examples of this.)
self.skip_ws()
def test_reg(self,regexp):
if self.pos>self.datasize-5 and not self.final:
if self.in_peref:
self.pop_entity()
self.in_peref=0
self._skip_ws()
return self.test_reg(regexp)
raise OutOfDataException()
return regexp.match(self.data,self.pos)!=None
def get_match(self,regexp):
if self.pos>self.datasize-5 and not self.final:
if self.in_peref:
self.pop_entity()
self.in_peref=0
self._skip_ws()
return self.get_match(regexp)
raise OutOfDataException()
ent=regexp.match(self.data,self.pos)
if ent==None:
self.report_error(reg2code[regexp.pattern])
return ""
end=ent.end(0) # Speeds us up slightly
if end==self.datasize:
if self.in_peref:
self.pop_entity()
self.in_peref=0
#self._skip_ws()
return ent.group(0)
raise OutOfDataException()
self.pos=end
return ent.group(0)
# --- DTD Parser proper
def __init__(self):
EntityParser.__init__(self)
self.internal=0
self.seen_xmldecl=0
self.dtd=DTDConsumerPE() # Keeps track of PE info
self.dtd_consumer=self.dtd # Where all events go
self.in_peref=0
self.ignores_entered=0
self.includes_entered=0
self.own_ent_stack=[] # Keeps includes_entered
def reset(self):
EntityParser.reset(self)
if hasattr(self,"dtd"):
self.dtd.reset()
self.internal=0
self.seen_xmldecl=0
self.in_peref=0
self.ignores_entered=0
self.includes_entered=0
self.own_ent_stack=[] # Keeps includes_entered
self.dtd_start_called = 0 # Set to 1 if parsing external subset from
# xmlproc.py (which has called dtd_start...)
def parseStart(self):
if not self.dtd_start_called:
self.dtd_consumer.dtd_start()
def parseEnd(self):
self.dtd_consumer.dtd_end()
def set_dtd_consumer(self,dtd):
"Tells the parser where to send DTD information."
self.dtd_consumer=dtd
def set_dtd_object(self,dtd):
"""Tells the parser where to mirror PE information (in addition to
what goes to the DTD consumer and where to get PE information."""
self.dtd=dtd
def set_internal(self,yesno):
"Tells the parser whether the DTD is internal or external."
self.internal=yesno
def deref(self):
"Removes circular references."
self.ent = self.dtd_consumer = self.dtd = self.app = self.err = None
def do_parse(self):
"Does the actual parsing."
try:
prepos=self.pos
if self.ignores_entered>0:
self.parse_ignored_data()
self._skip_ws()
while self.pos<self.datasize:
if self.now_at("<!ELEMENT"):
self.parse_elem_type()
elif self.now_at("<!ENTITY"):
self.parse_entity()
elif self.now_at("<!ATTLIST"):
self.parse_attlist()
elif self.now_at("<!NOTATION"):
self.parse_notation()
elif self.test_reg(reg_pe_ref):
self.parse_pe_ref()
elif self.now_at("<?"):
self.parse_pi(self.dtd_consumer)
elif self.now_at("<!--"):
self.parse_comment(self.dtd_consumer)
elif self.now_at("<!["):
self.parse_conditional()
elif self.now_at("]]>") and self.includes_entered>0:
self.includes_entered=self.includes_entered-1
else:
self.report_error(3013)
self.scan_to(">")
prepos=self.pos
self._skip_ws()
if self.final and self.includes_entered>0:
self.report_error(3043)
except OutOfDataException,e:
if self.final:
raise e
else:
self.pos=prepos
except IndexError,e:
if self.final:
raise OutOfDataException()
else:
self.pos=prepos
def parse_entity(self):
"Parses an entity declaration."
EntityParser.skip_ws(self,1) # No PE refs allowed here
if self.now_at("%"):
pedecl=1
EntityParser.skip_ws(self,1) # No PE refs allowed here
else:
pedecl=0
ent_name=self._get_name()
self.skip_ws(1)
(pub_id,sys_id)=self.parse_external_id(0)
if sys_id==None:
internal=1
ent_val=self.parse_ent_repltext()
else:
internal=0
if self.now_at("NDATA"):
self.report_error(3002)
else:
self.skip_ws()
if not internal and self.now_at("NDATA"):
# Parsing the optional NDataDecl
if pedecl:
self.report_error(3035)
self.skip_ws()
ndata=self._get_name()
self.skip_ws()
else:
ndata=""
if not self.now_at(">"):
self.report_error(3005,">")
if pedecl:
# These are echoed to self.dtd so we remember this stuff
if internal:
self.dtd_consumer.new_parameter_entity(ent_name,ent_val)
if self.dtd!=self.dtd_consumer:
self.dtd.new_parameter_entity(ent_name,ent_val)
else:
self.dtd_consumer.new_external_pe(ent_name,pub_id,sys_id)
if self.dtd!=self.dtd_consumer:
self.dtd.new_external_pe(ent_name,pub_id,sys_id)
else:
if internal:
self.dtd_consumer.new_general_entity(ent_name,ent_val)
else:
self.dtd_consumer.new_external_entity(ent_name,pub_id,sys_id,ndata)
def parse_ent_repltext(self):
"""Parses an entity replacement text and resolves all character
entity and parameter entity references in it."""
val=""
if self.now_at('"'):
delim='"'
elif self.now_at("'"):
delim="'"
else:
self.report_error(3004,("'","\""))
self.scan_to(">")
return
return self.parse_ent_litval(self.scan_to(delim))
def parse_ent_litval(self,litval):
pos=0
val=""
while 1:
res=reg_litval_stop.search(litval,pos)
if res==None:
break
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -