📄 dtdparser.py
字号:
val=val+litval[pos:res.start(0)]
pos=res.start(0)
if litval[pos:pos+2]=="&#":
endpos=string_find(litval,";",pos)
if endpos==-1:
self.report_error(3005,";")
break
if litval[pos+2]=="x":
digs=unhex(litval[pos+3:endpos])
else:
digs=int(litval[pos+2:endpos])
if not (digs==9 or digs==10 or digs==13 or \
(digs>=32 and digs<=255)):
if digs>255:
self.report_error(1005,digs)
else:
self.report_error(3018,digs)
else:
val=val+chr(digs)
pos=endpos+1
elif litval[pos]=="%":
endpos=string_find(litval,";",pos)
if endpos==-1:
self.report_error(3005,";")
break
name=litval[pos+1:endpos]
try:
ent=self.dtd.resolve_pe(name)
if ent.is_internal():
val=val+self.parse_ent_litval(ent.value)
else:
self.report_error(3037) # FIXME: Easily solved now...?
except KeyError,e:
self.report_error(3038,name)
pos=endpos+1
else:
self.report_error(4001)
break
return val+litval[pos:]
def parse_notation(self):
"Parses a notation declaration."
self.skip_ws(1)
name=self._get_name()
self.skip_ws(1)
(pubid,sysid)=self.parse_external_id(1,0)
self.skip_ws()
if not self.now_at(">"):
self.report_error(3005,">")
self.dtd_consumer.new_notation(name,pubid,sysid)
def parse_pe_ref(self):
"Parses a reference to a parameter entity."
name=self.get_match(reg_pe_ref)[1:-1]
try:
ent=self.dtd.resolve_pe(name)
except KeyError,e:
self.report_error(3038,name)
return
if ent.is_internal():
self.push_entity(self.get_current_sysid(),ent.value)
self.do_parse()
self.pop_entity()
else:
sysid=self.pubres.resolve_pe_pubid(ent.get_pubid(),
ent.get_sysid())
int=self.internal
self.set_internal(0)
try:
self.open_entity(sysid) # Does parsing and popping
finally:
self.set_internal(int)
def parse_attlist(self):
"Parses an attribute list declaration."
self.skip_ws(1)
elem=self._get_name()
self.skip_ws(1)
while not self.test_str(">"):
attr=self._get_name()
self.skip_ws(1)
if self.test_reg(reg_attr_type):
a_type=self.get_match(reg_attr_type)
elif self.now_at("NOTATION"):
self.skip_ws(1)
a_type=("NOTATION",self.__parse_list(reg_name,"|"))
elif self.now_at("("):
self.pos=self.pos-1 # Does not expect '(' to be skipped
a_type=self.__parse_list(reg_nmtoken,"|")
tokens={}
for token in a_type:
if tokens.has_key(token):
self.report_error(3044,(token,))
else:
tokens[token]=1
else:
self.report_error(3039)
self.scan_to(">")
return
self.skip_ws(1)
if self.test_str("\"") or self.test_str("'"):
a_decl="#DEFAULT"
a_def=self.parse_ent_repltext()
elif self.now_at("#IMPLIED"):
a_decl="#IMPLIED"
a_def=None
elif self.now_at("#REQUIRED"):
a_decl="#REQUIRED"
a_def=None
elif self.now_at("#FIXED"):
self.skip_ws(1)
a_decl="#FIXED"
a_def=self.parse_ent_repltext()
else:
self.report_error(3909)
a_decl=None
a_def=None
self.skip_ws()
self.dtd_consumer.new_attribute(elem,attr,a_type,a_decl,a_def)
self.pos=self.pos+1 # Skipping the '>'
def parse_elem_type(self):
"Parses an element type declaration."
self.skip_ws(1)
#elem_name=self.get_match(reg_name)
elem_name=self._get_name()
self.skip_ws(1)
# content-spec
if self.now_at("EMPTY"):
elem_cont="EMPTY"
elif self.now_at("ANY"):
elem_cont="ANY"
elif self.now_at("("):
elem_cont=self._parse_content_model()
else:
self.report_error(3004,("EMPTY, ANY","("))
elem_cont="ANY" # Just so things don't fall apart downstream
self.skip_ws()
if not self.now_at(">"):
self.report_error(3005,">")
self.dtd_consumer.new_element_type(elem_name,elem_cont)
def _parse_content_model(self,level=0):
"""Parses the content model of an element type declaration. Level
tells the function if we are on the top level (=0) or not (=1).
The '(' has just been passed over, we read past the ')'. Returns
a tuple (separator, contents, modifier), where content consists
of (cp, modifier) tuples and cp can be a new content model tuple."""
self.skip_ws()
# Creates a content list with separator first
cont_list=[]
sep=""
if self.now_at("#PCDATA") and level==0:
return self.parse_mixed_content_model()
while 1:
self.skip_ws()
if self.now_at("("):
cp=self._parse_content_model(1)
else:
cp=self._get_name()
if self.test_str("?") or self.test_str("*") or self.test_str("+"):
mod=self.data[self.pos]
self.pos=self.pos+1
else:
mod=""
if type(cp)==StringType:
cont_list.append((cp,mod))
else:
cont_list.append(cp)
self.skip_ws()
if self.now_at(")"):
break
if sep=="":
if self.test_str("|") or self.test_str(","):
sep=self.data[self.pos]
else:
self.report_error(3004,("'|'",","))
self.pos=self.pos+1
else:
if not self.now_at(sep):
self.report_error(3040)
self.scan_to(")")
if self.test_str("+") or self.test_str("?") or self.test_str("*"):
mod=self.data[self.pos]
self.pos=self.pos+1
else:
mod=""
return (sep,cont_list,mod)
def parse_mixed_content_model(self):
"Parses mixed content models. Ie: ones containing #PCDATA."
cont_list=[("#PCDATA","")]
sep=""
mod=""
while 1:
try:
self.skip_ws()
except OutOfDataException,e:
raise e
if self.now_at("|"):
sep="|"
elif self.now_at(")"):
break
else:
self.report_error(3005,"|")
self.scan_to(">")
self.skip_ws()
cont_list.append((self.get_match(reg_name),""))
if self.now_at("*"):
mod="*"
elif sep=="|":
self.report_error(3005,"*")
return (sep,cont_list,mod)
def parse_conditional(self):
"Parses a conditional section."
if self.internal:
self.report_error(3041)
ignore=1
self.scan_to("]]>")
else:
self.skip_ws()
if self.now_at("IGNORE"):
self.ignores_entered=1
self.skip_ws()
if not self.now_at("["):
self.report_error(3005,"[")
self.parse_ignored_data()
return
if not self.now_at("INCLUDE"):
self.report_error(3004,("'IGNORE'","INCLUDE"))
self.scan_to("[")
self.includes_entered=self.includes_entered+1
self.skip_ws()
if not self.now_at("["):
self.report_error(3005,"[")
# Doing an extra skip_ws and waiting until we get here
# before increasing the include count, to avoid increasing
# the count inside a PE, where it would be forgotten after pop.
self.skip_ws()
self.includes_entered=self.includes_entered+1
def parse_ignored_data(self):
try:
counter=self.ignores_entered
while counter:
self.find_reg(reg_cond_sect)
if self.now_at("]]>"):
counter=counter-1
else:
counter=counter+1
self.pos=self.pos+3
except OutOfDataException,e:
if self.final:
self.report_error(3043)
self.ignores_entered=counter
self.data=""
self.pos=0
self.datasize=0
raise e
self.ignores_entered=0
def __parse_list(self, elem_regexp, separator):
"Parses a '(' S? elem_regexp S? separator ... ')' list. (Internal.)"
list=[]
self.skip_ws()
if not self.now_at("("):
self.report_error(3005,"(")
while 1:
self.skip_ws()
list.append(self.get_match(elem_regexp))
self.skip_ws()
if self.now_at(")"):
break
elif not self.now_at(separator):
self.report_error(3004,("')'",separator))
break
return list
def is_external(self):
return not self.internal
# --- Internal methods
def _push_ent_stack(self,name="None"):
EntityParser._push_ent_stack(self,name)
self.own_ent_stack.append(self.includes_entered)
self.includes_entered=0
def _pop_ent_stack(self):
EntityParser._pop_ent_stack(self)
self.includes_entered=self.own_ent_stack[-1]
del self.own_ent_stack[-1]
# --- Minimal DTD consumer
class DTDConsumerPE(DTDConsumer):
def __init__(self):
DTDConsumer.__init__(self,None)
self.param_ents={}
self.used_notations = {}
def new_parameter_entity(self,name,val):
if not self.param_ents.has_key(name): #Keep first decl
self.param_ents[name]=InternalEntity(name,val)
def new_external_pe(self,name,pubid,sysid):
if not self.param_ents.has_key(name): # Keep first decl
self.param_ents[name]=ExternalEntity(name,pubid,sysid,"")
def resolve_pe(self,name):
return self.param_ents[name]
def reset(self):
self.param_ents={}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -