📄 xmlproc.py
字号:
val=val+self.parse_literal_entval()
if not self.pos==self.datasize:
self.report_error(3001)
self.pop_entity()
else:
self.report_error(3020)
except KeyError,e:
self.report_error(3021,name)
del self.open_ents[-1]
else:
self.report_error(4001)
if not self.now_at(";"):
self.report_error(3005,";")
self.scan_to(">")
return val
def parse_end_tag(self):
"Parses the end tag from after the '</' and beyond '>'."
self.pos=self.pos+2 # Skips the '</'
name=self._get_name()
if self.data[self.pos]!=">":
self.skip_ws() # Probably rare to find whitespace here
if not self.now_at(">"): self.report_error(3005,">")
else:
self.pos=self.pos+1
try:
elem=self.stack[-1]
del self.stack[-1]
if name!=elem:
self.report_error(3023,(name,elem))
# Let's do some guessing in case we continue
if len(self.stack)>0 and self.stack[-1]==name:
del self.stack[-1]
else:
self.stack.append(elem) # Put it back
except IndexError,e:
self.report_error(3024,name)
self.app.handle_end_tag(name)
def parse_data(self):
"Parses character data."
start=self.pos
end=string_find(self.data,"<",self.pos)
if end==-1:
end=string_find(self.data,"&",self.pos)
if end==-1:
if not self.final:
raise OutOfDataException()
end=self.datasize
else:
ampend=string_find(self.data,"&",self.pos,end)
if ampend!=-1:
end=ampend
self.pos=end
if string_find(self.data,"]]>",start,end)!=-1:
self.pos=string_find(self.data,"]]>",start,end)
self.report_error(3025)
self.pos=self.pos+3 # Skipping over it
if self.stack==[]:
res=reg_ws.match(self.data,start)
if res==None or res.end(0)!=end:
self.report_error(3029)
else:
self.app.handle_data(self.data,start,end)
def parse_charref(self):
"Parses a character reference."
if self.now_at("x"):
digs=unhex(self.get_match(reg_hex_digits))
else:
try:
digs=int(self.get_match(reg_digits))
except ValueError,e:
self.report_error(3027)
digs=None
if not self.now_at(";"): self.report_error(3005,";")
if digs==None: return
if not (digs==9 or digs==10 or digs==13 or \
(digs>=32 and digs<=255)):
if digs>255:
self.report_error(1005,digs)
else:
self.report_error(3018,digs)
else:
if self.stack==[]:
self.report_error(3028)
self.app.handle_data(chr(digs),0,1)
def parse_cdata(self):
"Parses a CDATA marked section from after the '<![CDATA['."
new_pos=self.get_index("]]>")
if self.stack==[]:
self.report_error(3029)
self.app.handle_data(self.data,self.pos,new_pos)
self.pos=new_pos+3
def parse_ent_ref(self):
"Parses a general entity reference from after the '&'."
name=self._get_name()
if not self.now_at(";"): self.report_error(3005,";")
try:
ent=self.ent.resolve_ge(name)
except KeyError,e:
self.report_error(3021,name)
return
if ent.name in self.open_ents:
self.report_error(3019)
return
self.open_ents.append(ent.name)
if self.stack==[]:
self.report_error(3030)
# Storing size of current element stack
stack_size=len(self.stack)
if ent.is_internal():
self.push_entity(self.get_current_sysid(),ent.value,name)
try:
self.do_parse()
except OutOfDataException: # Ran out of data before done
self.report_error(3001)
self.flush()
self.pop_entity()
else:
if ent.notation!="":
self.report_error(3031)
tmp=self.seen_xmldecl
self.seen_xmldecl=0 # Avoid complaints
self.seen_root=0 # Haven't seen root in the new entity yet
self.open_entity(self.pubres.resolve_entity_pubid(ent.get_pubid(),
ent.get_sysid()),
name)
self.seen_root=1 # Entity references only allowed inside elements
self.seen_xmldecl=tmp
# Did any elements cross the entity boundary?
if stack_size!=len(self.stack):
self.report_error(3042)
del self.open_ents[-1]
def parse_doctype(self):
"Parses the document type declaration."
if self.seen_doctype:
self.report_error(3032)
if self.seen_root:
self.report_error(3033)
self.skip_ws(1)
rootname=self._get_name()
self.skip_ws(1)
(pub_id,sys_id)=self.parse_external_id()
self.skip_ws()
self.app.handle_doctype(rootname, pub_id, sys_id)
self.dtd.dtd_start()
if self.now_at("["):
self.parse_internal_dtd()
elif not self.now_at(">"):
self.report_error(3005,">")
# External subset must be parsed _after_ the internal one
if pub_id!=None or sys_id!=None: # Was there an external id at all?
if self.read_external_subset:
try:
sys_id = self.pubres.resolve_doctype_pubid(pub_id, sys_id)
p=self._setup_dtd_parser(0)
p.dtd_start_called = 1
p.parse_resource(join_sysids(self.get_current_sysid(),
sys_id))
finally:
p.deref()
self.err.set_locator(self)
if (pub_id == None and sys_id == None) or \
not self.read_external_subset:
# If we parse the external subset dtd_end is called for us by
# the dtd parser. If we don't we must call it ourselves.
self.dtd.dtd_end()
self.seen_doctype=1 # Has to be at the end to avoid block trouble
def parse_internal_dtd(self):
"Parse the internal DTD beyond the '['."
self.set_start_point() # Record start of int_subset, preserve data
self.update_pos()
line=self.line
lb=self.last_break
last_part_size=0
while 1:
self.find_reg(reg_int_dtd)
if self.now_at("\""): self.scan_to("\"")
elif self.now_at("'"): self.scan_to("'")
elif self.now_at("<?"): self.scan_to("?>")
elif self.now_at("<!--"): self.scan_to("-->")
elif self.now_at("<!["): self.scan_to("]]>")
elif self.now_at("]"):
p=self.pos
self.skip_ws()
if self.now_at(">"):
last_part_size=(self.pos-p)+1
break
# [:lps] cuts off the "]\s+>" at the end
self.handle_internal_dtd(line,lb,self.get_region()[:-last_part_size])
def handle_internal_dtd(self,doctype_line,doctype_lb,int_dtd):
"Handles the internal DTD."
try:
p=self._setup_dtd_parser(1)
try:
p.line=doctype_line
p.last_break=doctype_lb
p.set_sysid(self.get_current_sysid())
p.final=1
p.feed(int_dtd)
except OutOfDataException,e:
self.report_error(3034)
finally:
p.deref()
self.err.set_locator(self)
def _setup_dtd_parser(self, internal_subset):
p=DTDParser()
p.set_error_handler(self.err)
p.set_dtd_consumer(self.dtd)
p.set_error_language(self.err_lang)
p.set_inputsource_factory(self.isf)
p.set_pubid_resolver(self.pubres)
p.set_dtd_object(self.dtd)
if self.dtd_listener!=None:
self.dtd.set_dtd_listener(self.dtd_listener)
p.set_internal(internal_subset)
self.err.set_locator(p)
return p
# ===== The introspection methods =====
def get_elem_stack(self):
"Returns the internal element stack. Note: this is a live list!"
return self.stack
def get_data_buffer(self):
"Returns the current data buffer."
return self.data
def get_construct_start(self):
"""Returns the start position of the current construct (tag, comment,
etc)."""
return self.prepos
def get_construct_end(self):
"""Returns the end position of the current construct (tag, comment,
etc)."""
return self.pos
def get_raw_construct(self):
"Returns the raw form of the current construct."
return self.data[self.prepos:self.pos]
def get_current_ent_stack(self):
"""Returns a snapshot of the entity stack. A list of the system
identifier of the entity and its name, if any."""
return map(lambda ent: (ent[0],ent[9]),self.ent_stack)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -