⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlproc.py

📁 Python Development Environment (Python IDE plugin for Eclipse). Features editor, code completion, re
💻 PY
📖 第 1 页 / 共 2 页
字号:
"""
The main module of the parser. All other modules will be imported into this
one, so this module is the only one one needs to import. For validating
parsing, import xmlval instead.
"""

# $Id$
   
import re,string,sys,urllib,urlparse

string_translate=string.translate # optimization. made 10% difference!
string_find     =string.find

from dtdparser import *
from xmlutils import *
from xmlapp import *
from xmldtd import *

version="0.70"
revision="$Revision$"
        
# ==============================
# A full well-formedness parser
# ==============================

class XMLProcessor(XMLCommonParser):
    "A parser that performs a complete well-formedness check."

    def __init__(self):        
	EntityParser.__init__(self)

	# Various handlers
	self.app=Application()
	self.dtd=WFCDTD(self)
	self.ent=self.dtd
        self.dtd_listener=None
        self.stop_on_wf=1
        
    def set_application(self,app):
	"Sets the object to send data events to."
	self.app=app
	app.set_locator(self)
        
    def set_dtd_listener(self,listener):
        "Registers an object that listens for DTD parse events."
        self.dtd_listener=listener                

    def set_data_after_wf_error(self,stop_on_wf=0):
        """Sets the parser policy on well-formedness errors. If this is set to
        0 data events are still delivered, even after well-formedness errors.
        Otherwise no more data events reach the application after such erors.
        """
        self.stop_on_wf=stop_on_wf

    def set_read_external_subset(self,read_it):
        """Tells the parser whether to read the external subset of documents
        or not."""
        self.read_external_subset=read_it
        
    def report_error(self,number,args=None):
        if self.stop_on_wf and number>2999:
            self.app=Application() # No more data events reported
        EntityParser.report_error(self,number,args)
        
    def reset(self):
        EntityParser.reset(self)
        if hasattr(self,"dtd"):
            self.dtd.reset()

	# State vars
	self.stack=[]
	self.seen_root=0
	self.seen_doctype=0
	self.seen_xmldecl=0
        self.stop_on_wf=1
        self.read_external_subset=0

    def deref(self):
        "Deletes circular references."
        self.dtd = self.ent = self.err = self.app = self.pubres = None

    def do_parse(self):
	"Does the actual parsing."
	try:
	    while self.pos<self.datasize:
		self.prepos=self.pos

		if self.data[self.pos]=="<":
                    try:
                        t=self.data[self.pos+1] # Optimization
                    except IndexError,e:            
                        raise OutOfDataException()
                    if t=="/":
                        self.parse_end_tag()
                    elif t!="!" and t!="?":
                        self.parse_start_tag()                        
                    elif self.now_at("<!--"):
                        self.parse_comment(self.app)
                    elif self.now_at("<?"): # FIXME: use t and modify self.pos?
                        self.parse_pi(self.app,1)
                    elif self.now_at("<![CDATA["):
                        self.parse_cdata()
                    elif self.now_at("<!DOCTYPE"):
                        self.parse_doctype()
                    else:
                        self.report_error(3013)
                        self.scan_to(">") # Avoid endless loops
                elif self.data[self.pos]=="&":
                    if self.now_at("&#"):
                        self.parse_charref()
                    else:
                        self.pos=self.pos+1  # Skipping the '&'
                        self.parse_ent_ref()
                else:
                    self.parse_data()
	except OutOfDataException,e:
	    if self.final:
		raise e
	    else:
		self.pos=self.prepos  # Didn't complete the construct

    def parseStart(self):
	"Must be called before parsing starts. (Notifies application.)"        
	self.app.doc_start()

    def parseEnd(self):
	"""Must be called when parsing is finished. (Does some checks and "
	"notifies the application.)"""	    
	if self.stack!=[] and self.ent_stack==[]:
	    self.report_error(3014,self.stack[-1])
	elif not self.seen_root:
	    self.report_error(3015)

	self.app.doc_end()
	    
    def parse_start_tag(self):
	"Parses the start tag."
	self.pos=self.pos+1 # Skips the '<'
        name=self._get_name()
	self.skip_ws()

        try:
            (attrs,fixeds)=self.dtd.attrinfo[name]
            attrs=attrs.copy()
        except KeyError:
            attrs={}
            fixeds={}

        if self.data[self.pos]!=">" and self.data[self.pos]!="/":
            seen={}
            while not self.test_str(">") and not self.test_str("/>"):
                a_name=self._get_name()
                self.skip_ws()
                if not self.now_at("="):
                    self.report_error(3005,"=")
                    self.scan_to(">") ## Panic! Get out of the tag!
                    a_val=""
                    break
                self.skip_ws()

                a_val=self.parse_att_val()
                if a_val==-1:
                    # WF error, we've skipped the rest of the tag
                    self.pos=self.pos-1      # Lets us find the '>'
                    if self.data[self.pos-1]=="/":
                        self.pos=self.pos-1  # Gets the '/>' cases right
                    break  

                if seen.has_key(a_name):
                    self.report_error(3016,a_name)
                else:
                    seen[a_name]=1

                attrs[a_name]=a_val
                if fixeds.has_key(a_name) and fixeds[a_name]!=a_val:
                    self.report_error(2000,a_name)
                self.skip_ws()

	# --- Take care of the tag

	if self.stack==[] and self.seen_root:
	    self.report_error(3017)
	    
	self.seen_root=1
        
	if self.now_at(">"):
	    self.app.handle_start_tag(name,attrs)
            self.stack.append(name)
	elif self.now_at("/>"):
	    self.app.handle_start_tag(name,attrs)
	    self.app.handle_end_tag(name)
        else:
            self.report_error(3004,("'>'","/>"))

    def parse_att_val(self):
	"Parses an attribute value and resolves all entity references in it."

	val=""
        if self.now_at('"'):
            delim='"'
            reg_attval_stop=reg_attval_stop_quote
        elif self.now_at("'"):
            delim="'"
            reg_attval_stop=reg_attval_stop_sing
        else:
            self.report_error(3004,("'","\""))
            self.scan_to(">")
            return -1 # FIXME: Ugly. Should throw an exception instead       
	        
        while 1:
            piece=self.find_reg(reg_attval_stop)
            val=val+string_translate(piece,ws_trans)

	    if self.now_at(delim):
                break

	    if self.now_at("&#"):
                val=val+self._read_char_ref()
	    elif self.now_at("&"):
                name=self._get_name()

                if name in self.open_ents:
                    self.report_error(3019)
                    return
                else:
                    self.open_ents.append(name)
                
                try:
                    ent=self.ent.resolve_ge(name)
                    if ent.is_internal():
                        # Doing all this here sucks a bit, but...
                        self.push_entity(self.get_current_sysid(),\
                                         ent.value,name)

                        self.final=1 # Only one block

                        val=val+self.parse_literal_entval()
                        if not self.pos==self.datasize:
                            self.report_error(3001) # Thing started, not compl

                        self.pop_entity()
                    else:
                        self.report_error(3020)
                except KeyError,e:
                    self.report_error(3021,name) ## FIXME: Check standalone dcl

                del self.open_ents[-1]

            elif self.now_at("<"):
                self.report_error(3022)
                continue
	    else:
		self.report_error(4001)
                self.pos=self.pos+1    # Avoid endless loop
                continue
		
	    if not self.now_at(";"):
		self.report_error(3005,";")
            
        return val

    def parse_literal_entval(self):
	"Parses a literal entity value for insertion in an attribute value."

	val=""
        reg_stop=re.compile("&")
	        
        while 1:
            try:
                piece=self.find_reg(reg_stop)
            except OutOfDataException,e:
                # Only character data left
                val=val+string_translate(self.data[self.pos:],ws_trans)
                self.pos=self.datasize
                break
            
            val=val+string_translate(piece,ws_trans)

	    if self.now_at("&#"):
                val=val+self._read_char_ref()		
	    elif self.now_at("&"):
                name=self._get_name()

                if name in self.open_ents:
                    self.report_error(3019)
                    return ""
                else:
                    self.open_ents.append(name)
                
                try:
                    ent=self.ent.resolve_ge(name)
                    if ent.is_internal():
                        # Doing all this here sucks a bit, but...
                        self.push_entity(self.get_current_sysid(),\
                                         ent.value,name)

                        self.final=1 # Only one block

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -