sgmlparser.tcl

来自「Linux下的MSN聊天程序源码」· TCL 代码 · 共 2,131 行 · 第 1/5 页

TCL
2,131
字号
	# default: normal mode	# Bug: if the attribute list has a right angle bracket then the empty	# element marker will not be seen	set empty [uplevel #0 $options(-emptyelement) [list $tag $param $empty]]	switch -glob -- [string length $tag],[regexp {^\?|!.*} $tag],$close,$empty {	    0,0,, {		# Ignore empty tag - dealt with non-normal mode above	    }	    *,0,, {		# Start tag for an element.		# Check if the internal DTD entity is in an attribute value		regsub -all &xml:intdtd\; $param \[$options(-internaldtd)\] param		set code [catch {ParseEvent:ElementOpen $tag $param [array get options]} msg]		set state(haveDocElement) 1		switch $code {		    0 {# OK}		    3 {			# break			return {}		    }		    4 {			# continue			# Remember this tag and look for its close			set state(continue:tag) $tag			set state(continue:level) 1			set state(mode) continue			continue		    }		    default {			return -code $code -errorinfo $::errorInfo $msg		    }		}	    }	    *,0,/, {		# End tag for an element.		set code [catch {ParseEvent:ElementClose $tag [array get options]} msg]		switch $code {		    0 {# OK}		    3 {			# break			return {}		    }		    4 {			# continue			# skip sibling nodes			set state(continue:tag) [lindex $state(stack) end]			set state(continue:level) 1			set state(mode) continue			continue		    }		    default {			return -code $code -errorinfo $::errorInfo $msg		    }		}	    }	    *,0,,/ {		# Empty element		# The trailing slash sneaks through into the param variable		regsub -all /[cl $::sgml::Wsp]*\$ $param {} param		set code [catch {ParseEvent:ElementOpen $tag $param [array get options] -empty 1} msg]		set state(haveDocElement) 1		switch $code {		    0 {# OK}		    3 {			# break			return {}		    }		    4 {			# continue			# Pretty useless since it closes straightaway		    }		    default {			return -code $code -errorinfo $::errorInfo $msg		    }		}		set code [catch {ParseEvent:ElementClose $tag [array get options] -empty 1} msg]		switch $code {		    0 {# OK}		    3 {			# break			return {}		    }		    4 {			# continue			# skip sibling nodes			set state(continue:tag) [lindex $state(stack) end]			set state(continue:level) 1			set state(mode) continue			continue		    }		    default {			return -code $code -errorinfo $::errorInfo $msg		    }		}	    }	    *,1,* {		# Processing instructions or XML declaration		switch -glob -- $tag {		    {\?xml} {			# XML Declaration			if {$state(haveXMLDecl)} {			    uplevel #0 $options(-errorcommand) [list illegalcharacter "unexpected characters \"<$tag\" around line $state(line)"]			} elseif {![regexp {\?$} $param]} {			    uplevel #0 $options(-errorcommand) [list missingcharacters "XML Declaration missing characters \"?>\" around line $state(line)"]			} else {			    # We can do the parsing in one step with Tcl 8.1 RE's			    # This has the benefit of performing better WF checking			    set adv_re [format {^[%s]*version[%s]*=[%s]*("|')(-+|[a-zA-Z0-9_.:]+)\1([%s]+encoding[%s]*=[%s]*("|')([A-Za-z][-A-Za-z0-9._]*)\4)?([%s]*standalone[%s]*=[%s]*("|')(yes|no)\7)?[%s]*\?$} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp]			    if {[catch {regexp $adv_re $param discard delimiter version discard delimiter encoding discard delimiter standalone} matches]} {				# Otherwise we must fallback to 8.0.				# This won't detect certain well-formedness errors				# Get the version number				if {[regexp [format {[%s]*version[%s]*=[%s]*"(-+|[a-zA-Z0-9_.:]+)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard version] || [regexp [format {[%s]*version[%s]*=[%s]*'(-+|[a-zA-Z0-9_.:]+)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard version]} {				    if {[string compare $version "1.0"]} {					# Should we support future versions?					# At least 1.X?					uplevel #0 $options(-errorcommand) [list versionincompatibility "document XML version \"$version\" is incompatible with XML version 1.0"]				    }				} else {				    uplevel #0 $options(-errorcommand) [list missingversion "XML Declaration missing version information around line $state(line)"]				}				# Get the encoding declaration				set encoding {}				regexp [format {[%s]*encoding[%s]*=[%s]*"([A-Za-z]([A-Za-z0-9._]|-)*)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard encoding				regexp [format {[%s]*encoding[%s]*=[%s]*'([A-Za-z]([A-Za-z0-9._]|-)*)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard encoding				# Get the standalone declaration				set standalone {}				regexp [format {[%s]*standalone[%s]*=[%s]*"(yes|no)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard standalone				regexp [format {[%s]*standalone[%s]*=[%s]*'(yes|no)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard standalone				# Invoke the callback				uplevel #0 $options(-xmldeclcommand) [list $version $encoding $standalone]			    } elseif {$matches == 0} {				uplevel #0 $options(-errorcommand) [list illformeddeclaration "XML Declaration not well-formed around line $state(line)"]			    } else {				# Invoke the callback				uplevel #0 $options(-xmldeclcommand) [list $version $encoding $standalone]			    }			}		    }		    {\?*} {			# Processing instruction			set tag [string range $tag 1 end]			if {[regsub {\?$} $tag {} tag]} {			    if {[string length [string trim $param]]} {				uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$param\" in processing instruction around line $state(line)"]			    }			} elseif {![regexp ^$Name\$ $tag]} {			    uplevel #0 $options(-errorcommand) [list illegalcharacter "illegal character in processing instruction target \"$tag\""]			} elseif {[regexp {[xX][mM][lL]} $tag]} {			    uplevel #0 $options(-errorcommand) [list illegalcharacters "characters \"xml\" not permitted in processing instruction target \"$tag\""]			} elseif {![regsub {\?$} $param {} param]} {			    uplevel #0 $options(-errorcommand) [list missingquestion "PI: expected '?' character around line $state(line)"]			}			set code [catch {uplevel #0 $options(-processinginstructioncommand) [list $tag [string trimleft $param]]} msg]			switch $code {			    0 {# OK}			    3 {				# break				return {}			    }			    4 {				# continue				# skip sibling nodes				set state(continue:tag) [lindex $state(stack) end]				set state(continue:level) 1				set state(mode) continue				continue			    }			    default {				return -code $code -errorinfo $::errorInfo $msg			    }			}		    }		    !DOCTYPE {			# External entity reference			# This should move into xml.tcl			# Parse the params supplied.  Looking for Name, ExternalID and MarkupDecl			set matched [regexp ^[cl $Wsp]*($Name)[cl $Wsp]*(.*) $param x state(doc_name) param]			set state(doc_name) [Normalize $state(doc_name) $options(-normalize)]			set externalID {}			set pubidlit {}			set systemlit {}			set externalID {}			if {[regexp -nocase ^[cl $Wsp]*(SYSTEM|PUBLIC)(.*) $param x id param]} {			    switch [string toupper $id] {				SYSTEM {				    if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x systemlit param]} {					set externalID [list SYSTEM $systemlit] ;# "				    } else {					uplevel #0 $options(-errorcommand) {syntaxerror {syntax error: SYSTEM identifier not followed by literal}}				    }				}				PUBLIC {				    if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x pubidlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x pubidlit param]} {					if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x systemlit param]} {					    set externalID [list PUBLIC $pubidlit $systemlit]					} else {					    uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: PUBLIC identifier not followed by system literal around line $state(line)"]					}				    } else {					uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: PUBLIC identifier not followed by literal around line $state(line)"]				    }				}			    }			    if {[regexp -nocase ^[cl $Wsp]+NDATA[cl $Wsp]+($Name)(.*) $param x notation param]} {				lappend externalID $notation			    }			}			set state(inDTD) 1			ParseEvent:DocTypeDecl [array get options] $state(doc_name) $pubidlit $systemlit $options(-internaldtd)			set state(inDTD) 0		    }		    !--* {			# Start of a comment			# See if it ends in the same tag, otherwise change the			# parsing mode			regexp {!--(.*)} $tag discard comm1			if {[regexp ([cl ^-]*)--[cl $Wsp]*\$ $comm1 discard comm1_1]} {			    # processed comment (end in tag)			    uplevel #0 $options(-commentcommand) [list $comm1_1]			} elseif {[regexp ([cl ^-]*)--[cl $Wsp]*\$ $param discard comm2]} {			    # processed comment (end in attributes)			    uplevel #0 $options(-commentcommand) [list $comm1$comm2]			} elseif {[regexp ([cl ^-]*)-->(.*) $text discard comm2 text]} {			    # processed comment (end in text)			    uplevel #0 $options(-commentcommand) [list $comm1$param$empty>$comm2]			} else {			    # start of comment			    set state(mode) comment			    set state(commentdata) "$comm1$param$empty>$text"			    continue			}		    }		    {!\[CDATA\[*} {			regexp {!\[CDATA\[(.*)} $tag discard cdata1			if {[regexp {(.*)]]$} $cdata1 discard cdata2]} {			    # processed CDATA (end in tag)			    PCDATA [array get options] [subst -novariable -nocommand $cdata2]			    set text [subst -novariable -nocommand $text]			} elseif {[regexp {(.*)]]$} $param discard cdata2]} {			    # processed CDATA (end in attribute)			    # Backslashes in param are quoted at this stage			    PCDATA [array get options] $cdata1[subst -novariable -nocommand $cdata2]			    set text [subst -novariable -nocommand $text]			} elseif {[regexp {(.*)]]>(.*)} $text discard cdata2 text]} {			    # processed CDATA (end in text)			    # Backslashes in param and text are quoted at this stage			    PCDATA [array get options] $cdata1[subst -novariable -nocommand $param]$empty>[subst -novariable -nocommand $cdata2]			    set text [subst -novariable -nocommand $text]			} else {			    # start CDATA			    set state(cdata) "$cdata1$param>$text"			    set state(mode) cdata			    continue			}		    }		    !ELEMENT -		    !ATTLIST -		    !ENTITY -		    !NOTATION {			uplevel #0 $options(-errorcommand) [list illegaldeclaration "[string range $tag 1 end] declaration not expected in document instance around line $state(line)"]		    }		    default {			uplevel #0 $options(-errorcommand) [list unknowninstruction "unknown processing instruction \"<$tag>\" around line $state(line)"]		    }		}	    }	    *,1,* -	    *,0,/,/ {		# Syntax error	    	uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: closed/empty tag: tag $tag param $param empty $empty close $close around line $state(line)"]	    }	}	# Process character data	if {$state(haveDocElement) && [llength $state(stack)]} {	    # Check if the internal DTD entity is in the text	    regsub -all &xml:intdtd\; $text \[$options(-internaldtd)\] text	    # Look for entity references	    if {([array size entities] || \		    [string length $options(-entityreferencecommand)]) && \		    $options(-defaultexpandinternalentities) && \		    [regexp {&[^;]+;} $text]} {		# protect Tcl specials		# NB. braces and backslashes may already be protected		regsub -all {\\({|}|\\)} $text {\1} text		regsub -all {([][$\\{}])} $text {\\\1} text		# Mark entity references		regsub -all {&([^;]+);} $text [format {%s; %s {\1} ; %s %s} \}\} [namespace code [list Entity [array get options] $options(-entityreferencecommand) [namespace code [list PCDATA [array get options]]] $options(entities)]] [namespace code [list DeProtect [namespace code [list PCDATA [array get options]]]]] \{\{] text		set text "uplevel #0 [namespace code [list DeProtect1 [namespace code [list PCDATA [array get options]]]]] {{$text}}"		eval $text	    } else {		# Restore protected special characters		regsub -all {\\([][{}\\])} $text {\1} text		PCDATA [array get options] $text	    }	} elseif {[string length [string trim $text]]} {	    uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\" in document prolog around line $state(line)"]	}    }    # If this is the end of the document, close all open containers    if {$options(-final) && [llength $state(stack)]} {	eval $options(-errorcommand) [list unclosedelement "element [lindex $state(stack) end] remains unclosed around line $state(line)"]    }    return {}}# sgml::DeProtect --##	Invoke given command after removing protecting backslashes#	from given text.## Arguments:#	cmd	Command to invoke#	text	Text to deprotect## Results:#	Depends on commandproc sgml::DeProtect1 {cmd text} {    if {[string compare {} $text]} {	regsub -all {\\([]$[{}\\])} $text {\1} text	uplevel #0 $cmd [list $text]    }}proc sgml::DeProtect {cmd text} {    set text [lindex $text 0]    if {[string compare {} $text]} {	regsub -all {\\([]$[{}\\])} $text {\1} text	uplevel #0 $cmd [list $text]    }}# sgml::ParserDelete --##	Free all memory associated with parser## Arguments:#	var	global state array## Results:#	Variables unsetproc sgml::ParserDelete var {    upvar #0 $var state    if {![info exists state]} {	return -code error "unknown parser"    }    catch {unset $state(entities)}    catch {unset $state(parameterentities)}    catch {unset $state(elementdecls)}    catch {unset $state(attlistdecls)}    catch {unset $state(notationdecls)}    catch {unset $state(namespaces)}    unset state    return {}}# sgml::ParseEvent:ElementOpen --##	Start of an element.## Arguments:#	tag	Element name#	attr	Attribute list#	opts	Options#	args	further configuration options#

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?