sgmlparser.tcl

来自「Linux下的MSN聊天程序源码」· TCL 代码 · 共 2,131 行 · 第 1/5 页

TCL
2,131
字号
# Options:#	-empty boolean#		indicates whether the element was an empty element## Results:#	Modify state and invoke callbackproc sgml::ParseEvent:ElementOpen {tag attr opts args} {    variable Name    variable Wsp    array set options $opts    upvar #0 $options(-statevariable) state    array set cfg {-empty 0}    array set cfg $args    set handleEmpty 0    if {$options(-normalize)} {	set tag [string toupper $tag]    }    # Update state    lappend state(stack) $tag    # Parse attribute list into a key-value representation    if {[string compare $options(-parseattributelistcommand) {}]} {	if {[catch {uplevel #0 $options(-parseattributelistcommand) [list $opts $attr]} attr]} {	    if {[string compare [lindex $attr 0] "unterminated attribute value"]} {		uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"]		set attr {}	    } else {		# It is most likely that a ">" character was in an attribute value.		# This manifests itself by ">" appearing in the element's text.		# In this case the callback should return a three element list;		# the message "unterminated attribute value", the attribute list it		# did manage to parse and the remainder of the attribute list.		foreach {msg attlist brokenattr} $attr break		upvar text elemText		if {[string first > $elemText] >= 0} {		    # Now piece the attribute list back together		    regexp [cl $Wsp]*($Name)[cl $Wsp]*=[cl $Wsp]*("|')(.*) $brokenattr discard attname delimiter attvalue		    regexp (.*)>([cl ^>]*)\$ $elemText discard remattlist elemText		    regexp ([cl ^$delimiter]*)${delimiter}(.*) $remattlist discard remattvalue remattlist		    # Gotcha: watch out for empty element syntax		    if {[string match */ [string trimright $remattlist]]} {			set remattlist [string range $remattlist 0 end-1]			set handleEmpty 1			set cfg(-empty) 1		    }		    append attvalue >$remattvalue		    lappend attlist $attname $attvalue		    # Complete parsing the attribute list		    if {[catch {uplevel #0 $options(-parseattributelistcommand) [list $options(-statevariable) $remattlist]} attr]} {			uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"]			set attr {}			set attlist {}		    } else {			eval lappend attlist $attr		    }		    set attr $attlist		} else {		    uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"]		    set attr {}		}	    }	}    }    set empty {}    if {$cfg(-empty) && $options(-reportempty)} {	set empty {-empty 1}    }    # Check for namespace declarations    upvar #0 $options(namespaces) namespaces    set nsdecls {}    if {[llength $attr]} {	array set attrlist $attr	foreach {attrName attrValue} [array get attrlist xmlns*] {	    unset attrlist($attrName)	    set colon [set prefix {}]	    if {[regexp {^xmlns(:(.+))?$} $attrName discard colon prefix]} {		switch -glob [string length $colon],[string length $prefix] {		    0,0 {			# default NS declaration			lappend state(defaultNSURI) $attrValue			lappend state(defaultNS) [llength $state(stack)]			lappend nsdecls $attrValue {}		    }		    0,* {			# Huh?		    }		    *,0 {			# Error			uplevel #0 $state(-warningcommand) "no prefix specified for namespace URI \"$attrValue\" in element \"$tag\""		    }		    default {			set namespaces($prefix,[llength $state(stack)]) $attrValue			lappend nsdecls $attrValue $prefix		    }		}	    }	}	if {[llength $nsdecls]} {	    set nsdecls [list -namespacedecls $nsdecls]	}	set attr [array get attrlist]    }    # Check whether this element has an expanded name    set ns {}    if {[regexp {([^:]+):(.*)$} $tag discard prefix tag]} {	set nsspec [lsort -dictionary -decreasing [array names namespaces $prefix,*]]	if {[llength $nsspec]} {	    set nsuri $namespaces([lindex $nsspec 0])	    set ns [list -namespace $nsuri]	} else {	    uplevel #0 $options(-errorcommand) [list namespaceundeclared "no namespace declared for prefix \"$prefix\" in element $tag"]	}    } elseif {[llength $state(defaultNSURI)]} {	set ns [list -namespace [lindex $state(defaultNSURI) end]]    }    # Invoke callback    set code [catch {uplevel #0 $options(-elementstartcommand) [list $tag $attr] $empty $ns $nsdecls} msg]    # Sometimes empty elements must be handled here (see above)    if {$code == 0 && $handleEmpty} {	ParseEvent:ElementClose $tag $opts -empty 1    }    return -code $code -errorinfo $::errorInfo $msg}# sgml::ParseEvent:ElementClose --##	End of an element.## Arguments:#	tag	Element name#	opts	Options#	args	further configuration options## Options:#	-empty boolean#		indicates whether the element as an empty element## Results:#	Modify state and invoke callbackproc sgml::ParseEvent:ElementClose {tag opts args} {    array set options $opts    upvar #0 $options(-statevariable) state    array set cfg {-empty 0}    array set cfg $args    # WF check    if {[string compare $tag [lindex $state(stack) end]]} {	uplevel #0 $options(-errorcommand) [list illegalendtag "end tag \"$tag\" does not match open element \"[lindex $state(stack) end]\" around line $state(line)"]	return    }    # Check whether this element has an expanded name    upvar #0 $options(namespaces) namespaces    set ns {}    if {[regexp {([^:]+):(.*)$} $tag discard prefix tag]} {	set nsuri $namespaces([lindex [lsort -dictionary -decreasing [array names namespaces $prefix,*]] 0])	set ns [list -namespace $nsuri]    } elseif {[llength $state(defaultNSURI)]} {	set ns [list -namespace [lindex $state(defaultNSURI) end]]    }    # Pop namespace stacks, if any    if {[llength $state(defaultNS)]} {	if {[llength $state(stack)] == [lindex $state(defaultNS) end]} {	    set state(defaultNS) [lreplace $state(defaultNS) end end]	}    }    foreach nsspec [array names namespaces *,[llength $state(stack)]] {	unset namespaces($nsspec)    }    # Update state    set state(stack) [lreplace $state(stack) end end]    set empty {}    if {$cfg(-empty) && $options(-reportempty)} {	set empty {-empty 1}    }    # Invoke callback    # Mats: Shall be same as sgml::ParseEvent:ElementOpen to handle exceptions in callback.    set code [catch {uplevel #0 $options(-elementendcommand) [list $tag] $empty $ns} msg]    return -code $code -errorinfo $::errorInfo $msg}# sgml::PCDATA --##	Process PCDATA before passing to application## Arguments:#	opts	options#	pcdata	Character data to be processed## Results:#	Checks that characters are legal,#	checks -ignorewhitespace setting.proc sgml::PCDATA {opts pcdata} {    array set options $opts    if {$options(-ignorewhitespace) && \	    ![string length [string trim $pcdata]]} {	return {}    }    if {![regexp ^[cl $::sgml::Char]*\$ $pcdata]} {	upvar \#0 $options(-statevariable) state	uplevel \#0 $options(-errorcommand) [list illegalcharacters "illegal, non-Unicode characters found in text \"$pcdata\" around line $state(line)"]    }    uplevel \#0 $options(-characterdatacommand) [list $pcdata]}# sgml::Normalize --##	Perform name normalization if required## Arguments:#	name	name to normalize#	req	normalization required## Results:#	Name returned as upper-case if normalization requiredproc sgml::Normalize {name req} {    if {$req} {	return [string toupper $name]    } else {	return $name    }}# sgml::Entity --##	Resolve XML entity references (syntax: &xxx;).## Arguments:#	opts		options#	entityrefcmd	application callback for entity references#	pcdatacmd	application callback for character data#	entities	name of array containing entity definitions.#	ref		entity reference (the "xxx" bit)## Results:#	Returns substitution text for given entity.proc sgml::Entity {opts entityrefcmd pcdatacmd entities ref} {    array set options $opts    upvar #0 $options(-statevariable) state    if {![string length $entities]} {	set entities [namespace current]::EntityPredef    }    switch -glob -- $ref {	%* {	    # Parameter entity - not recognised outside of a DTD	}	#x* {	    # Character entity - hex	    if {[catch {format %c [scan [string range $ref 2 end] %x tmp; set tmp]} char]} {		return -code error "malformed character entity \"$ref\""	    }	    uplevel #0 $pcdatacmd [list $char]	    return {}	}	#* {	    # Character entity - decimal	    if {[catch {format %c [scan [string range $ref 1 end] %d tmp; set tmp]} char]} {		return -code error "malformed character entity \"$ref\""	    }	    uplevel #0 $pcdatacmd [list $char]	    return {}	}	default {	    # General entity	    upvar #0 $entities map	    if {[info exists map($ref)]} {		if {![regexp {<|&} $map($ref)]} {		    # Simple text replacement - optimise		    uplevel #0 $pcdatacmd [list $map($ref)]		    return {}		}		# Otherwise an additional round of parsing is required.		# This only applies to XML, since HTML doesn't have general entities		# Must parse the replacement text for start & end tags, etc		# This text must be self-contained: balanced closing tags, and so on		set tokenised [tokenise $map($ref) $::xml::tokExpr $::xml::substExpr]		set options(-final) 0		eval parseEvent [list $tokenised] [array get options]		return {}	    } elseif {[string compare $entityrefcmd "::sgml::noop"]} {		set result [uplevel #0 $entityrefcmd [list $ref]]		if {[string length $result]} {		    uplevel #0 $pcdatacmd [list $result]		}		return {}	    } else {		# Reconstitute entity reference		uplevel #0 $options(-errorcommand) [list illegalentity "undefined entity reference \"$ref\""]		return {}	    }	}    }    # If all else fails leave the entity reference untouched    uplevel #0 $pcdatacmd [list &$ref\;]    return {}}###################################### DTD parser for SGML (XML).## This DTD actually only handles XML DTDs.  Other language's# DTD's, such as HTML, must be written in terms of a XML DTD.###################################### sgml::ParseEvent:DocTypeDecl --##	Entry point for DTD parsing## Arguments:#	opts	configuration options#	docEl	document element name#	pubId	public identifier#	sysId	system identifier (a URI)#	intSSet	internal DTD subsetproc sgml::ParseEvent:DocTypeDecl {opts docEl pubId sysId intSSet} {    array set options {}    array set options $opts    set code [catch {uplevel #0 $options(-doctypecommand) [list $docEl $pubId $sysId $intSSet]} err]    switch $code {	3 {	    # break	    return {}	}	0 -	4 {	    # continue	}	default {	    return -code $code $err	}    }    # Otherwise we'll parse the DTD and report it piecemeal    # The internal DTD subset is processed first (XML 2.8)    # During this stage, parameter entities are only allowed    # between markup declarations    ParseDTD:Internal [array get options] $intSSet    # The external DTD subset is processed last (XML 2.8)    # During this stage, parameter entities may occur anywhere    # We must resolve the external identifier to obtain the    # DTD data.  The application may supply its own resolver.    if {[string length $pubId] || [string length $sysId]} {	uplevel #0 $options(-externalentitycommand) [list $options(-cmd) $options(-baseuri) $sysId $pubId]    }    return {}}# sgml::ParseDTD:Internal --##	Parse the internal DTD subset.##	Parameter entities are only allowed between markup declarations.## Arguments:#	opts	configuration options#	dtd	DTD data## Results:#	Markup declarations parsed may cause callback invocationproc sgml::ParseDTD:Internal {opts dtd} {    variable MarkupDeclExpr

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?