sgmlparser.tcl
来自「Linux下的MSN聊天程序源码」· TCL 代码 · 共 2,131 行 · 第 1/5 页
TCL
2,131 行
# Options:# -empty boolean# indicates whether the element was an empty element## Results:# Modify state and invoke callbackproc sgml::ParseEvent:ElementOpen {tag attr opts args} { variable Name variable Wsp array set options $opts upvar #0 $options(-statevariable) state array set cfg {-empty 0} array set cfg $args set handleEmpty 0 if {$options(-normalize)} { set tag [string toupper $tag] } # Update state lappend state(stack) $tag # Parse attribute list into a key-value representation if {[string compare $options(-parseattributelistcommand) {}]} { if {[catch {uplevel #0 $options(-parseattributelistcommand) [list $opts $attr]} attr]} { if {[string compare [lindex $attr 0] "unterminated attribute value"]} { uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"] set attr {} } else { # It is most likely that a ">" character was in an attribute value. # This manifests itself by ">" appearing in the element's text. # In this case the callback should return a three element list; # the message "unterminated attribute value", the attribute list it # did manage to parse and the remainder of the attribute list. foreach {msg attlist brokenattr} $attr break upvar text elemText if {[string first > $elemText] >= 0} { # Now piece the attribute list back together regexp [cl $Wsp]*($Name)[cl $Wsp]*=[cl $Wsp]*("|')(.*) $brokenattr discard attname delimiter attvalue regexp (.*)>([cl ^>]*)\$ $elemText discard remattlist elemText regexp ([cl ^$delimiter]*)${delimiter}(.*) $remattlist discard remattvalue remattlist # Gotcha: watch out for empty element syntax if {[string match */ [string trimright $remattlist]]} { set remattlist [string range $remattlist 0 end-1] set handleEmpty 1 set cfg(-empty) 1 } append attvalue >$remattvalue lappend attlist $attname $attvalue # Complete parsing the attribute list if {[catch {uplevel #0 $options(-parseattributelistcommand) [list $options(-statevariable) $remattlist]} attr]} { uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"] set attr {} set attlist {} } else { eval lappend attlist $attr } set attr $attlist } else { uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"] set attr {} } } } } set empty {} if {$cfg(-empty) && $options(-reportempty)} { set empty {-empty 1} } # Check for namespace declarations upvar #0 $options(namespaces) namespaces set nsdecls {} if {[llength $attr]} { array set attrlist $attr foreach {attrName attrValue} [array get attrlist xmlns*] { unset attrlist($attrName) set colon [set prefix {}] if {[regexp {^xmlns(:(.+))?$} $attrName discard colon prefix]} { switch -glob [string length $colon],[string length $prefix] { 0,0 { # default NS declaration lappend state(defaultNSURI) $attrValue lappend state(defaultNS) [llength $state(stack)] lappend nsdecls $attrValue {} } 0,* { # Huh? } *,0 { # Error uplevel #0 $state(-warningcommand) "no prefix specified for namespace URI \"$attrValue\" in element \"$tag\"" } default { set namespaces($prefix,[llength $state(stack)]) $attrValue lappend nsdecls $attrValue $prefix } } } } if {[llength $nsdecls]} { set nsdecls [list -namespacedecls $nsdecls] } set attr [array get attrlist] } # Check whether this element has an expanded name set ns {} if {[regexp {([^:]+):(.*)$} $tag discard prefix tag]} { set nsspec [lsort -dictionary -decreasing [array names namespaces $prefix,*]] if {[llength $nsspec]} { set nsuri $namespaces([lindex $nsspec 0]) set ns [list -namespace $nsuri] } else { uplevel #0 $options(-errorcommand) [list namespaceundeclared "no namespace declared for prefix \"$prefix\" in element $tag"] } } elseif {[llength $state(defaultNSURI)]} { set ns [list -namespace [lindex $state(defaultNSURI) end]] } # Invoke callback set code [catch {uplevel #0 $options(-elementstartcommand) [list $tag $attr] $empty $ns $nsdecls} msg] # Sometimes empty elements must be handled here (see above) if {$code == 0 && $handleEmpty} { ParseEvent:ElementClose $tag $opts -empty 1 } return -code $code -errorinfo $::errorInfo $msg}# sgml::ParseEvent:ElementClose --## End of an element.## Arguments:# tag Element name# opts Options# args further configuration options## Options:# -empty boolean# indicates whether the element as an empty element## Results:# Modify state and invoke callbackproc sgml::ParseEvent:ElementClose {tag opts args} { array set options $opts upvar #0 $options(-statevariable) state array set cfg {-empty 0} array set cfg $args # WF check if {[string compare $tag [lindex $state(stack) end]]} { uplevel #0 $options(-errorcommand) [list illegalendtag "end tag \"$tag\" does not match open element \"[lindex $state(stack) end]\" around line $state(line)"] return } # Check whether this element has an expanded name upvar #0 $options(namespaces) namespaces set ns {} if {[regexp {([^:]+):(.*)$} $tag discard prefix tag]} { set nsuri $namespaces([lindex [lsort -dictionary -decreasing [array names namespaces $prefix,*]] 0]) set ns [list -namespace $nsuri] } elseif {[llength $state(defaultNSURI)]} { set ns [list -namespace [lindex $state(defaultNSURI) end]] } # Pop namespace stacks, if any if {[llength $state(defaultNS)]} { if {[llength $state(stack)] == [lindex $state(defaultNS) end]} { set state(defaultNS) [lreplace $state(defaultNS) end end] } } foreach nsspec [array names namespaces *,[llength $state(stack)]] { unset namespaces($nsspec) } # Update state set state(stack) [lreplace $state(stack) end end] set empty {} if {$cfg(-empty) && $options(-reportempty)} { set empty {-empty 1} } # Invoke callback # Mats: Shall be same as sgml::ParseEvent:ElementOpen to handle exceptions in callback. set code [catch {uplevel #0 $options(-elementendcommand) [list $tag] $empty $ns} msg] return -code $code -errorinfo $::errorInfo $msg}# sgml::PCDATA --## Process PCDATA before passing to application## Arguments:# opts options# pcdata Character data to be processed## Results:# Checks that characters are legal,# checks -ignorewhitespace setting.proc sgml::PCDATA {opts pcdata} { array set options $opts if {$options(-ignorewhitespace) && \ ![string length [string trim $pcdata]]} { return {} } if {![regexp ^[cl $::sgml::Char]*\$ $pcdata]} { upvar \#0 $options(-statevariable) state uplevel \#0 $options(-errorcommand) [list illegalcharacters "illegal, non-Unicode characters found in text \"$pcdata\" around line $state(line)"] } uplevel \#0 $options(-characterdatacommand) [list $pcdata]}# sgml::Normalize --## Perform name normalization if required## Arguments:# name name to normalize# req normalization required## Results:# Name returned as upper-case if normalization requiredproc sgml::Normalize {name req} { if {$req} { return [string toupper $name] } else { return $name }}# sgml::Entity --## Resolve XML entity references (syntax: &xxx;).## Arguments:# opts options# entityrefcmd application callback for entity references# pcdatacmd application callback for character data# entities name of array containing entity definitions.# ref entity reference (the "xxx" bit)## Results:# Returns substitution text for given entity.proc sgml::Entity {opts entityrefcmd pcdatacmd entities ref} { array set options $opts upvar #0 $options(-statevariable) state if {![string length $entities]} { set entities [namespace current]::EntityPredef } switch -glob -- $ref { %* { # Parameter entity - not recognised outside of a DTD } #x* { # Character entity - hex if {[catch {format %c [scan [string range $ref 2 end] %x tmp; set tmp]} char]} { return -code error "malformed character entity \"$ref\"" } uplevel #0 $pcdatacmd [list $char] return {} } #* { # Character entity - decimal if {[catch {format %c [scan [string range $ref 1 end] %d tmp; set tmp]} char]} { return -code error "malformed character entity \"$ref\"" } uplevel #0 $pcdatacmd [list $char] return {} } default { # General entity upvar #0 $entities map if {[info exists map($ref)]} { if {![regexp {<|&} $map($ref)]} { # Simple text replacement - optimise uplevel #0 $pcdatacmd [list $map($ref)] return {} } # Otherwise an additional round of parsing is required. # This only applies to XML, since HTML doesn't have general entities # Must parse the replacement text for start & end tags, etc # This text must be self-contained: balanced closing tags, and so on set tokenised [tokenise $map($ref) $::xml::tokExpr $::xml::substExpr] set options(-final) 0 eval parseEvent [list $tokenised] [array get options] return {} } elseif {[string compare $entityrefcmd "::sgml::noop"]} { set result [uplevel #0 $entityrefcmd [list $ref]] if {[string length $result]} { uplevel #0 $pcdatacmd [list $result] } return {} } else { # Reconstitute entity reference uplevel #0 $options(-errorcommand) [list illegalentity "undefined entity reference \"$ref\""] return {} } } } # If all else fails leave the entity reference untouched uplevel #0 $pcdatacmd [list &$ref\;] return {}}###################################### DTD parser for SGML (XML).## This DTD actually only handles XML DTDs. Other language's# DTD's, such as HTML, must be written in terms of a XML DTD.###################################### sgml::ParseEvent:DocTypeDecl --## Entry point for DTD parsing## Arguments:# opts configuration options# docEl document element name# pubId public identifier# sysId system identifier (a URI)# intSSet internal DTD subsetproc sgml::ParseEvent:DocTypeDecl {opts docEl pubId sysId intSSet} { array set options {} array set options $opts set code [catch {uplevel #0 $options(-doctypecommand) [list $docEl $pubId $sysId $intSSet]} err] switch $code { 3 { # break return {} } 0 - 4 { # continue } default { return -code $code $err } } # Otherwise we'll parse the DTD and report it piecemeal # The internal DTD subset is processed first (XML 2.8) # During this stage, parameter entities are only allowed # between markup declarations ParseDTD:Internal [array get options] $intSSet # The external DTD subset is processed last (XML 2.8) # During this stage, parameter entities may occur anywhere # We must resolve the external identifier to obtain the # DTD data. The application may supply its own resolver. if {[string length $pubId] || [string length $sysId]} { uplevel #0 $options(-externalentitycommand) [list $options(-cmd) $options(-baseuri) $sysId $pubId] } return {}}# sgml::ParseDTD:Internal --## Parse the internal DTD subset.## Parameter entities are only allowed between markup declarations.## Arguments:# opts configuration options# dtd DTD data## Results:# Markup declarations parsed may cause callback invocationproc sgml::ParseDTD:Internal {opts dtd} { variable MarkupDeclExpr
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?