sgmlparser.tcl
来自「Linux下的MSN聊天程序源码」· TCL 代码 · 共 2,131 行 · 第 1/5 页
TCL
2,131 行
# default: normal mode # Bug: if the attribute list has a right angle bracket then the empty # element marker will not be seen set empty [uplevel #0 $options(-emptyelement) [list $tag $param $empty]] switch -glob -- [string length $tag],[regexp {^\?|!.*} $tag],$close,$empty { 0,0,, { # Ignore empty tag - dealt with non-normal mode above } *,0,, { # Start tag for an element. # Check if the internal DTD entity is in an attribute value regsub -all &xml:intdtd\; $param \[$options(-internaldtd)\] param set code [catch {ParseEvent:ElementOpen $tag $param [array get options]} msg] set state(haveDocElement) 1 switch $code { 0 {# OK} 3 { # break return {} } 4 { # continue # Remember this tag and look for its close set state(continue:tag) $tag set state(continue:level) 1 set state(mode) continue continue } default { return -code $code -errorinfo $::errorInfo $msg } } } *,0,/, { # End tag for an element. set code [catch {ParseEvent:ElementClose $tag [array get options]} msg] switch $code { 0 {# OK} 3 { # break return {} } 4 { # continue # skip sibling nodes set state(continue:tag) [lindex $state(stack) end] set state(continue:level) 1 set state(mode) continue continue } default { return -code $code -errorinfo $::errorInfo $msg } } } *,0,,/ { # Empty element # The trailing slash sneaks through into the param variable regsub -all /[cl $::sgml::Wsp]*\$ $param {} param set code [catch {ParseEvent:ElementOpen $tag $param [array get options] -empty 1} msg] set state(haveDocElement) 1 switch $code { 0 {# OK} 3 { # break return {} } 4 { # continue # Pretty useless since it closes straightaway } default { return -code $code -errorinfo $::errorInfo $msg } } set code [catch {ParseEvent:ElementClose $tag [array get options] -empty 1} msg] switch $code { 0 {# OK} 3 { # break return {} } 4 { # continue # skip sibling nodes set state(continue:tag) [lindex $state(stack) end] set state(continue:level) 1 set state(mode) continue continue } default { return -code $code -errorinfo $::errorInfo $msg } } } *,1,* { # Processing instructions or XML declaration switch -glob -- $tag { {\?xml} { # XML Declaration if {$state(haveXMLDecl)} { uplevel #0 $options(-errorcommand) [list illegalcharacter "unexpected characters \"<$tag\" around line $state(line)"] } elseif {![regexp {\?$} $param]} { uplevel #0 $options(-errorcommand) [list missingcharacters "XML Declaration missing characters \"?>\" around line $state(line)"] } else { # We can do the parsing in one step with Tcl 8.1 RE's # This has the benefit of performing better WF checking set adv_re [format {^[%s]*version[%s]*=[%s]*("|')(-+|[a-zA-Z0-9_.:]+)\1([%s]+encoding[%s]*=[%s]*("|')([A-Za-z][-A-Za-z0-9._]*)\4)?([%s]*standalone[%s]*=[%s]*("|')(yes|no)\7)?[%s]*\?$} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] if {[catch {regexp $adv_re $param discard delimiter version discard delimiter encoding discard delimiter standalone} matches]} { # Otherwise we must fallback to 8.0. # This won't detect certain well-formedness errors # Get the version number if {[regexp [format {[%s]*version[%s]*=[%s]*"(-+|[a-zA-Z0-9_.:]+)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard version] || [regexp [format {[%s]*version[%s]*=[%s]*'(-+|[a-zA-Z0-9_.:]+)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard version]} { if {[string compare $version "1.0"]} { # Should we support future versions? # At least 1.X? uplevel #0 $options(-errorcommand) [list versionincompatibility "document XML version \"$version\" is incompatible with XML version 1.0"] } } else { uplevel #0 $options(-errorcommand) [list missingversion "XML Declaration missing version information around line $state(line)"] } # Get the encoding declaration set encoding {} regexp [format {[%s]*encoding[%s]*=[%s]*"([A-Za-z]([A-Za-z0-9._]|-)*)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard encoding regexp [format {[%s]*encoding[%s]*=[%s]*'([A-Za-z]([A-Za-z0-9._]|-)*)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard encoding # Get the standalone declaration set standalone {} regexp [format {[%s]*standalone[%s]*=[%s]*"(yes|no)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard standalone regexp [format {[%s]*standalone[%s]*=[%s]*'(yes|no)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard standalone # Invoke the callback uplevel #0 $options(-xmldeclcommand) [list $version $encoding $standalone] } elseif {$matches == 0} { uplevel #0 $options(-errorcommand) [list illformeddeclaration "XML Declaration not well-formed around line $state(line)"] } else { # Invoke the callback uplevel #0 $options(-xmldeclcommand) [list $version $encoding $standalone] } } } {\?*} { # Processing instruction set tag [string range $tag 1 end] if {[regsub {\?$} $tag {} tag]} { if {[string length [string trim $param]]} { uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$param\" in processing instruction around line $state(line)"] } } elseif {![regexp ^$Name\$ $tag]} { uplevel #0 $options(-errorcommand) [list illegalcharacter "illegal character in processing instruction target \"$tag\""] } elseif {[regexp {[xX][mM][lL]} $tag]} { uplevel #0 $options(-errorcommand) [list illegalcharacters "characters \"xml\" not permitted in processing instruction target \"$tag\""] } elseif {![regsub {\?$} $param {} param]} { uplevel #0 $options(-errorcommand) [list missingquestion "PI: expected '?' character around line $state(line)"] } set code [catch {uplevel #0 $options(-processinginstructioncommand) [list $tag [string trimleft $param]]} msg] switch $code { 0 {# OK} 3 { # break return {} } 4 { # continue # skip sibling nodes set state(continue:tag) [lindex $state(stack) end] set state(continue:level) 1 set state(mode) continue continue } default { return -code $code -errorinfo $::errorInfo $msg } } } !DOCTYPE { # External entity reference # This should move into xml.tcl # Parse the params supplied. Looking for Name, ExternalID and MarkupDecl set matched [regexp ^[cl $Wsp]*($Name)[cl $Wsp]*(.*) $param x state(doc_name) param] set state(doc_name) [Normalize $state(doc_name) $options(-normalize)] set externalID {} set pubidlit {} set systemlit {} set externalID {} if {[regexp -nocase ^[cl $Wsp]*(SYSTEM|PUBLIC)(.*) $param x id param]} { switch [string toupper $id] { SYSTEM { if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x systemlit param]} { set externalID [list SYSTEM $systemlit] ;# " } else { uplevel #0 $options(-errorcommand) {syntaxerror {syntax error: SYSTEM identifier not followed by literal}} } } PUBLIC { if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x pubidlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x pubidlit param]} { if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x systemlit param]} { set externalID [list PUBLIC $pubidlit $systemlit] } else { uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: PUBLIC identifier not followed by system literal around line $state(line)"] } } else { uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: PUBLIC identifier not followed by literal around line $state(line)"] } } } if {[regexp -nocase ^[cl $Wsp]+NDATA[cl $Wsp]+($Name)(.*) $param x notation param]} { lappend externalID $notation } } set state(inDTD) 1 ParseEvent:DocTypeDecl [array get options] $state(doc_name) $pubidlit $systemlit $options(-internaldtd) set state(inDTD) 0 } !--* { # Start of a comment # See if it ends in the same tag, otherwise change the # parsing mode regexp {!--(.*)} $tag discard comm1 if {[regexp ([cl ^-]*)--[cl $Wsp]*\$ $comm1 discard comm1_1]} { # processed comment (end in tag) uplevel #0 $options(-commentcommand) [list $comm1_1] } elseif {[regexp ([cl ^-]*)--[cl $Wsp]*\$ $param discard comm2]} { # processed comment (end in attributes) uplevel #0 $options(-commentcommand) [list $comm1$comm2] } elseif {[regexp ([cl ^-]*)-->(.*) $text discard comm2 text]} { # processed comment (end in text) uplevel #0 $options(-commentcommand) [list $comm1$param$empty>$comm2] } else { # start of comment set state(mode) comment set state(commentdata) "$comm1$param$empty>$text" continue } } {!\[CDATA\[*} { regexp {!\[CDATA\[(.*)} $tag discard cdata1 if {[regexp {(.*)]]$} $cdata1 discard cdata2]} { # processed CDATA (end in tag) PCDATA [array get options] [subst -novariable -nocommand $cdata2] set text [subst -novariable -nocommand $text] } elseif {[regexp {(.*)]]$} $param discard cdata2]} { # processed CDATA (end in attribute) # Backslashes in param are quoted at this stage PCDATA [array get options] $cdata1[subst -novariable -nocommand $cdata2] set text [subst -novariable -nocommand $text] } elseif {[regexp {(.*)]]>(.*)} $text discard cdata2 text]} { # processed CDATA (end in text) # Backslashes in param and text are quoted at this stage PCDATA [array get options] $cdata1[subst -novariable -nocommand $param]$empty>[subst -novariable -nocommand $cdata2] set text [subst -novariable -nocommand $text] } else { # start CDATA set state(cdata) "$cdata1$param>$text" set state(mode) cdata continue } } !ELEMENT - !ATTLIST - !ENTITY - !NOTATION { uplevel #0 $options(-errorcommand) [list illegaldeclaration "[string range $tag 1 end] declaration not expected in document instance around line $state(line)"] } default { uplevel #0 $options(-errorcommand) [list unknowninstruction "unknown processing instruction \"<$tag>\" around line $state(line)"] } } } *,1,* - *,0,/,/ { # Syntax error uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: closed/empty tag: tag $tag param $param empty $empty close $close around line $state(line)"] } } # Process character data if {$state(haveDocElement) && [llength $state(stack)]} { # Check if the internal DTD entity is in the text regsub -all &xml:intdtd\; $text \[$options(-internaldtd)\] text # Look for entity references if {([array size entities] || \ [string length $options(-entityreferencecommand)]) && \ $options(-defaultexpandinternalentities) && \ [regexp {&[^;]+;} $text]} { # protect Tcl specials # NB. braces and backslashes may already be protected regsub -all {\\({|}|\\)} $text {\1} text regsub -all {([][$\\{}])} $text {\\\1} text # Mark entity references regsub -all {&([^;]+);} $text [format {%s; %s {\1} ; %s %s} \}\} [namespace code [list Entity [array get options] $options(-entityreferencecommand) [namespace code [list PCDATA [array get options]]] $options(entities)]] [namespace code [list DeProtect [namespace code [list PCDATA [array get options]]]]] \{\{] text set text "uplevel #0 [namespace code [list DeProtect1 [namespace code [list PCDATA [array get options]]]]] {{$text}}" eval $text } else { # Restore protected special characters regsub -all {\\([][{}\\])} $text {\1} text PCDATA [array get options] $text } } elseif {[string length [string trim $text]]} { uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\" in document prolog around line $state(line)"] } } # If this is the end of the document, close all open containers if {$options(-final) && [llength $state(stack)]} { eval $options(-errorcommand) [list unclosedelement "element [lindex $state(stack) end] remains unclosed around line $state(line)"] } return {}}# sgml::DeProtect --## Invoke given command after removing protecting backslashes# from given text.## Arguments:# cmd Command to invoke# text Text to deprotect## Results:# Depends on commandproc sgml::DeProtect1 {cmd text} { if {[string compare {} $text]} { regsub -all {\\([]$[{}\\])} $text {\1} text uplevel #0 $cmd [list $text] }}proc sgml::DeProtect {cmd text} { set text [lindex $text 0] if {[string compare {} $text]} { regsub -all {\\([]$[{}\\])} $text {\1} text uplevel #0 $cmd [list $text] }}# sgml::ParserDelete --## Free all memory associated with parser## Arguments:# var global state array## Results:# Variables unsetproc sgml::ParserDelete var { upvar #0 $var state if {![info exists state]} { return -code error "unknown parser" } catch {unset $state(entities)} catch {unset $state(parameterentities)} catch {unset $state(elementdecls)} catch {unset $state(attlistdecls)} catch {unset $state(notationdecls)} catch {unset $state(namespaces)} unset state return {}}# sgml::ParseEvent:ElementOpen --## Start of an element.## Arguments:# tag Element name# attr Attribute list# opts Options# args further configuration options#
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?