sgmlparser.tcl
来自「Linux下的MSN聊天程序源码」· TCL 代码 · 共 2,131 行 · 第 1/5 页
TCL
2,131 行
variable MarkupDeclSub array set options {} array set options $opts upvar #0 $options(-statevariable) state upvar #0 $options(parameterentities) PEnts upvar #0 $options(externalparameterentities) ExtPEnts # Bug 583947: remove comments before further processing regsub -all {<!--.*?-->} $dtd {} dtd # Tokenize the DTD # Protect Tcl special characters regsub -all {([{}\\])} $dtd {\\\1} dtd regsub -all $MarkupDeclExpr $dtd $MarkupDeclSub dtd # Entities may have angle brackets in their replacement # text, which breaks the RE processing. So, we must # use a similar technique to processing doc instances # to rebuild the declarations from the pieces set mode {} ;# normal set delimiter {} set name {} set param {} set state(inInternalDTD) 1 # Process the tokens foreach {decl value text} [lrange "{} {} \{$dtd\}" 3 end] { # Keep track of line numbers incr state(line) [regsub -all \n $text {} discard] ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode replText text param # There may be parameter entity references between markup decls if {[regexp {%.*;} $text]} { # Protect Tcl special characters regsub -all {([{}\\])} $text {\\\1} text regsub -all %($::sgml::Name)\; $text "\} {\\1} \{" text set PElist "\{$text\}" set PElist [lreplace $PElist end end] foreach {text entref} $PElist { if {[string length [string trim $text]]} { uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text in internal DTD subset around line $state(line)"] } # Expand parameter entity and recursively parse # BUG: no checks yet for recursive entity references if {[info exists PEnts($entref)]} { set externalParser [$options(-cmd) entityparser] $externalParser parse $PEnts($entref) -dtdsubset internal } elseif {[info exists ExtPEnts($entref)]} { set externalParser [$options(-cmd) entityparser] $externalParser parse $ExtPEnts($entref) -dtdsubset external #$externalParser free } else { uplevel #0 $options(-errorcommand) [list illegalreference "reference to undeclared parameter entity \"$entref\""] } } } } return {}}# sgml::ParseDTD:EntityMode --## Perform special processing for various parser modes## Arguments:# opts configuration options# modeVar pass-by-reference mode variable# replTextVar pass-by-ref# declVar pass-by-ref# valueVar pass-by-ref# textVar pass-by-ref# delimiter delimiter currently in force# name# param## Results:# Depends on current modeproc sgml::ParseDTD:EntityMode {opts modeVar replTextVar declVar valueVar textVar delimiter name param} { upvar 1 $modeVar mode upvar 1 $replTextVar replText upvar 1 $declVar decl upvar 1 $valueVar value upvar 1 $textVar text array set options $opts switch $mode { {} { # Pass through to normal processing section } entity { # Look for closing delimiter if {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $decl discard val1 remainder]} { append replText <$val1 DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter set decl / set text $remainder\ $value>$text set value {} set mode {} } elseif {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $value discard val2 remainder]} { append replText <$decl\ $val2 DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter set decl / set text $remainder>$text set value {} set mode {} } elseif {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $text discard val3 remainder]} { append replText <$decl\ $value>$val3 DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter set decl / set text $remainder set value {} set mode {} } else { # Remain in entity mode append replText <$decl\ $value>$text return -code continue } } ignore { upvar #0 $options(-statevariable) state if {[regexp {]](.*)$} $decl discard remainder]} { set state(condSections) [lreplace $state(condSections) end end] set decl $remainder set mode {} } elseif {[regexp {]](.*)$} $value discard remainder]} { set state(condSections) [lreplace $state(condSections) end end] regexp <[cl $::sgml::Wsp]*($::sgml::Name)(.*) $remainder discard decl value set mode {} } elseif {[regexp {]]>(.*)$} $text discard remainder]} { set state(condSections) [lreplace $state(condSections) end end] set decl / set value {} set text $remainder #regexp <[cl $::sgml::Wsp]*($::sgml::Name)([cl ^>]*)>(.*) $remainder discard decl value text set mode {} } else { set decl / } } comment { # Look for closing comment delimiter upvar #0 $options(-statevariable) state if {[regexp (.*?)--(.*)\$ $decl discard data1 remainder]} { } elseif {[regexp (.*?)--(.*)\$ $value discard data1 remainder]} { } elseif {[regexp (.*?)--(.*)\$ $text discard data1 remainder]} { } else { # comment continues append state(commentdata) <$decl\ $value>$text set decl / set value {} set text {} } } } return {}}# sgml::ParseDTD:ProcessMarkupDecl --## Process a single markup declaration## Arguments:# opts configuration options# declVar pass-by-ref# valueVar pass-by-ref# delimiterVar pass-by-ref for current delimiter in force# nameVar pass-by-ref# modeVar pass-by-ref for current parser mode# replTextVar pass-by-ref# textVar pass-by-ref# paramVar pass-by-ref## Results:# Depends on markup declaration. May change parser modeproc sgml::ParseDTD:ProcessMarkupDecl {opts declVar valueVar delimiterVar nameVar modeVar replTextVar textVar paramVar} { upvar 1 $modeVar mode upvar 1 $replTextVar replText upvar 1 $textVar text upvar 1 $declVar decl upvar 1 $valueVar value upvar 1 $nameVar name upvar 1 $delimiterVar delimiter upvar 1 $paramVar param variable declExpr variable ExternalEntityExpr array set options $opts upvar #0 $options(-statevariable) state switch -glob -- $decl { / { # continuation from entity processing } !ELEMENT { # Element declaration if {[regexp $declExpr $value discard tag cmodel]} { DTD:ELEMENT [array get options] $tag $cmodel } else { uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed element declaration around line $state(line)"] } } !ATTLIST { # Attribute list declaration variable declExpr if {[regexp $declExpr $value discard tag attdefns]} { if {[catch {DTD:ATTLIST [array get options] $tag $attdefns} err]} { #puts stderr "Stack trace: $::errorInfo\n***\n" # Atttribute parsing has bugs at the moment #return -code error "$err around line $state(line)" return {} } } else { uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed attribute list declaration around line $state(line)"] } } !ENTITY { # Entity declaration variable EntityExpr if {[regexp $EntityExpr $value discard param name value]} { # Entity replacement text may have a '>' character. # In this case, the real delimiter will be in the following # text. This is complicated by the possibility of there # being several '<','>' pairs in the replacement text. # At this point, we are searching for the matching quote delimiter. if {[regexp $ExternalEntityExpr $value]} { DTD:ENTITY [array get options] $name [string trim $param] $value } elseif {[regexp ("|')(.*?)\\1(.*) $value discard delimiter replText value]} { if {[string length [string trim $value]]} { uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"] } else { DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter } } elseif {[regexp ("|')(.*) $value discard delimiter replText]} { append replText >$text set text {} set mode entity } else { uplevel #0 $options(-errorcommand) [list illegaldeclaration "no delimiter for entity declaration around line $state(line)"] } } else { uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"] } } !NOTATION { # Notation declaration if {[regexp $declExpr param discard tag notation]} { DTD:ENTITY [array get options] $tag $notation } else { uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"] } } !--* { # Start of a comment if {[regexp !--(.*?)--\$ $decl discard data]} { if {[string length [string trim $value]]} { uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$value\""] } uplevel #0 $options(-commentcommand) [list $data] set decl / set value {} } elseif {[regexp -- ^(.*?)--\$ $value discard data2]} { regexp !--(.*)\$ $decl discard data1 uplevel #0 $options(-commentcommand) [list $data1\ $data2] set decl / set value {} } elseif {[regexp (.*?)-->(.*)\$ $text discard data3 remainder]} { regexp !--(.*)\$ $decl discard data1 uplevel #0 $options(-commentcommand) [list $data1\ $value>$data3] set decl / set value {} set text $remainder } else { regexp !--(.*)\$ $decl discard data1 set state(commentdata) $data1\ $value>$text set decl / set value {} set text {} set mode comment } } !*INCLUDE* - !*IGNORE* { if {$state(inInternalDTD)} { uplevel #0 $options(-errorcommand) [list illegalsection "conditional section not permitted in internal DTD subset around line $state(line)"] } if {[regexp {^!\[INCLUDE\[(.*)} $decl discard remainder]} { # Push conditional section stack, popped by ]]> sequence if {[regexp {(.*?)]]$} $remainder discard r2]} { # section closed immediately if {[string length [string trim $r2]]} { uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"] } } elseif {[regexp {(.*?)]](.*)} $value discard r2 r3]} { # section closed immediately if {[string length [string trim $r2]]} { uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"] } if {[string length [string trim $r3]]} { uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r3\" in conditional section"] } } else { lappend state(condSections) INCLUDE set parser [$options(-cmd) entityparser] $parser parse $remainder\ $value> -dtdsubset external #$parser free if {[regexp {(.*?)]]>(.*)} $text discard t1 t2]} { if {[string length [string trim $t1]]} { uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""] } if {![llength $state(condSections)]} { uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"] } set state(condSections) [lreplace $state(condSections) end end] set text $t2 } } } elseif {[regexp {^!\[IGNORE\[(.*)} $decl discard remainder]} { # Set ignore mode. Still need a stack set mode ignore if {[regexp {(.*?)]]$} $remainder discard r2]} { # section closed immediately if {[string length [string trim $r2]]} { uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"] } } elseif {[regexp {(.*?)]](.*)} $value discard r2 r3]} { # section closed immediately if {[string length [string trim $r2]]} { uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"] } if {[string length [string trim $r3]]} { uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r3\" in conditional section"] } } else { lappend state(condSections) IGNORE if {[regexp {(.*?)]]>(.*)} $text discard t1 t2]} { if {[string length [string trim $t1]]} { uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""] } if {![llength $state(condSections)]} { uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"] } set state(condSections) [lreplace $state(condSections) end end] set text $t2 } } } else { uplevel #0 $options(-errorcommand) [list illegaldeclaration "illegal markup declaration \"$decl\" around line $state(line)"] } } default { if {[regexp {^\?(.*)} $decl discard target]} { # Processing instruction } else { uplevel #0 $options(-errorcommand) [list illegaldeclaration "illegal markup declaration \"$decl\""] } } } return {}}# sgml::ParseDTD:External --## Parse the external DTD subset.## Parameter entities are allowed anywhere.## Arguments:# opts configuration options# dtd DTD data
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?