📄 uri.tcl
字号:
# uri.tcl --## URI parsing and fetch## Copyright (c) 2000 Zveno Pty Ltd# Steve Ball, http://www.zveno.com/# Derived from urls.tcl by Andreas Kupries## TODO:# Handle www-url-encoding details## CVS: $Id: uri.tcl 5913 2006-01-23 12:28:30Z tjikkun $package require Tcl 8.2namespace eval ::uri { namespace export split join namespace export resolve isrelative namespace export geturl namespace export canonicalize namespace export register variable file:counter 0 # extend these variable in the coming namespaces variable schemes {} variable schemePattern "" variable url "" variable url2part array set url2part {} # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # basic regular expressions used in URL syntax. namespace eval basic { variable loAlpha {[a-z]} variable hiAlpha {[A-Z]} variable digit {[0-9]} variable alpha {[a-zA-Z]} variable safe {[$_.+-]} variable extra {[!*'(,)]} # danger in next pattern, order important for [] variable national {[][|\}\{\^~`]} variable punctuation {[<>#%"]} ;#" fake emacs hilit variable reserved {[;/?:@&=]} variable hex {[0-9A-Fa-f]} variable alphaDigit {[A-Za-z0-9]} variable alphaDigitMinus {[A-Za-z0-9-]} # next is <national | punctuation> variable unsafe {[][<>"#%\{\}|\\^~`]} ;#" emacs hilit variable escape "%${hex}${hex}" # unreserved = alpha | digit | safe | extra # xchar = unreserved | reserved | escape variable unreserved {[a-zA-Z0-9$_.+!*'(,)-]} variable uChar "(${unreserved}|${escape})" variable xCharN {[a-zA-Z0-9$_.+!*'(,);/?:@&=-]} variable xChar "(${xCharN}|${escape})" variable digits "${digit}+" variable toplabel \ "(${alpha}${alphaDigitMinus}*${alphaDigit}|${alpha})" variable domainlabel \ "(${alphaDigit}${alphaDigitMinus}*${alphaDigit}|${alphaDigit})" variable hostname \ "((${domainlabel}\\.)*${toplabel})" variable hostnumber \ "(${digits}\\.${digits}\\.${digits}\\.${digits})" variable host "(${hostname}|${hostnumber})" variable port $digits variable hostOrPort "${host}(:${port})?" variable usrCharN {[a-zA-Z0-9$_.+!*'(,);?&=-]} variable usrChar "(${usrCharN}|${escape})" variable user "${usrChar}*" variable password $user variable login "(${user}(:${password})?@)?${hostOrPort}" } ;# basic {}}# ::uri::register --## Register a scheme (and aliases) in the package. The command# creates a namespace below "::uri" with the same name as the# scheme and executes the script declaring the pattern variables# for this scheme in the new namespace. At last it updates the# uri variables keeping track of overall scheme information.## The script has to declare at least the variable "schemepart",# the pattern for an url of the registered scheme after the# scheme declaration. Not declaring this variable is an error.## Arguments:# schemeList Name of the scheme to register, plus aliases# script Script declaring the scheme patterns## Results:# None.proc ::uri::register {schemeList script} { variable schemes variable schemePattern variable url variable url2part # Check scheme and its aliases for existence. foreach scheme $schemeList { if {[lsearch -exact $schemes $scheme] >= 0} { return -code error \ "trying to register scheme (\"$scheme\") which is already known" } } # Get the main scheme set scheme [lindex $schemeList 0] if {[catch {namespace eval $scheme $script} msg]} { catch {namespace delete $scheme} return -code error \ "error while evaluating scheme script: $msg" } if {![info exists ${scheme}::schemepart]} { namespace delete $scheme return -code error \ "Variable \"schemepart\" is missing." } # Now we can extend the variables which keep track of the registered schemes. eval [linsert $schemeList 0 lappend schemes] set schemePattern "([::join $schemes |]):" foreach s $schemeList { # FRINK: nocheck set url2part($s) "${s}:[set ${scheme}::schemepart]" # FRINK: nocheck append url "(${s}:[set ${scheme}::schemepart])|" } set url [string trimright $url |] return}# ::uri::split --## Splits the given <a url> into its constituents.## Arguments:# url the URL to split## Results:# Tcl list containing constituents, suitable for 'array set'.proc ::uri::split {url {defaultscheme http}} { set url [string trim $url] set scheme {} # RFC 1738: scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] regexp -- {^([a-z0-9+.-][a-z0-9+.-]*):} $url dummy scheme if {$scheme == {}} { set scheme $defaultscheme } # ease maintenance: dynamic dispatch, able to handle all schemes # added in future! if {[::info procs Split[string totitle $scheme]] == {}} { error "unknown scheme '$scheme' in '$url'" } regsub -- "^${scheme}:" $url {} url set parts(scheme) $scheme array set parts [Split[string totitle $scheme] $url] # should decode all encoded characters! return [array get parts]}proc ::uri::SplitFtp {url} { # @c Splits the given ftp-<a url> into its constituents. # @a url: The url to split, without! scheme specification. # @r List containing the constituents, suitable for 'array set'. # general syntax: # //<user>:<password>@<host>:<port>/<cwd1>/.../<cwdN>/<name>;type=<typecode> # # additional rules: # # <user>:<password> are optional, detectable by presence of @. # <password> is optional too. # # "//" [ <user> [":" <password> ] "@"] <host> [":" <port>] "/" # <cwd1> "/" ..."/" <cwdN> "/" <name> [";type=" <typecode>] upvar \#0 [namespace current]::ftp::typepart ftptype array set parts {user {} pwd {} host {} port {} path {} type {}} # slash off possible type specification if {[regexp -indices -- "${ftptype}$" $url dummy ftype]} { set from [lindex $ftype 0] set to [lindex $ftype 1] set parts(type) [string range $url $from $to] set from [lindex $dummy 0] set url [string replace $url $from end] } # Handle user, password, host and port if {[string match "//*" $url]} { set url [string range $url 2 end] array set parts [GetUPHP url] } set parts(path) [string trimleft $url /] return [array get parts]}proc ::uri::JoinFtp args { array set components { user {} pwd {} host {} port {} path {} type {} } array set components $args set userPwd {} if {[string length $components(user)] || [string length $components(pwd)]} { set userPwd $components(user)[expr {[string length $components(pwd)] ? ":$components(pwd)" : {}}]@ } set port {} if {[string length $components(port)]} { set port :$components(port) } set type {} if {[string length $components(type)]} { set type \;type=$components(type) } return ftp://${userPwd}$components(host)${port}/[string trimleft $components(path) /]$type}proc ::uri::SplitHttps {url} { uri::SplitHttp $url}proc ::uri::SplitHttp {url} { # @c Splits the given http-<a url> into its constituents. # @a url: The url to split, without! scheme specification. # @r List containing the constituents, suitable for 'array set'. # general syntax: # //<host>:<port>/<path>?<searchpart> # # where <host> and <port> are as described in Section 3.1. If :<port> # is omitted, the port defaults to 80. No user name or password is # allowed. <path> is an HTTP selector, and <searchpart> is a query # string. The <path> is optional, as is the <searchpart> and its # preceding "?". If neither <path> nor <searchpart> is present, the "/" # may also be omitted. # # Within the <path> and <searchpart> components, "/", ";", "?" are # reserved. The "/" character may be used within HTTP to designate a # hierarchical structure. # # path == <cwd1> "/" ..."/" <cwdN> "/" <name> ["#" <fragment>] upvar #0 [namespace current]::http::search search upvar #0 [namespace current]::http::segment segment array set parts {host {} port {} path {} query {}} set searchPattern "\\?(${search})\$" set fragmentPattern "#(${segment})\$" # slash off possible query if {[regexp -indices -- $searchPattern $url match query]} { set from [lindex $query 0] set to [lindex $query 1] set parts(query) [string range $url $from $to] set url [string replace $url [lindex $match 0] end] } # slash off possible fragment if {[regexp -indices -- $fragmentPattern $url match fragment]} { set from [lindex $fragment 0] set to [lindex $fragment 1] set parts(fragment) [string range $url $from $to] set url [string replace $url [lindex $match 0] end] } if {[string match "//*" $url]} { set url [string range $url 2 end] array set parts [GetUPHP url] } set parts(path) [string trimleft $url /] return [array get parts]}proc ::uri::JoinHttp {args} { eval [linsert $args 0 uri::JoinHttpInner http 80]}proc ::uri::JoinHttps {args} { eval [linsert $args 0 uri::JoinHttpInner https 443]}proc ::uri::JoinHttpInner {scheme defport args} { array set components [list \ host {} port $defport path {} query {} \ ] array set components $args set port {} if {[string length $components(port)] && $components(port) != $defport} { set port :$components(port) } set query {} if {[string length $components(query)]} { set query ?$components(query) } regsub -- {^/} $components(path) {} components(path) if { [info exists components(fragment)] && $components(fragment) != "" } { set components(fragment) "#$components(fragment)" } else { set components(fragment) "" } return $scheme://$components(host)$port/$components(path)$components(fragment)$query}proc ::uri::SplitFile {url} { # @c Splits the given file-<a url> into its constituents. # @a url: The url to split, without! scheme specification. # @r List containing the constituents, suitable for 'array set'. upvar #0 [namespace current]::basic::hostname hostname upvar #0 [namespace current]::basic::hostnumber hostnumber if {[string match "//*" $url]} { set url [string range $url 2 end] set hostPattern "^($hostname|$hostnumber)" switch -exact -- $::tcl_platform(platform) { windows { # Catch drive letter append hostPattern :? } default { # Proceed as usual } } if {[regexp -indices -- $hostPattern $url match host]} { set fh [lindex $host 0] set th [lindex $host 1] set parts(host) [string range $url $fh $th] set matchEnd [lindex $match 1] incr matchEnd set url [string range $url $matchEnd end] } } set parts(path) $url return [array get parts]}proc ::uri::JoinFile args { array set components { host {} port {} path {} } array set components $args switch -exact -- $::tcl_platform(platform) { windows { if {[string length $components(host)]} { return file://$components(host):$components(path) } else { return file://$components(path) } } default { return file://$components(host)$components(path) } }}proc ::uri::SplitMailto {url} { # @c Splits the given mailto-<a url> into its constituents. # @a url: The url to split, without! scheme specification. # @r List containing the constituents, suitable for 'array set'. if {[string match "*@*" $url]} { set url [::split $url @] return [list user [lindex $url 0] host [lindex $url 1]] } else { return [list user $url] }}proc ::uri::JoinMailto args { array set components { user {} host {} } array set components $args return mailto:$components(user)@$components(host)}proc ::uri::SplitNews {url} { if { [string first @ $url] >= 0 } { return [list message-id $url] } else { return [list newsgroup-name $url] }}proc ::uri::JoinNews args { array set components { message-id {} newsgroup-name {} } array set components $args return news:$components(message-id)$components(newsgroup-name)}proc ::uri::GetUPHP {urlvar} { # @c Parse user, password host and port out of the url stored in # @c variable <a urlvar>. # @d Side effect: The extracted information is removed from the given url. # @r List containing the extracted information in a format suitable for # @r 'array set'. # @a urlvar: Name of the variable containing the url to parse.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -