📄 uni_parse2.tcl
字号:
# uni_parse2.tcl --## This program parses the UnicodeData file and generates the# corresponding uni_norm.c file with compressed character# data tables. The input to this program should be the latest# UnicodeData.txt and CompositionExclusions.txt files from:# ftp://ftp.unicode.org/Public/UNIDATA/## Copyright (c) 1998-1999 by Scriptics Corporation.# All rights reserved.## Modified for ejabberd by Alexey Shchepin# # RCS: @(#) $Id: uni_parse2.tcl,v 1.3 2003/10/09 18:08:34 aleksey Exp $namespace eval uni { set cclass_shift 6 set decomp_shift 5 set comp_shift 5 set shift 5; # number of bits of data within a page # This value can be adjusted to find the # best split to minimize table size variable pMap; # map from page to page index, each entry is # an index into the pages table, indexed by # page number variable pages; # map from page index to page info, each # entry is a list of indices into the groups # table, the list is indexed by the offset variable groups; # list of character info values, indexed by # group number, initialized with the # unassigned character group variable categories { Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So }; # Ordered list of character categories, must # match the enumeration in the header file. variable titleCount 0; # Count of the number of title case # characters. This value is used in the # regular expression code to allocate enough # space for the title case variants.}proc uni::getValue {items index} { variable categories variable titleCount # Extract character info set category [lindex $items 2] if {[scan [lindex $items 12] %4x toupper] == 1} { set toupper [expr {$index - $toupper}] } else { set toupper {} } if {[scan [lindex $items 13] %4x tolower] == 1} { set tolower [expr {$tolower - $index}] } else { set tolower {} } if {[scan [lindex $items 14] %4x totitle] == 1} { set totitle [expr {$index - $totitle}] } else { set totitle {} } set categoryIndex [lsearch -exact $categories $category] if {$categoryIndex < 0} { puts "Unexpected character category: $index($category)" set categoryIndex 0 } elseif {$category == "Lt"} { incr titleCount } return "$categoryIndex,$toupper,$tolower,$totitle"}proc uni::getGroup {value} { variable groups set gIndex [lsearch -exact $groups $value] if {$gIndex == -1} { set gIndex [llength $groups] lappend groups $value } return $gIndex}proc uni::addPage {info} { variable pMap variable pages set pIndex [lsearch -exact $pages $info] if {$pIndex == -1} { set pIndex [llength $pages] lappend pages $info } lappend pMap $pIndex return}proc uni::addPage {map_var pages_var info} { variable $map_var variable $pages_var set pIndex [lsearch -exact [set $pages_var] $info] if {$pIndex == -1} { set pIndex [llength [set $pages_var]] lappend $pages_var $info } lappend $map_var $pIndex return}proc uni::load_exclusions {data} { variable exclusions foreach line [split $data \n] { if {$line == ""} continue set items [split $line " "] if {[lindex $items 0] == "#"} continue scan [lindex $items 0] %x index set exclusions($index) "" }}proc uni::load_tables {data} { variable cclass_map variable decomp_map variable comp_map variable comp_first variable comp_second variable exclusions foreach line [split $data \n] { if {$line == ""} continue set items [split $line \;] scan [lindex $items 0] %x index set cclass [lindex $items 3] set decomp [lindex $items 5] set cclass_map($index) $cclass #set decomp_map($index) $cclass if {$decomp != ""} { if {[string index [lindex $decomp 0] 0] == "<"} { set decomp1 [lreplace $decomp 0 0] set decomp {} foreach ch $decomp1 { scan $ch %x ch lappend decomp $ch } set decomp_map($index) $decomp } else { switch -- [llength $decomp] { 1 { scan $decomp %x ch set decomp_map($index) $ch } 2 { scan $decomp "%x %x" ch1 ch2 set decomp [list $ch1 $ch2] set decomp_map($index) $decomp # hackish if {(![info exists cclass_map($ch1)] || \ $cclass_map($ch1) == 0) && \ ![info exists exclusions($index)]} { if {[info exists comp_first($ch1)]} { incr comp_first($ch1) } else { set comp_first($ch1) 1 } if {[info exists comp_second($ch2)]} { incr comp_second($ch2) } else { set comp_second($ch2) 1 } set comp_map($decomp) $index } else { puts "Excluded $index" } } default { puts "Bad canonical decomposition: $line" } } } #puts "[format 0x%0.4x $index]\t$cclass\t$decomp_map($index)" } } #puts [array get comp_first] #puts [array get comp_second]}proc uni::buildTables {} { variable cclass_shift variable decomp_shift variable comp_shift variable cclass_map variable cclass_pmap {} variable cclass_pages {} variable decomp_map variable decomp_pmap {} variable decomp_pages {} variable decomp_list {} variable comp_map variable comp_pmap {} variable comp_pages {} variable comp_first variable comp_second variable comp_first_list {} variable comp_second_list {} variable comp_x_list {} variable comp_y_list {} variable comp_both_map {} set cclass_info {} set decomp_info {} set comp_info {} set cclass_mask [expr {(1 << $cclass_shift) - 1}] set decomp_mask [expr {(1 << $decomp_shift) - 1}] set comp_mask [expr {(1 << $comp_shift) - 1}] foreach comp [array names comp_map] { set ch1 [lindex $comp 0] set ch2 [lindex $comp 1] if {$comp_first($ch1) == 1} { set i [llength $comp_first_list] lappend comp_first_list [list $ch2 $comp_map($comp)] set comp_info_map($ch1) [expr {$i | (1 << 16)}] } elseif {$comp_second($ch2) == 1} { set i [llength $comp_second_list] lappend comp_second_list [list $ch1 $comp_map($comp)] set comp_info_map($ch2) [expr {$i | (1 << 16)}] } else { if {[lsearch -exact $comp_x_list $ch1] < 0} { set i [llength $comp_x_list] lappend comp_x_list $ch1 set comp_info_map($ch1) $i } if {[lsearch -exact $comp_y_list $ch2] < 0} { set i [llength $comp_y_list] lappend comp_y_list $ch2 set comp_info_map($ch2) $i } } } set next 0 for {set i 0} {$i <= 0xffff} {incr i} { #set gIndex [getGroup [getValue $i]] set cclass_offset [expr {$i & $cclass_mask}] if {[info exists cclass_map($i)]} { set cclass $cclass_map($i) } else { set cclass 0 } lappend cclass_info $cclass if {$cclass_offset == $cclass_mask} { addPage cclass_pmap cclass_pages $cclass_info set cclass_info {} } set decomp_offset [expr {$i & $decomp_mask}] if {[info exists decomp_map($i)]} { set decomp $decomp_map($i) #puts -$decomp while {[info exists decomp_map([set ch1 [lindex $decomp 0]])]} { set decomp [concat $decomp_map($ch1) [lreplace $decomp 0 0]] #puts +$decomp } if {[info exists decomp_used($decomp)]} { lappend decomp_info $decomp_used($decomp) } else { set val [expr {([llength $decomp] << 16) + \ [llength $decomp_list]}] #set val [expr {[llength $decomp_list]}] lappend decomp_info $val set decomp_used($decomp) $val #puts "$val $decomp" foreach d $decomp { lappend decomp_list $d } } } else { lappend decomp_info -1 } if {$decomp_offset == $decomp_mask} { addPage decomp_pmap decomp_pages $decomp_info set decomp_info {} } set comp_offset [expr {$i & $comp_mask}] if {[info exists comp_info_map($i)]} { set comp $comp_info_map($i) } else { set comp -1 } lappend comp_info $comp if {$comp_offset == $comp_mask} { addPage comp_pmap comp_pages $comp_info set comp_info {} } } #puts [array get decomp_map] #puts $decomp_list return}proc uni::main {} { global argc argv0 argv variable cclass_shift
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -