⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 uni_parse2.tcl

📁 ejabberd-0.7.5 分布式Jabber服务器
💻 TCL
📖 第 1 页 / 共 2 页
字号:
# uni_parse2.tcl --##	This program parses the UnicodeData file and generates the#	corresponding uni_norm.c file with compressed character#	data tables.  The input to this program should be the latest#	UnicodeData.txt and CompositionExclusions.txt files from:#	    ftp://ftp.unicode.org/Public/UNIDATA/## Copyright (c) 1998-1999 by Scriptics Corporation.# All rights reserved.## Modified for ejabberd by Alexey Shchepin# # RCS: @(#) $Id: uni_parse2.tcl,v 1.3 2003/10/09 18:08:34 aleksey Exp $namespace eval uni {    set cclass_shift 6    set decomp_shift 5    set comp_shift 5    set shift 5;		# number of bits of data within a page				# This value can be adjusted to find the				# best split to minimize table size    variable pMap;		# map from page to page index, each entry is				# an index into the pages table, indexed by				# page number    variable pages;		# map from page index to page info, each				# entry is a list of indices into the groups				# table, the list is indexed by the offset    variable groups;		# list of character info values, indexed by				# group number, initialized with the				# unassigned character group    variable categories {	Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp	Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So    };				# Ordered list of character categories, must				# match the enumeration in the header file.    variable titleCount 0;	# Count of the number of title case				# characters.  This value is used in the				# regular expression code to allocate enough				# space for the title case variants.}proc uni::getValue {items index} {    variable categories    variable titleCount    # Extract character info    set category [lindex $items 2]    if {[scan [lindex $items 12] %4x toupper] == 1} {	set toupper [expr {$index - $toupper}]    } else {	set toupper {}    }    if {[scan [lindex $items 13] %4x tolower] == 1} {	set tolower [expr {$tolower - $index}]    } else {	set tolower {}    }    if {[scan [lindex $items 14] %4x totitle] == 1} {	set totitle [expr {$index - $totitle}]    } else {	set totitle {}    }    set categoryIndex [lsearch -exact $categories $category]    if {$categoryIndex < 0} {	puts "Unexpected character category: $index($category)"	set categoryIndex 0    } elseif {$category == "Lt"} {	incr titleCount    }    return "$categoryIndex,$toupper,$tolower,$totitle"}proc uni::getGroup {value} {    variable groups    set gIndex [lsearch -exact $groups $value]    if {$gIndex == -1} {	set gIndex [llength $groups]	lappend groups $value    }    return $gIndex}proc uni::addPage {info} {    variable pMap    variable pages        set pIndex [lsearch -exact $pages $info]    if {$pIndex == -1} {	set pIndex [llength $pages]	lappend pages $info    }    lappend pMap $pIndex    return}proc uni::addPage {map_var pages_var info} {    variable $map_var    variable $pages_var        set pIndex [lsearch -exact [set $pages_var] $info]    if {$pIndex == -1} {	set pIndex [llength [set $pages_var]]	lappend $pages_var $info    }    lappend $map_var $pIndex    return}proc uni::load_exclusions {data} {    variable exclusions    foreach line [split $data \n] {	if {$line == ""} continue	set items [split $line " "]	if {[lindex $items 0] == "#"} continue	scan [lindex $items 0] %x index	set exclusions($index) ""    }}proc uni::load_tables {data} {    variable cclass_map    variable decomp_map    variable comp_map    variable comp_first    variable comp_second    variable exclusions    foreach line [split $data \n] {	if {$line == ""} continue	set items [split $line \;]	scan [lindex $items 0] %x index	set cclass [lindex $items 3]	set decomp [lindex $items 5]	set cclass_map($index) $cclass	#set decomp_map($index) $cclass	if {$decomp != ""} {	    if {[string index [lindex $decomp 0] 0] == "<"} {		set decomp1 [lreplace $decomp 0 0]		set decomp {}		foreach ch $decomp1 {		    scan $ch %x ch		    lappend decomp $ch		}		set decomp_map($index) $decomp	    } else {		switch -- [llength $decomp] {		    1 {			scan $decomp %x ch			set decomp_map($index) $ch		    }		    2 {			scan $decomp "%x %x" ch1 ch2			set decomp [list $ch1 $ch2]			set decomp_map($index) $decomp			# hackish			if {(![info exists cclass_map($ch1)] || \				 $cclass_map($ch1) == 0) && \				![info exists exclusions($index)]} {			    if {[info exists comp_first($ch1)]} {				incr comp_first($ch1)			    } else {				set comp_first($ch1) 1			    }			    if {[info exists comp_second($ch2)]} {				incr comp_second($ch2)			    } else {				set comp_second($ch2) 1			    }			    set comp_map($decomp) $index			} else {			    puts "Excluded $index"			}		    }		    default {			puts "Bad canonical decomposition: $line"		    } 		}	    }	    #puts "[format 0x%0.4x $index]\t$cclass\t$decomp_map($index)"	}    }    #puts [array get comp_first]    #puts [array get comp_second]}proc uni::buildTables {} {    variable cclass_shift    variable decomp_shift    variable comp_shift    variable cclass_map    variable cclass_pmap {}    variable cclass_pages {}    variable decomp_map    variable decomp_pmap {}    variable decomp_pages {}    variable decomp_list {}    variable comp_map    variable comp_pmap {}    variable comp_pages {}    variable comp_first    variable comp_second    variable comp_first_list {}    variable comp_second_list {}    variable comp_x_list {}    variable comp_y_list {}    variable comp_both_map {}    set cclass_info {}    set decomp_info {}    set comp_info {}        set cclass_mask [expr {(1 << $cclass_shift) - 1}]    set decomp_mask [expr {(1 << $decomp_shift) - 1}]    set comp_mask [expr {(1 << $comp_shift) - 1}]    foreach comp [array names comp_map] {	set ch1 [lindex $comp 0]	set ch2 [lindex $comp 1]	if {$comp_first($ch1) == 1} {	    set i [llength $comp_first_list]	    lappend comp_first_list [list $ch2 $comp_map($comp)]	    set comp_info_map($ch1) [expr {$i | (1 << 16)}]	} elseif {$comp_second($ch2) == 1} {	    set i [llength $comp_second_list]	    lappend comp_second_list [list $ch1 $comp_map($comp)]	    set comp_info_map($ch2) [expr {$i | (1 << 16)}]	} else {	    if {[lsearch -exact $comp_x_list $ch1] < 0} {		set i [llength $comp_x_list]		lappend comp_x_list $ch1		set comp_info_map($ch1) $i	    }	    if {[lsearch -exact $comp_y_list $ch2] < 0} {		set i [llength $comp_y_list]		lappend comp_y_list $ch2		set comp_info_map($ch2) $i	    }	}    }    set next 0    for {set i 0} {$i <= 0xffff} {incr i} {	#set gIndex [getGroup [getValue $i]]	set cclass_offset [expr {$i & $cclass_mask}]	if {[info exists cclass_map($i)]} {	    set cclass $cclass_map($i)	} else {	    set cclass 0	}	lappend cclass_info $cclass	if {$cclass_offset == $cclass_mask} {	    addPage cclass_pmap cclass_pages $cclass_info	    set cclass_info {}	}	set decomp_offset [expr {$i & $decomp_mask}]	if {[info exists decomp_map($i)]} {	    set decomp $decomp_map($i)	    #puts -$decomp	    while {[info exists decomp_map([set ch1 [lindex $decomp 0]])]} {		set decomp [concat $decomp_map($ch1) [lreplace $decomp 0 0]]		#puts +$decomp	    }	    if {[info exists decomp_used($decomp)]} {		lappend decomp_info $decomp_used($decomp)	    } else {		set val [expr {([llength $decomp] << 16) + \				   [llength $decomp_list]}]		#set val [expr {[llength $decomp_list]}]		lappend decomp_info $val		set decomp_used($decomp) $val		#puts "$val $decomp"		foreach d $decomp {		    lappend decomp_list $d		}	    }	} else {	    lappend decomp_info -1	}	if {$decomp_offset == $decomp_mask} {	    addPage decomp_pmap decomp_pages $decomp_info	    set decomp_info {}	}	set comp_offset [expr {$i & $comp_mask}]	if {[info exists comp_info_map($i)]} {	    set comp $comp_info_map($i)	} else {	    set comp -1	}	lappend comp_info $comp	if {$comp_offset == $comp_mask} {	    addPage comp_pmap comp_pages $comp_info	    set comp_info {}	}    }    #puts [array get decomp_map]    #puts $decomp_list    return}proc uni::main {} {    global argc argv0 argv    variable cclass_shift

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -