📄 uni_parse2.tcl
字号:
variable cclass_pmap variable cclass_pages variable decomp_shift variable decomp_pmap variable decomp_pages variable decomp_list variable comp_shift variable comp_map variable comp_pmap variable comp_pages variable comp_first_list variable comp_second_list variable comp_x_list variable comp_y_list variable pages variable groups {} variable titleCount if {$argc != 3} { puts stderr "\nusage: $argv0 <datafile> <exclusionsfile> <outdir>\n" exit 1 } set f [open [lindex $argv 1] r] set data [read $f] close $f load_exclusions $data set f [open [lindex $argv 0] r] set data [read $f] close $f load_tables $data buildTables #puts "X = [llength $pMap] Y= [llength $pages] A= [llength $groups]" #set size [expr {[llength $pMap] + [llength $pages]*(1<<$shift)}] #puts "shift = 6, space = $size" #puts "title case count = $titleCount" set f [open [file join [lindex $argv 2] uni_norm.c] w] fconfigure $f -translation lf puts $f "/* * uni_norm.c -- * * Declarations of Unicode character information tables. This file is * automatically generated by the uni_parse2.tcl script. Do not * modify this file by hand. * * Copyright (c) 1998 by Scriptics Corporation. * All rights reserved. * * Modified for ejabberd by Alexey Shchepin * * RCS: @(#) \$Id\$ *//* * A 16-bit Unicode character is split into two parts in order to index * into the following tables. The lower CCLASS_OFFSET_BITS comprise an offset * into a page of characters. The upper bits comprise the page number. */#define CCLASS_OFFSET_BITS $cclass_shift/* * The pageMap is indexed by page number and returns an alternate page number * that identifies a unique page of characters. Many Unicode characters map * to the same alternate page number. */static unsigned char cclassPageMap\[\] = {" set line " " set last [expr {[llength $cclass_pmap] - 1}] for {set i 0} {$i <= $last} {incr i} { append line [lindex $cclass_pmap $i] if {$i != $last} { append line ", " } if {[string length $line] > 70} { puts $f $line set line " " } } puts $f $line puts $f "};/* * The cclassGroupMap is indexed by combining the alternate page number with * the page offset and returns a combining class number. */static unsigned char cclassGroupMap\[\] = {" set line " " set lasti [expr {[llength $cclass_pages] - 1}] for {set i 0} {$i <= $lasti} {incr i} { set page [lindex $cclass_pages $i] set lastj [expr {[llength $page] - 1}] for {set j 0} {$j <= $lastj} {incr j} { append line [lindex $page $j] if {$j != $lastj || $i != $lasti} { append line ", " } if {[string length $line] > 70} { puts $f $line set line " " } } } puts $f $line puts $f "};#define GetUniCharCClass(ch) (cclassGroupMap\[(cclassPageMap\[(((int)(ch)) & 0xffff) >> CCLASS_OFFSET_BITS\] << CCLASS_OFFSET_BITS) | ((ch) & ((1 << CCLASS_OFFSET_BITS)-1))\])#define DECOMP_OFFSET_BITS $decomp_shift/* * The pageMap is indexed by page number and returns an alternate page number * that identifies a unique page of characters. Many Unicode characters map * to the same alternate page number. */static unsigned char decompPageMap\[\] = {" set line " " set last [expr {[llength $decomp_pmap] - 1}] for {set i 0} {$i <= $last} {incr i} { append line [lindex $decomp_pmap $i] if {$i != $last} { append line ", " } if {[string length $line] > 70} { puts $f $line set line " " } } puts $f $line puts $f "};/* * The decompGroupMap is indexed by combining the alternate page number with * the page offset and returns a group number that identifies a length and * shift of decomposition sequence in decompList */static int decompGroupMap\[\] = {" set line " " set lasti [expr {[llength $decomp_pages] - 1}] for {set i 0} {$i <= $lasti} {incr i} { set page [lindex $decomp_pages $i] set lastj [expr {[llength $page] - 1}] for {set j 0} {$j <= $lastj} {incr j} { append line [lindex $page $j] if {$j != $lastj || $i != $lasti} { append line ", " } if {[string length $line] > 70} { puts $f $line set line " " } } } puts $f $line puts $f "};/* * List of decomposition sequences */static int decompList\[\] = {" set line " " set last [expr {[llength $decomp_list] - 1}] for {set i 0} {$i <= $last} {incr i} { set val [lindex $decomp_list $i] append line [format "%d" $val] if {$i != $last} { append line ", " } if {[string length $line] > 70} { puts $f $line set line " " } } puts $f $line puts $f "};/* * This macro extracts the information about a character from the * Unicode character tables. */#define GetUniCharDecompInfo(ch) (decompGroupMap\[(decompPageMap\[(((int)(ch)) & 0xffff) >> DECOMP_OFFSET_BITS\] << DECOMP_OFFSET_BITS) | ((ch) & ((1 << DECOMP_OFFSET_BITS)-1))\])#define GetDecompShift(info) ((info) & 0xffff)#define GetDecompLen(info) ((info) >> 16)#define COMP_OFFSET_BITS $comp_shift/* * The pageMap is indexed by page number and returns an alternate page number * that identifies a unique page of characters. Many Unicode characters map * to the same alternate page number. */static unsigned char compPageMap\[\] = {" set line " " set last [expr {[llength $comp_pmap] - 1}] for {set i 0} {$i <= $last} {incr i} { append line [lindex $comp_pmap $i] if {$i != $last} { append line ", " } if {[string length $line] > 70} { puts $f $line set line " " } } puts $f $line puts $f "};/* * The groupMap is indexed by combining the alternate page number with * the page offset and returns a group number that identifies a unique * set of character attributes. */static int compGroupMap\[\] = {" set line " " set lasti [expr {[llength $comp_pages] - 1}] for {set i 0} {$i <= $lasti} {incr i} { set page [lindex $comp_pages $i] set lastj [expr {[llength $page] - 1}] for {set j 0} {$j <= $lastj} {incr j} { append line [lindex $page $j] if {$j != $lastj || $i != $lasti} { append line ", " } if {[string length $line] > 70} { puts $f $line set line " " } } } puts $f $line puts $f "};/* * Lists of compositions for characters that appears only in one composition */static int compFirstList\[\]\[2\] = {" set line " " set last [expr {[llength $comp_first_list] - 1}] for {set i 0} {$i <= $last} {incr i} { set val [lindex $comp_first_list $i] append line [format "{%d, %d}" [lindex $val 0] [lindex $val 1]] if {$i != $last} { append line ", " } if {[string length $line] > 60} { puts $f $line set line " " } } puts $f $line puts $f "};static int compSecondList\[\]\[2\] = {" set line " " set last [expr {[llength $comp_second_list] - 1}] for {set i 0} {$i <= $last} {incr i} { set val [lindex $comp_second_list $i] append line [format "{%d, %d}" [lindex $val 0] [lindex $val 1]] if {$i != $last} { append line ", " } if {[string length $line] > 60} { puts $f $line set line " " } } puts $f $line puts $f "};/* * Compositions matrix */static int compBothList\[[llength $comp_x_list]\]\[[llength $comp_y_list]\] = {" set lastx [expr {[llength $comp_x_list] - 1}] set lasty [expr {[llength $comp_y_list] - 1}] for {set i 0} {$i <= $lastx} {incr i} { puts $f " \{" set line " " for {set j 0} {$j <= $lasty} {incr j} { set comp [list [lindex $comp_x_list $i] [lindex $comp_y_list $j]] if {[info exists comp_map($comp)]} { set val $comp_map($comp) } else { set val 0 } append line [format "%d" $val] if {$j != $lasty} { append line ", " } if {[string length $line] > 70} { puts $f $line set line " " } } puts $f $line if {$j != $lasty} { puts $f " \}," } else { puts $f " \}" } } puts $f "};#define GetUniCharCompInfo(ch) (compGroupMap\[(compPageMap\[(((int)(ch)) & 0xffff) >> COMP_OFFSET_BITS\] << COMP_OFFSET_BITS) | ((ch) & ((1 << COMP_OFFSET_BITS)-1))\])#define CompSingleMask (1 << 16)#define CompMask ((1 << 16) - 1)" close $f}uni::mainreturn
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -