⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 uni_parse2.tcl

📁 ejabberd-0.7.5 分布式Jabber服务器
💻 TCL
📖 第 1 页 / 共 2 页
字号:
    variable cclass_pmap    variable cclass_pages    variable decomp_shift    variable decomp_pmap    variable decomp_pages    variable decomp_list    variable comp_shift    variable comp_map    variable comp_pmap    variable comp_pages    variable comp_first_list    variable comp_second_list    variable comp_x_list    variable comp_y_list    variable pages    variable groups {}    variable titleCount    if {$argc != 3} {	puts stderr "\nusage: $argv0 <datafile> <exclusionsfile> <outdir>\n"	exit 1    }    set f [open [lindex $argv 1] r]    set data [read $f]    close $f    load_exclusions $data    set f [open [lindex $argv 0] r]    set data [read $f]    close $f    load_tables $data    buildTables    #puts "X = [llength $pMap]  Y= [llength $pages]  A= [llength $groups]"    #set size [expr {[llength $pMap] + [llength $pages]*(1<<$shift)}]    #puts "shift = 6, space = $size"    #puts "title case count = $titleCount"    set f [open [file join [lindex $argv 2] uni_norm.c] w]    fconfigure $f -translation lf    puts $f "/* * uni_norm.c -- * *	Declarations of Unicode character information tables.  This file is *	automatically generated by the uni_parse2.tcl script.  Do not *	modify this file by hand. * * Copyright (c) 1998 by Scriptics Corporation. * All rights reserved. * * Modified for ejabberd by Alexey Shchepin * * RCS: @(#) \$Id\$ *//* * A 16-bit Unicode character is split into two parts in order to index * into the following tables.  The lower CCLASS_OFFSET_BITS comprise an offset * into a page of characters.  The upper bits comprise the page number. */#define CCLASS_OFFSET_BITS $cclass_shift/* * The pageMap is indexed by page number and returns an alternate page number * that identifies a unique page of characters.  Many Unicode characters map * to the same alternate page number. */static unsigned char cclassPageMap\[\] = {"    set line "    "    set last [expr {[llength $cclass_pmap] - 1}]    for {set i 0} {$i <= $last} {incr i} {	append line [lindex $cclass_pmap $i]	if {$i != $last} {	    append line ", "	}	if {[string length $line] > 70} {	    puts $f $line	    set line "    "	}    }    puts $f $line    puts $f "};/* * The cclassGroupMap is indexed by combining the alternate page number with * the page offset and returns a combining class number. */static unsigned char cclassGroupMap\[\] = {"    set line "    "    set lasti [expr {[llength $cclass_pages] - 1}]    for {set i 0} {$i <= $lasti} {incr i} {	set page [lindex $cclass_pages $i]	set lastj [expr {[llength $page] - 1}]	for {set j 0} {$j <= $lastj} {incr j} {	    append line [lindex $page $j]	    if {$j != $lastj || $i != $lasti} {		append line ", "	    }	    if {[string length $line] > 70} {		puts $f $line		set line "    "	    }	}    }    puts $f $line    puts $f "};#define GetUniCharCClass(ch) (cclassGroupMap\[(cclassPageMap\[(((int)(ch)) & 0xffff) >> CCLASS_OFFSET_BITS\] << CCLASS_OFFSET_BITS) | ((ch) & ((1 << CCLASS_OFFSET_BITS)-1))\])#define DECOMP_OFFSET_BITS $decomp_shift/* * The pageMap is indexed by page number and returns an alternate page number * that identifies a unique page of characters.  Many Unicode characters map * to the same alternate page number. */static unsigned char decompPageMap\[\] = {"    set line "    "    set last [expr {[llength $decomp_pmap] - 1}]    for {set i 0} {$i <= $last} {incr i} {	append line [lindex $decomp_pmap $i]	if {$i != $last} {	    append line ", "	}	if {[string length $line] > 70} {	    puts $f $line	    set line "    "	}    }    puts $f $line    puts $f "};/* * The decompGroupMap is indexed by combining the alternate page number with * the page offset and returns a group number that identifies a length and * shift of decomposition sequence in decompList */static int decompGroupMap\[\] = {"    set line "    "    set lasti [expr {[llength $decomp_pages] - 1}]    for {set i 0} {$i <= $lasti} {incr i} {	set page [lindex $decomp_pages $i]	set lastj [expr {[llength $page] - 1}]	for {set j 0} {$j <= $lastj} {incr j} {	    append line [lindex $page $j]	    if {$j != $lastj || $i != $lasti} {		append line ", "	    }	    if {[string length $line] > 70} {		puts $f $line		set line "    "	    }	}    }    puts $f $line    puts $f "};/* * List of decomposition sequences */static int decompList\[\] = {"    set line "    "    set last [expr {[llength $decomp_list] - 1}]    for {set i 0} {$i <= $last} {incr i} {	set val [lindex $decomp_list $i]	append line [format "%d" $val]	if {$i != $last} {	    append line ", "	}	if {[string length $line] > 70} {	    puts $f $line	    set line "    "	}    }    puts $f $line    puts $f "};/* * This macro extracts the information about a character from the * Unicode character tables. */#define GetUniCharDecompInfo(ch) (decompGroupMap\[(decompPageMap\[(((int)(ch)) & 0xffff) >> DECOMP_OFFSET_BITS\] << DECOMP_OFFSET_BITS) | ((ch) & ((1 << DECOMP_OFFSET_BITS)-1))\])#define GetDecompShift(info) ((info) & 0xffff)#define GetDecompLen(info) ((info) >> 16)#define COMP_OFFSET_BITS $comp_shift/* * The pageMap is indexed by page number and returns an alternate page number * that identifies a unique page of characters.  Many Unicode characters map * to the same alternate page number. */static unsigned char compPageMap\[\] = {"    set line "    "    set last [expr {[llength $comp_pmap] - 1}]    for {set i 0} {$i <= $last} {incr i} {	append line [lindex $comp_pmap $i]	if {$i != $last} {	    append line ", "	}	if {[string length $line] > 70} {	    puts $f $line	    set line "    "	}    }    puts $f $line    puts $f "};/* * The groupMap is indexed by combining the alternate page number with * the page offset and returns a group number that identifies a unique * set of character attributes. */static int compGroupMap\[\] = {"    set line "    "    set lasti [expr {[llength $comp_pages] - 1}]    for {set i 0} {$i <= $lasti} {incr i} {	set page [lindex $comp_pages $i]	set lastj [expr {[llength $page] - 1}]	for {set j 0} {$j <= $lastj} {incr j} {	    append line [lindex $page $j]	    if {$j != $lastj || $i != $lasti} {		append line ", "	    }	    if {[string length $line] > 70} {		puts $f $line		set line "    "	    }	}    }    puts $f $line    puts $f "};/* * Lists of compositions for characters that appears only in one composition */static int compFirstList\[\]\[2\] = {"    set line "    "    set last [expr {[llength $comp_first_list] - 1}]    for {set i 0} {$i <= $last} {incr i} {	set val [lindex $comp_first_list $i]	append line [format "{%d, %d}" [lindex $val 0] [lindex $val 1]]	if {$i != $last} {	    append line ", "	}	if {[string length $line] > 60} {	    puts $f $line	    set line "    "	}    }    puts $f $line    puts $f "};static int compSecondList\[\]\[2\] = {"    set line "    "    set last [expr {[llength $comp_second_list] - 1}]    for {set i 0} {$i <= $last} {incr i} {	set val [lindex $comp_second_list $i]	append line [format "{%d, %d}" [lindex $val 0] [lindex $val 1]]	if {$i != $last} {	    append line ", "	}	if {[string length $line] > 60} {	    puts $f $line	    set line "    "	}    }    puts $f $line    puts $f "};/* * Compositions matrix */static int compBothList\[[llength $comp_x_list]\]\[[llength $comp_y_list]\] = {"    set lastx [expr {[llength $comp_x_list] - 1}]    set lasty [expr {[llength $comp_y_list] - 1}]    for {set i 0} {$i <= $lastx} {incr i} {	puts $f "    \{"	set line "        "	for {set j 0} {$j <= $lasty} {incr j} {	    set comp [list [lindex $comp_x_list $i] [lindex $comp_y_list $j]]	    if {[info exists comp_map($comp)]} {		set val $comp_map($comp)	    } else {		set val 0	    }	    	    append line [format "%d" $val]	    if {$j != $lasty} {		append line ", "	    }	    if {[string length $line] > 70} {		puts $f $line		set line "        "	    }	}	puts $f $line	if {$j != $lasty} {	    puts $f "    \},"	} else {	    puts $f "    \}"	}    }    puts $f "};#define GetUniCharCompInfo(ch) (compGroupMap\[(compPageMap\[(((int)(ch)) & 0xffff) >> COMP_OFFSET_BITS\] << COMP_OFFSET_BITS) | ((ch) & ((1 << COMP_OFFSET_BITS)-1))\])#define CompSingleMask (1 << 16)#define CompMask ((1 << 16) - 1)"    close $f}uni::mainreturn

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -