⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mktables.pl

📁 UNIX下perl实现代码
💻 PL
字号:
#!../../miniperluse bytes;$UnicodeData = "Unicode.301";$SyllableData = "syllables.txt";$PropData = "PropList.txt";# Note: we try to keep filenames unique within first 8 chars.  Using# subdirectories for the following helps.mkdir "In", 0755;mkdir "Is", 0755;mkdir "To", 0755;@todo = (# typical    # 005F: SPACING UNDERSCROE    ['IsWord',   '$cat =~ /^[LMN]/ or $code eq "005F"',	''],    ['IsAlnum',  '$cat =~ /^[LMN]/',	''],    ['IsAlpha',  '$cat =~ /^[LM]/',	''],    # 0009: HORIZONTAL TABULATION    # 000A: LINE FEED    # 000B: VERTICAL TABULATION    # 000C: FORM FEED    # 000D: CARRIAGE RETURN    # 0020: SPACE    ['IsSpace',  '$cat  =~ /^Z/ ||                  $code =~ /^(0009|000A|000B|000C|000D)$/',	''],    ['IsSpacePerl',                 '$cat  =~ /^Z/ ||                  $code =~ /^(0009|000A|000C|000D)$/',		''],    ['IsBlank',  '$code =~ /^(0020|0009)$/ ||		  $cat  =~ /^Z[^lp]$/',	''],    ['IsDigit',  '$cat =~ /^Nd$/',	''],    ['IsUpper',  '$cat =~ /^L[ut]$/',	''],    ['IsLower',  '$cat =~ /^Ll$/',	''],    ['IsASCII',  '$code le "007f"',	''],    ['IsCntrl',  '$cat =~ /^C/',	''],    ['IsGraph',  '$cat =~ /^([LMNPS]|Co)/',	''],    ['IsPrint',  '$cat =~ /^([LMNPS]|Co|Zs)/',	''],    ['IsPunct',  '$cat =~ /^P/',	''],    # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f    ['IsXDigit', '$code =~ /^00(3[0-9]|[46][1-6])$/',	''],    ['ToUpper',  '$up',			'$up'],    ['ToLower',  '$down',		'$down'],    ['ToTitle',  '$title',		'$title'],    ['ToDigit',  '$dec ne ""',		'$dec'],# Name    ['Name',	'$name',		'$name'],# Category    ['Category', '$cat',		'$cat'],# Normative    ['IsM',	'$cat =~ /^M/',		''],	# Mark    ['IsMn',	'$cat eq "Mn"',		''],	# Mark, Non-Spacing     ['IsMc',	'$cat eq "Mc"',		''],	# Mark, Combining    ['IsMe',	'$cat eq "Me"',		''],    # Mark, Enclosing    ['IsN',	'$cat =~ /^N/',		''],	# Number    ['IsNd',	'$cat eq "Nd"',		''],	# Number, Decimal Digit    ['IsNo',	'$cat eq "No"',		''],	# Number, Other    ['IsNl',	'$cat eq "Nl"',		''],    # Number, Letter    ['IsZ',	'$cat =~ /^Z/',		''],	# Separator    ['IsZs',	'$cat eq "Zs"',		''],	# Separator, Space    ['IsZl',	'$cat eq "Zl"',		''],	# Separator, Line    ['IsZp',	'$cat eq "Zp"',		''],	# Separator, Paragraph    ['IsC',	'$cat =~ /^C/',		''],	# Crazy    ['IsCc',	'$cat eq "Cc"',		''],	# Other, Control or Format    ['IsCo',	'$cat eq "Co"',		''],	# Other, Private Use    ['IsCn',	'$cat eq "Cn"',		''],	# Other, Not Assigned    ['IsCf',	'$cat eq "Cf"',		''],    # Other, Format    ['IsCs',	'$cat eq "Cs"',		''],    # Other, Surrogate    ['IsCn',	'Unassigned Code Value',$PropData],	# Other, Not Assigned # Informative    ['IsL',	'$cat =~ /^L/',		''],	# Letter    ['IsLu',	'$cat eq "Lu"',		''],	# Letter, Uppercase    ['IsLl',	'$cat eq "Ll"',		''],	# Letter, Lowercase    ['IsLt',	'$cat eq "Lt"',		''],	# Letter, Titlecase     ['IsLm',	'$cat eq "Lm"',		''],	# Letter, Modifier    ['IsLo',	'$cat eq "Lo"',		''],	# Letter, Other     ['IsP',	'$cat =~ /^P/',		''],	# Punctuation    ['IsPd',	'$cat eq "Pd"',		''],	# Punctuation, Dash    ['IsPs',	'$cat eq "Ps"',		''],	# Punctuation, Open    ['IsPe',	'$cat eq "Pe"',		''],	# Punctuation, Close    ['IsPo',	'$cat eq "Po"',		''],	# Punctuation, Other    ['IsPc',	'$cat eq "Pc"',		''],	# Punctuation, Connector    ['IsPi',	'$cat eq "Pi"',		''],	# Punctuation, Initial quote    ['IsPf',	'$cat eq "Pf"',		''],	# Punctuation, Final quote    ['IsS',	'$cat =~ /^S/',		''],	# Symbol    ['IsSm',	'$cat eq "Sm"',		''],	# Symbol, Math    ['IsSk',	'$cat eq "Sk"',		''],	# Symbol, Modifier    ['IsSc',	'$cat eq "Sc"',		''],	# Symbol, Currency    ['IsSo',	'$cat eq "So"',		''],	# Symbol, Other# Combining class    ['CombiningClass', '$comb',		'$comb'],# BIDIRECTIONAL PROPERTIES     ['Bidirectional', '$bid',		'$bid'],# Strong types:    ['IsBidiL',	'$bid eq "L"',		''],	# Left-Right; Most alphabetic,						# syllabic, and logographic						# characters (e.g., CJK						# ideographs)    ['IsBidiR',	'$bid eq "R"',		''],	# Right-Left; Arabic, Hebrew,						# and punctuation specific to						# those scripts    ['IsBidiLRE', '$bid eq "LRE"',       ''],    # Left-to-Right Embedding    ['IsBidiLRO', '$bid eq "LRO"',       ''],    # Left-to-Right Override    ['IsBidiAL', '$bid eq "AL"',         ''],    # Right-to-Left Arabic    ['IsBidiRLE', '$bid eq "RLE"',       ''],    # Right-to-Left Embedding    ['IsBidiRLO', '$bid eq "RLO"',       ''],    # Right-to-Left Override    ['IsBidiPDF', '$bid eq "PDF"',       ''],    # Pop Directional Format    ['IsBidiNSM', '$bid eq "NSM"',       ''],    # Non-Spacing Mark    ['IsBidiBN', '$bid eq "BN"',         ''],    # Boundary Neutral# Weak types:    ['IsBidiEN','$bid eq "EN"',		''],	# European Number    ['IsBidiES','$bid eq "ES"',		''],	# European Number Separator    ['IsBidiET','$bid eq "ET"',		''],	# European Number Terminator    ['IsBidiAN','$bid eq "AN"',		''],	# Arabic Number    ['IsBidiCS','$bid eq "CS"',		''],	# Common Number Separator# Separators:    ['IsBidiB',	'$bid eq "B"',		''],	# Block Separator    ['IsBidiS',	'$bid eq "S"',		''],	# Segment Separator# Neutrals:    ['IsBidiWS','$bid eq "WS"',		''],	# Whitespace    ['IsBidiON','$bid eq "ON"',		''],	# Other Neutrals ; All other						# characters: punctuation,						# symbols# Decomposition    ['Decomposition',	'$decomp',	'$decomp'],    ['IsDecoCanon',	'$decomp && $decomp !~ /^</',	''],    ['IsDecoCompat',	'$decomp =~ /^</',		''],    ['IsDCfont',	'$decomp =~ /^<font>/',		''],    ['IsDCnoBreak',	'$decomp =~ /^<noBreak>/',	''],    ['IsDCinitial',	'$decomp =~ /^<initial>/',	''],    ['IsDCmedial',	'$decomp =~ /^<medial>/',	''],    ['IsDCfinal',	'$decomp =~ /^<final>/',	''],    ['IsDCisolated',	'$decomp =~ /^<isolated>/',	''],    ['IsDCcircle',	'$decomp =~ /^<circle>/',	''],    ['IsDCsuper',	'$decomp =~ /^<super>/',	''],    ['IsDCsub',		'$decomp =~ /^<sub>/',		''],    ['IsDCvertical',	'$decomp =~ /^<vertical>/',	''],    ['IsDCwide',	'$decomp =~ /^<wide>/',		''],    ['IsDCnarrow',	'$decomp =~ /^<narrow>/',	''],    ['IsDCsmall',	'$decomp =~ /^<small>/',	''],    ['IsDCsquare',	'$decomp =~ /^<square>/',	''],    ['IsDCfraction',	'$decomp =~ /^<fraction>/',	''],    ['IsDCcompat',	'$decomp =~ /^<compat>/',	''],# Number    ['Number', 	'$num ne ""',		'$num'],# Mirrored    ['IsMirrored', '$mir eq "Y"',	''],# Arabic    ['ArabLink', 	'1',		'$link'],    ['ArabLnkGrp', 	'1',		'$linkgroup'],# Jamo    ['JamoShort',	'1',		'$short'],# Syllables    syllable_defs(),# Line break properties - Normative    ['IsLbrkBK','$brk eq "BK"',		''],	# Mandatory Break    ['IsLbrkCR','$brk eq "CR"',		''],	# Carriage Return    ['IsLbrkLF','$brk eq "LF"',		''],	# Line Feed    ['IsLbrkCM','$brk eq "CM"',		''],	# Attached Characters and Combining Marks    ['IsLbrkSG','$brk eq "SG"',		''],	# Surrogates    ['IsLbrkGL','$brk eq "GL"',		''],	# Non-breaking (Glue)    ['IsLbrkCB','$brk eq "CB"',		''],	# Contingent Break Opportunity    ['IsLbrkSP','$brk eq "SP"',		''],	# Space    ['IsLbrkZW','$brk eq "ZW"',		''],	# Zero Width Space# Line break properties - Informative    ['IsLbrkXX','$brk eq "XX"',		''],	# Unknown    ['IsLbrkOP','$brk eq "OP"',		''],	# Opening Punctuation    ['IsLbrkCL','$brk eq "CL"',		''],	# Closing Punctuation    ['IsLbrkQU','$brk eq "QU"',		''],	# Ambiguous Quotation    ['IsLbrkNS','$brk eq "NS"',		''],	# Non Starter    ['IsLbrkEX','$brk eq "EX"',		''],	# Exclamation/Interrogation    ['IsLbrkSY','$brk eq "SY"',		''],	# Symbols Allowing Breaks    ['IsLbrkIS','$brk eq "IS"',		''],	# Infix Separator (Numeric)    ['IsLbrkPR','$brk eq "PR"',		''],	# Prefix (Numeric)    ['IsLbrkPO','$brk eq "PO"',		''],	# Postfix (Numeric)    ['IsLbrkNU','$brk eq "NU"',		''],	# Numeric    ['IsLbrkAL','$brk eq "AL"',		''],	# Ordinary Alphabetic and Symbol Characters    ['IsLbrkID','$brk eq "ID"',		''],	# Ideographic    ['IsLbrkIN','$brk eq "IN"',		''],	# Inseparable    ['IsLbrkHY','$brk eq "HY"',		''],	# Hyphen    ['IsLbrkBB','$brk eq "BB"',		''],	# Break Opportunity Before    ['IsLbrkBA','$brk eq "BA"',		''],	# Break Opportunity After    ['IsLbrkSA','$brk eq "SA"',		''],	# Complex Context (South East Asian)    ['IsLbrkAI','$brk eq "AI"',		''],	# Ambiguous (Alphabetic or Ideographic)    ['IsLbrkB2','$brk eq "B2"',		''],	# Break Opportunity Before and After);# This is not written for speed...foreach $file (@todo) {    my ($table, $wanted, $val) = @$file;    next if @ARGV and not grep { $_ eq $table } @ARGV;    print $table,"\n";    if ($table =~ /^(Is|In|To)(.*)/) {	open(OUT, ">$1/$2.pl") or die "Can't create $1/$2.pl: $!\n";    }    else {	open(OUT, ">$table.pl") or die "Can't create $table.pl: $!\n";    }    print OUT <<EOH;# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! # This file is built by $0 from e.g. $UnicodeData.# Any changes made here will be lost!EOH    print OUT <<"END";return <<'END';END    print OUT proplist($table, $wanted, $val);    print OUT "END\n";    close OUT;}# Must treat blocks specially.exit if @ARGV and not grep { $_ eq Block } @ARGV;print "Block\n";open(UD, 'Blocks.txt') or die "Can't open Blocks.txt: $!\n";open(OUT, ">Block.pl") or die "Can't create Block.pl: $!\n";print OUT <<EOH;# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! # This file is built by $0 from e.g. $UnicodeData.# Any changes made here will be lost!EOHprint OUT <<"END";return <<'END';ENDwhile (<UD>) {    next if /^#/;    next if /^$/;    chomp;    ($code, $last, $name) = split(/; */);    if ($name) {	print OUT "$code	$last	$name\n";	$name =~ s/\s+//g;	open(BLOCK, ">In/$name.pl");	print BLOCK <<EOH;# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! # This file is built by $0 from e.g. $UnicodeData.# Any changes made here will be lost!EOH	print BLOCK <<"END2";return <<'END';$code	$lastENDEND2	close BLOCK;    }}print OUT "END\n";close OUT;##################################################sub proplist {    my ($table, $wanted, $val) = @_;    my @wanted;    my $out;    my $split;    return listFromPropFile($wanted) if $val eq $PropData;    if ($table =~ /^Arab/) {	open(UD, "ArabShap.txt") or warn "Can't open $table: $!";	$split = '($code, $name, $link, $linkgroup) = split(/; */);';    }    elsif ($table =~ /^Jamo/) {	open(UD, "Jamo.txt") or warn "Can't open $table: $!";	$split = '($code, $short, $name) = split(/; */); $code =~ s/^U\+//;';    }    elsif ($table =~ /^IsSyl/) {	open(UD, $SyllableData) or warn "Can't open $table: $!";	$split = '($code, $short, $syl) = split(/; */); $code =~ s/^U\+//;';    }    elsif ($table =~ /^IsLbrk/) {	open(UD, "LineBrk.txt") or warn "Can't open $table: $!";	$split = '($code, $brk, $name) = split(/;/);';    }    else {	open(UD, $UnicodeData) or warn "Can't open $UnicodeData: $!";	$split = '($code, $name, $cat, $comb, $bid, $decomp, $dec, $dig, $num, $mir, $uni1,		$comment, $up, $down, $title) = split(/;/);';    }    if ($table =~ /^(?:To|Is)[A-Z]/) {	eval <<"END";	    while (<UD>) {		next if /^#/;		next if /^\\s/;		s/\\s+\$//;		$split		if ($wanted) {		    push(\@wanted, [hex \$code, hex $val, \$name =~ /, First>\$/]);		}	    }END	die $@ if $@;	while (@wanted) {	    $beg = shift @wanted;	    $last = $beg;	    while (@wanted and $wanted[0]->[0] == $last->[0] + 1 and		(not $val or $wanted[0]->[1] == $last->[1] + 1)) {		    $last = shift @wanted;	    }	    $out .= sprintf "%04x", $beg->[0];	    if ($beg->[2]) {		$last = shift @wanted;	    }	    if ($beg == $last) {		$out .= "\t";	    }	    else {		$out .= sprintf "\t%04x", $last->[0];	    }	    $out .= sprintf "\t%04x", $beg->[1] if $val;	    $out .= "\n";	}    }    else {	eval <<"END";	    while (<UD>) {		next if /^#/;		next if /^\\s*\$/;		chop;		$split		if ($wanted) {		    push(\@wanted, [hex \$code, $val, \$name =~ /, First>\$/]);		}	    }END	die $@ if $@;	while (@wanted) {	    $beg = shift @wanted;	    $last = $beg;	    while (@wanted and $wanted[0]->[0] == $last->[0] + 1 and		($wanted[0]->[1] eq $last->[1])) {		    $last = shift @wanted;	    }	    $out .= sprintf "%04x", $beg->[0];	    if ($beg->[2]) {		$last = shift @wanted;	    }	    if ($beg == $last) {		$out .= "\t";	    }	    else {		$out .= sprintf "\t%04x", $last->[0];	    }	    $out .= sprintf "\t%s\n", $beg->[1];	}    }    $out;}sub listFromPropFile {    my ($wanted) = @_;    my $out;    open (UD, $PropData) or die "Can't open $PropData: $!\n";    local($/) = "\n" . '*' x 43 . "\n\nProperty dump for:";   # not 42?    <UD>;    while (<UD>) {        chomp;        if (s/0x[\d\w]+\s+\((.*?)\)// and $wanted eq $1) {            s/\(\d+ chars\)//g;            s/^\s+//mg;            s/\s+$//mg;            s/\.\./\t/g;	    $out = lc $_;	    last;        }    }    close (UD);    "$out\n";}sub syllable_defs {    my @defs;    my %seen;    open (SD, $SyllableData) or die "Can't open $SyllableData: $!\n";    while (<SD>) {        next if /^\s*(#|$)/;        s/\s+$//;        ($code, $name, $syl) = split /; */;        next unless $syl;        push (@defs, ["IsSyl$syl", qq{\$syl eq "$syl"}, ''])                                                     unless $seen{$syl}++;    }    close (SD);    return (@defs);}# eof

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -