📄 mktables
字号:
0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 0x202f, 0x205f, 0x3000 ); $Cat{Blank}->$op($code) if $isspace && !($code == 0x000A || $code == 0x000B || $code == 0x000C || $code == 0x000D || $code == 0x0085 || $cat =~ /^Z[lp]/); $Cat{Digit}->$op($code) if $cat eq "Nd"; $Cat{Upper}->$op($code) if $cat eq "Lu"; $Cat{Lower}->$op($code) if $cat eq "Ll"; $Cat{Title}->$op($code) if $cat eq "Lt"; $Cat{ASCII}->$op($code) if $code <= 0x007F; $Cat{Cntrl}->$op($code) if $cat =~ /^C/; my $isgraph = !$isspace && $cat !~ /Cc|Cs|Cn/; $Cat{Graph}->$op($code) if $isgraph; $Cat{Print}->$op($code) if $isgraph || $isspace; $Cat{Punct}->$op($code) if $cat =~ /^P/; $Cat{XDigit}->$op($code) if ($code >= 0x30 && $code <= 0x39) ## 0..9 || ($code >= 0x41 && $code <= 0x46) ## A..F || ($code >= 0x61 && $code <= 0x66); ## a..f } ## open ane read file..... if (not open IN, "UnicodeData.txt") { die "$0: UnicodeData.txt: $!\n"; } ## ## For building \p{_CombAbove} and \p{_CanonDCIJ} ## my %_Above_HexCodes; ## Hexcodes for chars with $comb == 230 ("ABOVE") my %CodeToDeco; ## Maps code to decomp. list for chars with first ## decomp. char an "i" or "j" (for \p{_CanonDCIJ}) ## This is filled in as we go.... my $CombAbove = Table->New(Is => '_CombAbove', Desc => '(for internal casefolding use)', Fuzzy => 0); while (<IN>) { next unless /^[0-9A-Fa-f]+;/; s/\s+$//; my ($hexcode, ## code point in hex (e.g. "0041") $name, ## character name (e.g. "LATIN CAPITAL LETTER A") $cat, ## category (e.g. "Lu") $comb, ## Canonical combining class (e.t. "230") $bidi, ## directional category (e.g. "L") $deco, ## decomposition mapping $decimal, ## decimal digit value $digit, ## digit value $number, ## numeric value $mirrored, ## mirrored $unicode10, ## name in Unicode 1.0 $comment, ## comment field $upper, ## uppercase mapping $lower, ## lowercase mapping $title, ## titlecase mapping ) = split(/\s*;\s*/); # Note that in Unicode 3.2 there will be names like # LINE FEED (LF), which probably means that \N{} needs # to cope also with LINE FEED and LF. $name = $unicode10 if $name eq '<control>' && $unicode10 ne ''; my $code = hex($hexcode); if ($comb and $comb == 230) { $CombAbove->Append($code); $_Above_HexCodes{$hexcode} = 1; } ## Used in building \p{_CanonDCIJ} if ($deco and $deco =~ m/^006[9A]\b/) { $CodeToDeco{$code} = $deco; } ## ## There are a few pairs of lines like: ## AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;; ## D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;; ## that define ranges. ## if ($name =~ /^<(.+), (First|Last)>$/) { $name = $1; gencat($name, $cat, $code, $2 eq 'First' ? 'Append' : 'Extend'); #New_Prop(In => $name, $General{$name}, Fuzzy => 1); } else { ## normal (single-character) lines gencat($name, $cat, $code, 'Append'); # No Append() here since since several codes may map into one. $To{Upper}->RawAppendRange($code, $code, $upper) if $upper; $To{Lower}->RawAppendRange($code, $code, $lower) if $lower; $To{Title}->RawAppendRange($code, $code, $title) if $title; $To{Digit}->Append($code, $decimal) if length $decimal; $Bidi->Append($code, $bidi); $Comb->Append($code, $comb) if $comb; $Number->Append($code, $number) if length $number; length($decimal) and ($Number{De} ||= Table->New())->Append($code) or length($digit) and ($Number{Di} ||= Table->New())->Append($code) or length($number) and ($Number{Nu} ||= Table->New())->Append($code); $Mirrored->Append($code) if $mirrored eq "Y"; $Bidi{$bidi} ||= Table->New();#Is => "bt/$bidi", #Desc => "Bi-directional category '$bidi'", #Fuzzy => 0); $Bidi{$bidi}->Append($code); if ($deco) { $Deco->Append($code, $deco); if ($deco =~/^<(\w+)>/) { my $dshort = $PVA_reverse{dt}{ucfirst lc $1}; $DC{com}->Append($code); $DC{$dshort} ||= Table->New(); $DC{$dshort}->Append($code); } else { $DC{can}->Append($code); } } } } close IN; ## ## Tidy up a few special cases.... ## $Cat{Cn} = $Assigned->Invert; ## Cn is everything that doesn't exist New_Prop(Is => 'Cn', $Cat{Cn}, Desc => "General Category 'Cn' [not functional in Perl]", Fuzzy => 0); ## Unassigned is the same as 'Cn' New_Alias(Is => 'Unassigned', SameAs => 'Cn', Fuzzy => 0); $Cat{C}->Replace($Cat{C}->Merge($Cat{Cn})); ## Now merge in Cn into C # LC is Ll, Lu, and Lt. # (used to be L& or L_, but PropValueAliases.txt defines it as LC) New_Prop(Is => 'LC', Table->Merge(@Cat{qw[Ll Lu Lt]}), Desc => '[\p{Ll}\p{Lu}\p{Lt}]', Fuzzy => 0); ## Any and All are all code points. my $Any = Table->New(Is => 'Any', Desc => sprintf("[\\x{0000}-\\x{%X}]", $LastUnicodeCodepoint), Fuzzy => 0); $Any->RawAppendRange(0, $LastUnicodeCodepoint); New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 0); ## ## Build special properties for Perl's internal case-folding needs: ## \p{_CaseIgnorable} ## \p{_CanonDCIJ} ## \p{_CombAbove} ## _CombAbove was built above. Others are built here.... ## ## \p{_CaseIgnorable} is [\p{Mn}\0x00AD\x2010] New_Prop(Is => '_CaseIgnorable', Table->Merge($Cat{Mn}, 0x00AD, #SOFT HYPHEN 0x2010), #HYPHEN Desc => '(for internal casefolding use)', Fuzzy => 0); ## \p{_CanonDCIJ} is fairly complex... my $CanonCDIJ = Table->New(Is => '_CanonDCIJ', Desc => '(for internal casefolding use)', Fuzzy => 0); ## It contains the ASCII 'i' and 'j'.... $CanonCDIJ->Append(0x0069); # ASCII ord("i") $CanonCDIJ->Append(0x006A); # ASCII ord("j") ## ...and any character with a decomposition that starts with either of ## those code points, but only if the decomposition does not have any ## combining character with the "ABOVE" canonical combining class. for my $code (sort { $a <=> $b} keys %CodeToDeco) { ## Need to ensure that all decomposition characters do not have ## a %HexCodeToComb in %AboveCombClasses. my $want = 1; for my $deco_hexcode (split / /, $CodeToDeco{$code}) { if (exists $_Above_HexCodes{$deco_hexcode}) { ## one of the decmposition chars has an ABOVE combination ## class, so we're not interested in this one $want = 0; last; } } if ($want) { $CanonCDIJ->Append($code); } } ## ## Now dump the files. ## $Name->Write("Name.pl"); { my @PVA = $HEADER; foreach my $name (qw (PropertyAlias PA_reverse PropValueAlias PVA_reverse PVA_abbr_map)) { # Should I really jump through typeglob hoops just to avoid a # symbolic reference? (%{"utf8::$name}) push @PVA, "\n", "\%utf8::$name = (\n", simple_dumper (%{$utf8::{$name}}), ");\n"; } push @PVA, "1;\n"; WriteIfChanged("PVA.pl", @PVA); } # $Bidi->Write("Bidirectional.pl"); for (keys %Bidi) { $Bidi{$_}->Write( ["lib","bc","$_.pl"], "BidiClass category '$PropValueAlias{bc}{$_}'" ); } $Comb->Write("CombiningClass.pl"); for (keys %{ $PropValueAlias{ccc} }) { my ($code, $name) = @{ $PropValueAlias{ccc}{$_} }; (my $c = Table->New())->Append($code); $c->Write( ["lib","ccc","$_.pl"], "CombiningClass category '$name'" ); } $Deco->Write("Decomposition.pl"); for (keys %DC) { $DC{$_}->Write( ["lib","dt","$_.pl"], "DecompositionType category '$PropValueAlias{dt}{$_}'" ); } # $Number->Write("Number.pl"); for (keys %Number) { $Number{$_}->Write( ["lib","nt","$_.pl"], "NumericType category '$PropValueAlias{nt}{$_}'" ); } # $General->Write("Category.pl"); for my $to (sort keys %To) { $To{$to}->Write(["To","$to.pl"]); } for (keys %{ $PropValueAlias{gc} }) { New_Alias(Is => $PropValueAlias{gc}{$_}, SameAs => $_, Fuzzy => 1); }}#### Process LineBreak.txt##sub LineBreak_Txt(){ if (not open IN, "LineBreak.txt") { die "$0: LineBreak.txt: $!\n"; } my $Lbrk = Table->New(); my %Lbrk; while (<IN>) { next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(\w+)/; my ($first, $last, $lbrk) = (hex($1), hex($2||""), $3); $Lbrk->Append($first, $lbrk); $Lbrk{$lbrk} ||= Table->New(); $Lbrk{$lbrk}->Append($first); if ($last) { $Lbrk->Extend($last); $Lbrk{$lbrk}->Extend($last); } } close IN; # $Lbrk->Write("Lbrk.pl"); for (keys %Lbrk) { $Lbrk{$_}->Write( ["lib","lb","$_.pl"], "Linebreak category '$PropValueAlias{lb}{$_}'" ); }}#### Process ArabicShaping.txt.##sub ArabicShaping_txt(){ if (not open IN, "ArabicShaping.txt") { die "$0: ArabicShaping.txt: $!\n"; } my $ArabLink = Table->New(); my $ArabLinkGroup = Table->New(); my %JoinType; while (<IN>) { next unless /^[0-9A-Fa-f]+;/; s/\s+$//; my ($hexcode, $name, $link, $linkgroup) = split(/\s*;\s*/); my $code = hex($hexcode); $ArabLink->Append($code, $link); $ArabLinkGroup->Append($code, $linkgroup); $JoinType{$link} ||= Table->New(Is => "JoinType$link"); $JoinType{$link}->Append($code); } close IN; # $ArabLink->Write("ArabLink.pl"); # $ArabLinkGroup->Write("ArabLnkGrp.pl"); for (keys %JoinType) { $JoinType{$_}->Write( ["lib","jt","$_.pl"], "JoiningType category '$PropValueAlias{jt}{$_}'" ); }}#### Process EastAsianWidth.txt.##sub EastAsianWidth_txt(){ if (not open IN, "EastAsianWidth.txt") { die "$0: EastAsianWidth.txt: $!\n"; } my %EAW; while (<IN>) { next unless /^[0-9A-Fa-f]+(\.\.[0-9A-Fa-f]+)?;/; s/#.*//; s/\s+$//; my ($hexcodes, $pv) = split(/\s*;\s*/); $EAW{$pv} ||= Table->New(Is => "EastAsianWidth$pv"); my ($start, $end) = split(/\.\./, $hexcodes); if (defined $end) { $EAW{$pv}->AppendRange(hex($start), hex($end)); } else { $EAW{$pv}->Append(hex($start)); } } close IN; for (keys %EAW) { $EAW{$_}->Write( ["lib","ea","$_.pl"], "EastAsianWidth category '$PropValueAlias{ea}{$_}'" ); }}#### Process HangulSyllableType.txt.##sub HangulSyllableType_txt(){ if (not open IN, "HangulSyllableType.txt") { die "$0: HangulSyllableType.txt: $!\n"; } my %HST; while (<IN>) { next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(\w+)/; my ($first, $last, $pv) = (hex($1), hex($2||""), $3); $HST{$pv} ||= Table->New(Is => "HangulSyllableType$pv"); $HST{$pv}->Append($first); if ($last) { $HST{$pv}->Extend($last) } } close IN; for (keys %HST) { $HST{$_}->Write( ["lib","hst","$_.pl"], "HangulSyllableType category '$PropValueAlias{hst}{$_}'" ); }}#### Process Jamo.txt.##sub Jamo_txt(){ if (not open IN, "Jamo.txt") { die "$0: Jamo.txt: $!\n"; } my $Short = Table->New();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -