📄 mktables
字号:
while (<IN>) { next unless /^([0-9A-Fa-f]+)\s*;\s*(\w*)/; my ($code, $short) = (hex($1), $2); $Short->Append($code, $short); } close IN; # $Short->Write("JamoShort.pl");}#### Process Scripts.txt.##sub Scripts_txt(){ my @ScriptInfo; if (not open(IN, "Scripts.txt")) { die "$0: Scripts.txt: $!\n"; } while (<IN>) { next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/; # Wait until all the scripts have been read since # they are not listed in numeric order. push @ScriptInfo, [ hex($1), hex($2||""), $3 ]; } close IN; # Now append the scripts properties in their code point order. my %Script; my $Scripts = Table->New(); for my $script (sort { $a->[0] <=> $b->[0] } @ScriptInfo) { my ($first, $last, $name) = @$script; $Scripts->Append($first, $name); $Script{$name} ||= Table->New(Is => $name, Desc => "Script '$name'", Fuzzy => 1); $Script{$name}->Append($first, $name); if ($last) { $Scripts->Extend($last); $Script{$name}->Extend($last); } } # $Scripts->Write("Scripts.pl"); ## Common is everything not explicitly assigned to a Script ## ## ***shouldn't this be intersected with \p{Assigned}? ****** ## New_Prop(Is => 'Common', $Scripts->Invert, Desc => 'Pseudo-Script of codepoints not in other Unicode scripts', Fuzzy => 1);}#### Given a name like "Close Punctuation", return a regex (that when applied## with /i) matches any valid form of that name (e.g. "ClosePunctuation",## "Close-Punctuation", etc.)#### Accept any space, dash, or underbar where in the official name there is## space or a dash (or underbar, but there never is).####sub NameToRegex($){ my $Name = shift; $Name =~ s/[- _]/(?:[-_]|\\s+)?/g; return $Name;}#### Process Blocks.txt.##sub Blocks_txt(){ my $Blocks = Table->New(); my %Blocks; if (not open IN, "Blocks.txt") { die "$0: Blocks.txt: $!\n"; } while (<IN>) { #next if not /Private Use$/; next if not /^([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.+?)\s*$/; my ($first, $last, $name) = (hex($1), hex($2), $3); $Blocks->Append($first, $name); $Blocks{$name} ||= Table->New(In => $name, Desc => "Block '$name'", Fuzzy => 1); $Blocks{$name}->Append($first, $name); if ($last and $last != $first) { $Blocks->Extend($last); $Blocks{$name}->Extend($last); } } close IN; # $Blocks->Write("Blocks.pl");}#### Read in the PropList.txt. It contains extended properties not## listed in the UnicodeData.txt, such as 'Other_Alphabetic':## alphabetic but not of the general category L; many modifiers## belong to this extended property category: while they are not## alphabets, they are alphabetic in nature.##sub PropList_txt(){ my @PropInfo; if (not open IN, "PropList.txt") { die "$0: PropList.txt: $!\n"; } while (<IN>) { next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/; # Wait until all the extended properties have been read since # they are not listed in numeric order. push @PropInfo, [ hex($1), hex($2||""), $3 ]; } close IN; # Now append the extended properties in their code point order. my $Props = Table->New(); my %Prop; for my $prop (sort { $a->[0] <=> $b->[0] } @PropInfo) { my ($first, $last, $name) = @$prop; $Props->Append($first, $name); $Prop{$name} ||= Table->New(Is => $name, Desc => "Extended property '$name'", Fuzzy => 1); $Prop{$name}->Append($first, $name); if ($last) { $Props->Extend($last); $Prop{$name}->Extend($last); } } for (keys %Prop) { (my $file = $PA_reverse{$_}) =~ tr/_//d; # XXX I'm assuming that the names from %Prop don't suffer 8.3 clashes. $BaseNames{lc $file}++; $Prop{$_}->Write( ["lib","gc_sc","$file.pl"], "Binary property '$_'" ); } # Alphabetic is L, Nl, and Other_Alphabetic. New_Prop(Is => 'Alphabetic', Table->Merge($Cat{L}, $Cat{Nl}, $Prop{Other_Alphabetic}), Desc => '[\p{L}\p{Nl}\p{OtherAlphabetic}]', # canonical names Fuzzy => 1); # Lowercase is Ll and Other_Lowercase. New_Prop(Is => 'Lowercase', Table->Merge($Cat{Ll}, $Prop{Other_Lowercase}), Desc => '[\p{Ll}\p{OtherLowercase}]', # canonical names Fuzzy => 1); # Uppercase is Lu and Other_Uppercase. New_Prop(Is => 'Uppercase', Table->Merge($Cat{Lu}, $Prop{Other_Uppercase}), Desc => '[\p{Lu}\p{OtherUppercase}]', # canonical names Fuzzy => 1); # Math is Sm and Other_Math. New_Prop(Is => 'Math', Table->Merge($Cat{Sm}, $Prop{Other_Math}), Desc => '[\p{Sm}\p{OtherMath}]', # canonical names Fuzzy => 1); # ID_Start is Ll, Lu, Lt, Lm, Lo, Nl, and Other_ID_Start. New_Prop(Is => 'ID_Start', Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl]}, $Prop{Other_ID_Start}), Desc => '[\p{Ll}\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{Nl}\p{OtherIDStart}]', Fuzzy => 1); # ID_Continue is ID_Start, Mn, Mc, Nd, Pc, and Other_ID_Continue. New_Prop(Is => 'ID_Continue', Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl Mn Mc Nd Pc ]}, @Prop{qw[Other_ID_Start Other_ID_Continue]}), Desc => '[\p{ID_Start}\p{Mn}\p{Mc}\p{Nd}\p{Pc}\p{OtherIDContinue}]', Fuzzy => 1); # Default_Ignorable_Code_Point = Other_Default_Ignorable_Code_Point # + Cf + Cc + Cs + Noncharacter + Variation_Selector # - WhiteSpace - FFF9..FFFB (Annotation Characters) my $Annotation = Table->New(); $Annotation->RawAppendRange(0xFFF9, 0xFFFB); New_Prop(Is => 'Default_Ignorable_Code_Point', Table->Merge(@Cat{qw[Cf Cc Cs]}, $Prop{Noncharacter_Code_Point}, $Prop{Variation_Selector}, $Prop{Other_Default_Ignorable_Code_Point}) ->Invert ->Merge($Prop{White_Space}, $Annotation) ->Invert, Desc => '(?![\p{WhiteSpace}\x{FFF9}-\x{FFFB}])[\p{Cf}\p{Cc}'. '\p{Cs}\p{NoncharacterCodePoint}\p{VariationSelector}'. '\p{OtherDefaultIgnorableCodePoint}]', Fuzzy => 1);}#### These are used in:## MakePropTestScript()## WriteAllMappings()## for making the test script.##my %FuzzyNameToTest;my %ExactNameToTest;## This used only for making the test scriptsub GenTests($$$$){ my $FH = shift; my $Prop = shift; my $MatchCode = shift; my $FailCode = shift; if (defined $MatchCode) { printf $FH qq/Expect(1, "\\x{%04X}", '\\p{$Prop}' );\n/, $MatchCode; printf $FH qq/Expect(0, "\\x{%04X}", '\\p{^$Prop}');\n/, $MatchCode; printf $FH qq/Expect(0, "\\x{%04X}", '\\P{$Prop}' );\n/, $MatchCode; printf $FH qq/Expect(1, "\\x{%04X}", '\\P{^$Prop}');\n/, $MatchCode; } if (defined $FailCode) { printf $FH qq/Expect(0, "\\x{%04X}", '\\p{$Prop}' );\n/, $FailCode; printf $FH qq/Expect(1, "\\x{%04X}", '\\p{^$Prop}');\n/, $FailCode; printf $FH qq/Expect(1, "\\x{%04X}", '\\P{$Prop}' );\n/, $FailCode; printf $FH qq/Expect(0, "\\x{%04X}", '\\P{^$Prop}');\n/, $FailCode; }}## This used only for making the test scriptsub ExpectError($$){ my $FH = shift; my $prop = shift; print $FH qq/Error('\\p{$prop}');\n/; print $FH qq/Error('\\P{$prop}');\n/;}## This used only for making the test scriptmy @GoodSeps = ( " ", "-", " \t ", "", "", "_", );my @BadSeps = ( "--", "__", " _", "/" );## This used only for making the test scriptsub RandomlyFuzzifyName($;$){ my $Name = shift; my $WantError = shift; ## if true, make an error my @parts; for my $part (split /[-\s_]+/, $Name) { if (@parts) { if ($WantError and rand() < 0.3) { push @parts, $BadSeps[rand(@BadSeps)]; $WantError = 0; } else { push @parts, $GoodSeps[rand(@GoodSeps)]; } } my $switch = int rand(4); if ($switch == 0) { push @parts, uc $part; } elsif ($switch == 1) { push @parts, lc $part; } elsif ($switch == 2) { push @parts, ucfirst $part; } else { push @parts, $part; } } my $new = join('', @parts); if ($WantError) { if (rand() >= 0.5) { $new .= $BadSeps[rand(@BadSeps)]; } else { $new = $BadSeps[rand(@BadSeps)] . $new; } } return $new;}## This used only for making the test scriptsub MakePropTestScript(){ ## this written directly -- it's huge. force_unlink ("TestProp.pl"); if (not open OUT, ">TestProp.pl") { die "$0: TestProp.pl: $!\n"; } print OUT <DATA>; while (my ($Name, $Table) = each %ExactNameToTest) { GenTests(*OUT, $Name, $Table->ValidCode, $Table->InvalidCode); ExpectError(*OUT, uc $Name) if uc $Name ne $Name; ExpectError(*OUT, lc $Name) if lc $Name ne $Name; } while (my ($Name, $Table) = each %FuzzyNameToTest) { my $Orig = $CanonicalToOrig{$Name}; my %Names = ( $Name => 1, $Orig => 1, RandomlyFuzzifyName($Orig) => 1 ); for my $N (keys %Names) { GenTests(*OUT, $N, $Table->ValidCode, $Table->InvalidCode); } ExpectError(*OUT, RandomlyFuzzifyName($Orig, 'ERROR')); } print OUT "Finished();\n"; close OUT;}#### These are used only in:## RegisterFileForName()## WriteAllMappings()##my %Exact; ## will become %utf8::Exact;my %Canonical; ## will become %utf8::Canonical;my %CaComment; ## Comment for %Canonical entry of same key#### Given info about a name and a datafile that it should be associated with,## register that assocation in %Exact and %Canonical.sub RegisterFileForName($$$$){ my $Type = shift; my $Name = shift; my $IsFuzzy = shift; my $filename = shift; ## ## Now in details for the mapping. $Type eq 'Is' has the ## Is removed, as it will be removed in utf8_heavy when this ## data is being checked. In keeps its "In", but a second ## sans-In record is written if it doesn't conflict with ## anything already there. ## if (not $IsFuzzy) { if ($Type eq 'Is') { die "oops[$Name]" if $Exact{$Name}; $Exact{$Name} = $filename; } else { die "oops[$Type$Name]" if $Exact{"$Type$Name"}; $Exact{"$Type$Name"} = $filename; $Exact{$Name} = $filename if not $Exact{$Name}; } } else { my $CName = lc $Name; if ($Type eq 'Is') { die "oops[$CName]" if $Canonical{$CName}; $Canonical{$CName} = $filename; $CaComment{$CName} = $Name if $Name =~ tr/A-Z// >= 2; } else { die "oops[$Type$CName]" if $Canonical{lc "$Type$CName"}; $Canonical{lc "$Type$CName"} = $filename; $CaComment{lc "$Type$CName"} = "$Type$Name"; if (not $Canonical{$CName}) { $Canonical{$CName} = $filename; $CaComment{$CName} = "$Type$Name"; } } }}#### Writes the info accumulated in#### %TableInfo;## %FuzzyNames;## %AliasInfo;####sub WriteAllMappings(){ my @MAP; ## 'Is' *MUST* come first, so its names have precidence over 'In's for my $Type ('Is', 'In') { my %RawNameToFile; ## a per-$Type cache for my $Name (sort {length $a <=> length $b} keys %{$TableInfo{$Type}}) { ## Note: $Name is already canonical my $Table = $TableInfo{$Type}->{$Name}; my $IsFuzzy = $FuzzyNames{$Type}->{$Name}; ## Need an 8.3 safe filename (which means "an 8 safe" $filename) my $filename;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -