📄 mktables
字号:
##sub Table::ExtendNoCheck{ ## Optmized adding: Assumes $Table and $codepoint as parms $_[0]->[-1]->[RANGE_END] = $_[1];}#### Given a code point range start and end (and optional name), blindly## append them to the list of ranges for the Table.#### NOTE: Code points must be added in strictly ascending numeric order.##sub Table::RawAppendRange{ my $Table = shift; #self my $start = shift; my $end = shift; my $name = shift; $name = "" if not defined $name; ## warning: $name can be "0" push @$Table, [ $start, # RANGE_START $end, # RANGE_END $name ]; # RANGE_NAME}#### Given a code point (and optional name), add it to the Table.#### NOTE: Code points must be added in strictly ascending numeric order.##sub Table::Append{ my $Table = shift; #self my $codepoint = shift; my $name = shift; $name = "" if not defined $name; ## warning: $name can be "0" ## ## If we've already got a range working, and this code point is the next ## one in line, and if the name is the same, just extend the current range. ## my $last = $Table->[-1]; if ($last and $last->[RANGE_END] == $codepoint - 1 and $last->[RANGE_NAME] eq $name) { $Table->ExtendNoCheck($codepoint); } else { $Table->RawAppendRange($codepoint, $codepoint, $name); }}#### Given a code point range starting value and ending value (and name),## Add the range to teh Table.#### NOTE: Code points must be added in strictly ascending numeric order.##sub Table::AppendRange{ my $Table = shift; #self my $start = shift; my $end = shift; my $name = shift; $name = "" if not defined $name; ## warning: $name can be "0" $Table->Append($start, $name); $Table->Extend($end) if $end > $start;}#### Return a new Table that represents all code points not in the Table.##sub Table::Invert{ my $Table = shift; #self my $New = Table->New(); my $max = -1; for my $range (@$Table) { my $start = $range->[RANGE_START]; my $end = $range->[RANGE_END]; if ($start-1 >= $max+1) { $New->AppendRange($max+1, $start-1, ""); } $max = $end; } if ($max+1 < $LastUnicodeCodepoint) { $New->AppendRange($max+1, $LastUnicodeCodepoint); } return $New;}#### Merges any number of other tables with $self, returning the new table.## (existing tables are not modified)###### Args may be Tables, or individual code points (as integers).#### Can be called as either a constructor or a method.##sub Table::Merge{ shift(@_) if not ref $_[0]; ## if called as a constructor, lose the class my @Tables = @_; ## Accumulate all records from all tables my @Records; for my $Arg (@Tables) { if (ref $Arg) { ## arg is a table -- get its ranges push @Records, @$Arg; } else { ## arg is a codepoint, make a range push @Records, [ $Arg, $Arg ] } } ## sort by range start, with longer ranges coming first. my ($first, @Rest) = sort { ($a->[RANGE_START] <=> $b->[RANGE_START]) or ($b->[RANGE_END] <=> $b->[RANGE_END]) } @Records; my $New = Table->New(); ## Ensuring the first range is there makes the subsequent loop easier $New->AppendRange($first->[RANGE_START], $first->[RANGE_END]); ## Fold in records so long as they add new information. for my $set (@Rest) { my $start = $set->[RANGE_START]; my $end = $set->[RANGE_END]; if ($start > $New->Max) { $New->AppendRange($start, $end); } elsif ($end > $New->Max) { $New->ExtendNoCheck($end); } } return $New;}#### Given a filename, write a representation of the Table to a file.## May have an optional comment as a 2nd arg.## Filename may actually be an arrayref of directories##sub Table::Write{ my $Table = shift; #self my $filename = shift; my $comment = shift; my @OUT = $HEADER; if (defined $comment) { $comment =~ s/\s+\Z//; $comment =~ s/^/# /gm; push @OUT, "#\n$comment\n#\n"; } push @OUT, "return <<'END';\n"; for my $set (@$Table) { my $start = $set->[RANGE_START]; my $end = $set->[RANGE_END]; my $name = $set->[RANGE_NAME]; if ($start == $end) { push @OUT, sprintf "%04X\t\t%s\n", $start, $name; } else { push @OUT, sprintf "%04X\t%04X\t%s\n", $start, $end, $name; } } push @OUT, "END\n"; WriteIfChanged($filename, @OUT);}## This used only for making the test script.## helper functionsub IsUsable($){ my $code = shift; return 0 if $code <= 0x0000; ## don't use null return 0 if $code >= $LastUnicodeCodepoint; ## keep in range return 0 if ($code >= 0xD800 and $code <= 0xDFFF); ## no surrogates return 0 if ($code >= 0xFDD0 and $code <= 0xFDEF); ## utf8.c says no good return 0 if (($code & 0xFFFF) == 0xFFFE); ## utf8.c says no good return 0 if (($code & 0xFFFF) == 0xFFFF); ## utf8.c says no good return 1;}## Return a code point that's part of the table.## Returns nothing if the table is empty (or covers only surrogates).## This used only for making the test script.sub Table::ValidCode{ my $Table = shift; #self for my $set (@$Table) { return $set->[RANGE_END] if IsUsable($set->[RANGE_END]); } return ();}## Return a code point that's not part of the table## Returns nothing if the table covers all code points.## This used only for making the test script.sub Table::InvalidCode{ my $Table = shift; #self return 0x1234 if not @$Table; for my $set (@$Table) { if (IsUsable($set->[RANGE_END] + 1)) { return $set->[RANGE_END] + 1; } if (IsUsable($set->[RANGE_START] - 1)) { return $set->[RANGE_START] - 1; } } return ();}##################################################################################################################################################################################################################################### Called like:## New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 1);#### The args must be in that order, although the Fuzzy pair may be omitted.#### This creates 'IsAll' as an alias for 'IsAny'##sub New_Alias($$$@){ my $Type = shift; ## "Is" or "In" my $Alias = shift; my $SameAs = shift; # expecting "SameAs" -- just ignored my $Name = shift; ## remaining args are optional key/val my %Args = @_; my $Fuzzy = delete $Args{Fuzzy}; ## sanity check a few args if (%Args or ($Type ne 'Is' and $Type ne 'In') or $SameAs ne 'SameAs') { confess "$0: bad args to New_Alias" } $Alias = CanonicalName($Alias) if $Fuzzy; if (not $TableInfo{$Type}->{$Name}) { my $CName = CanonicalName($Name); if ($TableInfo{$Type}->{$CName}) { confess "$0: Use canonical form '$CName' instead of '$Name' for alias."; } else { confess "$0: don't have original $Type => $Name to make alias\n"; } } if ($TableInfo{$Alias}) { confess "$0: already have original $Type => $Alias; can't make alias"; } $AliasInfo{$Type}->{$Name} = $Alias; if ($Fuzzy) { $FuzzyNames{$Type}->{$Alias} = $Name; }}## All assigned code pointsmy $Assigned = Table->New(Is => 'Assigned', Desc => "All assigned code points", Fuzzy => 0);my $Name = Table->New(); ## all characters, individually by namemy $General = Table->New(); ## all characters, grouped by categorymy %General;my %Cat;## Simple Data::Dumper alike. Good enough for our needs. We can't use the real## thing as we have to run under miniperlsub simple_dumper { my @lines; my $item; foreach $item (@_) { if (ref $item) { if (ref $item eq 'ARRAY') { push @lines, "[\n", simple_dumper (@$item), "],\n"; } elsif (ref $item eq 'HASH') { push @lines, "{\n", simple_dumper (%$item), "},\n"; } else { die "Can't cope with $item"; } } else { if (defined $item) { my $copy = $item; $copy =~ s/([\'\\])/\\$1/gs; push @lines, "'$copy',\n"; } else { push @lines, "undef,\n"; } } } @lines;}#### Process UnicodeData.txt (Categories, etc.)##sub UnicodeData_Txt(){ my $Bidi = Table->New(); my $Deco = Table->New(); my $Comb = Table->New(); my $Number = Table->New(); my $Mirrored = Table->New();#Is => 'Mirrored', #Desc => "Mirrored in bidirectional text", #Fuzzy => 0); my %DC; my %Bidi; my %Number; $DC{can} = Table->New(); $DC{com} = Table->New(); ## Initialize Perl-generated categories ## (Categories from UnicodeData.txt are auto-initialized in gencat) $Cat{Alnum} = Table->New(Is => 'Alnum', Desc => "[[:Alnum:]]", Fuzzy => 0); $Cat{Alpha} = Table->New(Is => 'Alpha', Desc => "[[:Alpha:]]", Fuzzy => 0); $Cat{ASCII} = Table->New(Is => 'ASCII', Desc => "[[:ASCII:]]", Fuzzy => 0); $Cat{Blank} = Table->New(Is => 'Blank', Desc => "[[:Blank:]]", Fuzzy => 0); $Cat{Cntrl} = Table->New(Is => 'Cntrl', Desc => "[[:Cntrl:]]", Fuzzy => 0); $Cat{Digit} = Table->New(Is => 'Digit', Desc => "[[:Digit:]]", Fuzzy => 0); $Cat{Graph} = Table->New(Is => 'Graph', Desc => "[[:Graph:]]", Fuzzy => 0); $Cat{Lower} = Table->New(Is => 'Lower', Desc => "[[:Lower:]]", Fuzzy => 0); $Cat{Print} = Table->New(Is => 'Print', Desc => "[[:Print:]]", Fuzzy => 0); $Cat{Punct} = Table->New(Is => 'Punct', Desc => "[[:Punct:]]", Fuzzy => 0); $Cat{Space} = Table->New(Is => 'Space', Desc => "[[:Space:]]", Fuzzy => 0); $Cat{Title} = Table->New(Is => 'Title', Desc => "[[:Title:]]", Fuzzy => 0); $Cat{Upper} = Table->New(Is => 'Upper', Desc => "[[:Upper:]]", Fuzzy => 0); $Cat{XDigit} = Table->New(Is => 'XDigit', Desc => "[[:XDigit:]]", Fuzzy => 0); $Cat{Word} = Table->New(Is => 'Word', Desc => "[[:Word:]]", Fuzzy => 0); $Cat{SpacePerl} = Table->New(Is => 'SpacePerl', Desc => '\s', Fuzzy => 0); $Cat{VertSpace} = Table->New(Is => 'VertSpace', Desc => '\v', Fuzzy => 0); $Cat{HorizSpace} = Table->New(Is => 'HorizSpace', Desc => '\h', Fuzzy => 0); my %To; $To{Upper} = Table->New(); $To{Lower} = Table->New(); $To{Title} = Table->New(); $To{Digit} = Table->New(); sub gencat($$$$) { my ($name, ## Name ("LATIN CAPITAL LETTER A") $cat, ## Category ("Lu", "Zp", "Nd", etc.) $code, ## Code point (as an integer) $op) = @_; my $MajorCat = substr($cat, 0, 1); ## L, M, Z, S, etc $Assigned->$op($code); $Name->$op($code, $name); $General->$op($code, $cat); ## add to the sub category (e.g. "Lu", "Nd", "Cf", ..) $Cat{$cat} ||= Table->New(Is => $cat, Desc => "General Category '$cat'", Fuzzy => 0); $Cat{$cat}->$op($code); ## add to the major category (e.g. "L", "N", "C", ...) $Cat{$MajorCat} ||= Table->New(Is => $MajorCat, Desc => "Major Category '$MajorCat'", Fuzzy => 0); $Cat{$MajorCat}->$op($code); ($General{$name} ||= Table->New)->$op($code, $name); # 005F: SPACING UNDERSCORE $Cat{Word}->$op($code) if $cat =~ /^[LMN]|Pc/; $Cat{Alnum}->$op($code) if $cat =~ /^[LM]|Nd/; $Cat{Alpha}->$op($code) if $cat =~ /^[LM]/; my $isspace = ($cat =~ /Zs|Zl|Zp/ && $code != 0x200B) # 200B is ZWSP which is for line break control # and therefore it is not part of "space" even while it is "Zs". || $code == 0x0009 # 0009: HORIZONTAL TAB || $code == 0x000A # 000A: LINE FEED || $code == 0x000B # 000B: VERTICAL TAB || $code == 0x000C # 000C: FORM FEED || $code == 0x000D # 000D: CARRIAGE RETURN || $code == 0x0085 # 0085: NEL ; $Cat{Space}->$op($code) if $isspace; $Cat{SpacePerl}->$op($code) if $isspace && $code != 0x000B; # Backward compat. $Cat{VertSpace}->$op($code) if grep {$code == $_} ( 0x0A..0x0D,0x85,0x2028,0x2029 ); $Cat{HorizSpace}->$op($code) if grep {$code == $_} ( 0x09, 0x20, 0xa0, 0x1680, 0x180e, 0x2000, 0x2001, 0x2002,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -