📄 enc2xs

📁 source of perl for linux application,
💻
📖 第 1 页 / 共 3 页
字号:
 my $type; while ($type = <$fh>)  {   last if $type !~ /^\s*#/;  } chomp($type); return if $type eq 'E'; # Do the hash lookup once, rather than once per function call. 4% speedup. my $type_func = $encode_types{$type}; my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); warn "$type encoded $name\n"; my $rep = ''; # Save a defined test by setting these to defined values. my $min_el = ~0; # A very big integer my $max_el = 0;  # Anything must be longer than 0 {  my $v = hex($def);  $rep = &$type_func($v & 0xFF, ($v >> 8) & 0xffe); } my $errors; my $seen; # use -Q to silence the seen test. Makefile.PL uses this by default. $seen = {} unless $opt{Q}; do  {   my $line = <$fh>;   chomp($line);   my $page = hex($line);   my $ch = 0;   my $i = 16;   do    {     # So why is it 1% faster to leave the my here?     my $line = <$fh>;     $line =~ s/\r\n$/\n/;     die "$.:${line}Line should be exactly 65 characters long including     newline (".length($line).")" unless length ($line) == 65;     # Split line into groups of 4 hex digits, convert groups to ints     # This takes 65.35		     # map {hex $_} $line =~ /(....)/g     # This takes 63.75 (2.5% less time)     # unpack "n*", pack "H*", $line     # There's an implicit loop in map. Loops are bad, m'kay. Ops are bad, m'kay     # Doing it as while ($line =~ /(....)/g) took 74.63     foreach my $val (unpack "n*", pack "H*", $line)      {       next if $val == 0xFFFD;       my $ech = &$type_func($ch,$page);       if ($val || (!$ch && !$page))        {         my $el  = length($ech);         $max_el = $el if $el > $max_el;         $min_el = $el if $el < $min_el;         my $uch = encode_U($val);         if ($seen) {           # We're doing the test.           # We don't need to read this quickly, so storing it as a scalar,           # rather than 3 (anon array, plus the 2 scalars it holds) saves           # RAM and may make us faster on low RAM systems. [see __END__]           if (exists $seen->{$uch})             {               warn sprintf("U%04X is %02X%02X and %04X\n",                            $val,$page,$ch,$seen->{$uch});               $errors++;             }           else             {               $seen->{$uch} = $page << 8 | $ch;             }         }         # Passing 2 extra args each time is 3.6% slower!         # Even with having to add $fallback ||= 0 later         enter_fb0($e2u,$ech,$uch);         enter_fb0($u2e,$uch,$ech);        }       else        {         # No character at this position         # enter($e2u,$ech,undef,$e2u);        }       $ch++;      }    } while --$i;  } while --$pages; die "\$min_el=$min_el, \$max_el=$max_el - seems we read no lines"   if $min_el > $max_el; die "$errors mapping conflicts\n" if ($errors && $opt{'S'}); $encoding{$name} = [$e2u,$u2e,$rep,$min_el,$max_el];}# my ($a,$s,$d,$t,$fb) = @_;sub enter {  my ($current,$inbytes,$outbytes,$next,$fallback) = @_;  # state we shift to after this (multibyte) input character defaults to same  # as current state.  $next ||= $current;  # Making sure it is defined seems to be faster than {no warnings;} in  # &process, or passing it in as 0 explicity.  # XXX $fallback ||= 0;  # Start at the beginning and work forwards through the string to zero.  # effectively we are removing 1 character from the front each time  # but we don't actually edit the string. [this alone seems to be 14% speedup]  # Hence -$pos is the length of the remaining string.  my $pos = -length $inbytes;  while (1) {    my $byte = substr $inbytes, $pos, 1;    #  RAW_NEXT => 0,    #  RAW_IN_LEN => 1,    #  RAW_OUT_BYTES => 2,    #  RAW_FALLBACK => 3,    # to unicode an array would seem to be better, because the pages are dense.    # from unicode can be very sparse, favouring a hash.    # hash using the bytes (all length 1) as keys rather than ord value,    # as it's easier to sort these in &process.    # It's faster to always add $fallback even if it's undef, rather than    # choosing between 3 and 4 element array. (hence why we set it defined    # above)    my $do_now = $current->{Raw}{$byte} ||= [{},-$pos,'',$fallback];    # When $pos was -1 we were at the last input character.    unless (++$pos) {      $do_now->[RAW_OUT_BYTES] = $outbytes;      $do_now->[RAW_NEXT] = $next;      return;    }    # Tail recursion. The intermdiate state may not have a name yet.    $current = $do_now->[RAW_NEXT];  }}# This is purely for optimistation. It's just &enter hard coded for $fallback# of 0, using only a 3 entry array ref to save memory for every entry.sub enter_fb0 {  my ($current,$inbytes,$outbytes,$next) = @_;  $next ||= $current;  my $pos = -length $inbytes;  while (1) {    my $byte = substr $inbytes, $pos, 1;    my $do_now = $current->{Raw}{$byte} ||= [{},-$pos,''];    unless (++$pos) {      $do_now->[RAW_OUT_BYTES] = $outbytes;      $do_now->[RAW_NEXT] = $next;      return;    }    $current = $do_now->[RAW_NEXT];  }}sub process{  my ($name,$a) = @_;  $name =~ s/\W+/_/g;  $a->{Cname} = $name;  my $raw = $a->{Raw};  my ($l, $agg_max_in, $agg_next, $agg_in_len, $agg_out_len, $agg_fallback);  my @ent;  $agg_max_in = 0;  foreach my $key (sort keys %$raw) {    #  RAW_NEXT => 0,    #  RAW_IN_LEN => 1,    #  RAW_OUT_BYTES => 2,    #  RAW_FALLBACK => 3,    my ($next, $in_len, $out_bytes, $fallback) = @{$raw->{$key}};    # Now we are converting from raw to aggregate, switch from 1 byte strings    # to numbers    my $b = ord $key;    $fallback ||= 0;    if ($l &&        # If this == fails, we're going to reset $agg_max_in below anyway.        $b == ++$agg_max_in &&        # References in numeric context give the pointer as an int.        $agg_next == $next &&        $agg_in_len == $in_len &&        $agg_out_len == length $out_bytes &&        $agg_fallback == $fallback        # && length($l->[AGG_OUT_BYTES]) < 16       ) {      #     my $i = ord($b)-ord($l->[AGG_MIN_IN]);      # we can aggregate this byte onto the end.      $l->[AGG_MAX_IN] = $b;      $l->[AGG_OUT_BYTES] .= $out_bytes;    } else {      # AGG_MIN_IN => 0,      # AGG_MAX_IN => 1,      # AGG_OUT_BYTES => 2,      # AGG_NEXT => 3,      # AGG_IN_LEN => 4,      # AGG_OUT_LEN => 5,      # AGG_FALLBACK => 6,      # Reset the last thing we saw, plus set 5 lexicals to save some derefs.      # (only gains .6% on euc-jp  -- is it worth it?)      push @ent, $l = [$b, $agg_max_in = $b, $out_bytes, $agg_next = $next,                       $agg_in_len = $in_len, $agg_out_len = length $out_bytes,                       $agg_fallback = $fallback];    }    if (exists $next->{Cname}) {      $next->{'Forward'} = 1 if $next != $a;    } else {      process(sprintf("%s_%02x",$name,$b),$next);    }  }  # encengine.c rules say that last entry must be for 255  if ($agg_max_in < 255) {    push @ent, [1+$agg_max_in, 255,undef,$a,0,0];  }  $a->{'Entries'} = \@ent;}sub addstrings{ my ($fh,$a) = @_; my $name = $a->{'Cname'}; # String tables foreach my $b (@{$a->{'Entries'}})  {   next unless $b->[AGG_OUT_LEN];   $strings{$b->[AGG_OUT_BYTES]} = undef;  } if ($a->{'Forward'})  {   my $cpp = ($Config{d_cplusplus} || '') eq 'define';   my $var = $^O eq 'MacOS' || $cpp ? 'extern' : 'static';   my $const = $cpp ? '' : 'const';   print $fh "$var $const encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n";  } $a->{'DoneStrings'} = 1; foreach my $b (@{$a->{'Entries'}})  {   my ($s,$e,$out,$t,$end,$l) = @$b;   addstrings($fh,$t) unless $t->{'DoneStrings'};  }}sub outbigstring{  my ($fh,$name) = @_;  $string_acc = '';  # Make the big string in the string accumulator. Longest first, on the hope  # that this makes it more likely that we find the short strings later on.  # Not sure if it helps sorting strings of the same length lexcically.  foreach my $s (sort {length $b <=> length $a || $a cmp $b} keys %strings) {    my $index = index $string_acc, $s;    if ($index >= 0) {      $saved += length($s);      $strings_in_acc{$s} = $index;    } else {    OPTIMISER: {    if ($opt{'O'}) {      my $sublength = length $s;      while (--$sublength > 0) {        # progressively lop characters off the end, to see if the start of        # the new string overlaps the end of the accumulator.        if (substr ($string_acc, -$sublength)        eq substr ($s, 0, $sublength)) {          $subsave += $sublength;          $strings_in_acc{$s} = length ($string_acc) - $sublength;          # append the last bit on the end.          $string_acc .= substr ($s, $sublength);          last OPTIMISER;        }        # or if the end of the new string overlaps the start of the        # accumulator        next unless substr ($string_acc, 0, $sublength)          eq substr ($s, -$sublength);        # well, the last $sublength characters of the accumulator match.        # so as we're prepending to the accumulator, need to shift all our        # existing offsets forwards        $_ += $sublength foreach values %strings_in_acc;        $subsave += $sublength;        $strings_in_acc{$s} = 0;        # append the first bit on the start.        $string_acc = substr ($s, 0, -$sublength) . $string_acc;        last OPTIMISER;      }    }    # Optimiser (if it ran) found nothing, so just going have to tack the    # whole thing on the end.    $strings_in_acc{$s} = length $string_acc;    $string_acc .= $s;      };    }  }  $strings = length $string_acc;  my $cpp = ($Config{d_cplusplus} || '') eq 'define';  my $var = $cpp ? '' : 'static';  my $definition = "\n$var const U8 $name\[$strings] = { " .    join(',',unpack "C*",$string_acc);  # We have a single long line. Split it at convenient commas.  print $fh $1, "\n" while $definition =~ /\G(.{74,77},)/gcs;  print $fh substr ($definition, pos $definition), " };\n";}sub findstring {  my ($name,$s) = @_;  my $offset = $strings_in_acc{$s};  die "Can't find string " . join (',',unpack "C*",$s) . " in accumulator"    unless defined $offset;  "$name + $offset";}sub outtable{ my ($fh,$a,$bigname) = @_; my $name = $a->{'Cname'}; $a->{'Done'} = 1; foreach my $b (@{$a->{'Entries'}})  {   my ($s,$e,$out,$t,$end,$l) = @$b;   outtable($fh,$t,$bigname) unless $t->{'Done'};  } my $cpp = ($Config{d_cplusplus} || '') eq 'define'; my $var = $cpp ? '' : 'static'; my $const = $cpp ? '' : 'const'; print $fh "\n$var $const encpage_t $name\[",   scalar(@{$a->{'Entries'}}), "] = {\n"; foreach my $b (@{$a->{'Entries'}})  {   my ($sc,$ec,$out,$t,$end,$l,$fb) = @$b;   # $end |= 0x80 if $fb; # what the heck was on your mind, Nick?  -- Dan   print  $fh "{";   if ($l)    {     printf $fh findstring($bigname,$out);    }   else    {     print  $fh "0";    }   print  $fh ",",$t->{Cname};   printf $fh ",0x%02x,0x%02x,$l,$end},\n",$sc,$ec;  } print $fh "};\n";}sub output_enc{ my ($fh,$name,$a) = @_; die "Changed - fix me for new structure"; foreach my $b (sort keys %$a)  {   my ($s,$e,$out,$t,$end,$l,$fb) = @{$a->{$b}};  }}sub decode_U{ my $s = shift;}my @uname;sub char_names{ my $s = do "unicore/Name.pl"; die "char_names: unicore/Name.pl: $!\n" unless defined $s; pos($s) = 0; while ($s =~ /\G([0-9a-f]+)\t([0-9a-f]*)\t(.*?)\s*\n/igc)  {   my $name = $3;   my $s = hex($1);   last if $s >= 0x10000;   my $e = length($2) ? hex($2) : $s;   for (my $i = $s; $i <= $e; $i++)    {     $uname[$i] = $name;#    print sprintf("U%04X $name\n",$i);    }  }}sub output_ucm_page{  my ($cmap,$a,$t,$pre) = @_;  # warn sprintf("Page %x\n",$pre);  my $raw = $t->{Raw};  foreach my $key (sort keys %$raw) {    #  RAW_NEXT => 0,    #  RAW_IN_LEN => 1,    #  RAW_OUT_BYTES => 2,    #  RAW_FALLBACK => 3,    my ($next, $in_len, $out_bytes, $fallback) = @{$raw->{$key}};    my $u = ord $key;    $fallback ||= 0;    if ($next != $a && $next != $t) {      output_ucm_page($cmap,$a,$next,(($pre|($u &0x3F)) << 6)&0xFFFF);    } elsif (length $out_bytes) {      if ($pre) {        $u = $pre|($u &0x3f);      }      my $s = sprintf "<U%04X> ",$u;      #foreach my $c (split(//,$out_bytes)) {      #  $s .= sprintf "\\x%02X",ord($c);      #}      # 9.5% faster changing that loop to this:      $s .= sprintf +("\\x%02X" x length $out_bytes), unpack "C*", $out_bytes;      $s .= sprintf " |%d # %s\n",($fallback ? 1 : 0),$uname[$u];      push(@$cmap,$s);    } else {      warn join(',',$u, @{$raw->{$key}},$a,$t);    }  }}sub output_ucm{ my ($fh,$name,$h,$rep,$min_el,$max_el) = @_; print $fh "# $0 @orig_ARGV\n" unless $opt{'q'}; print $fh "<code_set_name> \"$name\"\n"; char_names(); if (defined $min_el)  {   print $fh "<mb_cur_min> $min_el\n";  } if (defined $max_el)  {   print $fh "<mb_cur_max> $max_el\n";  } if (defined $rep)  {   print $fh "<subchar> ";   foreach my $c (split(//,$rep))    {     printf $fh "\\x%02X",ord($c);    }   print $fh "\n";  } my @cmap; output_ucm_page(\@cmap,$h,$h,0); print $fh "#\nCHARMAP\n"; foreach my $line (sort { substr($a,8) cmp substr($b,8) } @cmap)  {   print $fh $line;  } print $fh "END CHARMAP\n";}use vars qw(    $_Enc2xs    $_Version    $_Inc    $_E2X     $_Name    $_TableFiles    $_Now);sub find_e2x{    eval { require File::Find; };    my (@inc, %e2x_dir);    for my $inc (@INC){    push @inc, $inc unless $inc eq '.'; #skip current dir    }    File::Find::find(         sub {         my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,             $atime,$mtime,$ctime,$blksize,$blocks)             = lstat($_) or return;         -f _ or return;         if (/^.*\.e2x$/o){             no warnings 'once';             $e2x_dir{$File::Find::dir} ||= $mtime;         }         return;
💿 文件大小 15230 K
👤 上传用户 gankai1983
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#application #source #linux #perl
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -