📄 iconv_mktbl
字号:
#! /usr/bin/perl## Copyright (c) 1999, 2000# Konstantin Chuguev. All rights reserved.# # Redistribution and use in source and binary forms, with or without# modification, are permitted provided that the following conditions# are met:# 1. Redistributions of source code must retain the above copyright# notice, this list of conditions and the following disclaimer.# 2. Redistributions in binary form must reproduce the above copyright# notice, this list of conditions and the following disclaimer in the# documentation and/or other materials provided with the distribution.# # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF# SUCH DAMAGE.# # iconv (Charset Conversion Library) v2.0#require 'getopts.pl';use integer;sub pack_hex { "_$_[0](" . join(", ", map sprintf("0x%02X", $_), unpack('C4', $_[1])) . ")";}sub pack_array { my($size, $format, $array_ref) = @_; return pack("$format$size", @$array_ref) unless $opt_C; my($res, $i); if ($format eq 'N') { for ($i = 0; $i < $size; $i += 2) { $res .= "\t" . &pack_hex('1l', pack("N", $$array_ref[$i])) . ", " . &pack_hex('1l', pack("N", $$array_ref[$i+1])) . ",\n"; } $array_size += $size * 4; } else { for ($i = 0; $i < $size; $i += 4) { $res .= "\t" . &pack_hex('2s', pack("n2", $$array_ref[$i], $$array_ref[$i+1])) . ", " . &pack_hex('2s', pack("n2", $$array_ref[$i+2], $$array_ref[$i+3])) . ",\n"; } $array_size += $size * 2; } return $res;}# create an array of short/long values in network byte ordersub build_array { my($size, $format, $default, $array_ref) = @_; my($i); for $i (0 .. $size-1) { $$array_ref[$i] = $default unless defined($$array_ref[$i]); } return &pack_array($size, $format, $array_ref);}sub build_table1 { my($size, $array_ref) = @_; return &build_array($size, "n", 0xFFFE, $array_ref);}sub build_table2 { my($size, $array_ref) = @_; my($offset, $n, $i, @offs) = ($size * 4, 0); for $i (0 .. $size-1) { next unless defined($$array_ref[$i]); $offs[$i] = $offset; $offset += $size * 2; } my($data) = (&build_array($size, "N", 0, \@offs)); for $i (0 .. $size-1) { next unless defined($$array_ref[$i]); $n ++; $data .= &build_table1($size, $$array_ref[$i]); } printf STDERR "%d subtables.\n", $n; return $data;}$control0 = 0;$control1 = 0;$delete = 0;@to_ucs;@from_ucs;# set a value in two charset conversion tables; update charset properties# ($cs, $ucs) = (local charset code, Unicode)#sub set_val { my($cs, $ucs) = @_; return if $opt_a && $cs > 0x7F; $to_ucs[$cs >> 8][$cs & 0xFF] = $ucs; $from_ucs[$ucs >> 8][$ucs & 0xFF] = $cs; if (($cs & 0x60) == 0) { if($cs & 0x80) { $control1 = 1; } else { $control0 = 1; } } $delete = 1 if $cs == 0x7F; if ($cs < 0x80) { $_7bit = 1; } elsif ($cs < 0x100) { $_8bit = 1; } elsif ($cs & 0x8080) { $_16bit = 1; } else { $_14bit = 1; }}# set a range of equal codes to charset conversion tables#sub set_range { for (@_) { &set_val($_, $_); }}&Getopts('aCc:Mm:o:p:u:');# ||| || | | +- u N: field number for Unicode character codes# ||| || | +--- p str: prefix# ||| || +----- o file: output file name# ||| |+------- m file: character mnemonic table from RFC1345# ||| +-------- M: Macintosh newline (<LF> only)# ||+---------- c N: field number for charset character codes# |+----------- C: make C source file# +------------ a: ignore 8 bit (for ASCII)$opt_c = 0 unless defined($opt_c);$opt_p = '0x' unless defined($opt_p);$opt_u = 1 unless defined($opt_u);if ($opt_o) { $opt_o =~ tr/-/_/; open(STDOUT, ">$opt_o"); $opt_o =~ s/.c$//;}%map;if ($opt_M) { $/ = "\cM";}if ($opt_m) { open(MAP, $opt_m); while(<MAP>) { chop; next unless /^ [^ ]/; next if 2 > split; $map{$_[0]} = $_[1]; } close(MAP); local($code) = 0; while (<>) { chop; s/^ *//; if (/^&[a-z]/) { split(' ', substr($_, 1)); if ($_[0] eq 'code') { $code = $_[1]; } } else { foreach (split) { &set_val($code, hex "0x$map{$_}") if $_ ne '??'; $code ++; } } }} else { while (<>) { s/[#\n].*//; next if 2 > split; # too few fields next if ($_[$opt_c] =~ s/^$opt_p/0x/o) != 1; # local charset code prefix is invalid &set_val(hex $_[$opt_c], hex $_[$opt_u]); }}if (!$_16bit && !$_14bit) { if ($_8bit) { print STDERR "8bit charset"; if (!$control0) { &set_range(0 .. 0x1F); print STDERR "; control0 chars added"; } if (!$control1) { &set_range(0x80 .. 0x9F); print STDERR "; control1 chars added"; } if (!$delete) { &set_range(0x7F); print STDERR "; delete char added"; } $nbits = 8; $type = 1; } else { print STDERR "7bit charset"; $nbits = 7; $type = 0; } print STDERR ".\n"; $to = &build_table1($_8bit ? 256 : 128, $to_ucs[0]);} elsif ($_16bit) { print STDERR "16bit charset"; if (!$_7bit && !$_8bit) { &set_range(0 .. 0x7F); print STDERR "; ASCII subset added"; } elsif (!$control0) { &set_range(0 .. 0x1F); print STDERR "; control0 chars added"; } print STDERR ".\n"; $to = &build_table2(256, \@to_ucs); $nbits = 16; $type = 3;} else { print STDERR "14bit charset.\n"; $to = &build_table2(128, \@to_ucs); $nbits = 14; $type = 2;}$to_size = $opt_C ? $array_size : length($to);$from = &build_table2(256, \@from_ucs);if ($opt_C) { die "-o option is mandatory with -C" unless $opt_o; $opt_o =~ s/\.c$//; $opt_o =~ tr/-/_/; $name = $opt_o; $name =~ tr/[a-z]/[A-Z]/; print "#include \"..\/lib\/deps.h\"\n\n"; print "#ifdef _ICONV_CONVERTER_$name\n"; print "#include \"..\/lib\/endian.h\"\n\n"; print "_CONST unsigned char _iconv_ccs_table_$opt_o" . "[] = {\n"; print "\t3, 'C', 'S', 'C', 'T', ICONV_ORDER, $nbits, $type,\n"; print &pack_array(2, 'N', [8, 8 + $to_size]); print $to; print $from; print "};\n\n"; print "#endif /* #ifdef _ICONV_CONVERTER_$name */\n\n";} else { print pack("A5CCCNN", "\003CSCT", 0, $nbits, $type, 8, 8 + $to_size); print $to; print $from;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -