📄 unimap.pm
字号:
package PDF::API2::UniMap;
use vars qw ($VERSION @EXPORT @EXPORT_OK @EXPORT_TAGS @ISA);
BEGIN {
@ISA = qw(Exporter);
@EXPORT = qw ();
@EXPORT_OK = qw ( utf8_to_ucs2 utf16_to_ucs2 );
@EXPORT_TAGS = qw ();
( $VERSION ) = '$Revisioning: 20020418.102155 $ ' =~ /\$Revisioning:\s+([^\s]+)/;
}
sub utf8c_to_ucs4c {
my $string=shift @_;
my ($c,$out,$len);
$c=vec($string,0,8);
if($c & 0x80) {
if(($c & 0xc0)==0xc0) {
if(($c & 0xe0)==0xe0){
if(($c & 0xf0)==0xf0) {
if(($c & 0xf8)==0xf8) {
if(($c & 0xfc)==0xfc) {
if(($c & 0xfe)==0xfe) {
# not valid !
$len=0;
$c=0;
} else {
# 6-byte utf8
$len=6;
$c = ($c & 0x01) << 30;
$c|= (vec($string,1,8) & 0x3f) << 24;
$c|= (vec($string,2,8) & 0x3f) << 18;
$c|= (vec($string,3,8) & 0x3f) << 12;
$c|= (vec($string,4,8) & 0x3f) << 6;
$c|= (vec($string,5,8) & 0x3f);
}
} else {
# 5-byte utf8
$len=5;
$c = ($c & 0x03) << 24;
$c|= (vec($string,1,8) & 0x3f) << 18;
$c|= (vec($string,2,8) & 0x3f) << 12;
$c|= (vec($string,3,8) & 0x3f) << 6;
$c|= (vec($string,4,8) & 0x3f);
}
} else {
# 4-byte utf8
$len=4;
$c = ($c & 0x7) << 18;
$c|= (vec($string,1,8) & 0x3f) << 12;
$c|= (vec($string,2,8) & 0x3f) << 6;
$c|= (vec($string,3,8) & 0x3f);
}
} else {
# 3-byte utf8
$len=3;
$c=($c & 0x0f) << 12;
$c|=((vec($string,1,8) & 0x3f) << 6);
$c|=(vec($string,2,8) & 0x3f);
}
} else {
# 2-byte utf8
$len=2;
$c&=0x1f;
$c=$c<<6;
$c|=(vec($string,1,8) & 0x3f);
}
} else {
# not valid
$c=0;
$len=0;
}
} else {
## ASCII-7bits
$len=1;
}
$out=pack('N',($c & 0xffffffff));
return($out,$len);
}
sub utf8c_to_ucs2c {
my ($string)=@_;
my ($c,$len)=utf8c_to_ucs4c($string);
$c=pack('n',(unpack('N',$c) & 0xffff));
$c='' if($len>4);
return($c,$len);
}
sub utf8_to_ucs2 {
my $string=shift @_;
my($ucs,$len,$final);
do {
($ucs,$len)=utf8c_to_ucs2c($string);
$final.=$ucs;
$string=substr($string,$len-length($string),length($string)-$len);
} while( ($len>0) && (length($string)>0) );
return($final);
}
sub utf16_to_ucs2 {
my $final=shift @_;
return($final);
}
sub new {
my $class=shift(@_);
my $encoding=lc(shift @_) || 'latin1';
my $this={};
$encoding=~s/[^a-z0-9\-]+//cgi;
bless($this,$class);
my $buf;
my $unimap='';
if($encoding=~/^uni(\d+)$/) {
my $uct=$1*256;
$this->{'enc'} = $encoding;
$this->{'u2c'} = {};
$this->{'c2u'} = {};
$this->{'c2n'} = {};
foreach my $ch (0..255) {
my $um=$ch+$uct;
$this->{'u2c'}->{$um}=$ch;
$this->{'c2u'}->{$ch}=$um;
$this->{'c2n'}->{$ch}=$u2n{$um} || sprintf('uni%04X',$um);
}
if(wantarray) {
return($this,$encoding);
} else {
return $this;
}
} else {
map {
if(-e "$_/PDF/API2/UniMap/$encoding.map"){
$unimap="$_/PDF/API2/UniMap/$encoding.map";
}
} @INC;
if(! -e $unimap) {
die " encoding='$encoding' not supported.";
} else {
$this->{'enc'} = $encoding;
$this->{'u2c'} = {};
$this->{'c2u'} = {};
$this->{'c2n'} = {};
open(INF,"$unimap");
binmode(INF);
read(INF,$buf,4);
while(!eof(INF)) {
read(INF,$buf,4);
my ($ch,$um)=unpack('nn',$buf);
$this->{'u2c'}->{$um}=$ch;
$this->{'c2u'}->{$ch}=$um;
$this->{'c2n'}->{$ch}=$u2n{$um} || sprintf('uni%04X',$um);
}
close(INF);
if(wantarray) {
return($this,$encoding);
} else {
return $this;
}
}
}
}
sub end {
my $this=shift(@_);
undef($this);
}
sub u2c {
my $this=shift @_;
my $um=shift @_;
return($this->{'u2c'}->{$um});
}
sub c2u {
my $this=shift @_;
my $ch=shift @_;
return($this->{'c2u'}->{$ch});
}
sub c2n {
my $this=shift @_;
my $ch=shift @_;
return($this->{'c2n'}->{$ch});
}
sub glyphs {
my $this=shift @_;
return(map { $this->{'c2n'}->{$_} || '.notdef' } (0..255));
}
sub unimaps {
return(
map {
$_=~s/^.*\/([^\/]+)\.map$/$1/cgi;
lc($_);
} (
map {
glob("$_/PDF/API2/UniMap/*.map");
} @INC
)
);
}
sub isMap {
my $encoding=lc(shift @_);
return(undef) if(!$encoding);
$encoding=~s/[^a-z0-9\-]+//cgi;
return(scalar grep(/$encoding/,PDF::API2::UniMap::unimaps()));
}
1;
BEGIN {
%u2n=(
'32'=>'space',
'33'=>'exclam',
'34'=>'quotedbl',
'35'=>'numbersign',
'36'=>'dollar',
'37'=>'percent',
'38'=>'ampersand',
'39'=>'quotesingle',
'40'=>'parenleft',
'41'=>'parenright',
'42'=>'asterisk',
'43'=>'plus',
'44'=>'comma',
'45'=>'hyphen',
'46'=>'period',
'47'=>'slash',
'48'=>'zero',
'49'=>'one',
'50'=>'two',
'51'=>'three',
'52'=>'four',
'53'=>'five',
'54'=>'six',
'55'=>'seven',
'56'=>'eight',
'57'=>'nine',
'58'=>'colon',
'59'=>'semicolon',
'60'=>'less',
'61'=>'equal',
'62'=>'greater',
'63'=>'question',
'64'=>'at',
'65'=>'A',
'66'=>'B',
'67'=>'C',
'68'=>'D',
'69'=>'E',
'70'=>'F',
'71'=>'G',
'72'=>'H',
'73'=>'I',
'74'=>'J',
'75'=>'K',
'76'=>'L',
'77'=>'M',
'78'=>'N',
'79'=>'O',
'80'=>'P',
'81'=>'Q',
'82'=>'R',
'83'=>'S',
'84'=>'T',
'85'=>'U',
'86'=>'V',
'87'=>'W',
'88'=>'X',
'89'=>'Y',
'90'=>'Z',
'91'=>'bracketleft',
'92'=>'backslash',
'93'=>'bracketright',
'94'=>'asciicircum',
'95'=>'underscore',
'96'=>'grave',
'97'=>'a',
'98'=>'b',
'99'=>'c',
'100'=>'d',
'101'=>'e',
'102'=>'f',
'103'=>'g',
'104'=>'h',
'105'=>'i',
'106'=>'j',
'107'=>'k',
'108'=>'l',
'109'=>'m',
'110'=>'n',
'111'=>'o',
'112'=>'p',
'113'=>'q',
'114'=>'r',
'115'=>'s',
'116'=>'t',
'117'=>'u',
'118'=>'v',
'119'=>'w',
'120'=>'x',
'121'=>'y',
'122'=>'z',
'123'=>'braceleft',
'124'=>'bar',
'125'=>'braceright',
'126'=>'asciitilde',
'127'=>'bullet',
'128'=>'Euro',
'129'=>'bullet',
'130'=>'quotesinglbase',
'131'=>'florin',
'132'=>'quotedblbase',
'133'=>'ellipsis',
'134'=>'dagger',
'135'=>'daggerdbl',
'136'=>'circumflex',
'137'=>'perthousand',
'138'=>'Scaron',
'139'=>'guilsinglleft',
'140'=>'OE',
'141'=>'bullet',
'142'=>'Zcaron',
'143'=>'bullet',
'144'=>'bullet',
'145'=>'quoteleft',
'146'=>'quoteright',
'147'=>'quotedblleft',
'148'=>'quotedblright',
'149'=>'bullet',
'150'=>'endash',
'151'=>'emdash',
'152'=>'tilde',
'153'=>'trademark',
'154'=>'scaron',
'155'=>'guilsinglright',
'156'=>'oe',
'157'=>'bullet',
'158'=>'zcaron',
'159'=>'Ydieresis',
'160'=>'space',
'161'=>'exclamdown',
'162'=>'cent',
'163'=>'sterling',
'164'=>'currency',
'165'=>'yen',
'166'=>'brokenbar',
'167'=>'section',
'168'=>'dieresis',
'169'=>'copyright',
'170'=>'ordfeminine',
'171'=>'guillemotleft',
'172'=>'logicalnot',
'173'=>'hyphen',
'174'=>'registered',
'175'=>'macron',
'176'=>'degree',
'177'=>'plusminus',
'178'=>'twosuperior',
'179'=>'threesuperior',
'180'=>'acute',
'181'=>'mu',
'182'=>'paragraph',
'183'=>'periodcentered',
'184'=>'cedilla',
'185'=>'onesuperior',
'186'=>'ordmasculine',
'187'=>'guillemotright',
'188'=>'onequarter',
'189'=>'onehalf',
'190'=>'threequarters',
'191'=>'questiondown',
'192'=>'Agrave',
'193'=>'Aacute',
'194'=>'Acircumflex',
'195'=>'Atilde',
'196'=>'Adieresis',
'197'=>'Aring',
'198'=>'AE',
'199'=>'Ccedilla',
'200'=>'Egrave',
'201'=>'Eacute',
'202'=>'Ecircumflex',
'203'=>'Edieresis',
'204'=>'Igrave',
'205'=>'Iacute',
'206'=>'Icircumflex',
'207'=>'Idieresis',
'208'=>'Eth',
'209'=>'Ntilde',
'210'=>'Ograve',
'211'=>'Oacute',
'212'=>'Ocircumflex',
'213'=>'Otilde',
'214'=>'Odieresis',
'215'=>'multiply',
'216'=>'Oslash',
'217'=>'Ugrave',
'218'=>'Uacute',
'219'=>'Ucircumflex',
'220'=>'Udieresis',
'221'=>'Yacute',
'222'=>'Thorn',
'223'=>'germandbls',
'224'=>'agrave',
'225'=>'aacute',
'226'=>'acircumflex',
'227'=>'atilde',
'228'=>'adieresis',
'229'=>'aring',
'230'=>'ae',
'231'=>'ccedilla',
'232'=>'egrave',
'233'=>'eacute',
'234'=>'ecircumflex',
'235'=>'edieresis',
'236'=>'igrave',
'237'=>'iacute',
'238'=>'icircumflex',
'239'=>'idieresis',
'240'=>'eth',
'241'=>'ntilde',
'242'=>'ograve',
'243'=>'oacute',
'244'=>'ocircumflex',
'245'=>'otilde',
'246'=>'odieresis',
'247'=>'divide',
'248'=>'oslash',
'249'=>'ugrave',
'250'=>'uacute',
'251'=>'ucircumflex',
'252'=>'udieresis',
'253'=>'yacute',
'254'=>'thorn',
'255'=>'ydieresis',
'256'=>'Amacron',
'257'=>'amacron',
'258'=>'Abreve',
'259'=>'abreve',
'260'=>'Aogonek',
'261'=>'aogonek',
'262'=>'Cacute',
'263'=>'cacute',
'264'=>'Ccircumflex',
'265'=>'ccircumflex',
'266'=>'Cdotaccent',
'267'=>'cdotaccent',
'268'=>'Ccaron',
'269'=>'ccaron',
'270'=>'Dcaron',
'271'=>'dcaron',
'272'=>'Dcroat',
'273'=>'dcroat',
'274'=>'Emacron',
'275'=>'emacron',
'276'=>'Ebreve',
'277'=>'ebreve',
'278'=>'Edotaccent',
'279'=>'edotaccent',
'280'=>'Eogonek',
'281'=>'eogonek',
'282'=>'Ecaron',
'283'=>'ecaron',
'284'=>'Gcircumflex',
'285'=>'gcircumflex',
'286'=>'Gbreve',
'287'=>'gbreve',
'288'=>'Gdotaccent',
'289'=>'gdotaccent',
'290'=>'Gcommaaccent',
'291'=>'gcommaaccent',
'292'=>'Hcircumflex',
'293'=>'hcircumflex',
'294'=>'Hbar',
'295'=>'hbar',
'296'=>'Itilde',
'297'=>'itilde',
'298'=>'Imacron',
'299'=>'imacron',
'300'=>'Ibreve',
'301'=>'ibreve',
'302'=>'Iogonek',
'303'=>'iogonek',
'304'=>'Idotaccent',
'305'=>'dotlessi',
'306'=>'IJ',
'307'=>'ij',
'308'=>'Jcircumflex',
'309'=>'jcircumflex',
'310'=>'Kcommaaccent',
'311'=>'kcommaaccent',
'312'=>'kgreenlandic',
'313'=>'Lacute',
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -