⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 unimap.pm

📁 PDF-API2-0.2.3.7_dev.tar.gz
💻 PM
📖 第 1 页 / 共 3 页
字号:
package PDF::API2::UniMap;


use vars qw ($VERSION @EXPORT @EXPORT_OK @EXPORT_TAGS @ISA);

BEGIN {
	@ISA         = qw(Exporter);
	@EXPORT      = qw ();
	@EXPORT_OK   = qw ( utf8_to_ucs2 utf16_to_ucs2 );
	@EXPORT_TAGS = qw ();
	( $VERSION ) = '$Revisioning: 20020418.102155 $ ' =~ /\$Revisioning:\s+([^\s]+)/;
}

sub utf8c_to_ucs4c {
	my $string=shift @_;
	my ($c,$out,$len);
	$c=vec($string,0,8);
	if($c & 0x80) {
		if(($c & 0xc0)==0xc0) {
			if(($c & 0xe0)==0xe0){
				if(($c & 0xf0)==0xf0) {
					if(($c & 0xf8)==0xf8) {
						if(($c & 0xfc)==0xfc) {
							if(($c & 0xfe)==0xfe) {
								# not valid !
								$len=0;
								$c=0;
							} else {
								# 6-byte utf8
								$len=6;
								$c = ($c & 0x01) << 30;
								$c|= (vec($string,1,8) & 0x3f) << 24;
								$c|= (vec($string,2,8) & 0x3f) << 18;
								$c|= (vec($string,3,8) & 0x3f) << 12;
								$c|= (vec($string,4,8) & 0x3f) << 6;
								$c|= (vec($string,5,8) & 0x3f);
							}
						} else {
							# 5-byte utf8
							$len=5;
							$c = ($c & 0x03) << 24;
							$c|= (vec($string,1,8) & 0x3f) << 18;
							$c|= (vec($string,2,8) & 0x3f) << 12;
							$c|= (vec($string,3,8) & 0x3f) << 6;
							$c|= (vec($string,4,8) & 0x3f);
						}
					} else {
						# 4-byte utf8
						$len=4;
						$c = ($c & 0x7) << 18;
						$c|= (vec($string,1,8) & 0x3f) << 12;
						$c|= (vec($string,2,8) & 0x3f) << 6;
						$c|= (vec($string,3,8) & 0x3f);
					}
				} else {
					# 3-byte utf8
					$len=3;
					$c=($c & 0x0f) << 12;
					$c|=((vec($string,1,8) & 0x3f) << 6);
					$c|=(vec($string,2,8) & 0x3f);
				}
			} else {
				# 2-byte utf8
				$len=2;
				$c&=0x1f;
				$c=$c<<6;
				$c|=(vec($string,1,8) & 0x3f);
			}		
		} else {
			# not valid
			$c=0;
			$len=0;
		}
	} else {
		## ASCII-7bits
		$len=1;
	}
	$out=pack('N',($c & 0xffffffff));
	return($out,$len);
}

sub utf8c_to_ucs2c {
	my ($string)=@_;
	my ($c,$len)=utf8c_to_ucs4c($string);
	$c=pack('n',(unpack('N',$c) & 0xffff));
	$c='' if($len>4);
	return($c,$len);
}

sub utf8_to_ucs2 {
	my $string=shift @_;
	my($ucs,$len,$final);
	do {
		($ucs,$len)=utf8c_to_ucs2c($string);
		$final.=$ucs;
		$string=substr($string,$len-length($string),length($string)-$len);
	} while( ($len>0) && (length($string)>0) );
	return($final);
}

sub utf16_to_ucs2 {
	my $final=shift @_;
	return($final);
}

sub new {
	my $class=shift(@_);
	my $encoding=lc(shift @_) || 'latin1';
	my $this={};
	$encoding=~s/[^a-z0-9\-]+//cgi;
	bless($this,$class);
	my $buf;
	my $unimap='';
	
	if($encoding=~/^uni(\d+)$/) {
		my $uct=$1*256;
		$this->{'enc'} = $encoding;
		$this->{'u2c'} = {};
		$this->{'c2u'} = {};
		$this->{'c2n'} = {};
		foreach my $ch (0..255) {
			my $um=$ch+$uct;
			$this->{'u2c'}->{$um}=$ch;
			$this->{'c2u'}->{$ch}=$um;
			$this->{'c2n'}->{$ch}=$u2n{$um} || sprintf('uni%04X',$um);
		}
		if(wantarray) {
			return($this,$encoding);
		} else {
			return $this;
		}
	} else {
		map {
			if(-e "$_/PDF/API2/UniMap/$encoding.map"){
				$unimap="$_/PDF/API2/UniMap/$encoding.map";
			}
		} @INC;

		if(! -e $unimap) {
			die " encoding='$encoding' not supported.";
		} else {
			$this->{'enc'} = $encoding;
			$this->{'u2c'} = {};
			$this->{'c2u'} = {};
			$this->{'c2n'} = {};
			open(INF,"$unimap");
			binmode(INF);
			read(INF,$buf,4);
			while(!eof(INF)) {
				read(INF,$buf,4);
				my ($ch,$um)=unpack('nn',$buf);
				$this->{'u2c'}->{$um}=$ch;
				$this->{'c2u'}->{$ch}=$um;
				$this->{'c2n'}->{$ch}=$u2n{$um} || sprintf('uni%04X',$um);
			}
			close(INF);
			if(wantarray) {
				return($this,$encoding);
			} else {
				return $this;
			}
		}
	}
}

sub end {
	my $this=shift(@_);
	undef($this);
}

sub u2c {
	my $this=shift @_;
	my $um=shift @_;
	return($this->{'u2c'}->{$um});
}

sub c2u {
	my $this=shift @_;
	my $ch=shift @_;
	return($this->{'c2u'}->{$ch});
}

sub c2n {
	my $this=shift @_;
	my $ch=shift @_;
	return($this->{'c2n'}->{$ch});
}

sub glyphs {
	my $this=shift @_;
	return(map { $this->{'c2n'}->{$_} || '.notdef' } (0..255));
}

sub unimaps {
	return( 
		map {	
			$_=~s/^.*\/([^\/]+)\.map$/$1/cgi; 
			lc($_); 
		} (
			map { 
				glob("$_/PDF/API2/UniMap/*.map"); 
			} @INC
		) 
	);
}

sub isMap {
	my $encoding=lc(shift @_);
	return(undef) if(!$encoding);
	$encoding=~s/[^a-z0-9\-]+//cgi;
	return(scalar grep(/$encoding/,PDF::API2::UniMap::unimaps()));
}

1;

BEGIN {

%u2n=(
  '32'=>'space',
  '33'=>'exclam',
  '34'=>'quotedbl',
  '35'=>'numbersign',
  '36'=>'dollar',
  '37'=>'percent',
  '38'=>'ampersand',
  '39'=>'quotesingle',
  '40'=>'parenleft',
  '41'=>'parenright',
  '42'=>'asterisk',
  '43'=>'plus',
  '44'=>'comma',
  '45'=>'hyphen',
  '46'=>'period',
  '47'=>'slash',
  '48'=>'zero',
  '49'=>'one',
  '50'=>'two',
  '51'=>'three',
  '52'=>'four',
  '53'=>'five',
  '54'=>'six',
  '55'=>'seven',
  '56'=>'eight',
  '57'=>'nine',
  '58'=>'colon',
  '59'=>'semicolon',
  '60'=>'less',
  '61'=>'equal',
  '62'=>'greater',
  '63'=>'question',
  '64'=>'at',
  '65'=>'A',
  '66'=>'B',
  '67'=>'C',
  '68'=>'D',
  '69'=>'E',
  '70'=>'F',
  '71'=>'G',
  '72'=>'H',
  '73'=>'I',
  '74'=>'J',
  '75'=>'K',
  '76'=>'L',
  '77'=>'M',
  '78'=>'N',
  '79'=>'O',
  '80'=>'P',
  '81'=>'Q',
  '82'=>'R',
  '83'=>'S',
  '84'=>'T',
  '85'=>'U',
  '86'=>'V',
  '87'=>'W',
  '88'=>'X',
  '89'=>'Y',
  '90'=>'Z',
  '91'=>'bracketleft',
  '92'=>'backslash',
  '93'=>'bracketright',
  '94'=>'asciicircum',
  '95'=>'underscore',
  '96'=>'grave',
  '97'=>'a',
  '98'=>'b',
  '99'=>'c',
  '100'=>'d',
  '101'=>'e',
  '102'=>'f',
  '103'=>'g',
  '104'=>'h',
  '105'=>'i',
  '106'=>'j',
  '107'=>'k',
  '108'=>'l',
  '109'=>'m',
  '110'=>'n',
  '111'=>'o',
  '112'=>'p',
  '113'=>'q',
  '114'=>'r',
  '115'=>'s',
  '116'=>'t',
  '117'=>'u',
  '118'=>'v',
  '119'=>'w',
  '120'=>'x',
  '121'=>'y',
  '122'=>'z',
  '123'=>'braceleft',
  '124'=>'bar',
  '125'=>'braceright',
  '126'=>'asciitilde',
  '127'=>'bullet',    		
  '128'=>'Euro',      		
  '129'=>'bullet',     		
  '130'=>'quotesinglbase',      
  '131'=>'florin',       	
  '132'=>'quotedblbase', 	
  '133'=>'ellipsis',     	
  '134'=>'dagger',       	
  '135'=>'daggerdbl',    	
  '136'=>'circumflex',   	
  '137'=>'perthousand',  	
  '138'=>'Scaron',       	
  '139'=>'guilsinglleft',       
  '140'=>'OE',   		
  '141'=>'bullet',      	
  '142'=>'Zcaron',      	
  '143'=>'bullet',      	
  '144'=>'bullet',      	
  '145'=>'quoteleft',   	
  '146'=>'quoteright',  	
  '147'=>'quotedblleft', 	
  '148'=>'quotedblright',       
  '149'=>'bullet',      	
  '150'=>'endash',       	
  '151'=>'emdash',       	
  '152'=>'tilde',        	
  '153'=>'trademark',    	
  '154'=>'scaron',       	
  '155'=>'guilsinglright',      
  '156'=>'oe',   		
  '157'=>'bullet',      	
  '158'=>'zcaron',      	
  '159'=>'Ydieresis',   	
  '160'=>'space',      		
  '161'=>'exclamdown',
  '162'=>'cent',
  '163'=>'sterling',
  '164'=>'currency',
  '165'=>'yen',
  '166'=>'brokenbar',
  '167'=>'section',
  '168'=>'dieresis',
  '169'=>'copyright',
  '170'=>'ordfeminine',
  '171'=>'guillemotleft',
  '172'=>'logicalnot',
  '173'=>'hyphen',
  '174'=>'registered',
  '175'=>'macron',
  '176'=>'degree',
  '177'=>'plusminus',
  '178'=>'twosuperior',
  '179'=>'threesuperior',
  '180'=>'acute',
  '181'=>'mu',
  '182'=>'paragraph',
  '183'=>'periodcentered',
  '184'=>'cedilla',
  '185'=>'onesuperior',
  '186'=>'ordmasculine',
  '187'=>'guillemotright',
  '188'=>'onequarter',
  '189'=>'onehalf',
  '190'=>'threequarters',
  '191'=>'questiondown',
  '192'=>'Agrave',
  '193'=>'Aacute',
  '194'=>'Acircumflex',
  '195'=>'Atilde',
  '196'=>'Adieresis',
  '197'=>'Aring',
  '198'=>'AE',
  '199'=>'Ccedilla',
  '200'=>'Egrave',
  '201'=>'Eacute',
  '202'=>'Ecircumflex',
  '203'=>'Edieresis',
  '204'=>'Igrave',
  '205'=>'Iacute',
  '206'=>'Icircumflex',
  '207'=>'Idieresis',
  '208'=>'Eth',
  '209'=>'Ntilde',
  '210'=>'Ograve',
  '211'=>'Oacute',
  '212'=>'Ocircumflex',
  '213'=>'Otilde',
  '214'=>'Odieresis',
  '215'=>'multiply',
  '216'=>'Oslash',
  '217'=>'Ugrave',
  '218'=>'Uacute',
  '219'=>'Ucircumflex',
  '220'=>'Udieresis',
  '221'=>'Yacute',
  '222'=>'Thorn',
  '223'=>'germandbls',
  '224'=>'agrave',
  '225'=>'aacute',
  '226'=>'acircumflex',
  '227'=>'atilde',
  '228'=>'adieresis',
  '229'=>'aring',
  '230'=>'ae',
  '231'=>'ccedilla',
  '232'=>'egrave',
  '233'=>'eacute',
  '234'=>'ecircumflex',
  '235'=>'edieresis',
  '236'=>'igrave',
  '237'=>'iacute',
  '238'=>'icircumflex',
  '239'=>'idieresis',
  '240'=>'eth',
  '241'=>'ntilde',
  '242'=>'ograve',
  '243'=>'oacute',
  '244'=>'ocircumflex',
  '245'=>'otilde',
  '246'=>'odieresis',
  '247'=>'divide',
  '248'=>'oslash',
  '249'=>'ugrave',
  '250'=>'uacute',
  '251'=>'ucircumflex',
  '252'=>'udieresis',
  '253'=>'yacute',
  '254'=>'thorn',
  '255'=>'ydieresis',
  '256'=>'Amacron',
  '257'=>'amacron',
  '258'=>'Abreve',
  '259'=>'abreve',
  '260'=>'Aogonek',
  '261'=>'aogonek',
  '262'=>'Cacute',
  '263'=>'cacute',
  '264'=>'Ccircumflex',
  '265'=>'ccircumflex',
  '266'=>'Cdotaccent',
  '267'=>'cdotaccent',
  '268'=>'Ccaron',
  '269'=>'ccaron',
  '270'=>'Dcaron',
  '271'=>'dcaron',
  '272'=>'Dcroat',
  '273'=>'dcroat',
  '274'=>'Emacron',
  '275'=>'emacron',
  '276'=>'Ebreve',
  '277'=>'ebreve',
  '278'=>'Edotaccent',
  '279'=>'edotaccent',
  '280'=>'Eogonek',
  '281'=>'eogonek',
  '282'=>'Ecaron',
  '283'=>'ecaron',
  '284'=>'Gcircumflex',
  '285'=>'gcircumflex',
  '286'=>'Gbreve',
  '287'=>'gbreve',
  '288'=>'Gdotaccent',
  '289'=>'gdotaccent',
  '290'=>'Gcommaaccent',
  '291'=>'gcommaaccent',
  '292'=>'Hcircumflex',
  '293'=>'hcircumflex',
  '294'=>'Hbar',
  '295'=>'hbar',
  '296'=>'Itilde',
  '297'=>'itilde',
  '298'=>'Imacron',
  '299'=>'imacron',
  '300'=>'Ibreve',
  '301'=>'ibreve',
  '302'=>'Iogonek',
  '303'=>'iogonek',
  '304'=>'Idotaccent',
  '305'=>'dotlessi',
  '306'=>'IJ',
  '307'=>'ij',
  '308'=>'Jcircumflex',
  '309'=>'jcircumflex',
  '310'=>'Kcommaaccent',
  '311'=>'kcommaaccent',
  '312'=>'kgreenlandic',
  '313'=>'Lacute',

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -