⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mkheader

📁 source of perl for linux application,
💻
字号:
#!perl## This auxiliary script makes five header files# used for building XSUB of Unicode::Normalize.## Usage:#    <do 'mkheader'> in perl, or <perl mkheader> in command line## Input files:#    unicore/CombiningClass.pl (or unicode/CombiningClass.pl)#    unicore/Decomposition.pl (or unicode/Decomposition.pl)#    unicore/CompositionExclusions.txt (or unicode/CompExcl.txt)## Output files:#    unfcan.h#    unfcpt.h#    unfcmb.h#    unfcmp.h#    unfexc.h#use 5.006;use strict;use warnings;use Carp;use File::Spec;BEGIN {    unless ("A" eq pack('U', 0x41)) {	die "Unicode::Normalize cannot stringify a Unicode code point\n";    }}our $PACKAGE = 'Unicode::Normalize, mkheader';our $Combin = do "unicore/CombiningClass.pl"    || do "unicode/CombiningClass.pl"    || croak "$PACKAGE: CombiningClass.pl not found";our $Decomp = do "unicore/Decomposition.pl"    || do "unicode/Decomposition.pl"    || croak "$PACKAGE: Decomposition.pl not found";our %Combin;	# $codepoint => $number    : combination classour %Canon;	# $codepoint => \@codepoints : canonical decomp.our %Compat;	# $codepoint => \@codepoints : compat. decomp.# after _U_stringify(), ($codepoint => $hexstring) for %Canon and %Compatour %Exclus;	# $codepoint => 1          : composition exclusionsour %Single;	# $codepoint => 1          : singletonsour %NonStD;	# $codepoint => 1          : non-starter decompositionsour %Comp1st;	# $codepoint => $listname  : may be composed with a next char.our %Comp2nd;	# $codepoint => 1          : may be composed with a prev char.our %CompList;	# $listname,$2nd  => $codepoint : compositeour $prefix = "UNF_";our $structname = "${prefix}complist";########## definition of Hangul constants ##########use constant SBase  => 0xAC00;use constant SFinal => 0xD7A3; # SBase -1 + SCountuse constant SCount =>  11172; # LCount * NCountuse constant NCount =>    588; # VCount * TCountuse constant LBase  => 0x1100;use constant LFinal => 0x1112;use constant LCount =>     19;use constant VBase  => 0x1161;use constant VFinal => 0x1175;use constant VCount =>     21;use constant TBase  => 0x11A7;use constant TFinal => 0x11C2;use constant TCount =>     28;sub decomposeHangul {    my $SIndex = $_[0] - SBase;    my $LIndex = int( $SIndex / NCount);    my $VIndex = int(($SIndex % NCount) / TCount);    my $TIndex =      $SIndex % TCount;    my @ret = (       LBase + $LIndex,       VBase + $VIndex,      $TIndex ? (TBase + $TIndex) : (),    );    return @ret;}########## getting full decomposion ##########{    my($f, $fh);    foreach my $d (@INC) {	$f = File::Spec->catfile($d, "unicore", "CompositionExclusions.txt");	last if open($fh, $f);	$f = File::Spec->catfile($d, "unicode", "CompExcl.txt");	last if open($fh, $f);	$f = undef;    }    croak "$PACKAGE: neither unicore/CompositionExclusions.txt "	. "nor unicode/CompExcl.txt is found in @INC" unless defined $f;    while (<$fh>) {	next if /^#/ or /^$/;	s/#.*//;	$Exclus{ hex($1) } = 1 if /([0-9A-Fa-f]+)/;    }    close $fh;}#### converts string "hhhh hhhh hhhh" to a numeric list##sub _getHexArray { map hex, $_[0] =~ /([0-9A-Fa-f]+)/g }while ($Combin =~ /(.+)/g) {    my @tab = split /\t/, $1;    my $ini = hex $tab[0];    if ($tab[1] eq '') {	$Combin{ $ini } = $tab[2];    } else {	$Combin{ $_ } = $tab[2] foreach $ini .. hex($tab[1]);    }}while ($Decomp =~ /(.+)/g) {    my @tab = split /\t/, $1;    my $compat = $tab[2] =~ s/<[^>]+>//;    my $dec = [ _getHexArray($tab[2]) ]; # decomposition    my $ini = hex($tab[0]); # initial decomposable character    my $listname =	@$dec == 2 ? sprintf("${structname}_%06x", $dec->[0]) : 'USELESS';		# %04x is bad since it'd place _3046 after _1d157.    if ($tab[1] eq '') {	$Compat{ $ini } = $dec;	if (! $compat) {	    $Canon{ $ini } = $dec;	    if (@$dec == 2) {		if ($Combin{ $dec->[0] }) {		    $NonStD{ $ini } = 1;		} else {		    $CompList{ $listname }{ $dec->[1] } = $ini;		    $Comp1st{ $dec->[0] } = $listname;		    $Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$ini};		}	    } elsif (@$dec == 1) {		$Single{ $ini } = 1;	    } else {		croak("Weird Canonical Decomposition of U+$tab[0]");	    }	}    } else {	foreach my $u ($ini .. hex($tab[1])) {	    $Compat{ $u } = $dec;	    if (! $compat) {		$Canon{ $u } = $dec;		if (@$dec == 2) {		    if ($Combin{ $dec->[0] }) {			$NonStD{ $u } = 1;		    } else {			$CompList{ $listname }{ $dec->[1] } = $u;			$Comp1st{ $dec->[0] } = $listname;			$Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$u};		    }		} elsif (@$dec == 1) {		    $Single{ $u } = 1;		} else {		    croak("Weird Canonical Decomposition of U+$tab[0]");		}	    }	}    }}# modern HANGUL JUNGSEONG and HANGUL JONGSEONG jamoforeach my $j (0x1161..0x1175, 0x11A8..0x11C2) {    $Comp2nd{$j} = 1;}sub getCanonList {    my @src = @_;    my @dec = map {	(SBase <= $_ && $_ <= SFinal) ? decomposeHangul($_)	    : $Canon{$_} ? @{ $Canon{$_} } : $_		} @src;    return join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec);    # condition @src == @dec is not ok.}sub getCompatList {    my @src = @_;    my @dec = map {	(SBase <= $_ && $_ <= SFinal) ? decomposeHangul($_)	    : $Compat{$_} ? @{ $Compat{$_} } : $_		} @src;    return join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec);    # condition @src == @dec is not ok.}# exhaustive decompositionforeach my $key (keys %Canon) {    $Canon{$key}  = [ getCanonList($key) ];}# exhaustive decompositionforeach my $key (keys %Compat) {    $Compat{$key} = [ getCompatList($key) ];}sub _pack_U {    return pack('U*', @_);}sub split_into_char {    use bytes;    my $uni = shift;    my $len = length($uni);    my @ary;    for(my $i = 0; $i < $len; ++$i) {	push @ary, ord(substr($uni,$i,1));    }    return @ary;}sub _U_stringify {    sprintf '"%s"', join '',	map sprintf("\\x%02x", $_), split_into_char(_pack_U(@_));}foreach my $hash (\%Canon, \%Compat) {    foreach my $key (keys %$hash) {	$hash->{$key} = _U_stringify( @{ $hash->{$key} } );    }}########## writing header files ##########my @boolfunc = (    {	name => "Exclusion",	type => "bool",	hash => \%Exclus,    },    {	name => "Singleton",	type => "bool",	hash => \%Single,    },    {	name => "NonStDecomp",	type => "bool",	hash => \%NonStD,    },    {	name => "Comp2nd",	type => "bool",	hash => \%Comp2nd,    },);my $file = "unfexc.h";open FH, ">$file" or croak "$PACKAGE: $file can't be made";binmode FH; select FH;    print << 'EOF';/* * This file is auto-generated by mkheader. * Any changes here will be lost! */EOFforeach my $tbl (@boolfunc) {    my @temp = sort {$a <=> $b} keys %{$tbl->{hash}};    my $type = $tbl->{type};    my $name = $tbl->{name};    print "$type is$name (UV uv)\n{\nreturn\n\t";    while (@temp) {	my $cur = shift @temp;	if (@temp && $cur + 1 == $temp[0]) {	    print "($cur <= uv && uv <= ";	    while (@temp && $cur + 1 == $temp[0]) {		$cur = shift @temp;	    }	    print "$cur)";	    print "\n\t|| " if @temp;	} else {	    print "uv == $cur";	    print "\n\t|| " if @temp;	}    }    print "\n\t? TRUE : FALSE;\n}\n\n";}close FH;####################################my $compinit =    "typedef struct { UV nextchar; UV composite; } $structname;\n\n";foreach my $i (sort keys %CompList) {    $compinit .= "$structname $i [] = {\n";    $compinit .= join ",\n",	map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}),	    sort {$a <=> $b } keys %{ $CompList{$i} };    $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel}my @tripletable = (    {	file => "unfcmb",	name => "combin",	type => "STDCHAR",	hash => \%Combin,	null =>  0,    },    {	file => "unfcan",	name => "canon",	type => "char*",	hash => \%Canon,	null => "NULL",    },    {	file => "unfcpt",	name => "compat",	type => "char*",	hash => \%Compat,	null => "NULL",    },    {	file => "unfcmp",	name => "compos",	type => "$structname *",	hash => \%Comp1st,	null => "NULL",	init => $compinit,    },);foreach my $tbl (@tripletable) {    my $file = "$tbl->{file}.h";    my $head = "${prefix}$tbl->{name}";    my $type = $tbl->{type};    my $hash = $tbl->{hash};    my $null = $tbl->{null};    my $init = $tbl->{init};    open FH, ">$file" or croak "$PACKAGE: $file can't be made";    binmode FH; select FH;    my %val;    print FH << 'EOF';/* * This file is auto-generated by mkheader. * Any changes here will be lost! */EOF    print $init if defined $init;    foreach my $uv (keys %$hash) {	croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv)	    unless $uv <= 0x10FFFF;	my @c = unpack 'CCCC', pack 'N', $uv;	$val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};    }    foreach my $p (sort { $a <=> $b } keys %val) {	next if ! $val{ $p };	for (my $r = 0; $r < 256; $r++) {	    next if ! $val{ $p }{ $r };	    printf "static $type ${head}_%02x_%02x [256] = {\n", $p, $r;	    for (my $c = 0; $c < 256; $c++) {		print "\t", defined $val{$p}{$r}{$c}		    ? "($type)".$val{$p}{$r}{$c}		    : $null;		print ','  if $c != 255;		print "\n" if $c % 8 == 7;	    }	    print "};\n\n";	}    }    foreach my $p (sort { $a <=> $b } keys %val) {	next if ! $val{ $p };	printf "static $type* ${head}_%02x [256] = {\n", $p;	for (my $r = 0; $r < 256; $r++) {	    print $val{ $p }{ $r }		? sprintf("${head}_%02x_%02x", $p, $r)		: "NULL";	    print ','  if $r != 255;	    print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;	}	print "};\n\n";    }    print "static $type** $head [] = {\n";    for (my $p = 0; $p <= 0x10; $p++) {	print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";	print ','  if $p != 0x10;	print "\n";    }    print "};\n\n";    close FH;}1;__END__

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -