⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ucmlint

📁 source of perl for linux application,
💻
字号:
#!/usr/local/bin/perl## $Id: ucmlint,v 2.1 2006/05/03 18:24:10 dankogai Exp $#use strict;our  $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };use Getopt::Std;our %Opt;getopts("Dehfv", \%Opt);if ($Opt{e}){   eval{ require Encode; };   $@ and die "can't load Encode : $@";}$Opt{h} and help();@ARGV or help();sub help{    print <<"";$0 -[Dehfv] [ucm files ...]  -D debug mode on  -e test with Encode module also (requires perl 5.7.3 or higher)  -h shows this message  -f forces roundtrip check even for |[123]  -v verbose mode}$| = 1;my (%Hdr, %U2E, %E2U);my $in_charmap = 0;my $nerror = 0;my $nwarning = 0;sub nit($;$){    my ($msg, $level) = @_;    my $lstr;    if ($level == 2){    $lstr = 'notice';    }elsif ($level == 1){    $lstr = 'warning'; $nwarning++;    }else{    $lstr = 'error'; $nerror++;    }    print "$ARGV:$lstr in line $.: $msg\n";}for $ARGV (@ARGV){    open UCM, $ARGV or die "$ARGV:$!";    %Hdr = %U2E = %E2U = ();    $in_charmap = $nerror = $nwarning = 0;    $. = 0;    while(<UCM>){    chomp;    s/\s*#.*$//o; /^$/ and next;    if ($_ eq "CHARMAP"){         $in_charmap = 1;        for my $must (qw/code_set_name mb_cur_min mb_cur_max/){        exists $Hdr{$must} or nit "<$must> nonexistent";        }        $Hdr{mb_cur_min} > $Hdr{mb_cur_max}        and nit sprintf("mb_cur_min(%d) > mb_cur_max(%d)",                $Hdr{mb_cur_min},$Hdr{mb_cur_max});        $in_charmap = 1;        next;    }    unless ($in_charmap){        my($hkey, $hvalue) = /^<(\S+)>\s+[\"\']?([^\"\']+)/o or next;        $Opt{D} and warn "$hkey => $hvalue";        if ($hkey eq "code_set_name"){ # name check        exists $Hdr{code_set_name}         and nit "Duplicate <code_set_name>: $hkey";        }        if ($hkey eq "code_set_alias"){ # alias check        $hvalue eq $Hdr{code_set_name}        and nit qq(alias "$hvalue" is already in <code_set_name>);        }        $Hdr{$hkey} = $hvalue;    }else{        my $name = $Hdr{code_set_name};        my($unistr, $encstr, $fb) = /^(\S+)\s+(\S+)\s(\S+)/o or next;        $Opt{v} and nit $_, 2;        my $uni = uniparse($unistr);        my $enc = encparse($encstr);        $fb =~ /^\|([0123])$/ or nit "malformed fallback: $fb";        $fb = $1;         $Opt{f} and $fb = 0;        unless ($fb == 1){ # check uni -> enc        if (exists $U2E{$uni}){            nit "dupe encode map: U$uni => $U2E{$uni} and $enc", 1;        }else{            $U2E{$uni} = $enc;            if ($Opt{e} and $fb != 3) {            my $e = hex2enc($enc);            my $u = hex2uni($uni);            my $eu = Encode::encode($name, $u);            $e eq $eu                or nit qq(encode('$name', $uni) != $enc);            }        }        }        unless ($fb == 3){  # check enc -> uni        if (exists $E2U{$enc}){            nit "dupe decode map: $enc => U$E2U{$enc} and U$uni", 1;        }else{            $E2U{$enc} = $uni;            if ($Opt{e} and $fb != 1) {            my $e = hex2enc($enc);            my $u = hex2uni($uni);            $Opt{D} and warn "$uni, $enc";            my $de = Encode::decode($name, $e);            $de eq $u                or nit qq(decode('$name', $enc) != $uni);            }        }        }        # warn "$uni, $enc, $fb";    }    }    $in_charmap or nit "Where is CHARMAP?";    checkRT();    printf ("$ARGV: %s error%s found\n",         ($nerror == 0 ? 'no' : $nerror),        ($nerror > 1 ? 's' : ''));}exit;sub hex2enc{    pack("C*", map {hex($_)} split(",", shift));}sub hex2uni{    join("", map { chr(hex($_)) } split(",", shift));}sub checkRT{    for my $uni (keys %E2U){    my $enc = $U2E{$uni} or next; # okay    $E2U{$U2E{$uni}} eq $uni or        nit "RT failure: U$uni => $enc =>U$E2U{$U2E{$uni}}";    }    for my $enc (keys %E2U){    my $uni =  $E2U{$enc} or next; # okay    $U2E{$E2U{$enc}} eq $enc or        nit "RT failure: $enc => U$uni => $U2E{$E2U{$enc}}";    }}sub uniparse{    my $str = shift;    my @u;    push @u, $1 while($str =~ /\G<U(.*?)>/ig);    for my $u (@u){    $u =~ /^([0-9A-Za-z]+)$/o        or nit "malformed Unicode character: $u";    }    return join(',', @u);}sub encparse{    my $str = shift;    my @e;    for my $e (split /\\x/io, $str){    $e or next; # first \x    $e =~ /^([0-9A-Za-z]{1,2})$/io        or nit "Hex $e in $str is bogus";    push @e, $1;    }    return join(',', @e);}__END__A UCM file looks like this.  #  # Comments  #  <code_set_name> "US-ascii" # Required  <code_set_alias> "ascii"   # Optional  <mb_cur_min> 1             # Required; usually 1  <mb_cur_max> 1             # Max. # of bytes/char  <subchar> \x3F             # Substitution char  #  CHARMAP  <U0000> \x00 |0 # <control>  <U0001> \x01 |0 # <control>  <U0002> \x02 |0 # <control>  ....  <U007C> \x7C |0 # VERTICAL LINE  <U007D> \x7D |0 # RIGHT CURLY BRACKET  <U007E> \x7E |0 # TILDE  <U007F> \x7F |0 # <control>  END CHARMAP

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -