📄 hz.pm

📁 source of perl for linux application,
💻 PM
字号:
package Encode::CN::HZ;use strict;use warnings;use vars qw($VERSION);$VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };use Encode qw(:fallbacks);use base qw(Encode::Encoding);__PACKAGE__->Define('hz');# HZ is a combination of ASCII and escaped GB, so we implement it# with the GB2312(raw) encoding here. Cf. RFCs 1842 & 1843.# not ported for EBCDIC.  Which should be used, "~" or "\x7E"?sub needs_lines { 1 }sub decode ($$;$) {    my ( $obj, $str, $chk ) = @_;    my $GB  = Encode::find_encoding('gb2312-raw');    my $ret = '';    my $in_ascii = 1;    # default mode is ASCII.    while ( length $str ) {        if ($in_ascii) {    # ASCII mode            if ( $str =~ s/^([\x00-\x7D\x7F]+)// ) {    # no '~' => ASCII                $ret .= $1;                # EBCDIC should need ascii2native, but not ported.            }            elsif ( $str =~ s/^\x7E\x7E// ) {           # escaped tilde                $ret .= '~';            }            elsif ( $str =~ s/^\x7E\cJ// ) {    # '\cJ' == LF in ASCII                1;                              # no-op            }            elsif ( $str =~ s/^\x7E\x7B// ) {    # '~{'                $in_ascii = 0;                   # to GB            }            else {    # encounters an invalid escape, \x80 or greater                last;            }        }        else {        # GB mode; the byte ranges are as in RFC 1843.            no warnings 'uninitialized';            if ( $str =~ s/^((?:[\x21-\x77][\x21-\x7E])+)// ) {                $ret .= $GB->decode( $1, $chk );            }            elsif ( $str =~ s/^\x7E\x7D// ) {    # '~}'                $in_ascii = 1;            }            else {                               # invalid                last;            }        }    }    $_[1] = '' if $chk;    # needs_lines guarantees no partial character    return $ret;}sub cat_decode {    my ( $obj, undef, $src, $pos, $trm, $chk ) = @_;    my ( $rdst, $rsrc, $rpos ) = \@_[ 1 .. 3 ];    my $GB  = Encode::find_encoding('gb2312-raw');    my $ret = '';    my $in_ascii = 1;      # default mode is ASCII.    my $ini_pos = pos($$rsrc);    substr( $src, 0, $pos ) = '';    my $ini_len = bytes::length($src);    # $trm is the first of the pair '~~', then 2nd tilde is to be removed.    # XXX: Is better C<$src =~ s/^\x7E// or die if ...>?    $src =~ s/^\x7E// if $trm eq "\x7E";    while ( length $src ) {        my $now;        if ($in_ascii) {    # ASCII mode            if ( $src =~ s/^([\x00-\x7D\x7F])// ) {    # no '~' => ASCII                $now = $1;            }            elsif ( $src =~ s/^\x7E\x7E// ) {          # escaped tilde                $now = '~';            }            elsif ( $src =~ s/^\x7E\cJ// ) {    # '\cJ' == LF in ASCII                next;            }            elsif ( $src =~ s/^\x7E\x7B// ) {    # '~{'                $in_ascii = 0;                   # to GB                next;            }            else {    # encounters an invalid escape, \x80 or greater                last;            }        }        else {        # GB mode; the byte ranges are as in RFC 1843.            if ( $src =~ s/^((?:[\x21-\x77][\x21-\x7F])+)// ) {                $now = $GB->decode( $1, $chk );            }            elsif ( $src =~ s/^\x7E\x7D// ) {    # '~}'                $in_ascii = 1;                next;            }            else {                               # invalid                last;            }        }        next if !defined $now;        $ret .= $now;        if ( $now eq $trm ) {            $$rdst .= $ret;            $$rpos = $ini_pos + $pos + $ini_len - bytes::length($src);            pos($$rsrc) = $ini_pos;            return 1;        }    }    $$rdst .= $ret;    $$rpos = $ini_pos + $pos + $ini_len - bytes::length($src);    pos($$rsrc) = $ini_pos;    return '';    # terminator not found}sub encode($$;$) {    my ( $obj, $str, $chk ) = @_;    my $GB  = Encode::find_encoding('gb2312-raw');    my $ret = '';    my $in_ascii = 1;    # default mode is ASCII.    no warnings 'utf8';  # $str may be malformed UTF8 at the end of a chunk.    while ( length $str ) {        if ( $str =~ s/^([[:ascii:]]+)// ) {            my $tmp = $1;            $tmp =~ s/~/~~/g;    # escapes tildes            if ( !$in_ascii ) {                $ret .= "\x7E\x7D";    # '~}'                $in_ascii = 1;            }            $ret .= pack 'a*', $tmp;    # remove UTF8 flag.        }        elsif ( $str =~ s/(.)// ) {            my $s = $1;            my $tmp = $GB->encode( $s, $chk );            last if !defined $tmp;            if ( length $tmp == 2 ) {    # maybe a valid GB char (XXX)                if ($in_ascii) {                    $ret .= "\x7E\x7B";    # '~{'                    $in_ascii = 0;                }                $ret .= $tmp;            }            elsif ( length $tmp ) {        # maybe FALLBACK in ASCII (XXX)                if ( !$in_ascii ) {                    $ret .= "\x7E\x7D";    # '~}'                    $in_ascii = 1;                }                $ret .= $tmp;            }        }        else {    # if $str is malformed UTF8 *and* if length $str != 0.            last;        }    }    $_[1] = $str if $chk;    # The state at the end of the chunk is discarded, even if in GB mode.    # That results in the combination of GB-OUT and GB-IN, i.e. "~}~{".    # Parhaps it is harmless, but further investigations may be required...    if ( !$in_ascii ) {        $ret .= "\x7E\x7D";    # '~}'        $in_ascii = 1;    }    return $ret;}1;__END__=head1 NAMEEncode::CN::HZ -- internally used by Encode::CN=cut
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -