📄 collate.pm

📁 视频监控网络部分的协议ddns,的模块的实现代码,请大家大胆指正.
💻 PM
📖 第 1 页 / 共 4 页
字号:
#### "hhhh hhhh hhhh" to (dddd, dddd, dddd)##sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g }## $code *must* be in Hangul syllable.# Check it before you enter here.#sub _decompHangul {    my $code = shift;    my $si = $code - Hangul_SBase;    my $li = int( $si / Hangul_NCount);    my $vi = int(($si % Hangul_NCount) / Hangul_TCount);    my $ti =      $si % Hangul_TCount;    return (	Hangul_LBase + $li,	Hangul_VBase + $vi,	$ti ? (Hangul_TBase + $ti) : (),    );}sub _isIllegal {    my $code = shift;    return ! defined $code                      # removed	|| ($code < 0 || 0x10FFFF < $code)      # out of range	|| (($code & 0xFFFE) == 0xFFFE)         # ??FFF[EF] (cf. utf8.c)	|| (0xD800 <= $code && $code <= 0xDFFF) # unpaired surrogates	|| (0xFDD0 <= $code && $code <= 0xFDEF) # other non-characters    ;}# Hangul Syllable Typesub getHST {    my $u = shift;    return	Hangul_LIni <= $u && $u <= Hangul_LFin || $u == Hangul_LFill ? "L" :	Hangul_VIni <= $u && $u <= Hangul_VFin	     ? "V" :	Hangul_TIni <= $u && $u <= Hangul_TFin	     ? "T" :	Hangul_SIni <= $u && $u <= Hangul_SFin ?	    ($u - Hangul_SBase) % Hangul_TCount ? "LVT" : "LV" : "";}#### bool _nonIgnorAtLevel(arrayref weights, int level)##sub _nonIgnorAtLevel($$){    my $wt = shift;    return if ! defined $wt;    my $lv = shift;    return grep($wt->[$_-1] != 0, MinLevel..$lv) ? TRUE : FALSE;}#### bool _eqArray(##    arrayref of arrayref[weights] source,##    arrayref of arrayref[weights] substr,##    int level)## * comparison of graphemes vs graphemes.##   @$source >= @$substr must be true (check it before call this);##sub _eqArray($$$){    my $source = shift;    my $substr = shift;    my $lev = shift;    for my $g (0..@$substr-1){	# Do the $g'th graphemes have the same number of AV weigths?	return if @{ $source->[$g] } != @{ $substr->[$g] };	for my $w (0..@{ $substr->[$g] }-1) {	    for my $v (0..$lev-1) {		return if $source->[$g][$w][$v] != $substr->[$g][$w][$v];	    }	}    }    return 1;}#### (int position, int length)## int position = index(string, substring, position, [undoc'ed grobal])#### With "grobal" (only for the list context),##  returns list of arrayref[position, length].##sub index{    my $self = shift;    my $str  = shift;    my $len  = length($str);    my $subE = $self->splitEnt(shift);    my $pos  = @_ ? shift : 0;       $pos  = 0 if $pos < 0;    my $grob = shift;    my $lev  = $self->{level};    my $v2i  = $self->{UCA_Version} >= 9 &&		$self->{variable} ne 'non-ignorable';    if (! @$subE) {	my $temp = $pos <= 0 ? 0 : $len <= $pos ? $len : $pos;	return $grob	    ? map([$_, 0], $temp..$len)	    : wantarray ? ($temp,0) : $temp;    }    $len < $pos	and return wantarray ? () : NOMATCHPOS;    my $strE = $self->splitEnt($pos ? substr($str, $pos) : $str, TRUE);    @$strE	or return wantarray ? () : NOMATCHPOS;    my(@strWt, @iniPos, @finPos, @subWt, @g_ret);    my $last_is_variable;    for my $vwt (map $self->getWt($_), @$subE) {	my($var, @wt) = unpack(VCE_TEMPLATE, $vwt);	my $to_be_pushed = _nonIgnorAtLevel(\@wt,$lev);	# "Ignorable (L1, L2) after Variable" since track. v. 9	if ($v2i) {	    if ($var) {		$last_is_variable = TRUE;	    }	    elsif (!$wt[0]) { # ignorable		$to_be_pushed = FALSE if $last_is_variable;	    }	    else {		$last_is_variable = FALSE;	    }	}	if (@subWt && !$var && !$wt[0]) {	    push @{ $subWt[-1] }, \@wt if $to_be_pushed;	} else {	    push @subWt, [ \@wt ];	}    }    my $count = 0;    my $end = @$strE - 1;    $last_is_variable = FALSE; # reuse    for (my $i = 0; $i <= $end; ) { # no $i++	my $found_base = 0;	# fetch a grapheme	while ($i <= $end && $found_base == 0) {	    for my $vwt ($self->getWt($strE->[$i][0])) {		my($var, @wt) = unpack(VCE_TEMPLATE, $vwt);		my $to_be_pushed = _nonIgnorAtLevel(\@wt,$lev);		# "Ignorable (L1, L2) after Variable" since track. v. 9		if ($v2i) {		    if ($var) {			$last_is_variable = TRUE;		    }		    elsif (!$wt[0]) { # ignorable			$to_be_pushed = FALSE if $last_is_variable;		    }		    else {			$last_is_variable = FALSE;		    }		}		if (@strWt && !$var && !$wt[0]) {		    push @{ $strWt[-1] }, \@wt if $to_be_pushed;		    $finPos[-1] = $strE->[$i][2];		} elsif ($to_be_pushed) {		    push @strWt, [ \@wt ];		    push @iniPos, $found_base ? NOMATCHPOS : $strE->[$i][1];		    $finPos[-1] = NOMATCHPOS if $found_base;		    push @finPos, $strE->[$i][2];		    $found_base++;		}		# else ===> no-op	    }	    $i++;	}	# try to match	while ( @strWt > @subWt || (@strWt == @subWt && $i > $end) ) {	    if ($iniPos[0] != NOMATCHPOS &&		    $finPos[$#subWt] != NOMATCHPOS &&			_eqArray(\@strWt, \@subWt, $lev)) {		my $temp = $iniPos[0] + $pos;		if ($grob) {		    push @g_ret, [$temp, $finPos[$#subWt] - $iniPos[0]];		    splice @strWt,  0, $#subWt;		    splice @iniPos, 0, $#subWt;		    splice @finPos, 0, $#subWt;		}		else {		    return wantarray			? ($temp, $finPos[$#subWt] - $iniPos[0])			:  $temp;		}	    }	    shift @strWt;	    shift @iniPos;	    shift @finPos;	}    }    return $grob	? @g_ret	: wantarray ? () : NOMATCHPOS;}#### scalarref to matching part = match(string, substring)##sub match{    my $self = shift;    if (my($pos,$len) = $self->index($_[0], $_[1])) {	my $temp = substr($_[0], $pos, $len);	return wantarray ? $temp : \$temp;	# An lvalue ref \substr should be avoided,	# since its value is affected by modification of its referent.    }    else {	return;    }}#### arrayref matching parts = gmatch(string, substring)##sub gmatch{    my $self = shift;    my $str  = shift;    my $sub  = shift;    return map substr($str, $_->[0], $_->[1]),		$self->index($str, $sub, 0, 'g');}#### bool subst'ed = subst(string, substring, replace)##sub subst{    my $self = shift;    my $code = ref $_[2] eq 'CODE' ? $_[2] : FALSE;    if (my($pos,$len) = $self->index($_[0], $_[1])) {	if ($code) {	    my $mat = substr($_[0], $pos, $len);	    substr($_[0], $pos, $len, $code->($mat));	} else {	    substr($_[0], $pos, $len, $_[2]);	}	return TRUE;    }    else {	return FALSE;    }}#### int count = gsubst(string, substring, replace)##sub gsubst{    my $self = shift;    my $code = ref $_[2] eq 'CODE' ? $_[2] : FALSE;    my $cnt = 0;    # Replacement is carried out from the end, then use reverse.    for my $pos_len (reverse $self->index($_[0], $_[1], 0, 'g')) {	if ($code) {	    my $mat = substr($_[0], $pos_len->[0], $pos_len->[1]);	    substr($_[0], $pos_len->[0], $pos_len->[1], $code->($mat));	} else {	    substr($_[0], $pos_len->[0], $pos_len->[1], $_[2]);	}	$cnt++;    }    return $cnt;}1;__END__=head1 NAMEUnicode::Collate - Unicode Collation Algorithm=head1 SYNOPSIS  use Unicode::Collate;  #construct  $Collator = Unicode::Collate->new(%tailoring);  #sort  @sorted = $Collator->sort(@not_sorted);  #compare  $result = $Collator->cmp($a, $b); # returns 1, 0, or -1.  # If %tailoring is false (i.e. empty),  # $Collator should do the default collation.=head1 DESCRIPTIONThis module is an implementation of Unicode Technical Standard #10(a.k.a. UTS #10) - Unicode Collation Algorithm (a.k.a. UCA).=head2 Constructor and TailoringThe C<new> method returns a collator object.   $Collator = Unicode::Collate->new(      UCA_Version => $UCA_Version,      alternate => $alternate, # deprecated: use of 'variable' is recommended.      backwards => $levelNumber, # or \@levelNumbers      entry => $element,      hangul_terminator => $term_primary_weight,      ignoreName => qr/$ignoreName/,      ignoreChar => qr/$ignoreChar/,      katakana_before_hiragana => $bool,      level => $collationLevel,      normalization  => $normalization_form,      overrideCJK => \&overrideCJK,      overrideHangul => \&overrideHangul,      preprocess => \&preprocess,      rearrange => \@charList,      table => $filename,      undefName => qr/$undefName/,      undefChar => qr/$undefChar/,      upper_before_lower => $bool,      variable => $variable,   );=over 4=item UCA_VersionIf the tracking version number of UCA is given,behavior of that tracking version is emulated on collating.If omitted, the return value of C<UCA_Version()> is used.C<UCA_Version()> should return the latest tracking version supported.The supported tracking version: 8, 9, 11, or 14.     UCA       Unicode Standard         DUCET (@version)     ---------------------------------------------------      8              3.1                3.0.1 (3.0.1d9)      9     3.1 with Corrigendum 3      3.1.1 (3.1.1)     11              4.0                4.0.0 (4.0.0)     14             4.1.0               4.1.0 (4.1.0)Note: Recent UTS #10 renames "Tracking Version" to "Revision."=item alternate-- see 3.2.2 Alternate Weighting, version 8 of UTS #10For backward compatibility, C<alternate> (old name) can be usedas an alias for C<variable>.=item backwards-- see 3.1.2 French Accents, UTS #10.     backwards => $levelNumber or \@levelNumbersWeights in reverse order; ex. level 2 (diacritic ordering) in French.If omitted, forwards at all the levels.=item entry-- see 3.1 Linguistic Features; 3.2.1 File Format, UTS #10.If the same character (or a sequence of characters) existsin the collation element table through C<table>,mapping to collation elements is overrided.If it does not exist, the mapping is defined additionally.    entry => <<'ENTRY', # for DUCET v4.0.0 (allkeys-4.0.0.txt)0063 0068 ; [.0E6A.0020.0002.0063] # ch0043 0068 ; [.0E6A.0020.0007.0043] # Ch0043 0048 ; [.0E6A.0020.0008.0043] # CH006C 006C ; [.0F4C.0020.0002.006C] # ll004C 006C ; [.0F4C.0020.0007.004C] # Ll004C 004C ; [.0F4C.0020.0008.004C] # LL00F1      ; [.0F7B.0020.0002.00F1] # n-tilde006E 0303 ; [.0F7B.0020.0002.00F1] # n-tilde00D1      ; [.0F7B.0020.0008.00D1] # N-tilde004E 0303 ; [.0F7B.0020.0008.00D1] # N-tildeENTRY    entry => <<'ENTRY', # for DUCET v4.0.0 (allkeys-4.0.0.txt)00E6 ; [.0E33.0020.0002.00E6][.0E8B.0020.0002.00E6] # ae ligature as <a><e>00C6 ; [.0E33.0020.0008.00C6][.0E8B.0020.0008.00C6] # AE ligature as <A><E>ENTRYB<NOTE:> The code point in the UCA file format (before C<';'>)B<must> be a Unicode code point (defined as hexadecimal),but not a native code point.So C<0063> must always denote C<U+0063>,but not a character of C<"\x63">.Weighting may vary depending on collation element table.So ensure the weights defined in C<entry> will be consistent withthose in the collation element table loaded via C<table>.
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -