📄 balanced.pm
字号:
# EXTRACT VARIOUSLY DELIMITED TEXT SEQUENCES FROM STRINGS.# FOR FULL DOCUMENTATION SEE Balanced.poduse 5.005;use strict;package Text::Balanced;use Exporter;use SelfLoader;use vars qw { $VERSION @ISA %EXPORT_TAGS };use version; $VERSION = qv('2.0.0');@ISA = qw ( Exporter ); %EXPORT_TAGS = ( ALL => [ qw( &extract_delimited &extract_bracketed &extract_quotelike &extract_codeblock &extract_variable &extract_tagged &extract_multiple &gen_delimited_pat &gen_extract_tagged &delimited_pat ) ] );Exporter::export_ok_tags('ALL');# PROTOTYPESsub _match_bracketed($$$$$$);sub _match_variable($$);sub _match_codeblock($$$$$$$);sub _match_quotelike($$$$);# HANDLE RETURN VALUES IN VARIOUS CONTEXTSsub _failmsg { my ($message, $pos) = @_; $@ = bless { error=>$message, pos=>$pos }, "Text::Balanced::ErrorMsg";}sub _fail{ my ($wantarray, $textref, $message, $pos) = @_; _failmsg $message, $pos if $message; return (undef,$$textref,undef) if $wantarray; return undef;}sub _succeed{ $@ = undef; my ($wantarray,$textref) = splice @_, 0, 2; my ($extrapos, $extralen) = @_>18 ? splice(@_, -2, 2) : (0,0); my ($startlen, $oppos) = @_[5,6]; my $remainderpos = $_[2]; if ($wantarray) { my @res; while (my ($from, $len) = splice @_, 0, 2) { push @res, substr($$textref,$from,$len); } if ($extralen) { # CORRECT FILLET my $extra = substr($res[0], $extrapos-$oppos, $extralen, "\n"); $res[1] = "$extra$res[1]"; eval { substr($$textref,$remainderpos,0) = $extra; substr($$textref,$extrapos,$extralen,"\n")} ; #REARRANGE HERE DOC AND FILLET IF POSSIBLE pos($$textref) = $remainderpos-$extralen+1; # RESET \G } else { pos($$textref) = $remainderpos; # RESET \G } return @res; } else { my $match = substr($$textref,$_[0],$_[1]); substr($match,$extrapos-$_[0]-$startlen,$extralen,"") if $extralen; my $extra = $extralen ? substr($$textref, $extrapos, $extralen)."\n" : ""; eval {substr($$textref,$_[4],$_[1]+$_[5])=$extra} ; #CHOP OUT PREFIX & MATCH, IF POSSIBLE pos($$textref) = $_[4]; # RESET \G return $match; }}# BUILD A PATTERN MATCHING A SIMPLE DELIMITED STRINGsub gen_delimited_pat($;$) # ($delimiters;$escapes){ my ($dels, $escs) = @_; return "" unless $dels =~ /\S/; $escs = '\\' unless $escs; $escs .= substr($escs,-1) x (length($dels)-length($escs)); my @pat = (); my $i; for ($i=0; $i<length $dels; $i++) { my $del = quotemeta substr($dels,$i,1); my $esc = quotemeta substr($escs,$i,1); if ($del eq $esc) { push @pat, "$del(?:[^$del]*(?:(?:$del$del)[^$del]*)*)$del"; } else { push @pat, "$del(?:[^$esc$del]*(?:$esc.[^$esc$del]*)*)$del"; } } my $pat = join '|', @pat; return "(?:$pat)";}*delimited_pat = \&gen_delimited_pat;# THE EXTRACTION FUNCTIONSsub extract_delimited (;$$$$){ my $textref = defined $_[0] ? \$_[0] : \$_; my $wantarray = wantarray; my $del = defined $_[1] ? $_[1] : qq{\'\"\`}; my $pre = defined $_[2] ? $_[2] : '\s*'; my $esc = defined $_[3] ? $_[3] : qq{\\}; my $pat = gen_delimited_pat($del, $esc); my $startpos = pos $$textref || 0; return _fail($wantarray, $textref, "Not a delimited pattern", 0) unless $$textref =~ m/\G($pre)($pat)/gc; my $prelen = length($1); my $matchpos = $startpos+$prelen; my $endpos = pos $$textref; return _succeed $wantarray, $textref, $matchpos, $endpos-$matchpos, # MATCH $endpos, length($$textref)-$endpos, # REMAINDER $startpos, $prelen; # PREFIX}sub extract_bracketed (;$$$){ my $textref = defined $_[0] ? \$_[0] : \$_; my $ldel = defined $_[1] ? $_[1] : '{([<'; my $pre = defined $_[2] ? $_[2] : '\s*'; my $wantarray = wantarray; my $qdel = ""; my $quotelike; $ldel =~ s/'//g and $qdel .= q{'}; $ldel =~ s/"//g and $qdel .= q{"}; $ldel =~ s/`//g and $qdel .= q{`}; $ldel =~ s/q//g and $quotelike = 1; $ldel =~ tr/[](){}<>\0-\377/[[(({{<</ds; my $rdel = $ldel; unless ($rdel =~ tr/[({</])}>/) { return _fail $wantarray, $textref, "Did not find a suitable bracket in delimiter: \"$_[1]\"", 0; } my $posbug = pos; $ldel = join('|', map { quotemeta $_ } split('', $ldel)); $rdel = join('|', map { quotemeta $_ } split('', $rdel)); pos = $posbug; my $startpos = pos $$textref || 0; my @match = _match_bracketed($textref,$pre, $ldel, $qdel, $quotelike, $rdel); return _fail ($wantarray, $textref) unless @match; return _succeed ( $wantarray, $textref, $match[2], $match[5]+2, # MATCH @match[8,9], # REMAINDER @match[0,1], # PREFIX );}sub _match_bracketed($$$$$$) # $textref, $pre, $ldel, $qdel, $quotelike, $rdel{ my ($textref, $pre, $ldel, $qdel, $quotelike, $rdel) = @_; my ($startpos, $ldelpos, $endpos) = (pos $$textref = pos $$textref||0); unless ($$textref =~ m/\G$pre/gc) { _failmsg "Did not find prefix: /$pre/", $startpos; return; } $ldelpos = pos $$textref; unless ($$textref =~ m/\G($ldel)/gc) { _failmsg "Did not find opening bracket after prefix: \"$pre\"", pos $$textref; pos $$textref = $startpos; return; } my @nesting = ( $1 ); my $textlen = length $$textref; while (pos $$textref < $textlen) { next if $$textref =~ m/\G\\./gcs; if ($$textref =~ m/\G($ldel)/gc) { push @nesting, $1; } elsif ($$textref =~ m/\G($rdel)/gc) { my ($found, $brackettype) = ($1, $1); if ($#nesting < 0) { _failmsg "Unmatched closing bracket: \"$found\"", pos $$textref; pos $$textref = $startpos; return; } my $expected = pop(@nesting); $expected =~ tr/({[</)}]>/; if ($expected ne $brackettype) { _failmsg qq{Mismatched closing bracket: expected "$expected" but found "$found"}, pos $$textref; pos $$textref = $startpos; return; } last if $#nesting < 0; } elsif ($qdel && $$textref =~ m/\G([$qdel])/gc) { $$textref =~ m/\G[^\\$1]*(?:\\.[^\\$1]*)*(\Q$1\E)/gsc and next; _failmsg "Unmatched embedded quote ($1)", pos $$textref; pos $$textref = $startpos; return; } elsif ($quotelike && _match_quotelike($textref,"",1,0)) { next; } else { $$textref =~ m/\G(?:[a-zA-Z0-9]+|.)/gcs } } if ($#nesting>=0) { _failmsg "Unmatched opening bracket(s): " . join("..",@nesting)."..", pos $$textref; pos $$textref = $startpos; return; } $endpos = pos $$textref; return ( $startpos, $ldelpos-$startpos, # PREFIX $ldelpos, 1, # OPENING BRACKET $ldelpos+1, $endpos-$ldelpos-2, # CONTENTS $endpos-1, 1, # CLOSING BRACKET $endpos, length($$textref)-$endpos, # REMAINDER );}sub _revbracket($){ my $brack = reverse $_[0]; $brack =~ tr/[({</])}>/; return $brack;}my $XMLNAME = q{[a-zA-Z_:][a-zA-Z0-9_:.-]*};sub extract_tagged (;$$$$$) # ($text, $opentag, $closetag, $pre, \%options){ my $textref = defined $_[0] ? \$_[0] : \$_; my $ldel = $_[1]; my $rdel = $_[2]; my $pre = defined $_[3] ? $_[3] : '\s*'; my %options = defined $_[4] ? %{$_[4]} : (); my $omode = defined $options{fail} ? $options{fail} : ''; my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}}) : defined($options{reject}) ? $options{reject} : '' ; my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}}) : defined($options{ignore}) ? $options{ignore} : '' ; if (!defined $ldel) { $ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[^>])*>'; } $@ = undef; my @match = _match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore); return _fail(wantarray, $textref) unless @match; return _succeed wantarray, $textref, $match[2], $match[3]+$match[5]+$match[7], # MATCH @match[8..9,0..1,2..7]; # REM, PRE, BITS}sub _match_tagged # ($$$$$$$){ my ($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore) = @_; my $rdelspec; my ($startpos, $opentagpos, $textpos, $parapos, $closetagpos, $endpos) = ( pos($$textref) = pos($$textref)||0 ); unless ($$textref =~ m/\G($pre)/gc) { _failmsg "Did not find prefix: /$pre/", pos $$textref; goto failed; } $opentagpos = pos($$textref); unless ($$textref =~ m/\G$ldel/gc) { _failmsg "Did not find opening tag: /$ldel/", pos $$textref; goto failed; } $textpos = pos($$textref); if (!defined $rdel) { $rdelspec = substr($$textref, $-[0], $+[0] - $-[0]); unless ($rdelspec =~ s/\A([[(<{]+)($XMLNAME).*/ quotemeta "$1\/$2". _revbracket($1) /oes) { _failmsg "Unable to construct closing tag to match: $rdel", pos $$textref; goto failed; } } else { $rdelspec = eval "qq{$rdel}" || do { my $del; for (qw,~ ! ^ & * ) _ + - = } ] : " ; ' > . ? / | ',) { next if $rdel =~ /\Q$_/; $del = $_; last } unless ($del) { use Carp; croak "Can't interpolate right delimiter $rdel" } eval "qq$del$rdel$del"; }; } while (pos($$textref) < length($$textref)) { next if $$textref =~ m/\G\\./gc; if ($$textref =~ m/\G(\n[ \t]*\n)/gc ) { $parapos = pos($$textref) - length($1) unless defined $parapos; } elsif ($$textref =~ m/\G($rdelspec)/gc ) { $closetagpos = pos($$textref)-length($1); goto matched; } elsif ($ignore && $$textref =~ m/\G(?:$ignore)/gc) { next; } elsif ($bad && $$textref =~ m/\G($bad)/gcs) { pos($$textref) -= length($1); # CUT OFF WHATEVER CAUSED THE SHORTNESS goto short if ($omode eq 'PARA' || $omode eq 'MAX'); _failmsg "Found invalid nested tag: $1", pos $$textref; goto failed; } elsif ($$textref =~ m/\G($ldel)/gc) { my $tag = $1; pos($$textref) -= length($tag); # REWIND TO NESTED TAG unless (_match_tagged(@_)) # MATCH NESTED TAG { goto short if $omode eq 'PARA' || $omode eq 'MAX'; _failmsg "Found unbalanced nested tag: $tag", pos $$textref; goto failed; } } else { $$textref =~ m/./gcs } }short: $closetagpos = pos($$textref); goto matched if $omode eq 'MAX'; goto failed unless $omode eq 'PARA'; if (defined $parapos) { pos($$textref) = $parapos } else { $parapos = pos($$textref) } return ( $startpos, $opentagpos-$startpos, # PREFIX $opentagpos, $textpos-$opentagpos, # OPENING TAG $textpos, $parapos-$textpos, # TEXT $parapos, 0, # NO CLOSING TAG $parapos, length($$textref)-$parapos, # REMAINDER ); matched: $endpos = pos($$textref); return ( $startpos, $opentagpos-$startpos, # PREFIX $opentagpos, $textpos-$opentagpos, # OPENING TAG $textpos, $closetagpos-$textpos, # TEXT $closetagpos, $endpos-$closetagpos, # CLOSING TAG $endpos, length($$textref)-$endpos, # REMAINDER );failed: _failmsg "Did not find closing tag", pos $$textref unless $@; pos($$textref) = $startpos; return;}sub extract_variable (;$$){ my $textref = defined $_[0] ? \$_[0] : \$_; return ("","","") unless defined $$textref; my $pre = defined $_[1] ? $_[1] : '\s*'; my @match = _match_variable($textref,$pre); return _fail wantarray, $textref unless @match; return _succeed wantarray, $textref, @match[2..3,4..5,0..1]; # MATCH, REMAINDER, PREFIX}sub _match_variable($$){# $## $^# $$ my ($textref, $pre) = @_; my $startpos = pos($$textref) = pos($$textref)||0; unless ($$textref =~ m/\G($pre)/gc) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -