📄 blackbox.pm
字号:
# Make $formatting and the previous line be exactly the same length, # with $formatting having a " " as the last character. DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n"; my @new_line; while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) { #print "Format matches $1\n"; if($2) { #print "SKIPPING <$2>\n"; push @new_line, substr($p->[$i-1], pos($formatting)-length($1), length($1)); } else { #print "SNARING $+\n"; push @new_line, [ ( $3 ? 'VerbatimB' : $4 ? 'VerbatimI' : $5 ? 'VerbatimBI' : die("Should never get called") ), {}, substr($p->[$i-1], pos($formatting)-length($1), length($1)) ]; #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; } } my @nixed = splice @$p, $i-1, 2, @new_line; # replace myself and the next line DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n"; DEBUG > 6 and print "New version of the above line is these tokens (", scalar(@new_line), "):", map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; $i--; # So the next line we scrutinize is the line before the one # that we just went and formatted } $p->[0] = 'VerbatimFormatted'; # Collapse adjacent text nodes, just for kicks. for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) { DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n"; $p->[$i] .= splice @$p, $i+1, 1; # merge --$i; # and back up } } # Now look for the last text token, and remove the terminal newline for( my $i = $#$p; $i >= 2; $i-- ) { # work backwards over the tokens, even the first if( !ref($p->[$i]) ) { if($p->[$i] =~ s/\n$//s) { DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n"; } else { DEBUG > 5 and print "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n"; } last; # we only want the next one } } return;}#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@sub _treelet_from_formatting_codes { # Given a paragraph, returns a treelet. Full of scary tokenizing code. # Like [ '~Top', {'start_line' => $start_line}, # "I like ", # [ 'B', {}, "pie" ], # "!" # ] my($self, $para, $start_line, $preserve_space) = @_; my $treelet = ['~Top', {'start_line' => $start_line},]; unless ($preserve_space || $self->{'preserve_whitespace'}) { $para =~ s/\. /\.\xA0 /g if $self->{'fullstop_space_harden'}; $para =~ s/\s+/ /g; # collapse and trim all whitespace first. $para =~ s/ $//; $para =~ s/^ //; } # Only apparent problem the above code is that N<< >> turns into # N<< >>. But then, word wrapping does that too! So don't do that! my @stack; my @lineage = ($treelet); DEBUG > 4 and print "Paragraph:\n$para\n\n"; # Here begins our frightening tokenizer RE. The following regex matches # text in four main parts: # # * Start-codes. The first alternative matches C< or C<<, the latter # followed by some whitespace. $1 will hold the entire start code # (including any space following a multiple-angle-bracket delimiter), # and $2 will hold only the additional brackets past the first in a # multiple-bracket delimiter. length($2) + 1 will be the number of # closing brackets we have to find. # # * Closing brackets. Match some amount of whitespace followed by # multiple close brackets. The logic to see if this closes anything # is down below. Note that in order to parse C<< >> correctly, we # have to use look-behind (?<=\s\s), since the match of the starting # code will have consumed the whitespace. # # * A single closing bracket, to close a simple code like C<>. # # * Something that isn't a start or end code. We have to be careful # about accepting whitespace, since perlpodspec says that any whitespace # before a multiple-bracket closing delimiter should be ignored. # while($para =~ m/\G (?: # Match starting codes, including the whitespace following a # multiple-delimiter start code. $1 gets the whole start code and # $2 gets all but one of the <s in the multiple-bracket case. ([A-Z]<(?:(<+)\s+)?) | # Match multiple-bracket end codes. $3 gets the whitespace that # should be discarded before an end bracket but kept in other cases # and $4 gets the end brackets themselves. (\s+|(?<=\s\s))(>{2,}) | (\s?>) # $5: simple end-codes | ( # $6: stuff containing no start-codes or end-codes (?: [^A-Z\s>] | (?: [A-Z](?!<) ) | (?: \s(?!\s*>) ) )+ ) ) /xgo ) { DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n"; if(defined $1) { if(defined $2) { DEBUG > 3 and print "Found complex start-text code \"$1\"\n"; push @stack, length($2) + 1; # length of the necessary complex end-code string } else { DEBUG > 3 and print "Found simple start-text code \"$1\"\n"; push @stack, 0; # signal that we're looking for simple } push @lineage, [ substr($1,0,1), {}, ]; # new node object push @{ $lineage[-2] }, $lineage[-1]; } elsif(defined $4) { DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n"; # This is where it gets messy... if(! @stack) { # We saw " >>>>" but needed nothing. This is ALL just stuff then. DEBUG > 4 and print " But it's really just stuff.\n"; push @{ $lineage[-1] }, $3, $4; next; } elsif(!$stack[-1]) { # We saw " >>>>" but needed only ">". Back pos up. DEBUG > 4 and print " And that's more than we needed to close simple.\n"; push @{ $lineage[-1] }, $3; # That was a for-real space, too. pos($para) = pos($para) - length($4) + 1; } elsif($stack[-1] == length($4)) { # We found " >>>>", and it was exactly what we needed. Commonest case. DEBUG > 4 and print " And that's exactly what we needed to close complex.\n"; } elsif($stack[-1] < length($4)) { # We saw " >>>>" but needed only " >>". Back pos up. DEBUG > 4 and print " And that's more than we needed to close complex.\n"; pos($para) = pos($para) - length($4) + $stack[-1]; } else { # We saw " >>>>" but needed " >>>>>>". So this is all just stuff! DEBUG > 4 and print " But it's really just stuff, because we needed more.\n"; push @{ $lineage[-1] }, $3, $4; next; } #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; # Keep the element from being childless pop @stack; pop @lineage; } elsif(defined $5) { DEBUG > 3 and print "Found apparent simple end-text code \"$4\"\n"; if(@stack and ! $stack[-1]) { # We're indeed expecting a simple end-code DEBUG > 4 and print " It's indeed an end-code.\n"; if(length($5) == 2) { # There was a space there: " >" push @{ $lineage[-1] }, ' '; } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element push @{ $lineage[-1] }, ''; # keep it from being really childless } pop @stack; pop @lineage; } else { DEBUG > 4 and print " It's just stuff.\n"; push @{ $lineage[-1] }, $5; } } elsif(defined $6) { DEBUG > 3 and print "Found stuff \"$6\"\n"; push @{ $lineage[-1] }, $6; } else { # should never ever ever ever happen DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n"; die "SPORK 512512!"; } } if(@stack) { # Uhoh, some sequences weren't closed. my $x= "..."; while(@stack) { push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; # Hmmmmm! my $code = (pop @lineage)->[0]; my $ender_length = pop @stack; if($ender_length) { --$ender_length; $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length); } else { $x = $code . "<$x>"; } } DEBUG > 1 and print "Unterminated $x sequence\n"; $self->whine($start_line, "Unterminated $x sequence", ); } return $treelet;}#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol) return stringify_lol($_[1]);}sub stringify_lol { # function: stringify_lol($lol) my $string_form = ''; _stringify_lol( $_[0] => \$string_form ); return $string_form;}sub _stringify_lol { # the real recursor my($lol, $to) = @_; use UNIVERSAL (); for(my $i = 2; $i < @$lol; ++$i) { if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) { _stringify_lol( $lol->[$i], $to); # recurse! } else { $$to .= $lol->[$i]; } } return;}#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@sub _dump_curr_open { # return a string representation of the stack my $curr_open = $_[0]{'curr_open'}; return '[empty]' unless @$curr_open; return join '; ', map {; ($_->[0] eq '=for') ? ( ($_->[1]{'~really'} || '=over') . ' ' . $_->[1]{'target'}) : $_->[0] } @$curr_open ;}###########################################################################my %pretty_form = ( "\a" => '\a', # ding! "\b" => '\b', # BS "\e" => '\e', # ESC "\f" => '\f', # FF "\t" => '\t', # tab "\cm" => '\cm', "\cj" => '\cj', "\n" => '\n', # probably overrides one of either \cm or \cj '"' => '\"', '\\' => '\\\\', '$' => '\\$', '@' => '\\@', '%' => '\\%', '#' => '\\#',);sub pretty { # adopted from Class::Classless # Not the most brilliant routine, but passable. # Don't give it a cyclic data structure! my @stuff = @_; # copy my $x; my $out = # join ",\n" . join ", ", map {; if(!defined($_)) { "undef"; } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') { $x = "[ " . pretty(@$_) . " ]" ; $x; } elsif(ref($_) eq 'SCALAR') { $x = "\\" . pretty($$_) ; $x; } elsif(ref($_) eq 'HASH') { my $hr = $_; $x = "{" . join(", ", map(pretty($_) . '=>' . pretty($hr->{$_}), sort keys %$hr ) ) . "}" ; $x; } elsif(!length($_)) { q{''} # empty string } elsif( $_ eq '0' # very common case or( m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s and $_ ne '-0' # the strange case that that RE lets thru ) ) { $_; } else { if( chr(65) eq 'A' ) { s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; } else { # We're in some crazy non-ASCII world! s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])> #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; } qq{"$_"}; } } @stuff; # $out =~ s/\n */ /g if length($out) < 75; return $out;}#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@# A rather unsubtle method of blowing away all the state information# from a parser object so it can be reused. Provided as a utility for# backward compatibilty in Pod::Man, etc. but not recommended for# general use.sub reinit { my $self = shift; foreach (qw(source_dead source_filename doc_has_startedstart_of_pod_block content_seen last_was_blank paras curr_openline_count pod_para_count in_pod ~tried_gen_errata errata errors_seenTitle)) { delete $self->{$_}; }}#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -