📄 blackbox.pm

📁 source of perl for linux application,
💻 PM
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
    # Make $formatting and the previous line be exactly the same length,    # with $formatting having a " " as the last character.     DEBUG > 4 and print "Formatting <$formatting>    on <", $p->[$i-1], ">\n";    my @new_line;    while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) {      #print "Format matches $1\n";      if($2) {        #print "SKIPPING <$2>\n";        push @new_line,          substr($p->[$i-1], pos($formatting)-length($1), length($1));      } else {        #print "SNARING $+\n";        push @new_line, [          (            $3 ? 'VerbatimB'  :            $4 ? 'VerbatimI'  :            $5 ? 'VerbatimBI' : die("Should never get called")          ), {},          substr($p->[$i-1], pos($formatting)-length($1), length($1))        ];        #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";      }    }    my @nixed =          splice @$p, $i-1, 2, @new_line; # replace myself and the next line    DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n";        DEBUG > 6 and print "New version of the above line is these tokens (",      scalar(@new_line), "):",      map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";    $i--; # So the next line we scrutinize is the line before the one          #  that we just went and formatted  }  $p->[0] = 'VerbatimFormatted';  # Collapse adjacent text nodes, just for kicks.  for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last    if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {      DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";      $p->[$i] .= splice @$p, $i+1, 1; # merge      --$i;  # and back up    }  }  # Now look for the last text token, and remove the terminal newline  for( my $i = $#$p; $i >= 2; $i-- ) {    # work backwards over the tokens, even the first    if( !ref($p->[$i]) ) {      if($p->[$i] =~ s/\n$//s) {        DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";      } else {        DEBUG > 5 and print         "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";      }      last; # we only want the next one    }  }  return;}#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@sub _treelet_from_formatting_codes {  # Given a paragraph, returns a treelet.  Full of scary tokenizing code.  #  Like [ '~Top', {'start_line' => $start_line},  #            "I like ",  #            [ 'B', {}, "pie" ],  #            "!"  #       ]    my($self, $para, $start_line, $preserve_space) = @_;    my $treelet = ['~Top', {'start_line' => $start_line},];    unless ($preserve_space || $self->{'preserve_whitespace'}) {    $para =~ s/\.  /\.\xA0 /g if $self->{'fullstop_space_harden'};      $para =~ s/\s+/ /g; # collapse and trim all whitespace first.    $para =~ s/ $//;    $para =~ s/^ //;  }    # Only apparent problem the above code is that N<<  >> turns into  # N<< >>.  But then, word wrapping does that too!  So don't do that!    my @stack;  my @lineage = ($treelet);  DEBUG > 4 and print "Paragraph:\n$para\n\n";   # Here begins our frightening tokenizer RE.  The following regex matches  # text in four main parts:  #  #  * Start-codes.  The first alternative matches C< or C<<, the latter  #    followed by some whitespace.  $1 will hold the entire start code  #    (including any space following a multiple-angle-bracket delimiter),  #    and $2 will hold only the additional brackets past the first in a  #    multiple-bracket delimiter.  length($2) + 1 will be the number of  #    closing brackets we have to find.  #  #  * Closing brackets.  Match some amount of whitespace followed by  #    multiple close brackets.  The logic to see if this closes anything  #    is down below.  Note that in order to parse C<<  >> correctly, we  #    have to use look-behind (?<=\s\s), since the match of the starting  #    code will have consumed the whitespace.  #  #  * A single closing bracket, to close a simple code like C<>.  #  #  * Something that isn't a start or end code.  We have to be careful  #    about accepting whitespace, since perlpodspec says that any whitespace  #    before a multiple-bracket closing delimiter should be ignored.  #  while($para =~    m/\G      (?:        # Match starting codes, including the whitespace following a        # multiple-delimiter start code.  $1 gets the whole start code and        # $2 gets all but one of the <s in the multiple-bracket case.        ([A-Z]<(?:(<+)\s+)?)        |        # Match multiple-bracket end codes.  $3 gets the whitespace that        # should be discarded before an end bracket but kept in other cases        # and $4 gets the end brackets themselves.        (\s+|(?<=\s\s))(>{2,})        |        (\s?>)          # $5: simple end-codes        |        (               # $6: stuff containing no start-codes or end-codes          (?:            [^A-Z\s>]            |            (?:              [A-Z](?!<)            )            |            (?:              \s(?!\s*>)            )          )+        )      )    /xgo  ) {    DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n";    if(defined $1) {      if(defined $2) {        DEBUG > 3 and print "Found complex start-text code \"$1\"\n";        push @stack, length($2) + 1;           # length of the necessary complex end-code string      } else {        DEBUG > 3 and print "Found simple start-text code \"$1\"\n";        push @stack, 0;  # signal that we're looking for simple      }      push @lineage, [ substr($1,0,1), {}, ];  # new node object      push @{ $lineage[-2] }, $lineage[-1];          } elsif(defined $4) {      DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n";      # This is where it gets messy...      if(! @stack) {        # We saw " >>>>" but needed nothing.  This is ALL just stuff then.        DEBUG > 4 and print " But it's really just stuff.\n";        push @{ $lineage[-1] }, $3, $4;        next;      } elsif(!$stack[-1]) {        # We saw " >>>>" but needed only ">".  Back pos up.        DEBUG > 4 and print " And that's more than we needed to close simple.\n";        push @{ $lineage[-1] }, $3; # That was a for-real space, too.        pos($para) = pos($para) - length($4) + 1;      } elsif($stack[-1] == length($4)) {        # We found " >>>>", and it was exactly what we needed.  Commonest case.        DEBUG > 4 and print " And that's exactly what we needed to close complex.\n";      } elsif($stack[-1] < length($4)) {        # We saw " >>>>" but needed only " >>".  Back pos up.        DEBUG > 4 and print " And that's more than we needed to close complex.\n";        pos($para) = pos($para) - length($4) + $stack[-1];      } else {        # We saw " >>>>" but needed " >>>>>>".  So this is all just stuff!        DEBUG > 4 and print " But it's really just stuff, because we needed more.\n";        push @{ $lineage[-1] }, $3, $4;        next;      }      #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";      push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };      # Keep the element from being childless            pop @stack;      pop @lineage;          } elsif(defined $5) {      DEBUG > 3 and print "Found apparent simple end-text code \"$4\"\n";      if(@stack and ! $stack[-1]) {        # We're indeed expecting a simple end-code        DEBUG > 4 and print " It's indeed an end-code.\n";        if(length($5) == 2) { # There was a space there: " >"          push @{ $lineage[-1] }, ' ';        } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element          push @{ $lineage[-1] }, ''; # keep it from being really childless        }        pop @stack;        pop @lineage;      } else {        DEBUG > 4 and print " It's just stuff.\n";        push @{ $lineage[-1] }, $5;      }    } elsif(defined $6) {      DEBUG > 3 and print "Found stuff \"$6\"\n";      push @{ $lineage[-1] }, $6;          } else {      # should never ever ever ever happen      DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n";      die "SPORK 512512!";    }  }  if(@stack) { # Uhoh, some sequences weren't closed.    my $x= "...";    while(@stack) {      push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };      # Hmmmmm!      my $code         = (pop @lineage)->[0];      my $ender_length =  pop @stack;      if($ender_length) {        --$ender_length;        $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);      } else {        $x = $code . "<$x>";      }    }    DEBUG > 1 and print "Unterminated $x sequence\n";    $self->whine($start_line,      "Unterminated $x sequence",    );  }    return $treelet;}#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@sub text_content_of_treelet {  # method: $parser->text_content_of_treelet($lol)  return stringify_lol($_[1]);}sub stringify_lol {  # function: stringify_lol($lol)  my $string_form = '';  _stringify_lol( $_[0] => \$string_form );  return $string_form;}sub _stringify_lol {  # the real recursor  my($lol, $to) = @_;  use UNIVERSAL ();  for(my $i = 2; $i < @$lol; ++$i) {    if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {      _stringify_lol( $lol->[$i], $to);  # recurse!    } else {      $$to .= $lol->[$i];    }  }  return;}#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@sub _dump_curr_open { # return a string representation of the stack  my $curr_open = $_[0]{'curr_open'};  return '[empty]' unless @$curr_open;  return join '; ',    map {;           ($_->[0] eq '=for')             ? ( ($_->[1]{'~really'} || '=over')               . ' ' . $_->[1]{'target'})             : $_->[0]        }    @$curr_open  ;}###########################################################################my %pretty_form = (  "\a" => '\a', # ding!  "\b" => '\b', # BS  "\e" => '\e', # ESC  "\f" => '\f', # FF  "\t" => '\t', # tab  "\cm" => '\cm',  "\cj" => '\cj',  "\n" => '\n', # probably overrides one of either \cm or \cj  '"' => '\"',  '\\' => '\\\\',  '$' => '\\$',  '@' => '\\@',  '%' => '\\%',  '#' => '\\#',);sub pretty { # adopted from Class::Classless  # Not the most brilliant routine, but passable.  # Don't give it a cyclic data structure!  my @stuff = @_; # copy  my $x;  my $out =    # join ",\n" .    join ", ",    map {;    if(!defined($_)) {      "undef";    } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {      $x = "[ " . pretty(@$_) . " ]" ;      $x;    } elsif(ref($_) eq 'SCALAR') {      $x = "\\" . pretty($$_) ;      $x;    } elsif(ref($_) eq 'HASH') {      my $hr = $_;      $x = "{" . join(", ",        map(pretty($_) . '=>' . pretty($hr->{$_}),            sort keys %$hr ) ) . "}" ;      $x;    } elsif(!length($_)) { q{''} # empty string    } elsif(      $_ eq '0' # very common case      or(         m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s         and $_ ne '-0' # the strange case that that RE lets thru      )    ) { $_;    } else {      if( chr(65) eq 'A' ) {        s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>         #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;         <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;      } else {        # We're in some crazy non-ASCII world!        s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])>         #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;         <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;      }      qq{"$_"};    }  } @stuff;  # $out =~ s/\n */ /g if length($out) < 75;  return $out;}#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@# A rather unsubtle method of blowing away all the state information# from a parser object so it can be reused. Provided as a utility for# backward compatibilty in Pod::Man, etc. but not recommended for# general use.sub reinit {  my $self = shift;  foreach (qw(source_dead source_filename doc_has_startedstart_of_pod_block content_seen last_was_blank paras curr_openline_count pod_para_count in_pod ~tried_gen_errata errata errors_seenTitle)) {    delete $self->{$_};  }}#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@1;
上一页 1 2 3 45
💿 文件大小 15230 K
👤 上传用户 gankai1983
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#application #source #linux #perl
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -