📄 blackbox.pm
字号:
package Pod::Simple::BlackBox;## "What's in the box?" "Pain."############################################################################## This is where all the scary things happen: parsing lines into# paragraphs; and then into directives, verbatims, and then also# turning formatting sequences into treelets.## Are you really sure you want to read this code?##-----------------------------------------------------------------------------## The basic work of this module Pod::Simple::BlackBox is doing the dirty work# of parsing Pod into treelets (generally one per non-verbatim paragraph), and# to call the proper callbacks on the treelets.## Every node in a treelet is a ['name', {attrhash}, ...children...]use integer; # vroom!use strict;use Carp ();BEGIN { require Pod::Simple; *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG}#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@sub parse_line { shift->parse_lines(@_) } # alias# - - - Turn back now! Run away! - - -sub parse_lines { # Usage: $parser->parse_lines(@lines) # an undef means end-of-stream my $self = shift; my $code_handler = $self->{'code_handler'}; my $cut_handler = $self->{'cut_handler'}; $self->{'line_count'} ||= 0; my $scratch; DEBUG > 4 and print "# Parsing starting at line ", $self->{'line_count'}, ".\n"; DEBUG > 5 and print "# About to parse lines: ", join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n"; my $paras = ($self->{'paras'} ||= []); # paragraph buffer. Because we need to defer processing of =over # directives and verbatim paragraphs. We call _ponder_paragraph_buffer # to process this. $self->{'pod_para_count'} ||= 0; my $line; foreach my $source_line (@_) { if( $self->{'source_dead'} ) { DEBUG > 4 and print "# Source is dead.\n"; last; } unless( defined $source_line ) { DEBUG > 4 and print "# Undef-line seen.\n"; push @$paras, ['~end', {'start_line' => $self->{'line_count'}}]; push @$paras, $paras->[-1], $paras->[-1]; # So that it definitely fills the buffer. $self->{'source_dead'} = 1; $self->_ponder_paragraph_buffer; next; } if( $self->{'line_count'}++ ) { ($line = $source_line) =~ tr/\n\r//d; # If we don't have two vars, we'll end up with that there # tr/// modding the (potentially read-only) original source line! } else { DEBUG > 2 and print "First line: [$source_line]\n"; if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) { DEBUG and print "UTF-8 BOM seen. Faking a '=encode utf8'.\n"; $self->_handle_encoding_line( "=encode utf8" ); $line =~ tr/\n\r//d; } elsif( $line =~ s/^\xFE\xFF//s ) { DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; $self->scream( $self->{'line_count'}, "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." ); splice @_; push @_, undef; next; # TODO: implement somehow? } elsif( $line =~ s/^\xFF\xFE//s ) { DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n"; $self->scream( $self->{'line_count'}, "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." ); splice @_; push @_, undef; next; # TODO: implement somehow? } else { DEBUG > 2 and print "First line is BOM-less.\n"; ($line = $source_line) =~ tr/\n\r//d; } } DEBUG > 5 and print "# Parsing line: [$line]\n"; if(!$self->{'in_pod'}) { if($line =~ m/^=([a-zA-Z]+)/s) { if($1 eq 'cut') { $self->scream( $self->{'line_count'}, "=cut found outside a pod block. Skipping to next block." ); ## Before there were errata sections in the world, it was ## least-pessimal to abort processing the file. But now we can ## just barrel on thru (but still not start a pod block). #splice @_; #push @_, undef; next; } else { $self->{'in_pod'} = $self->{'start_of_pod_block'} = $self->{'last_was_blank'} = 1; # And fall thru to the pod-mode block further down } } else { DEBUG > 5 and print "# It's a code-line.\n"; $code_handler->(map $_, $line, $self->{'line_count'}, $self) if $code_handler; # Note: this may cause code to be processed out of order relative # to pods, but in order relative to cuts. # Note also that we haven't yet applied the transcoding to $line # by time we call $code_handler! if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) { # That RE is from perlsyn, section "Plain Old Comments (Not!)", #$fname = $2 if defined $2; #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n"; DEBUG > 1 and print "# Setting nextline to $1\n"; $self->{'line_count'} = $1 - 1; } next; } } # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . # Else we're in pod mode: # Apply any necessary transcoding: $self->{'_transcoder'} && $self->{'_transcoder'}->($line); # HERE WE CATCH =encoding EARLY! if( $line =~ m/^=encoding\s+\S+\s*$/s ) { $line = $self->_handle_encoding_line( $line ); } if($line =~ m/^=cut/s) { # here ends the pod block, and therefore the previous pod para DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n"; $self->{'in_pod'} = 0; # ++$self->{'pod_para_count'}; $self->_ponder_paragraph_buffer(); # by now it's safe to consider the previous paragraph as done. $cut_handler->(map $_, $line, $self->{'line_count'}, $self) if $cut_handler; # TODO: add to docs: Note: this may cause cuts to be processed out # of order relative to pods, but in order relative to code. } elsif($line =~ m/^\s*$/s) { # it's a blank line if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n"; push @{$paras->[-1]}, $line; } # otherwise it's not interesting if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n"; } $self->{'last_was_blank'} = 1; } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) { # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS my $new = [$1, {'start_line' => $self->{'line_count'}}, $2]; # Note that in "=head1 foo", the WS is lost. # Example: ['=head1', {'start_line' => 123}, ' foo'] ++$self->{'pod_para_count'}; $self->_ponder_paragraph_buffer(); # by now it's safe to consider the previous paragraph as done. push @$paras, $new; # the new incipient paragraph DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; } elsif($line =~ m/^\s/s) { if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n"; push @{$paras->[-1]}, $line; } else { ++$self->{'pod_para_count'}; $self->_ponder_paragraph_buffer(); # by now it's safe to consider the previous paragraph as done. DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n"; push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line]; } } else { ++$self->{'pod_para_count'}; $self->_ponder_paragraph_buffer(); # by now it's safe to consider the previous paragraph as done. push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line]; DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n"; } $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; } else { # It's a non-blank line /continuing/ the current para if(@$paras) { DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n"; push @{$paras->[-1]}, $line; } else { # Unexpected case! die "Continuing a paragraph but \@\$paras is empty?"; } $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; } } # ends the big while loop DEBUG > 1 and print(pretty(@$paras), "\n"); return $self;}#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@sub _handle_encoding_line { my($self, $line) = @_; # The point of this routine is to set $self->{'_transcoder'} as indicated. return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s; DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n"; my $e = $1; my $orig = $e; push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig"; my $enc_error; # Cf. perldoc Encode and perldoc Encode::Supported require Pod::Simple::Transcode; if( $self->{'encoding'} ) { my $norm_current = $self->{'encoding'}; my $norm_e = $e; foreach my $that ($norm_current, $norm_e) { $that = lc($that); $that =~ s/[-_]//g; } if($norm_current eq $norm_e) { DEBUG > 1 and print "The '=encoding $orig' line is ", "redundant. ($norm_current eq $norm_e). Ignoring.\n"; $enc_error = ''; # But that doesn't necessarily mean that the earlier one went okay } else { $enc_error = "Encoding is already set to " . $self->{'encoding'}; DEBUG > 1 and print $enc_error; } } elsif ( # OK, let's turn on the encoding do { DEBUG > 1 and print " Setting encoding to $e\n"; $self->{'encoding'} = $e; 1; } and $e eq 'HACKRAW' ) { DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n"; } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) { die($enc_error = "WHAT? _transcoder is already set?!") if $self->{'_transcoder'}; # should never happen require Pod::Simple::Transcode; $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e); eval { my @x = ('', "abc", "123"); $self->{'_transcoder'}->(@x); }; $@ && die( $enc_error = "Really unexpected error setting up encoding $e: $@\nAborting" ); } else { my @supported = Pod::Simple::Transcode::->all_encodings; # Note unsupported, and complain DEBUG and print " Encoding [$e] is unsupported.", "\nSupporteds: @supported\n"; my $suggestion = ''; # Look for a near match: my $norm = lc($e); $norm =~ tr[-_][]d; my $n; foreach my $enc (@supported) { $n = lc($enc); $n =~ tr[-_][]d; next unless $n eq $norm; $suggestion = " (Maybe \"$e\" should be \"$enc\"?)"; last; } my $encmodver = Pod::Simple::Transcode::->encmodver; $enc_error = join '' => "This document probably does not appear as it should, because its ", "\"=encoding $e\" line calls for an unsupported encoding.", $suggestion, " [$encmodver\'s supported encodings are: @supported]" ; $self->scream( $self->{'line_count'}, $enc_error ); } push @{ $self->{'encoding_command_statuses'} }, $enc_error; return '=encoding ALREADYDONE';}# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -sub _handle_encoding_second_level { # By time this is called, the encoding (if well formed) will already # have been acted one. my($self, $para) = @_; my @x = @$para; my $content = join ' ', splice @x, 2; $content =~ s/^\s+//s; $content =~ s/\s+$//s; DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n"; if($content eq 'ALREADYDONE') { # It's already been handled. Check for errors. if(! $self->{'encoding_command_statuses'} ) { DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n"; } elsif( $self->{'encoding_command_statuses'}[-1] ) { $self->whine( $para->[1]{'start_line'}, sprintf "Couldn't do %s: %s", $self->{'encoding_command_reqs' }[-1], $self->{'encoding_command_statuses'}[-1], ); } else { DEBUG > 2 and print " (Yup, it was successfully handled already.)\n"; } } else { # Otherwise it's a syntax error $self->whine( $para->[1]{'start_line'}, "Invalid =encoding syntax: $content" ); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -