📄 pullparser.pm

📁 source of perl for linux application,
💻 PM
📖 第 1 页 / 共 2 页
字号:
12 下一页
require 5;package Pod::Simple::PullParser;$VERSION = '2.02';use Pod::Simple ();BEGIN {@ISA = ('Pod::Simple')}use strict;use Carp ();use Pod::Simple::PullParserStartToken;use Pod::Simple::PullParserEndToken;use Pod::Simple::PullParserTextToken;BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }__PACKAGE__->_accessorize(  'source_fh',         # the filehandle we're reading from  'source_scalar_ref', # the scalarref we're reading from  'source_arrayref',   # the arrayref we're reading from);#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@##  And here is how we implement a pull-parser on top of a push-parser...sub filter {  my($self, $source) = @_;  $self = $self->new unless ref $self;  $source = *STDIN{IO} unless defined $source;  $self->set_source($source);  $self->output_fh(*STDOUT{IO});  $self->run; # define run() in a subclass if you want to use filter()!  return $self;}# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -sub parse_string_document {  my $this = shift;  $this->set_source(\ $_[0]);  $this->run;}sub parse_file {  my($this, $filename) = @_;  $this->set_source($filename);  $this->run;}# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#  In case anyone tries to use them:sub run {  use Carp ();  if( __PACKAGE__ eq ref($_[0]) || $_[0]) { # I'm not being subclassed!    Carp::croak "You can call run() only on subclasses of "     . __PACKAGE__;  } else {    Carp::croak join '',      "You can't call run() because ",      ref($_[0]) || $_[0], " didn't define a run() method";  }}sub parse_lines {  use Carp ();  Carp::croak "Use set_source with ", __PACKAGE__,    " and subclasses, not parse_lines";}sub parse_line {  use Carp ();  Carp::croak "Use set_source with ", __PACKAGE__,    " and subclasses, not parse_line";}#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~sub new {  my $class = shift;  my $self = $class->SUPER::new(@_);  die "Couldn't construct for $class" unless $self;  $self->{'token_buffer'} ||= [];  $self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken';  $self->{'text_token_class'}  ||= 'Pod::Simple::PullParserTextToken';  $self->{'end_token_class'}   ||= 'Pod::Simple::PullParserEndToken';  DEBUG > 1 and print "New pullparser object: $self\n";  return $self;}# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~sub get_token {  my $self = shift;  DEBUG > 1 and print "\nget_token starting up on $self.\n";  DEBUG > 2 and print " Items in token-buffer (",   scalar( @{ $self->{'token_buffer'} } ) ,   ") :\n", map(     "    " . $_->dump . "\n", @{ $self->{'token_buffer'} }   ),   @{ $self->{'token_buffer'} } ? '' : '       (no tokens)',   "\n"  ;  until( @{ $self->{'token_buffer'} } ) {    DEBUG > 3 and print "I need to get something into my empty token buffer...\n";    if($self->{'source_dead'}) {      DEBUG and print "$self 's source is dead.\n";      push @{ $self->{'token_buffer'} }, undef;    } elsif(exists $self->{'source_fh'}) {      my @lines;      my $fh = $self->{'source_fh'}       || Carp::croak('You have to call set_source before you can call get_token');             DEBUG and print "$self 's source is filehandle $fh.\n";      # Read those many lines at a time      for(my $i = Pod::Simple::MANY_LINES; $i--;) {        DEBUG > 3 and print " Fetching a line from source filehandle $fh...\n";        local $/ = $Pod::Simple::NL;        push @lines, scalar(<$fh>); # readline        DEBUG > 3 and print "  Line is: ",          defined($lines[-1]) ? $lines[-1] : "<undef>\n";        unless( defined $lines[-1] ) {          DEBUG and print "That's it for that source fh!  Killing.\n";          delete $self->{'source_fh'}; # so it can be GC'd          last;        }         # but pass thru the undef, which will set source_dead to true        # TODO: look to see if $lines[-1] is =encoding, and if so,        # do horribly magic things      }            if(DEBUG > 8) {        print "* I've gotten ", scalar(@lines), " lines:\n";        foreach my $l (@lines) {          if(defined $l) {            print "  line {$l}\n";          } else {            print "  line undef\n";          }        }        print "* end of ", scalar(@lines), " lines\n";      }      $self->SUPER::parse_lines(@lines);          } elsif(exists $self->{'source_arrayref'}) {      DEBUG and print "$self 's source is arrayref $self->{'source_arrayref'}, with ",       scalar(@{$self->{'source_arrayref'}}), " items left in it.\n";      DEBUG > 3 and print "  Fetching ", Pod::Simple::MANY_LINES, " lines.\n";      $self->SUPER::parse_lines(        splice @{ $self->{'source_arrayref'} },        0,        Pod::Simple::MANY_LINES      );      unless( @{ $self->{'source_arrayref'} } ) {        DEBUG and print "That's it for that source arrayref!  Killing.\n";        $self->SUPER::parse_lines(undef);        delete $self->{'source_arrayref'}; # so it can be GC'd      }       # to make sure that an undef is always sent to signal end-of-stream    } elsif(exists $self->{'source_scalar_ref'}) {      DEBUG and print "$self 's source is scalarref $self->{'source_scalar_ref'}, with ",        length(${ $self->{'source_scalar_ref'} }) -        (pos(${ $self->{'source_scalar_ref'} }) || 0),        " characters left to parse.\n";      DEBUG > 3 and print " Fetching a line from source-string...\n";      if( ${ $self->{'source_scalar_ref'} } =~        m/([^\n\r]*)((?:\r?\n)?)/g      ) {        #print(">> $1\n"),        $self->SUPER::parse_lines($1)         if length($1) or length($2)          or pos(     ${ $self->{'source_scalar_ref'} })           != length( ${ $self->{'source_scalar_ref'} });         # I.e., unless it's a zero-length "empty line" at the very         #  end of "foo\nbar\n" (i.e., between the \n and the EOS).      } else { # that's the end.  Byebye        $self->SUPER::parse_lines(undef);        delete $self->{'source_scalar_ref'};        DEBUG and print "That's it for that source scalarref!  Killing.\n";      }          } else {      die "What source??";    }  }  DEBUG and print "get_token about to return ",   Pod::Simple::pretty( @{$self->{'token_buffer'}}     ? $self->{'token_buffer'}[-1] : undef   ), "\n";  return shift @{$self->{'token_buffer'}}; # that's an undef if empty}use UNIVERSAL ();sub unget_token {  my $self = shift;  DEBUG and print "Ungetting ", scalar(@_), " tokens: ",   @_ ? "@_\n" : "().\n";  foreach my $t (@_) {    Carp::croak "Can't unget that, because it's not a token -- it's undef!"     unless defined $t;    Carp::croak "Can't unget $t, because it's not a token -- it's a string!"     unless ref $t;    Carp::croak "Can't unget $t, because it's not a token object!"     unless UNIVERSAL::can($t, 'type');  }    unshift @{$self->{'token_buffer'}}, @_;  DEBUG > 1 and print "Token buffer now has ",   scalar(@{$self->{'token_buffer'}}), " items in it.\n";  return;}#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@# $self->{'source_filename'} = $source;sub set_source {  my $self = shift @_;  return $self->{'source_fh'} unless @_;  my $handle;  if(!defined $_[0]) {    Carp::croak("Can't use empty-string as a source for set_source");  } elsif(ref(\( $_[0] )) eq 'GLOB') {    $self->{'source_filename'} = '' . ($handle = $_[0]);    DEBUG and print "$self 's source is glob $_[0]\n";    # and fall thru     } elsif(ref( $_[0] ) eq 'SCALAR') {    $self->{'source_scalar_ref'} = $_[0];    DEBUG and print "$self 's source is scalar ref $_[0]\n";    return;  } elsif(ref( $_[0] ) eq 'ARRAY') {    $self->{'source_arrayref'} = $_[0];    DEBUG and print "$self 's source is array ref $_[0]\n";    return;  } elsif(ref $_[0]) {    $self->{'source_filename'} = '' . ($handle = $_[0]);    DEBUG and print "$self 's source is fh-obj $_[0]\n";  } elsif(!length $_[0]) {    Carp::croak("Can't use empty-string as a source for set_source");  } else {  # It's a filename!    DEBUG and print "$self 's source is filename $_[0]\n";    {      local *PODSOURCE;      open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!";      $handle = *PODSOURCE{IO};    }    $self->{'source_filename'} = $_[0];    DEBUG and print "  Its name is $_[0].\n";    # TODO: file-discipline things here!  }  $self->{'source_fh'} = $handle;  DEBUG and print "  Its handle is $handle\n";  return 1;}# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~sub get_title_short {  shift->get_short_title(@_)  } # aliassub get_short_title {  my $title = shift->get_title(@_);  $title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s;    # turn "Foo::Bar -- bars for your foo" into "Foo::Bar"  return $title;}sub get_title       { shift->_get_titled_section(  'NAME', max_token => 50, desperate => 1, @_)}sub get_version     { shift->_get_titled_section(   'VERSION',    max_token => 400,    accept_verbatim => 1,    max_content_length => 3_000,   @_,  );}sub get_description { shift->_get_titled_section(   'DESCRIPTION',    max_token => 400,    max_content_length => 3_000,   @_,) }sub get_authors     { shift->get_author(@_) }  # a harmless aliassub get_author      {  my $this = shift;  # Max_token is so high because these are  #  typically at the end of the document:  $this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) ||  $this->_get_titled_section('AUTHORS', max_token => 10_000, @_);}#--------------------------------------------------------------------------sub _get_titled_section {  # Based on a get_title originally contributed by Graham Barr  my($self, $titlename, %options) = (@_);    my $max_token            = delete $options{'max_token'};  my $desperate_for_title  = delete $options{'desperate'};  my $accept_verbatim      = delete $options{'accept_verbatim'};  my $max_content_length   = delete $options{'max_content_length'};  $max_content_length = 120 unless defined $max_content_length;  Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ")    . join " ", map "[$_]", sort keys %options  )   if keys %options;  my %content_containers;  $content_containers{'Para'} = 1;  if($accept_verbatim) {    $content_containers{'Verbatim'} = 1;    $content_containers{'VerbatimFormatted'} = 1;  }  my $token_count = 0;  my $title;  my @to_unget;  my $state = 0;  my $depth = 0;  Carp::croak "What kind of titlename is \"$titlename\"?!" unless   defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity  my $titlename_re = quotemeta($titlename);  my $head1_text_content;  my $para_text_content;  while(    ++$token_count <= ($max_token || 1_000_000)    and defined(my $token = $self->get_token)  ) {    push @to_unget, $token;    if ($state == 0) { # seeking =head1      if( $token->is_start and $token->tagname eq 'head1' ) {        DEBUG and print "  Found head1.  Seeking content...\n";        ++$state;        $head1_text_content = '';      }    }    elsif($state == 1) { # accumulating text until end of head1      if( $token->is_text ) {        DEBUG and print "   Adding \"", $token->text, "\" to head1-content.\n";        $head1_text_content .= $token->text;      } elsif( $token->is_end and $token->tagname eq 'head1' ) {        DEBUG and print "  Found end of head1.  Considering content...\n";        if($head1_text_content eq $titlename          or $head1_text_content =~ m/\($titlename_re\)/s          # We accept "=head1 Nomen Modularis (NAME)" for sake of i18n        ) {          DEBUG and print "  Yup, it was $titlename.  Seeking next para-content...\n";          ++$state;        } elsif(          $desperate_for_title           # if we're so desperate we'll take the first           #  =head1's content as a title          and $head1_text_content =~ m/\S/          and $head1_text_content !~ m/^[ A-Z]+$/s          and $head1_text_content !~            m/\((?:             NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS             | COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS?             | CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT            )\)/sx            # avoid accepting things like =head1 Thingy Thongy (DESCRIPTION)          and ($max_content_length            ? (length($head1_text_content) <= $max_content_length) # sanity            : 1)        ) {          DEBUG and print "  It looks titular: \"$head1_text_content\".\n",            "\n  Using that.\n";          $title = $head1_text_content;          last;        } else {          --$state;          DEBUG and print "  Didn't look titular ($head1_text_content).\n",
12 下一页
💿 文件大小 15230 K
👤 上传用户 gankai1983
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#application #source #linux #perl
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -