expat.pm

来自「ARM上的如果你对底层感兴趣」· PM 代码 · 共 600 行 · 第 1/2 页

PM
600
字号
package XML::Parser::Expat;

require 5.004;

use English;
use strict;
use vars qw($VERSION @ISA %Handler_Setters);
use Carp;

use IO::Handle;

require DynaLoader;

@ISA = qw(DynaLoader);

$VERSION = "2.16" ;

bootstrap XML::Parser::Expat $VERSION;

%Handler_Setters = (
		    Start => \&SetStartElementHandler,
		    End   => \&SetEndElementHandler,
		    Char  => \&SetCharacterDataHandler,
		    Proc  => \&SetProcessingInstructionHandler,
		    Comment => \&SetCommentHandler,
		    Default => \&SetDefaultHandler,
		    Unparsed => \&SetUnparsedEntityDeclHandler,
		    Notation => \&SetNotationDeclHandler,
		    ExternEnt => \&SetExternalEntityRefHandler,
		    Entity => \&SetEntityDeclHandler,
		    Element => \&SetElementDeclHandler,
		    Attlist => \&SetAttListDeclHandler,
		    Doctype => \&SetDoctypeHandler,
		    XMLDecl => \&SetXMLDeclHandler
		    );

sub new {
    my ($class, %args) = @_;
    my $self = bless \%args, $_[0];
    $args{Used} = 0;
    $args{Context} = [];
    $args{Namespaces} ||= 0;
    $args{Namespace_Table} = {};
    $args{Namespace_List} = [undef];
    $args{_Setters} = \%Handler_Setters;
    $args{Parser} = ParserCreate($self, $args{ProtocolEncoding},
				 $args{Namespaces});
    $self;
}

sub setHandlers
{
    my ($self, @handler_pairs) = @_;

    croak("Uneven number of arguments to setHandlers method")
	if (int(@handler_pairs) & 1);

    while (@handler_pairs)
    {
	my $type = shift @handler_pairs;
	my $handler = shift @handler_pairs;
	my $hndl = $self->{_Setters}->{$type};

	unless (defined($hndl))
	{
	    my @types = sort keys %{$self->{_Setters}};

	    croak("Unknown Expat handler type: $type\n Valid types: @types");
	}

	&$hndl($self->{Parser}, $handler);
    }
}

sub default_current
{
    my $self = shift;
    DefaultCurrent($self->{Parser});
}

sub current_line
{
    GetCurrentLineNumber($_[0]->{Parser});
}

sub current_column
{
    GetCurrentColumnNumber($_[0]->{Parser});
}

sub current_byte
{
    GetCurrentByteIndex($_[0]->{Parser});
}

sub base
{
    my ($self, $newbase) = @_;
    my $p = $self->{Parser};
    my $oldbase = GetBase($p);
    SetBase($p, $newbase) if @_ > 1;
    $oldbase;
}

sub context
{
    my $ctx = $_[0]->{Context};
    @$ctx;
}

sub current_element
{
    my ($self) = @_;
    $self->{Context}->[-1];
}

sub in_element
{
    my ($self, $element) = @_;
    $self->{Context}->[-1] eq $element;
}

sub within_element
{
    my ($self, $element) = @_;
    my $cnt = 0;
    foreach (@{$self->{Context}})
    {
	$cnt++ if $_ eq $element;
    }
    return $cnt;
}

sub depth
{
    my ($self) = @_;
    int(@{$self->{Context}});
}

sub namespace
{
    my ($self, $name) = @_;
    local($WARNING) = 0;
    $self->{Namespace_List}->[$name];
}

sub eq_name
{
    my ($self, $nm1, $nm2) = @_;
    local($WARNING) = 0;

    $nm1 == $nm2 and $nm1 eq $nm2;
}

sub generate_ns_name
{
  my ($self, $name, $namespace) = @_;

  $namespace ?
    GenerateNSName($name, $namespace, $self->{Namespace_Table},
		   $self->{Namespace_List})
      : $name;
}

sub position_in_context
{
    my ($self, $lines) = @_;
    my $parser = $self->{Parser};
    my ($string, $linepos) = PositionContext($parser, $lines);
    my $col = GetCurrentColumnNumber($parser);
    my $ptr = ('=' x ($col - 1)) . '^' . "\n";
    my $ret;
    my $dosplit = $linepos < length($string);

    $string .= "\n" unless $string =~ /\n$/;

    if ($dosplit)
    {
	$ret = substr($string, 0, $linepos) . $ptr
	    . substr($string, $linepos);
    }
    else
    {
	$ret = $string . $ptr;
    }

    $ret;
}
    
sub DESTROY {
    my $self = shift;
    ParserFree($self->{Parser});
}

sub parse {
    my $self = shift;
    my $arg = shift;
    croak "Parser has already been used" if $self->{Used};
    $self->{Used} = 1;
    my $parser = $self->{Parser};
    my $ioref;
    my $result = 0;
    $self->{ErrorMessage} ||= '';

    if (defined $arg) {
      if (ref($arg) and UNIVERSAL::isa($arg, 'IO::Handler'))
	{
	  $ioref = $arg;
	}
      else {
	eval {
	  $ioref = *{$arg}{IO};
	};
      }
    }

    if (defined($ioref)) {
      my $delim = $self->{Stream_Delimiter};
      my $prev_rs;

      $prev_rs = $ioref->input_record_separator("\n$delim\n")
	if defined($delim);
	  
      $result = ParseStream($parser, $ioref, $delim);

      $ioref->input_record_separator($prev_rs)
	if defined($delim);
    }
    else {
      $result = ParseString($parser, $arg);
    }

    $result or croak $self->{ErrorMessage};
}

sub parsestring {
    my $self = shift;
    $self->parse(@_);
}

sub parsefile {
    my $self = shift;
    croak "Parser has already been used" if $self->{Used};
    local(*FILE);
    open(FILE, $_[0]) or  croak "Couldn't open $_[0]:\n$!";
    my $ret = $self->parse(*FILE);
    close(FILE);
    $ret;
}

1;

__END__

=head1 NAME

XML::Parser::Expat - Lowlevel access to James Clark's expat XML parser

=head1 SYNOPSIS

 use XML::Parser::Expat;

 $parser = new XML::Parser::Expat;
 $parser->setHandlers('Start' => \&sh, 'End'   => \&eh,
                      'Default' => \&dh, 'Char' => \&ch);
 open(FOO, 'info.xml') or die "Couldn't open";
 $parser->parse(*FOO);
 close(FOO);
 # $parser->parse('<foo id="me"> here <em>we</em> go </foo>');

 sub sh
 {
     my ($p, $el, %atts) = @_;
     if ($el eq 'special')
     {
	 $p->setHandlers('Default' => \&spec);
	 $p->default_current;
     }
     ...
 }

 sub spec
 {
     my ($p, $str) = @_;
     ...
     $p->setHandlers('Default' => \&dh);
 }

=head1 DESCRIPTION

This module provides an interface to James Clark's XML parser, expat. As in
expat, a single instance of the parser can only parse one document. Calls
to parsestring after the first for a given instance will die.

Expat (and XML::Parser::Expat) are event based. As the parser recognizes
parts of the document (say the start or end of an XML element), then any
handlers registered for that type of an event are called with suitable
parameters.

=head1 METHODS

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?