⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlparser.pm

📁 网页留言本,比一般的留言簿管用
💻 PM
字号:
# $Id: XMLParser.pm 4532 2004-05-11 05:15:40Z ezra $package XML::XPath::XMLParser;use strict;use XML::Parser;#use XML::XPath;use XML::XPath::Node;use XML::XPath::Node::Element;use XML::XPath::Node::Text;use XML::XPath::Node::Comment;use XML::XPath::Node::PI;use XML::XPath::Node::Attribute;use XML::XPath::Node::Namespace;my @options = qw(        filename        xml        parser        ioref        );my ($_current, $_namespaces_on);my %IdNames;use vars qw/$xmlns_ns $xml_ns/;$xmlns_ns = "http://www.w3.org/2000/xmlns/";$xml_ns = "http://www.w3.org/XML/1998/namespace";sub new {    my $proto = shift;    my $class = ref($proto) || $proto;    my %args = @_;    my %hash = map(( "_$_" => $args{$_} ), @options);    bless \%hash, $class;}sub parse {    my $self = shift;        $self->{IdNames} = {};    $self->{InScopeNamespaceStack} = [ {             '_Default' => undef,            'xmlns' => $xmlns_ns,            'xml' => $xml_ns,        } ];        $self->{NodeStack} = [ ];        $self->set_xml($_[0]) if $_[0];        my $parser = $self->get_parser || XML::Parser->new(            ErrorContext => 2,            ParseParamEnt => 1,            );        $parser->setHandlers(            Init => sub { $self->parse_init(@_) },            Char => sub { $self->parse_char(@_) },            Start => sub { $self->parse_start(@_) },            End => sub { $self->parse_end(@_) },            Final => sub { $self->parse_final(@_) },            Proc => sub { $self->parse_pi(@_) },            Comment => sub { $self->parse_comment(@_) },            Attlist => sub { $self->parse_attlist(@_) },            );        my $toparse;    if ($toparse = $self->get_filename) {        return $parser->parsefile($toparse);    }    else {        return $parser->parse($self->get_xml || $self->get_ioref);    }}sub parsefile {    my $self = shift;    my ($filename) = @_;    $self->set_filename($filename);    $self->parse;}sub parse_init {    my $self = shift;    my $e = shift;    my $document = XML::XPath::Node::Element->new();    my $newns = XML::XPath::Node::Namespace->new('xml', $xml_ns);    $document->appendNamespace($newns);    $self->{current} = $self->{DOC_Node} = $document;}sub parse_final {    my $self = shift;    return $self->{DOC_Node};}sub parse_char {    my $self = shift;    my $e = shift;    my $text = shift;        my $parent = $self->{current};        my $last = $parent->getLastChild;    if ($last && $last->isTextNode) {        # append to previous text node        $last->appendText($text);        return;    }        my $node = XML::XPath::Node::Text->new($text);    $parent->appendChild($node, 1);}sub parse_start {    my $self = shift;    my $e = shift;    my $tag = shift;        push @{ $self->{InScopeNamespaceStack} },         { %{ $self->{InScopeNamespaceStack}[-1] } };    $self->_scan_namespaces(@_);        my ($prefix, $namespace) = $self->_namespace($tag);        my $node = XML::XPath::Node::Element->new($tag, $prefix);        my @attributes;    for (my $ii = 0; $ii < $#_; $ii += 2) {	my ($name, $value) = ($_[$ii], $_[$ii+1]);        if ($name =~ /^xmlns(:(.*))?$/) {            # namespace node            my $prefix = $2 || '#default';#            warn "Creating NS node: $prefix = $value\n";            my $newns = XML::XPath::Node::Namespace->new($prefix, $value);            $node->appendNamespace($newns);        }        else {	    my ($prefix, $namespace) = $self->_namespace($name);            undef $namespace unless $prefix;            my $newattr = XML::XPath::Node::Attribute->new($name, $value, $prefix);            $node->appendAttribute($newattr, 1);            if (exists($self->{IdNames}{$tag}) && ($self->{IdNames}{$tag} eq $name)) {    #            warn "appending Id Element: $val for ", $node->getName, "\n";                $self->{DOC_Node}->appendIdElement($value, $node);            }        }    }            $self->{current}->appendChild($node, 1);    $self->{current} = $node;}sub parse_end {    my $self = shift;    my $e = shift;    $self->{current} = $self->{current}->getParentNode;}sub parse_pi {    my $self = shift;    my $e = shift;    my ($target, $data) = @_;    my $node = XML::XPath::Node::PI->new($target, $data);    $self->{current}->appendChild($node, 1);}sub parse_comment {    my $self = shift;    my $e = shift;    my ($data) = @_;    my $node = XML::XPath::Node::Comment->new($data);    $self->{current}->appendChild($node, 1);}sub parse_attlist {    my $self = shift;    my $e = shift;    my ($elname, $attname, $type, $default, $fixed) = @_;    if ($type eq 'ID') {        $self->{IdNames}{$elname} = $attname;    }}sub _scan_namespaces {    my ($self, %attributes) = @_;    while (my ($attr_name, $value) = each %attributes) {	if ($attr_name eq 'xmlns') {	    $self->{InScopeNamespaceStack}[-1]{'_Default'} = $value;	} elsif ($attr_name =~ /^xmlns:(.*)$/) {	    my $prefix = $1;	    $self->{InScopeNamespaceStack}[-1]{$prefix} = $value;	}    }}sub _namespace {    my ($self, $name) = @_;    my ($prefix, $localname) = split(/:/, $name);    if (!defined($localname)) {	if ($prefix eq 'xmlns') {	    return '', undef;	} else {	    return '', $self->{InScopeNamespaceStack}[-1]{'_Default'};	}    } else {	return $prefix, $self->{InScopeNamespaceStack}[-1]{$prefix};    }}sub as_string {    my $node = shift;    $node->toString;}sub get_parser { shift->{_parser}; }sub get_filename { shift->{_filename}; }sub get_xml { shift->{_xml}; }sub get_ioref { shift->{_ioref}; }sub set_parser { $_[0]->{_parser} = $_[1]; }sub set_filename { $_[0]->{_filename} = $_[1]; }sub set_xml { $_[0]->{_xml} = $_[1]; }sub set_ioref { $_[0]->{_ioref} = $_[1]; }1;__END__=head1 NAMEXML::XPath::XMLParser - The default XML parsing class that produces a node tree=head1 SYNOPSIS    my $parser = XML::XPath::XMLParser->new(                filename => $self->get_filename,                xml => $self->get_xml,                ioref => $self->get_ioref,                parser => $self->get_parser,            );    my $root_node = $parser->parse;=head1 DESCRIPTIONThis module generates a node tree for use as the context node for XPath processing.It aims to be a quick parser, nothing fancy, and yet has to store more informationthan most parsers. To achieve this I've used array refs everywhere - no hashes.I don't have any performance figures for the speedups achieved, so I make noappologies for anyone not used to using arrays instead of hashes. I think theymake good sense here where we know the attributes of each type of node.=head1 Node StructureAll nodes have the same first 2 entries in the array: node_parentand node_pos. The type of the node is determined using the ref() function.The node_parent always contains an entry for the parent of the currentnode - except for the root node which has undef in there. And node_pos is theposition of this node in the array that it is in (think: $node == $node->[node_parent]->[node_children]->[$node->[node_pos]] )Nodes are structured as follows:=head2 Root NodeThe root node is just an element node with no parent.    [      undef, # node_parent - check for undef to identify root node      undef, # node_pos      undef, # node_prefix      [ ... ], # node_children (see below)    ]=head2 Element Node    [      $parent, # node_parent      <position in current array>, # node_pos      'xxx', # node_prefix - namespace prefix on this element      [ ... ], # node_children      'yyy', # node_name - element tag name      [ ... ], # node_attribs - attributes on this element      [ ... ], # node_namespaces - namespaces currently in scope    ]=head2 Attribute Node    [      $parent, # node_parent - the element node      <position in current array>, # node_pos      'xxx', # node_prefix - namespace prefix on this element      'href', # node_key - attribute name      'ftp://ftp.com/', # node_value - value in the node    ]=head2 Namespace NodesEach element has an associated set of namespace nodes that are currentlyin scope. Each namespace node stores a prefix and the expanded name (retrievedfrom the xmlns:prefix="..." attribute).    [      $parent,      <pos>,      'a', # node_prefix - the namespace as it was written as a prefix      'http://my.namespace.com', # node_expanded - the expanded name.    ]=head2 Text Nodes    [      $parent,      <pos>,      'This is some text' # node_text - the text in the node    ]=head2 Comment Nodes    [      $parent,      <pos>,      'This is a comment' # node_comment    ]=head2 Processing Instruction Nodes    [      $parent,      <pos>,      'target', # node_target      'data', # node_data    ]=head1 UsageIf you feel the need to use this module outside of XML::XPath (for exampleyou might use this module directly so that you can cache parsed trees), youcan follow the following API:=head2 newThe new method takes either no parameters, or any of the following parameters:        filename        xml        parser        iorefThis uses the familiar hash syntax, so an example might be:    use XML::XPath::XMLParser;        my $parser = XML::XPath::XMLParser->new(filename => 'example.xml');The parameters represent a filename, a string containing XML, an XML::Parserinstance and an open filehandle ref respectively. You can also set or get allof these properties using the get_ and set_ functions that have the samename as the property: e.g. get_filename, set_ioref, etc.=head2 parseThe parse method generally takes no parameters, however you are free topass either an open filehandle reference or an XML string if you so require.The return value is a tree that XML::XPath can use. The parse method willdie if there is an error in your XML, so be sure to use perl's exceptionhandling mechanism (eval{};) if you want to avoid this.=head2 parsefileThe parsefile method is identical to parse() except it expects a singleparameter that is a string naming a file to open and parse. Again itreturns a tree and also dies if there are XML errors.=head1 NOTICESThis file is distributed as part of the XML::XPath module, and is copyright2000 Fastnet Software Ltd. Please see the documentation for the module as awhole for licencing information.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -