📄 lite.pm
字号:
my $element_list = []; my $current_element = $self->{tree}; # Call init handler if defined &{$self->{handlers}{init}}($self) if defined $self->{handlers}{init}; # Make a table of offsets to each element start and end point # Table is a hash of element names to lists of offsets: # [start_tag_start, start_tag_end, end_tag_start, end_tag_end] # where tags include the '<' and '>' # Also make a tree of linked lists. List contains root element # and other nodes. Each node consits of a list ref (the position list) # and a following list containing the child element. Text nodes are # a list ref (with just two positions). # Find the opening and closing of the XML, giving errors if not well-formed my $start_pos = index( $self->{doc}, '<' ); $self->_error( 'NO_ROOT' ) if $start_pos == -1; my $end_pos = index( $self->{doc}, '>', $start_pos + 1 ); $self->_error( 'ROOT_NOT_CLOSED', $start_pos + $self->{doc_offset} ) if $end_pos == -1; my $doc_end = rindex( $self->{doc}, '>' ); $self->_error( 'ROOT_NOT_CLOSED' ) if $doc_end == -1; # Now walk through the document, one tag at a time, building up our # lookup tables while( $end_pos <= $doc_end ) { # Get a tag my $tag = substr( $self->{doc}, $start_pos, $end_pos - $start_pos + 1 ); # Get the tag name and see if it's an end tag (starts with </) my( $end, $name ) = $tag =~ m{^<\s*(/?)\s*([^/>\s]+)}; if( $end ) { # If there is no start tag for this end tag then throw an error $self->_error( 'NO_START', $start_pos + $self->{doc_offset}, $tag ) unless defined $self->{elements}{$name}; # Otherwise, add the end point to the array for the last element in # the by-name lookup hash my( $x, $found ) = (@{$self->{elements}{$name}} - 1, 0); while( $x >= 0 ) { # Close the last open element (ignore elements already closed) if( @{$self->{elements}{$name}[$x]} < 4 ) { $self->{elements}{$name}[$x][2] = $start_pos; $self->{elements}{$name}[$x][3] = $end_pos; $found = 1; last; } # end if $x--; } # end while # If we didn't find an open element then throw an error $self->_error( 'NO_START', $start_pos + $self->{doc_offset}, $tag ) unless $found; # Call an end-tag handler if defined (not yet exposed) &{$self->{handlers}{end}}($self, $name) if defined $self->{handlers}{end}; # Close element in linked list (tree) $current_element = pop @$element_list; } else { # Make a new list in the by-name lookup hash if none found by this name yet $self->{elements}{$name} = [] unless defined $self->{elements}{$name}; # Add start points to the array of positions and push it on the hash my $pos_list = [$start_pos, $end_pos]; push @{$self->{elements}{$name}}, $pos_list; # Call start-tag handler if defined (not yet exposed) &{$self->{handlers}{start}}($self, $name) if defined $self->{handlers}{start}; # Now add the element to the linked list (tree) push @$element_list, $current_element; # If this is a single-tag element (e.g. <.../>) then close it immediately if( $tag =~ m{/\s*>$} ) { push @$current_element, $pos_list; $pos_list->[2] = undef; $pos_list->[3] = undef; # Call an end-tag handler now too &{$self->{handlers}{end}}($self, $name) if defined $self->{handlers}{end}; } else { # Otherwise, put this on the list and start a sublist for children my $new_element = []; push @$current_element, $pos_list, $new_element; $current_element = $new_element; } # end if } # end if # Move the start pointer to beginning of next element $start_pos = index( $self->{doc}, '<', $start_pos + 1 ); last if $start_pos == -1 || $end_pos == $doc_end; # Now $end_pos is end of old tag and $start_pos is start of new # So do things on the data between the tags as needed if( $start_pos - $end_pos > 1 ) { # Call any character data handler &{$self->{handlers}{char}}($self, substr($self->{doc}, $end_pos + 1, $start_pos - $end_pos - 1)) if defined $self->{handlers}{char}; # Inserting the text into the linked list as well# push @$current_element, [$end_pos + 1, $start_pos - 1]; } # end if # Now finish by incrementing the parser to the next element $end_pos = index( $self->{doc}, '>', $start_pos + 1 ); # If there is no next element, and we're not at the end of the document, # then throw an error $self->_error( 'ELM_NOT_CLOSED', $start_pos + $self->{doc_offset} ) if $end_pos == -1; } # end while # Call finalization handler if defined and return it's value return &{$self->{handlers}{final}}($self) if defined $self->{handlers}{final}; # Else return the tree pointer return $self->{tree};} # end _parse_it# ----------------------------------------------------------# Sub: _get_a_file## Args: $file## Returns: Scalar content of $file, undef on error## Description: Reads from $file and returns the content. # $file may be either a filename or an IO handle# ----------------------------------------------------------# Date Modification Author# ----------------------------------------------------------# 28Aug2001 Added scalar and IO handling JW# ----------------------------------------------------------sub _get_a_file { my $self = shift; my $file = shift; my $content = undef; # If it's a ref and a handle, then read that if( ref($file) ) { $content = join '', <$file>; } # If it's a scalar and the file exits then open it elsif( -e $file ) { open( XML, $file ) || return undef; $content = join '', <XML>; close XML || return undef; } # Don't know how to handle this type of parameter else { return undef; } # end if return $content;} # end _get_a_file# ----------------------------------------------------------# Sub: _error## Args: $code [, @args]# $code A code representing the message to send## Returns: Does not. Dies.## Description: Outputs an error message and dies# ----------------------------------------------------------# Date Modification Author# ----------------------------------------------------------# ----------------------------------------------------------sub _error { my $self = shift; my( $code, @args ) = @_; my $msg = $self->{ERRORS}{$code}; # Handle replacement codes $msg =~ s/\%(\d+)/$args[$1]/g; # Throw exception die ref($self) . ":$msg\n";} # end _error# ----------------------------------------------------------# Sub: _store_cdata## Args: $content## Returns: A reference to the CDATA element, padded to # original size.## Description: Stores the CDATA element in the internal# hash, and returns a reference plus padding to replace it# ----------------------------------------------------------# Date Modification Author# ----------------------------------------------------------# 28Aug2001 Added to support CDATA JW# ----------------------------------------------------------sub _store_cdata { my $self = shift; my( $content ) = @_; my $ref = @{$self->{_CDATA}}; $self->{_CDATA}[$ref] = $content; return $ref . ' ' x (length($content) - length($ref));} # end _store_cdata# ----------------------------------------------------------# Sub: _dump_tree## Args: $node# $node A starting node, or the root, if not given## Returns: The string to print## Description: Builds a printable tree in a debugging format# ----------------------------------------------------------# Date Modification Author# ----------------------------------------------------------# 06Nov2001 Added for debugging tree JW# ----------------------------------------------------------sub _dump_tree { my $self = shift; my $node = shift || $self->{tree}; my $tree = ''; for( my $i = 0; $i < scalar(@$node) && defined $node->[$i]; $i++ ) { if( (scalar(@{$node->[$i]}) == 4) && (defined $node->[$i][2]) ) { $tree .= '[' . join( ',', @{$node->[$i]} ) . "] " . substr($self->{doc}, $node->[$i][0], $node->[$i][1] - $node->[$i][0] + 1) . "..." . substr($self->{doc}, $node->[$i][2], $node->[$i][3] - $node->[$i][2] + 1) . "\n"; # Do child list $i++; $tree .= join( '', map( " $_\n", split( "\n", $self->_dump_tree( $node->[$i] ) ) ) ); } elsif( (scalar(@{$node->[$i]}) == 4) ) { $tree .= '[' . join( ',', $node->[$i][0], $node->[$i][1] ) . "] " . substr($self->{doc}, $node->[$i][0], $node->[$i][1] - $node->[$i][0] + 1) . "\n"; } else { $tree .= "ERROR! Invalid node: [" . join( ',', @{$node->[$i]} ) . "]\n"; } # end for } # end for return $tree;} # end _dump_tree# module clean-up code here (global destructor)END { }1; # so the require or use succeeds=back=head1 BUGS(None known)=head1 VERSION0.11=head1 AUTHORJeremy Wadsack for Wadsack-Allen Digital Group (dgsupport@wadsack-allen.com)=head1 COPYRIGHTCopyright 2001 Wadsack-Allen. All rights reserved.This library is free software; you can redistribute it and/ormodify it under the same terms as Perl itself.=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -