⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 classparser.pm

📁 该软件可以方便的把HTML网页解析成一棵Tree
💻 PM
📖 第 1 页 / 共 3 页
字号:
##package Apache::HTML::ClassParser;#	ClassParser.pm##	Copyright (C) 1999  Paul J. Lucas##	This program is free software; you can redistribute it and/or modify#	it under the terms of the GNU General Public License as published by#	the Free Software Foundation; either version 2 of the License, or#	(at your option) any later version.##	This program is distributed in the hope that it will be useful,#	but WITHOUT ANY WARRANTY; without even the implied warranty of#	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the#	GNU General Public License for more details.##	You should have received a copy of the GNU General Public License#	along with this program; if not, write to the Free Software#	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.##use 5.004;use strict 'vars';use vars qw( @EXPORT @EXPORT_OK @ISA $VERSION );use Apache::Constants qw( :common );use Apache::Request();use Exporter;use HTML::Tree;@EXPORT = qw( handler );@EXPORT_OK = qw();@ISA = qw( Exporter );$VERSION = '2.4.3';my %cache;sub visitor;################################################################################# SYNOPSIS#	sub handler## DESCRIPTION##	This is the entry point for the Apache mod_perl handler.  We find an#	HTML file's associated code file and parse an HTML file into a tree.#	We eval that code and cache it for multiple uses.## PARAMETERS##	r	A reference to the Apache request object.## RETURN VALUE##	Returns an Apache HTTP status code.################################################################################{	my $r = Apache::Request->new( shift );	##	# We are not interested in non-HTML files nor HEAD requests.	##	return DECLINED if $r->content_type ne 'text/html' || $r->header_only;	##	# Perform basic file sanity checks.	##	unless ( -e $r->finfo ) {		$r->log_error( "ClassParser: file not found: ", $r->filename );		return NOT_FOUND;	}	unless ( -r _ ) {		$r->log_error( "ClassParser: access denied: ", $r->filename );		return FORBIDDEN;	}	##	# Get the code file: if the pm_uri paramater was given, use it as the	# URI to the code file; otherwise, use the same URI as the HTML file,	# but replacing the suffix with ".pm".	##	my $code_filename;	if ( $r->param( 'pm_uri' ) ) {		$code_filename =			$r->lookup_uri( $r->param( 'pm_uri' ) )->filename;	} else {		( $code_filename = $r->filename ) =~ s/\.[a-z]?html?$/.pm/;	}	unless ( -r $code_filename && -f _ ) {		$r->log_error( "ClassParser: can not read $code_filename" );		return SERVER_ERROR;	}	my $mtime = -M _;	##	# Look in the cache first: if it's not in there or the source file was	# modified, read in and eval the code, determine the code's package	# name, create a new instance of the class that the package defines,	# and stuff the new object and the source file modification time into	# the cache.	##	my $object;	unless ( exists $cache{ $code_filename } &&		$cache{ $code_filename }{ mtime } == $mtime	) {		my $fh = Apache->gensym();		unless ( open( $fh, $code_filename ) ) {			$r->log_error(				"ClassParser: can not open $code_filename"			);			return SERVER_ERROR;		}		my $code = join( '', <$fh> );		close( $fh );		my $package = ($code =~ /^\s*package\s+([\w:]+);/m)[0];		unless ( $package ) {			$r->log_error(				"ClassParser: no package in $code_filename"			);			return SERVER_ERROR;		}		eval $code;		if ( $@ ) {			$r->log_error( $@ );			return SERVER_ERROR;		}		$object = $package->new();		$cache{ $code_filename } = {			object	=> $object,			mtime	=> $mtime,		};	} else {		##		# Just grab the previously cached object.		##		$object = $cache{ $code_filename }{ object };	}	##	# Inject a copy of the reference to the Apache::Request into the object	# so it can have access to it.	##	$object->{ r } = $r;	##	# Since we're generating dynamic pages, tell Apache to emit the headers	# that tell browsers not to cache pages.	##	$r->no_cache( 1 );	##	# If the object implements either a "get" or "post" method, call it.	# If it returns anything but OK, assume it handled the request and	# merely return its status.	##	my $status = OK;	if ( $r->method eq 'GET' ) {		goto clean_up if			$object->can( 'get' ) &&			( $status = $object->get() ) != OK;	} elsif ( $r->method eq 'POST' ) {		goto clean_up if			$object->can( 'post' ) &&			( $status = $object->post() ) != OK;	}	##	# Parse the HTML file into a tree.	##	my $html = HTML::Tree->from_file(		$r->filename, { Include_Comments => 1 }	);	unless ( $html ) {		$r->log_error( "ClassParser: can not parse ", $r->filename );		$status = SERVER_ERROR;		goto clean_up;	}	##	# If we're at the head of an Apache::Filter chain, call filter_input()	# as required even though we don't use the returned filehandle since	# HTML_Tree mmap's the file; and also do NOT emit the HTTP headers	# since that task must be done by Apache::Filter after the last filter	# in the chain.  Otherwise, just emit the HTTP headers.	##	if ( lc( $r->dir_config( 'Filter' ) ) eq 'on' ) {		$r->filter_register();		$r->filter_input();	} else {		$r->send_http_header();	}	##	# Finally, visit all nodes in the tree.	##	$html->visit( $object, \&visitor );clean_up:	##	# Blow away the object's copy of the reference to the Apache::Request	# so it will be deleted when handler() exits.	##	delete $object->{ r };	return $status;}#################################################################################	Built-in class_map functions################################################################################sub escape_text {	my @a = @_;	for ( @a ) {		s/&(\w+);/AMPER($1)/g;		s/&/&amp;/g;		s/"/&quot;/g;		s/</&lt;/g;		s/>/&gt;/g;		s/AMPER\((\w+)\)/&$1;/g;	}	return @a > 1 ? @a : $a[0];}sub append_att {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $att, $key ) = $class =~ /^append::(\w+)::(\w+)$/;	$node->atts->{ $att } .= escape_text( $this->{ $key } );	return 1;}sub check_key {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $key ) = $class =~ /^check::(\w+)$/;	$node->att( 'checked', $this->{ $key } ? 'checked' : undef );	return 1;}sub check_key_value {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $key ) = $class =~ /^check_value::(\w+)$/;	$node->att( 'checked',		$node->att( 'value' ) eq $this->{ $key } ? 'checked' : undef	);	return 1;}sub if_key {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $key ) = $class =~ /^if::(\w+)$/;	return $this->{ $key };}sub select_key {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $key ) = $class =~ /^select::(\w+)$/;	$node->att( 'selected',		$node->att( 'value' ) eq $this->{ $key } ? 'selected' : undef	);	return 1;}sub sub_att {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $att, $key ) = $class =~ /^sub::(\w+)::(\w+)$/;	$node->att( $att, escape_text( $this->{ $key } ) );	return 1;}sub sub_att_id {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $att, $key ) = $class =~ /^sub_id::(\w+)::(\w+)$/;	my $text = escape_text( $this->{ $key } );	$node->atts->{ $att } =~ s/\d+/$text/;	return 1;}sub sub_att_param {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $att, $key ) = $class =~ /^sub_param::(\w+)::(\w+)$/;	my $text = escape_text( $this->{ $key } );	$node->atts->{ $att } =~ s/\b$key=[^&=]+/$key=$text/;	return 1;}sub sub_href {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $key ) = $class =~ /^href::(\w+)$/;	$node->att( 'href', escape_text( $this->{ $key } ) );	return 1;}sub sub_href_id {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $key ) = $class =~ /^href_id::(\w+)$/;	my $text = escape_text( $this->{ $key } );	$node->atts->{ href } =~ s/\d+/$text/;	return 1;}sub sub_href_param {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $key ) = $class =~ /^href_param::(\w+)$/;	my $text = escape_text( $this->{ $key } );	$node->atts->{ href } =~ s/\b$key=[^&=]+/$key=$text/;	return 1;}sub sub_text {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $key ) = $class =~ /^text::(\w+)$/;	$node->children()->[0]->text( escape_text( $this->{ $key } ) );	return 1;}sub sub_value {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $key ) = $class =~ /^value::(\w+)$/;	$node->att( 'value', escape_text( $this->{ $key } ) );	return 1;}sub sub_value_id {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $key ) = $class =~ /^value_id::(\w+)$/;	my $text = escape_text( $this->{ $key } );	$node->atts->{ value } =~ s/\d+/$text/;	return 1;}sub sub_value_param {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $key ) = $class =~ /^value_param::(\w+)$/;	my $text = escape_text( $this->{ $key } );	$node->atts->{ value } =~ s/\b$key=[^&=]+/$key=$text/;	return 1;}sub unless_key {	my( $this, $node, $class, $is_end_tag ) = @_;	return 0 if $is_end_tag;	my( $key ) = $class =~ /^unless::(\w+)$/;	return !$this->{ $key };}my %function_map = (	append      => \&append_att,	check       => \&check_key,	check_value => \&check_key_value,	href        => \&sub_href,	href_id     => \&sub_href_id,	href_param  => \&sub_href_param,	if          => \&if_key,	select      => \&select_key,	sub         => \&sub_att,	sub_id      => \&sub_att_id,	sub_param   => \&sub_att_param,	text        => \&sub_text,	unless      => \&unless_key,	value       => \&sub_value,	value_id    => \&sub_value_id,	value_param => \&sub_value_param);################################################################################# SYNOPSIS#	sub visitor## DESCRIPTION##	Visit nodes in the HTML tree and generally emit the HTML for them.  If#	an HTML element has a CLASS attribute, see if any of the class values#	are in either the built-in function map or the $object's class_map.  If#	so, call the functions.## PARAMETERS##	object		A reference to a hash that MUST contain a "class_map"#			key and whose value MUST be a hash for the object's#			class map.##	node		A reference to the node we are visiting.##	depth		How "deep" the node is in the tree.##	is_end_tag	True only if we're being called for the end tag of an#			element.## RETURN VALUE##	If the node is a text node, returns 1; otherwise, if a function was#	called, returns the value of the function.################################################################################{	my( $object, $node, $depth, $is_end_tag ) = @_;	if ( $node->is_text() ) {		##		# For text nodes, simply emit the text as-is and return.		##		print $node->text();		return 1;	}	##	# Default the result to false so that, unless changed by a function in	# the class_map, we will return false for end tags (meaning "do not	# loop").	##	my $result = 0;	##	# Do all CLASSes given by the value of the CLASS attribute.	##	my $class_map = $object->{ class_map };	for ( split( /\s+/, $node->att( 'class' ) ) ) {		my $func;		##

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -