⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tree.xs

📁 该软件可以方便的把HTML网页解析成一棵Tree
💻 XS
📖 第 1 页 / 共 2 页
字号:
/***	HTML::Tree**	Tree.xs****	Copyright (C) 1999  Paul J. Lucas****	This program is free software; you can redistribute it and/or modify**	it under the terms of the GNU General Public License as published by**	the Free Software Foundation; either version 2 of the License, or**	(at your option) any later version.****	This program is distributed in the hope that it will be useful,**	but WITHOUT ANY WARRANTY; without even the implied warranty of**	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the**	GNU General Public License for more details.****	You should have received a copy of the GNU General Public License**	along with this program; if not, write to the Free Software**	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*/#include "HTML_Tree.h"// Perl#define	PERL_NO_GET_CONTEXTextern "C" {#include <XSUB.h>}// standard#include <algorithm>#include <fstream>// local#include "array_visitor.h"#include "blessed.h"#include "container4perl.h"#include "managed_ptr.h"#include "mmap_file.h"#include "perl_predicate.h"#include "perl_visitor.h"#include "string4perl.h"#include "util.h"#ifndef	PJL_NO_NAMESPACESusing namespace HTML_Tree;using namespace PJL;using namespace std;#endif#define	NODE_CAST(T,MANAGED_PTR)	dynamic_cast<T*>( (MANAGED_PTR)->ptr() )#define NODE_DECL(T,VAR,EXPR)		T *const VAR = NODE_CAST(T,EXPR)extern void array2tree( SV*, Content_Node* );extern void sv2tree( SV*, Content_Node* );//##############################################################################MODULE = HTML::Tree		PACKAGE = HTML::Tree##	This is the "main" package, the one that the user sees.################################################################################################################################################################# SYNOPSIS#SV*managed_node::as_array(param_hash_ref = 0)	SV* param_hash_ref## DESCRIPTION##	Build an array-of-hashes data structure starting from the current node.## PARAMETERS##	param_hash_ref	A reference to a hash of parameters.## RETURN VALUE##	Returns a reference to said array.################################################################################CODE:	array_visitor v( param_hash_ref );	(*THIS)->visit( v );	RETVAL = v.array();OUTPUT:	RETVAL################################################################################# SYNOPSIS#SV*managed_node::as_string(param_hash_ref = 0)	SV* param_hash_ref## DESCRIPTION##	Construct the HTML string representation starting from the current#	node.## PARAMETERS##	param_hash_ref	A reference to a hash of parameters.  The only#			parameter currently supported is Pretty_Print.## RETURN VALUE##	Return the string as an SV*.################################################################################CODE:	int pretty_print = -1;	if ( param_hash_ref ) {		if ( !is_hash_ref( param_hash_ref ) )			croak( "Usage: $node->as_string([hash_ref])" );		HV *const hv = (HV*)SvRV( param_hash_ref );		if ( SV **const sv_ptr = hv_fetch( hv, "Pretty_Print", 12, 0 ) )			pretty_print = SvIV( *sv_ptr );	}	RETVAL = newSVpv(		PERL_CONST_CAST(char*)(			(*THIS)->as_string( pretty_print ).c_str()		), 0	);OUTPUT:	RETVAL################################################################################# SYNOPSIS#char const*managed_node::att(key,new_value = 0)	char const* key	SV* new_value## DESCRIPTION##	Gets or sets the value of an attribute of an Element_Node.  If the new#	value is undef, the attribute is deleted.  The new_value has to be an#	SV* rather than simply a char const* so we can distinguish between no#	new_value being given and an undef being given.## PARAMETERS##	key	The attribute key.##	value	The attribute value.## RETURN VALUE##	Returns the value of an attribute of an Element_Node (after setting#	it).################################################################################CODE:	if ( NODE_DECL( Element_Node, e, THIS ) )		if ( new_value )			if ( !SvOK( new_value ) ) {	// undef: delete att.				e->attributes.erase( key );				RETVAL = 0;			} else				RETVAL = (					e->attributes[ key ] =						SvPV( new_value, PL_na )				).c_str();		else {			Element_Node::attribute_map::iterator const				i = e->attributes.find( key );			RETVAL = i != e->attributes.end() ?				i->second.c_str() : 0;		}	else		croak( "HTML::Tree::att(): object isn't an Element_Node" );OUTPUT:	RETVAL################################################################################# SYNOPSIS#SV*managed_node::atts()## DESCRIPTION##	Returns a reference to a tied hash of all of an Element_Node's#	attribute key/value pairs.## RETURN VALUE##	(As above.)################################################################################CODE:	if ( NODE_DECL( Element_Node, e, THIS ) ) {		//		// Create a new Perl hash, a new atts4perl wrapper wrapped		// around the current Element_Node's attributes, and tie the		// hash to a blessed reference to the wrapper using 'P' magic.		//		HV *const hash = newHV();		atts4perl *const atts = new atts4perl( e->attributes );		hv_magic( hash, (GV*)blessed( "atts4perl", atts ), 'P' );		RETVAL = newRV_noinc( (SV*)hash );	} else		croak( "HTML::Tree::atts(): object isn't an Element_Node" );OUTPUT:	RETVAL################################################################################# SYNOPSIS#SV*managed_node::children()## DESCRIPTION##	Returns a reference to a tied array of all of a Content_Node's child#	nodes or an empty array if a Content_Node has no child nodes.## RETURN VALUE##	(As above.)################################################################################CODE:	if ( NODE_DECL( Content_Node, c, THIS ) ) {		//		// Create a new Perl array and tie it to a blessed reference to		// a Content_Node using 'P' magic.		//		AV *const av = newAV();		sv_magic( (SV*)av, blessed( "Content_Node", c ), 'P', 0, 0 );		RETVAL = newRV_noinc( (SV*)av );	} else		croak( "HTML::Tree::children(): object isn't a Content_Node" );OUTPUT:	RETVAL################################################################################# SYNOPSIS#voidmanaged_node::delete()## DESCRIPTION##	Delete a node (and all of its child nodes, if any) from the XML tree.################################################################################CODE:	// Setting "manage" on the node will cause it to delete the underlying	// XML_Node when an assignment is done.	//	THIS->manage( true );	//	// We then assign 0 to cause the deletion to occur.  When the	// managed_node itself goes away at some point in the future, the	// deletion of a null pointer is guaranteed to be harmless by the C++	// language definition.	//	*THIS = 0;################################################################################# SYNOPSIS#voidmanaged_node::DESTROY()## DESCRIPTION##	Destroys a managed node.  If the node is actually managed, the entire#	underlying C++ (sub)tree starting at this node will be deleted.## NOTE##	No explicit code is needed since the xsubpp compiler is smart enough to#	generate the correct code to destroy a C++ object.################################################################################################################################################################# SYNOPSIS#managed_node*managed_node::find_if(func_ref)	SV* func_ref## DESCRIPTION##	Find a node for which the given predicate function is true starting at#	this node.## PARAMETERS##	func_ref	A reference to a function.## RETURN VALUE##	If found, returns a pointer to the node; returns null otherwise.################################################################################CODE:	static char PERL_CONST CLASS[] = "HTML::Tree";	HTML_Node::iterator const it = ::find_if(		(*THIS)->begin(), (*THIS)->end(), perl_predicate( func_ref )	);	RETVAL = it != (*THIS)->end() ? new managed_node( &*it ) : 0;OUTPUT:	RETVAL################################################################################# SYNOPSIS#managed_node*managed_node::find_name(name)	char const* name## DESCRIPTION##	Find an Element_Node having the given name starting at this node.## PARAMETERS##	name	The name of the Element_Node to find.## RETURN VALUE##	If found, returns a pointer to the node; returns null otherwise.################################################################################CODE:	static char PERL_CONST CLASS[] = "HTML::Tree";	static element_map const &elements = element_map::instance();	element_map::const_iterator const element = elements.find( name );	if ( element == elements.end() )		RETVAL = 0;	else {		HTML_Node::iterator const it = ::find(			(*THIS)->begin(), (*THIS)->end(),			Element_Node( element->first, element->second )		);		RETVAL = it != (*THIS)->end() ? new managed_node( &*it ) : 0;	}OUTPUT:	RETVAL################################################################################# SYNOPSIS#intmanaged_node::is_comment()## DESCRIPTION##	Determines if the current node is-a Comment_Node.## RETURN VALUE##	Returns true (1) only if the HTML node is-an Comment_Node; false (0)#	otherwise.################################################################################CODE:	RETVAL = !!NODE_CAST( Comment_Node, THIS );OUTPUT:	RETVAL################################################################################# SYNOPSIS#intmanaged_node::is_element()## DESCRIPTION##	Determines if the current node is-an Element_Node.## RETURN VALUE##	Returns true (1) only if the HTML node is-an Element_Node; false (0)#	otherwise.################################################################################CODE:	RETVAL = !!NODE_CAST( Element_Node, THIS );OUTPUT:	RETVAL################################################################################# SYNOPSIS#intmanaged_node::is_text()## DESCRIPTION##	Determines if the current node is-a Text_Node.## RETURN VALUE##	Returns true (1) only if the HTML node is-a Text_Node; false (0)#	otherwise.################################################################################CODE:	RETVAL = !!NODE_CAST( Text_Node, THIS );OUTPUT:	RETVAL################################################################################# SYNOPSIS#char const*

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -