⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 array_visitor.c

📁 该软件可以方便的把HTML网页解析成一棵Tree
💻 C
字号:
/***	HTML::Tree**	mod/XML/Tree/array_visitor.c****	Copyright (C) 2001  Paul J. Lucas****	This program is free software; you can redistribute it and/or modify**	it under the terms of the GNU General Public License as published by**	the Free Software Foundation; either version 2 of the License, or**	(at your option) any later version.****	This program is distributed in the hope that it will be useful,**	but WITHOUT ANY WARRANTY; without even the implied warranty of**	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the**	GNU General Public License for more details.****	You should have received a copy of the GNU General Public License**	along with this program; if not, write to the Free Software**	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*/#include "HTML_Tree.h"// local#include "array_visitor.h"#include "blessed.h"#include "container4perl.h"#include "string4perl.h"#include "util.h"#ifndef PJL_NO_NAMESPACESusing namespace HTML_Tree;using namespace std;#endif//*****************************************************************************//// SYNOPSIS//	array_visitor::array_visitor( SV* param_hash_ref )//// DESCRIPTION////	Construct (initialize) an array_visitor.//// PARAMETERS////	param_hash_ref	A reference to a hash containing key/value parameters.////*****************************************************************************:	av_root_( pjl_newAV() ), av_cur_( av_root_ ),	include_comments_( false ),	include_ws_text_( false ){	dTHX;	//	// We need to make the array mortal so Perl will garbage-collect it	// once the last reference to it goes away.	//	sv_2mortal( (SV*)av_root_ );	if ( !param_hash_ref )			// no parameters		return;	if ( !is_hash_ref( param_hash_ref ) )		croak( "Usage: $node->as_array([hash_ref])" );	//	// Initialize parameters.	//	HV *const hv = (HV*)SvRV( param_hash_ref );	SV **sv_ptr;	if ( sv_ptr = hv_fetch( hv, "Include_Comments", 16, 0 ) )		include_comments_ = SvTRUE( *sv_ptr );	if ( sv_ptr = hv_fetch( hv, "Include_WS_Text", 15, 0 ) )		include_ws_text_ = SvTRUE( *sv_ptr );}//*****************************************************************************//// SYNOPSIS//	/* virtual */ bool array_visitor::operator()(		HTML_Node *node, int, bool is_end_tag	)//// DESCRIPTION////	Build an array-of-hashes data structure representing the entire HTML//	tree.  (See the description in array_visitor.h.)//// PARAMETERS////	node		The HTML node we're currently visiting.////	is_end_tag	This is set to true only after visiting all of an HTML//			node's child nodes, if any.//// RETURN VALUE////	Per node, returns false if is_end_tag is true (so as not to loop), or a//	Text_Node is entirely whitespace; otherwise, return true.////*****************************************************************************{	dTHX;	static char const whitespace[] = " \f\n\r\t\v";	if ( is_end_tag ) {		av_cur_ = stack_.top();		stack_.pop();		return false;			// don't loop	}	if ( Text_Node *const t = dynamic_cast< Text_Node* >( node ) ) {		//		// First, see if the Text_Node really is a Comment_Node: if so,		// and requested not to include comments, forget it.		//		if ( dynamic_cast< Comment_Node* >( node ) )			if ( !include_comments_ )				return false;			else				/* fall through to "Otherwise ..." */;		else			//			// If requested not to include Text_Nodes that are			// entirely whitespace and if the node is entirely			// whitespace: forget it.			//			if ( !include_ws_text_ &&				t->text.find_first_not_of( whitespace ) ==					string::npos			)				return false;		//		// Otherwise, create a new Perl scalar, a new string4perl		// wrapper around the Text_Node's string, tie the scalar to a		// blessed refernece to the wrapper using 'q' magic, and push		// the scalar onto the current array.		//		SV *const sv = newSViv( 0 );	// dummy value		string4perl *const s = new string4perl( t->text );		sv_magic( sv, blessed( "string4perl", s ), 'q', 0, 0 );		av_push( av_cur_, sv );		return true;	}	//	// All else has failed, so it must be either an Empty_Node or a	// Content_Note.  Create a Perl hash to hold the "name" and possibly	// "atts" and "contents" keys.	//	Element_Node *const e = dynamic_cast< Element_Node* >( node );	HV *const hv = newHV();	hv_store( hv, "name", 4,		newSVpv( PERL_CONST_CAST(char*)( e->name ), 0 ), 0	);	if ( !e->attributes.empty() ) {		//		// The node has attributes: create a new Perl hash, a new		// atts4perl wrapper wrapped around the current		// Element_Node's attributes, and tie the hash to a blessed		// reference to the wrapper using 'P' magic.		//		HV *const hash = newHV();		atts4perl *const atts = new atts4perl( e->attributes );		hv_magic( hash, (GV*)blessed( "atts4perl", atts ), 'P' );		hv_store( hv, "atts", 4, newRV_noinc( (SV*)hash ), 0 );	}	//	// Push a reference to the hash onto the current array.	//	av_push( av_cur_, newRV_noinc( (SV*)hv ) );	if ( dynamic_cast< Content_Node* >( node ) ) {		//		// For Content_Nodes, create a new array for the nodes of the		// content, make it the new "current" array, and add a		// "content" key whose value is a reference to the array.		//		stack_.push( av_cur_ );		av_cur_ = newAV();		hv_store( hv, "content", 7, newRV_noinc( (SV*)av_cur_ ), 0 );	}	return true;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -