⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sgml.c

📁 www工具包. 这是W3C官方支持的www支撑库. 其中提供通用目的的客户端的WebAPI: complete HTTP/1.1 (with caching, pipelining, PUT, POS
💻 C
📖 第 1 页 / 共 2 页
字号:
/*									 SGML.c**	GENERAL SGML PARSER CODE****	(c) COPYRIGHT MIT 1995.**	Please first read the full copyright statement in the file COPYRIGH.**	@(#) $Id: SGML.c,v 1.53 1999/05/18 21:38:57 frystyk Exp $****	This module implements an HTStream object. To parse an**	SGML file, create this object which is a parser. The object**	is (currently) created by being passed a DTD structure,**	and a target HTStructured oject at which to throw the parsed stuff.**	**	 6 Feb 93  	Binary seraches used. Intreface modified.**	 8 Jul 94  FM	Insulate free() from _free structure element.**	Nov 1996   msa	Strip down the parser to minimal HTML tokenizer,**			Stop allocating space for the attribute values,**			use pointers to the string chunk instead.*/#include <assert.h>/* Library include files */#include "wwwsys.h"#include "HTUtils.h"#include "HTString.h"#include "HTChunk.h"#include "SGML.h"#define INVALID (-1)/*	The State (context) of the parser****	This is passed with each call to make the parser reentrant***/typedef enum _sgml_state    {	S_text, S_literal, S_tag, S_tag_gap, 	S_attr, S_attr_gap, S_equals, S_value, S_after_open,	S_nl, S_nl_tago,	S_ero, S_cro,#ifdef ISO_2022_JP	S_esc, S_dollar, S_paren, S_nonascii_text,#endif	S_squoted, S_dquoted, S_end, S_entity, S_junk_tag,	S_md, S_md_sqs, S_md_dqs, S_com_1, S_com, S_com_2, S_com_2a    } sgml_state;/*	Internal Context Data Structure**	-------------------------------*/struct _HTStream    {	const HTStreamClass *isa;	/* inherited from HTStream */	const SGML_dtd *dtd;	HTStructuredClass *actions;	/* target class  */	HTStructured *target;		/* target object */	HTTag *current_tag;	int current_attribute_number;	SGMLContent contents;		/* current content mode */	HTChunk *string;	int token;			/* ptr into string buffer */	sgml_state state;	BOOL present[MAX_ATTRIBUTES];	/* Flags: attribute is present? */	int value[MAX_ATTRIBUTES];	/* Offset pointers to the string */    };#define PUTC(ch) ((*context->actions->put_character)(context->target, ch))#define PUTB(b,l) ((*context->actions->put_block)(context->target, b, l))/*	Find Attribute Number**	---------------------*/PRIVATE int SGMLFindAttribute  (HTTag* tag, const char * s)    {	HTAttr* attributes = tag->attributes;	int high, low, i, diff;		/* Binary search for attribute name */	assert(tag->number_of_attributes <= MAX_ATTRIBUTES);	for(low=0, high=tag->number_of_attributes;	    high > low ;	    diff < 0 ? (low = i+1) : (high = i) )	    {		i = (low + (high-low)/2);		diff = strcasecomp(attributes[i].name, s);		if (diff==0)			return i;	/* success: found it */	    }	return -1;    }/*	Handle Attribute**	----------------*//* PUBLIC const char * SGML_default = "";   ?? */PRIVATE void handle_attribute_name (HTStream * context, const char * s)    {	HTTag * tag = context->current_tag;	/* Note: if tag==NULL, we are skipping unknown tag... */	if (tag)	    {		int i = SGMLFindAttribute(tag, s);		if (i >= 0)		    {			context->current_attribute_number = i;			context->present[i] = YES;			return;		    }		HTTRACE(SGML_TRACE, "Unknown attribute %s for tag %s\n" _			s _ context->current_tag->name);	    }	context->current_attribute_number = INVALID;	/* Invalid */    }/*	Handle attribute value**	----------------------*/PRIVATE void handle_attribute_value (HTStream * context)    {	/* Deal with attributes only if tag is known,	   ignore silently otherwise */	if (context->current_tag)	    {		if (context->current_attribute_number != INVALID)			context->value[context->current_attribute_number] =				context->token;		else {		    char * data = HTChunk_data(context->string);		    HTTRACE(SGML_TRACE, "Attribute value %s ignored\n" _			   data ? data+context->token : "<null>");		}	    }	context->current_attribute_number = INVALID; /* can't have two assignments! */    }/*	Handle entity**	-------------**** On entry,**	s	contains the entity name zero terminated*/PRIVATE void handle_entity (HTStream * context)    {	const char ** entities = context->dtd->entity_names;	const char *s = HTChunk_data(context->string);	int high, low, i, diff;	for(low=0, high = context->dtd->number_of_entities;	    high > low ;	    diff < 0 ? (low = i+1) : (high = i))	    {		i = (low + (high-low)/2);		diff = strcmp(entities[i], s);	/* Case sensitive! */		if (diff==0)		    {	/* success: found it */			(*context->actions->put_entity)(context->target, i);			return;		    }	    }	/* If entity string not found */	HTTRACE(SGML_TRACE, "Unknown entity %s\n" _ s);	(*context->actions->unparsed_entity)	    (context->target, HTChunk_data(context->string), HTChunk_size(context->string));    }/*	End element**	-----------*/PRIVATE void end_element (HTStream * context, HTTag *tag)    {	HTTRACE(SGML_TRACE, "End   </%s>\n" _ tag->name);	(*context->actions->end_element)		(context->target, tag - context->dtd->tags);    }/*	Start an element**	----------------*/PRIVATE void start_element (HTStream * context)    {	int i;	char *value[MAX_ATTRIBUTES];	HTTag *tag = context->current_tag;	HTTRACE(SGML_TRACE, "Start <%s>\n" _ tag->name);	context->contents = tag->contents;	/*	** Build the actual pointers to the value strings stored in the	** chunk buffer. (Must use offsets while collecting the values,	** because the string chunk may get resized during the collection	** and potentially relocated).	*/	for (i = 0; i < MAX_ATTRIBUTES; ++i)		value[i] = context->value[i] < 0 ? NULL :			HTChunk_data(context->string) + context->value[i];	(*context->actions->start_element)		(context->target,		 tag - context->dtd->tags,		 context->present,		 (const char**)value);  /* coerce type for think c */    }/*		Find Tag in DTD tag list**		------------------------**** On entry,**	dtd	points to dtd structire including valid tag list**	string	points to name of tag in question**** On exit,**	returns:**		NULL		tag not found**		else		address of tag structure in dtd*/PRIVATE HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)    {	int high, low, i, diff;	for(low=0, high=dtd->number_of_tags;	    high > low ;	    diff < 0 ? (low = i+1) : (high = i))	    {  /* Binary serach */		i = (low + (high-low)/2);		diff = strcasecomp(dtd->tags[i].name, string);	/* Case insensitive */		if (diff==0)			/* success: found it */			return &dtd->tags[i];	    }	return NULL;    }/*________________________________________________________________________**			Public Methods*//*	Could check that we are back to bottom of stack! @@  */PRIVATE int SGML_flush  (HTStream * context)    {	return (*context->actions->flush)(context->target);    }PRIVATE int SGML_free  (HTStream * context)    {	int status;	if ((status = (*context->actions->_free)(context->target)) != HT_OK)		return status;	HTChunk_delete(context->string);	HT_FREE(context);	return HT_OK;    }PRIVATE int SGML_abort  (HTStream * context, HTList * e)    {	(*context->actions->abort)(context->target, e);	HTChunk_delete(context->string);	HT_FREE(context);	return HT_ERROR;    }PRIVATE int SGML_write (HTStream * context, const char * b, int l)    {	const SGML_dtd	*dtd = context->dtd;	HTChunk	*string = context->string;	const char *text = b;	int count = 0;		while (l-- > 0)	    {		char c = *b++;		switch(context->state)		    {		    got_element_open:			/*			** The label is jumped when the '>' of a the element			** start tag has been detected. This DOES NOT FALL TO			** THE CODE S_after_open, only processes the tag and			** sets the state (c should still contain the			** terminating character of the tag ('>'))			*/			if (context->current_tag && context->current_tag->name)				start_element(context);			context->state = S_after_open;			break;		    case S_after_open:			/*			** State S_after_open is entered only for single			** character after the element opening tag to test			** against newline. Strip one trainling newline only			** after opening nonempty element.  - SGML: Ugh!			*/			text = b;			count = 0;			if (c == '\n' && (context->contents != SGML_EMPTY))			    {				context->state = S_text;				break;			    }			--text;			goto S_text;		    S_text:			context->state = S_text;		    case S_text:#ifdef ISO_2022_JP			if (c == '\033')			    {				context->state = S_esc;				++count;				break;			    }#endif /* ISO_2022_JP */			if (c == '&')			    {				if (count > 0)					PUTB(text, count);				count = 0;				HTChunk_clear(string);				context->state = S_ero;			    }			else if (c == '<')			    {				if (count > 0)					PUTB(text, count);				count = 0;				HTChunk_clear(string);				/* should scrap LITERAL, and use CDATA and				   RCDATA -- msa */				context->state =					(context->contents == SGML_LITERAL) ?						S_literal : S_tag;			    }			else if (c == '\n')			    	/* Newline - ignore if before end tag! */				context->state = S_nl;			else				++count;			break;		    case S_nl:			if (c == '<')			    {				if (count > 0)					PUTB(text, count);				count = 0;				HTChunk_clear(string);				context->state =					(context->contents == SGML_LITERAL) ?						S_literal : S_nl_tago;			    }			else			    {				++count;				goto S_text;			    }			break;		    case S_nl_tago:	/* Had newline and tag opener */			if (c != '/')				PUTC('\n'); /* Only ignore newline before </ */			context->state = S_tag;			goto handle_S_tag;#ifdef ISO_2022_JP		    case S_esc:			if (c=='$')				context->state = S_dollar;			else if (c=='(')				context->state = S_paren;			else				context->state = S_text;			++count;			break;		    case S_dollar:			if (c=='@' || c=='B')				context->state = S_nonascii_text;			else				context->state = S_text;			++count;			break;		    case S_paren:			if (c=='B' || c=='J')				context->state = S_text;			else				context->state = S_text;			++count;			break;		    case S_nonascii_text:			if (c == '\033')				context->state = S_esc;			++count;			break;#endif /* ISO_2022_JP */			/* In literal mode, waits only for specific end tag!			** Only foir compatibility with old servers.			*/		    case S_literal:			HTChunk_putc(string, c);			if ( TOUPPER(c) !=			    ((HTChunk_size(string) == 1) ? '/'			     : context->current_tag->name[HTChunk_size(string)-2]))			    {				/* If complete match, end literal */				if ((c == '>') &&				    (!context->current_tag->name[HTChunk_size(string)-2]))				    {					end_element						(context,context->current_tag);					/*					  ...setting SGML_MIXED below is a					  bit of kludge, but a good guess that					  currently works, anything other than					  SGML_LITERAL would work... -- msa */					context->contents = SGML_MIXED;				    }				else				    {					/* If Mismatch: recover string. */					PUTC( '<');					PUTB(HTChunk_data(string), HTChunk_size(string));				    }				context->state = S_text;				text = b;				count = 0;			    }			break;			/*

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -