⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 html.c

📁 www工具包
💻 C
📖 第 1 页 / 共 2 页
字号:
/*									 HTML.c**	SIMPLE HTML PARSER WITHOUT ANY PRESENTATION CODE****	(c) COPYRIGHT MIT 1995.**	Please first read the full copyright statement in the file COPYRIGH.**	@(#) $Id: HTML.c,v 1.78 1999/02/22 22:10:11 frystyk Exp $****	This generates of a hypertext object.  It converts from the**	structured stream interface foo HTML events into the style-**	oriented interface of the HText interface.**** HISTORY:**	 8 Jul 94  FM	Insulate free() from _free structure element.*//* Library include files */#include "wwwsys.h"#include "WWWUtil.h"#include "WWWCore.h"#include "WWWHTML.h"#include "HTML.h"#include "HTextImp.h"#define PUTC(t,c)	(*(t)->target->isa->put_character)((t)->target, (c))#define PUTS(t,s)	(*(t)->target->isa->put_string)((t)->target, (s))#define PUTB(s,b,l)	(*(t)->target->isa->put_block)((t)->target, (b), (l))#define FLUSH_TARGET(t)	(*(t)->target->isa->flush)((t)->target)#define FREE_TARGET(t)	(*(t)->target->isa->_free)((t)->target)#define ABORT_TARGET(t)	(*(t)->target->isa->abort)((t)->target, e)#define MAX_NESTING 40struct _HTStream {    const HTStreamClass *	isa;    /* .... */};struct _HTStructured {    const HTStructuredClass * 	isa;    HTRequest *			request;    HTParentAnchor * 		node_anchor;    HTextImp * 			text;    HTStream *			target;    HTChunk * 			title;    BOOL			in_word;    SGML_dtd *			dtd;    char *			comment_start;	/* for literate programming */    char *			comment_end;    BOOL			started;    int				overflow;    int * 			sp;    int	 			stack[MAX_NESTING];};/*** 	Entity values -- for ISO Latin 1 local representation**	This MUST match exactly the table referred to in the DTD!*/#define ENTITY_SIZE	67static char * ISO_Latin1[ENTITY_SIZE] = {  	"\306",	/* capital AE diphthong (ligature) */   	"\301",	/* capital A, acute accent */   	"\302",	/* capital A, circumflex accent */   	"\300",	/* capital A, grave accent */   	"\305",	/* capital A, ring */   	"\303",	/* capital A, tilde */   	"\304",	/* capital A, dieresis or umlaut mark */   	"\307",	/* capital C, cedilla */   	"\320",	/* capital Eth, Icelandic */   	"\311",	/* capital E, acute accent */   	"\312",	/* capital E, circumflex accent */   	"\310",	/* capital E, grave accent */   	"\313",	/* capital E, dieresis or umlaut mark */   	"\315",	/* capital I, acute accent */   	"\316",	/* capital I, circumflex accent */   	"\314",	/* capital I, grave accent */   	"\317",	/* capital I, dieresis or umlaut mark */   	"\321",	/* capital N, tilde */   	"\323",	/* capital O, acute accent */   	"\324",	/* capital O, circumflex accent */   	"\322",	/* capital O, grave accent */   	"\330",	/* capital O, slash */   	"\325",	/* capital O, tilde */   	"\326",	/* capital O, dieresis or umlaut mark */   	"\336",	/* capital THORN, Icelandic */   	"\332",	/* capital U, acute accent */   	"\333",	/* capital U, circumflex accent */   	"\331",	/* capital U, grave accent */   	"\334",	/* capital U, dieresis or umlaut mark */   	"\335",	/* capital Y, acute accent */   	"\341",	/* small a, acute accent */   	"\342",	/* small a, circumflex accent */   	"\346",	/* small ae diphthong (ligature) */   	"\340",	/* small a, grave accent */   	"\046",	/* ampersand */   	"\345",	/* small a, ring */   	"\343",	/* small a, tilde */   	"\344",	/* small a, dieresis or umlaut mark */   	"\347",	/* small c, cedilla */   	"\351",	/* small e, acute accent */   	"\352",	/* small e, circumflex accent */   	"\350",	/* small e, grave accent */   	"\360",	/* small eth, Icelandic */   	"\353",	/* small e, dieresis or umlaut mark */   	"\076",	/* greater than */   	"\355",	/* small i, acute accent */   	"\356",	/* small i, circumflex accent */   	"\354",	/* small i, grave accent */   	"\357",	/* small i, dieresis or umlaut mark */   	"\074",	/* less than */ 	"\040", /* non-breaking space */  	"\361",	/* small n, tilde */   	"\363",	/* small o, acute accent */   	"\364",	/* small o, circumflex accent */   	"\362",	/* small o, grave accent */   	"\370",	/* small o, slash */   	"\365",	/* small o, tilde */   	"\366",	/* small o, dieresis or umlaut mark */         "\042", /* double quote sign - June 94 */  	"\337",	/* small sharp s, German (sz ligature) */   	"\376",	/* small thorn, Icelandic */   	"\372",	/* small u, acute accent */   	"\373",	/* small u, circumflex accent */   	"\371",	/* small u, grave accent */   	"\374",	/* small u, dieresis or umlaut mark */   	"\375",	/* small y, acute accent */   	"\377",	/* small y, dieresis or umlaut mark */ };PRIVATE char ** CurrentEntityValues = ISO_Latin1;PUBLIC BOOL HTMLUseCharacterSet (HTMLCharacterSet i){    if (i == HTML_ISO_LATIN1) {	CurrentEntityValues = ISO_Latin1;	return YES;    } else {	HTTRACE(SGML_TRACE, "HTML Parser. Doesn't support this character set\n");	return NO;    }}PRIVATE int HTML_write (HTStructured * me, const char * b, int l){    if (!me->started) {	HTextImp_build(me->text, HTEXT_BEGIN);	me->started = YES;    }    /* Look at what we got */    switch (me->sp[0]) {    case HTML_COMMENT:	break;					/* Do Nothing */		    case HTML_TITLE:	HTChunk_putb(me->title, b, l);	/* Fall through */	    default:	HTextImp_addText(me->text, b, l);    }    return HT_OK;}PRIVATE int HTML_put_character (HTStructured * me, char c){    return HTML_write(me, &c, sizeof(char));}PRIVATE int HTML_put_string (HTStructured * me, const char* s){    return HTML_write(me, s, (int) strlen(s));}PRIVATE void HTML_start_element (HTStructured *	me,				 int		element_number,				 const BOOL * 	present,				 const char **	value){    HTChildAnchor * address = NULL;    if (!me->started) {	HTextImp_build(me->text, HTEXT_BEGIN);	me->started = YES;    }    /* Look at what element was started */    switch (element_number) {    case HTML_A:	if (present[HTML_A_HREF] && value[HTML_A_HREF]) {	    address = HTAnchor_findChildAndLink(		me->node_anchor,					/* parent */		present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL,	/* Tag */		value[HTML_A_HREF],					/* Addresss */		present[HTML_A_REL] && value[HTML_A_REL] ? 		(HTLinkType) HTAtom_caseFor(value[HTML_A_REL]) : NULL);	    	    if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {		HTLink * link = HTAnchor_mainLink((HTAnchor *) address);		HTParentAnchor * dest = HTAnchor_parent(HTLink_destination(link));		if (!HTAnchor_title(dest)) HTAnchor_setTitle(dest, value[HTML_A_TITLE]);	    }	    HTextImp_foundLink(me->text, element_number, HTML_A_HREF,			       address, present, value);	    HTTRACE(SGML_TRACE, "HTML Parser. Anchor `%s\'\n" _ value[HTML_A_HREF]);	}	break;    case HTML_AREA:	if (present[HTML_AREA_HREF] && value[HTML_AREA_HREF]) {	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,						value[HTML_AREA_HREF], NULL);	    HTextImp_foundLink(me->text, element_number, HTML_AREA_HREF,			       address, present, value);	    HTTRACE(SGML_TRACE, "HTML Parser. Image map area `%s\'\n" _ value[HTML_AREA_HREF]);	}	break;    case HTML_BASE:	if (present[HTML_BASE_HREF] && value[HTML_BASE_HREF]) {	    HTAnchor_setBase(me->node_anchor, (char *) value[HTML_BASE_HREF]);	    HTTRACE(SGML_TRACE, "HTML Parser. New base `%s\'\n" _ value[HTML_BASE_HREF]);	}	break;    case HTML_BODY:	if (present[HTML_BODY_BACKGROUND] && value[HTML_BODY_BACKGROUND]) {	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,						value[HTML_BODY_BACKGROUND], NULL);	    HTextImp_foundLink(me->text, element_number, HTML_BODY_BACKGROUND,			       address, present, value);	    HTTRACE(SGML_TRACE, "HTML Parser. Background `%s\'\n" _ value[HTML_BODY_BACKGROUND]);	}	break;    case HTML_FORM:	if (present[HTML_FORM_ACTION] && value[HTML_FORM_ACTION]) {	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,						value[HTML_FORM_ACTION], NULL);	    HTextImp_foundLink(me->text, element_number, HTML_FORM_ACTION,			       address, present, value);	}	break;    case HTML_FRAME:	if (present[HTML_FRAME_SRC] && value[HTML_FRAME_SRC]) {	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,						value[HTML_FRAME_SRC], NULL);	    HTextImp_foundLink(me->text, element_number, HTML_FRAME_SRC,			       address, present, value);	    HTTRACE(SGML_TRACE, "HTML Parser. Frame `%s\'\n" _ value[HTML_FRAME_SRC]);	}	break;	    case HTML_INPUT:	if (present[HTML_INPUT_SRC] && value[HTML_INPUT_SRC]) {	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,						value[HTML_INPUT_SRC], NULL);	    HTextImp_foundLink(me->text, element_number, HTML_INPUT_SRC,			       address, present, value);	}	break;    case HTML_IMG:	if (present[HTML_IMG_SRC] && value[HTML_IMG_SRC]) {	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,						value[HTML_IMG_SRC], NULL);	    HTextImp_foundLink(me->text, element_number, HTML_IMG_SRC,			       address, present, value);	}	break;    case HTML_ISINDEX:   	HTAnchor_setIndex(me->node_anchor);	break;	    case HTML_LINK:	if (present[HTML_LINK_HREF] && value[HTML_LINK_HREF]) {	    HTParentAnchor * dest = NULL;	    address = HTAnchor_findChildAndLink(		me->node_anchor,					/* parent */		present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL,	/* Tag */		present[HTML_A_HREF] ? value[HTML_A_HREF] : NULL,	/* Addresss */		NULL);							/* Rels */	    dest = HTAnchor_parent(HTAnchor_followMainLink((HTAnchor *) address));	    /* If forward reference */	    if ((present[HTML_LINK_REL] && value[HTML_LINK_REL])) {		char * strval = NULL;		char * ptr = NULL;		char * relation = NULL;		StrAllocCopy(strval, value[HTML_LINK_REL]);		ptr = strval;		while ((relation = HTNextLWSToken(&ptr)) != NULL) {		    HTLink_add((HTAnchor *) me->node_anchor, (HTAnchor *) dest,			       (HTLinkType) HTAtom_caseFor(relation),			       METHOD_INVALID);		}		HT_FREE(strval);	    }	    /* If reverse reference */	    if ((present[HTML_LINK_REV] && value[HTML_LINK_REV])) {		char * strval = NULL;		char * ptr = NULL;		char * relation = NULL;		StrAllocCopy(strval, value[HTML_LINK_REV]);		ptr = strval;		while ((relation = HTNextLWSToken(&ptr)) != NULL) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -