⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 html.c

📁 firtext搜索引擎源码
💻 C
📖 第 1 页 / 共 2 页
字号:
/*									 HTML.c**	SIMPLE HTML PARSER WITHOUT ANY PRESENTATION CODE****	(c) COPYRIGHT MIT 1995.**	Please first read the full copyright statement in the file COPYRIGH.**	@(#) $Id: HTML.c,v 1.81 2000/08/09 10:43:08 kahan Exp $****	This generates of a hypertext object.  It converts from the**	structured stream interface foo HTML events into the style-**	oriented interface of the HText interface.**** HISTORY:**	 8 Jul 94  FM	Insulate free() from _free structure element.*//* Library include files */#include "wwwsys.h"#include "WWWUtil.h"#include "WWWCore.h"#include "WWWHTML.h"#include "HTML.h"#include "HTextImp.h"#define PUTC(t,c)	(*(t)->target->isa->put_character)((t)->target, (c))#define PUTS(t,s)	(*(t)->target->isa->put_string)((t)->target, (s))#define PUTB(s,b,l)	(*(t)->target->isa->put_block)((t)->target, (b), (l))#define FLUSH_TARGET(t)	(*(t)->target->isa->flush)((t)->target)#define FREE_TARGET(t)	(*(t)->target->isa->_free)((t)->target)#define ABORT_TARGET(t)	(*(t)->target->isa->abort)((t)->target, e)#define MAX_NESTING 40struct _HTStream {    const HTStreamClass *	isa;    /* .... */};struct _HTStructured {    const HTStructuredClass * 	isa;    HTRequest *			request;    HTParentAnchor * 		node_anchor;    HTextImp * 			text;    HTStream *			target;    HTChunk * 			title;    BOOL			in_word;    SGML_dtd *			dtd;    char *			comment_start;	/* for literate programming */    char *			comment_end;    BOOL			started;    int				overflow;    int * 			sp;    int	 			stack[MAX_NESTING];};/*** 	Entity values -- for ISO Latin 1 local representation**	This MUST match exactly the table referred to in the DTD!*/static char * ISO_Latin1[HTML_ENTITIES] = {/* 00 */  	"\306",	/* capital AE diphthong (ligature) */   	"\301",	/* capital A, acute accent */   	"\302",	/* capital A, circumflex accent */   	"\300",	/* capital A, grave accent */   	"\305",	/* capital A, ring */   	"\303",	/* capital A, tilde */   	"\304",	/* capital A, dieresis or umlaut mark */   	"\307",	/* capital C, cedilla */   	"\320",	/* capital Eth, Icelandic */   	"\311",	/* capital E, acute accent */ /* 10 */  	"\312",	/* capital E, circumflex accent */   	"\310",	/* capital E, grave accent */   	"\313",	/* capital E, dieresis or umlaut mark */   	"\315",	/* capital I, acute accent */   	"\316",	/* capital I, circumflex accent */   	"\314",	/* capital I, grave accent */   	"\317",	/* capital I, dieresis or umlaut mark */   	"\321",	/* capital N, tilde */   	"\323",	/* capital O, acute accent */   	"\324",	/* capital O, circumflex accent */ /* 20 */  	"\322",	/* capital O, grave accent */   	"\330",	/* capital O, slash */   	"\325",	/* capital O, tilde */   	"\326",	/* capital O, dieresis or umlaut mark */   	"\336",	/* capital THORN, Icelandic */   	"\332",	/* capital U, acute accent */   	"\333",	/* capital U, circumflex accent */   	"\331",	/* capital U, grave accent */   	"\334",	/* capital U, dieresis or umlaut mark */   	"\335",	/* capital Y, acute accent */ /* 30 */  	"\341",	/* small a, acute accent */   	"\342",	/* small a, circumflex accent */   	"\264",	/* acute accent */  	"\346",	/* small ae diphthong (ligature) */   	"\340",	/* small a, grave accent */   	"\046",	/* ampersand */   	"\345",	/* small a, ring */   	"\343",	/* small a, tilde */   	"\344",	/* small a, dieresis or umlaut mark */         "\246",	/* broken vertical bar *//* 40 */  	"\347",	/* small c, cedilla */ 	"\270",	/* cedilla */	"\242", /* cent sign */        "\251",	/* copyright */        "\244",	/* general currency sign */  	"\260",	/* degree sign */  	"\367",	/* division sign */  	"\351",	/* small e, acute accent */   	"\352",	/* small e, circumflex accent */   	"\350",	/* small e, grave accent */ /* 50 */  	"\360",	/* small eth, Icelandic */   	"\353",	/* small e, dieresis or umlaut mark */   	"\275",	/* fraction one-half */  	"\274",	/* fraction one-fourth */  	"\276",	/* fraction three-fourth */  	"\076",	/* greater than */   	"\355",	/* small i, acute accent */   	"\356",	/* small i, circumflex accent */ 	"\241", /* inverted exclamation */  	"\354",	/* small i, grave accent */ /* 60 */  	"\277",	/* inverted question mark */  	"\357",	/* small i, dieresis or umlaut mark */   	"\253",	/* left angle quote */  	"\074",	/* less than */   	"\257",	/* macron accent */  	"\265",	/* micro sign (greek mu) */  	"\267",	/* middle dot */	"\040", /* non-breaking space */  	"\254",	/* not sign */  	"\361",	/* small n, tilde */ /* 70 */  	"\363",	/* small o, acute accent */   	"\364",	/* small o, circumflex accent */   	"\362",	/* small o, grave accent */   	"\252",	/* feminine ordinal */  	"\272",	/* masculine ordinal */  	"\370",	/* small o, slash */   	"\365",	/* small o, tilde */   	"\366",	/* small o, dieresis or umlaut mark */   	"\266",	/* paragraph sign */  	"\261",	/* plus or minus *//* 80 */	"\243", /* pound sign */        "\042", /* double quote sign - June 94 */	"\273",	/* right angle quote */  	"\256",	/* registered trademark */	"\247", /* section sign */  	"\255",	/* soft hyphen */  	"\271",	/* superscript 1 */  	"\262",	/* superscript 2 */  	"\263",	/* superscript 3 */  	"\337",	/* small sharp s, German (sz ligature) */ /* 90 */  	"\376",	/* small thorn, Icelandic */   	"\327",	/* multiply sign */  	"\372",	/* small u, acute accent */   	"\373",	/* small u, circumflex accent */   	"\371",	/* small u, grave accent */         "\250",	/* dieresis or umlaut mark */  	"\374",	/* small u, dieresis or umlaut mark */   	"\375",	/* small y, acute accent */ 	"\245", /* yen sign */  	"\377"	/* small y, dieresis or umlaut mark */ /* 100 */};PRIVATE char ** CurrentEntityValues = ISO_Latin1;PUBLIC BOOL HTMLUseCharacterSet (HTMLCharacterSet i){    if (i == HTML_ISO_LATIN1) {	CurrentEntityValues = ISO_Latin1;	return YES;    } else {	HTTRACE(SGML_TRACE, "HTML Parser. Doesn't support this character set\n");	return NO;    }}PRIVATE int HTML_write (HTStructured * me, const char * b, int l){    if (!me->started) {	HTextImp_build(me->text, HTEXT_BEGIN);	me->started = YES;    }    /* Look at what we got */    switch (me->sp[0]) {    case HTML_TITLE:	HTChunk_putb(me->title, b, l);	/* Fall through */	    default:	HTextImp_addText(me->text, b, l);    }    return HT_OK;}PRIVATE int HTML_put_character (HTStructured * me, char c){    return HTML_write(me, &c, sizeof(char));}PRIVATE int HTML_put_string (HTStructured * me, const char* s){    return HTML_write(me, s, (int) strlen(s));}PRIVATE void HTML_start_element (HTStructured *	me,				 int		element_number,				 const BOOL * 	present,				 const char **	value){    HTChildAnchor * address = NULL;    if (!me->started) {	HTextImp_build(me->text, HTEXT_BEGIN);	me->started = YES;    }    /* Look at what element was started */    switch (element_number) {    case HTML_A:	if (present[HTML_A_HREF] && value[HTML_A_HREF]) {	    address = HTAnchor_findChildAndLink(		me->node_anchor,					/* parent */		present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL,	/* Tag */		value[HTML_A_HREF],					/* Addresss */		present[HTML_A_REL] && value[HTML_A_REL] ? 		(HTLinkType) HTAtom_caseFor(value[HTML_A_REL]) : NULL);	    	    if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {		HTLink * link = HTAnchor_mainLink((HTAnchor *) address);		HTParentAnchor * dest = HTAnchor_parent(HTLink_destination(link));		if (!HTAnchor_title(dest)) HTAnchor_setTitle(dest, value[HTML_A_TITLE]);	    }	    HTextImp_foundLink(me->text, element_number, HTML_A_HREF,			       address, present, value);	    HTTRACE(SGML_TRACE, "HTML Parser. Anchor `%s\'\n" _ value[HTML_A_HREF]);	}	break;    case HTML_AREA:	if (present[HTML_AREA_HREF] && value[HTML_AREA_HREF]) {	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,						value[HTML_AREA_HREF], NULL);	    HTextImp_foundLink(me->text, element_number, HTML_AREA_HREF,			       address, present, value);	    HTTRACE(SGML_TRACE, "HTML Parser. Image map area `%s\'\n" _ value[HTML_AREA_HREF]);	}	break;    case HTML_BASE:	if (present[HTML_BASE_HREF] && value[HTML_BASE_HREF]) {	    HTAnchor_setBase(me->node_anchor, (char *) value[HTML_BASE_HREF]);	    HTTRACE(SGML_TRACE, "HTML Parser. New base `%s\'\n" _ value[HTML_BASE_HREF]);	}	break;    case HTML_BODY:	if (present[HTML_BODY_BACKGROUND] && value[HTML_BODY_BACKGROUND]) {	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,						value[HTML_BODY_BACKGROUND], NULL);	    HTextImp_foundLink(me->text, element_number, HTML_BODY_BACKGROUND,			       address, present, value);	    HTTRACE(SGML_TRACE, "HTML Parser. Background `%s\'\n" _ value[HTML_BODY_BACKGROUND]);	}	break;    case HTML_FORM:	if (present[HTML_FORM_ACTION] && value[HTML_FORM_ACTION]) {	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,						value[HTML_FORM_ACTION], NULL);	    HTextImp_foundLink(me->text, element_number, HTML_FORM_ACTION,			       address, present, value);	}	break;    case HTML_FRAME:	if (present[HTML_FRAME_SRC] && value[HTML_FRAME_SRC]) {	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,						value[HTML_FRAME_SRC], NULL);	    HTextImp_foundLink(me->text, element_number, HTML_FRAME_SRC,			       address, present, value);	    HTTRACE(SGML_TRACE, "HTML Parser. Frame `%s\'\n" _ value[HTML_FRAME_SRC]);	}	break;	    case HTML_INPUT:	if (present[HTML_INPUT_SRC] && value[HTML_INPUT_SRC]) {	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,						value[HTML_INPUT_SRC], NULL);	    HTextImp_foundLink(me->text, element_number, HTML_INPUT_SRC,			       address, present, value);	}	break;    case HTML_IMG:	if (present[HTML_IMG_SRC] && value[HTML_IMG_SRC]) {	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,						value[HTML_IMG_SRC], NULL);	    HTextImp_foundLink(me->text, element_number, HTML_IMG_SRC,			       address, present, value);	}	break;    case HTML_ISINDEX:   	HTAnchor_setIndex(me->node_anchor);	break;	    case HTML_LINK:	if (present[HTML_LINK_HREF] && value[HTML_LINK_HREF]) {	    HTParentAnchor * dest = NULL;	    address = HTAnchor_findChildAndLink(		me->node_anchor,					/* parent */		present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL,	/* Tag */		present[HTML_A_HREF] ? value[HTML_A_HREF] : NULL,	/* Addresss */		NULL);							/* Rels */	    dest = HTAnchor_parent(HTAnchor_followMainLink((HTAnchor *) address));	    /* If forward reference */	    if ((present[HTML_LINK_REL] && value[HTML_LINK_REL])) {		char * strval = NULL;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -