⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sgml.c

📁 elinks下lynx是最重要的二个文本浏览器, 在linux下非常实用, lynx比elinks早的多, 目前好像停止开发, 这是lynx源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
/*			General SGML Parser code		SGML.c**			========================****	This module implements an HTStream object.  To parse an**	SGML file, create this object which is a parser.  The object**	is (currently) created by being passed a DTD structure,**	and a target HTStructured object at which to throw the parsed stuff.****	 6 Feb 93  Binary searches used. Interface modified.*/#include <HTUtils.h>/* Remove the following to disable the experimental HTML DTD parsing.   Currently only used in this source file. - kw */#ifndef NO_EXTENDED_HTMLDTD#define EXTENDED_HTMLDTD#endif#include <SGML.h>#include <HTMLDTD.h>#include <HTCJK.h>#include <UCMap.h>#include <UCDefs.h>#include <UCAux.h>#include <HTChunk.h>#include <LYCharSets.h>#include <LYCharVals.h>	 /* S/390 -- gil -- 0635 */#include <LYGlobalDefs.h>#include <LYStrings.h>#include <LYLeaks.h>#ifdef USE_COLOR_STYLE# include <LYStyle.h>#endif#ifdef USE_PRETTYSRC# include <LYPrettySrc.h>#endif#define INVALID (-1)#ifdef USE_PRETTYSRCchar* entity_string; /* this is used for printing entity name.    Unconditionally added since redundant assigments don't hurt much*/PRIVATE void fake_put_character ARGS2(		    void*, p GCC_UNUSED,		    char,  c GCC_UNUSED){}#define START TRUE#define STOP FALSE#define PUTS_TR(x) psrc_convert_string = TRUE; PUTS(x)#endif/* my_casecomp() - optimized by the first character, NOT_ASCII ok */#define my_casecomp(a,b)  ((TOUPPER(*a) == TOUPPER(*b)) ? \			AS_casecomp(a,b) : \			(TOASCII(TOUPPER(*a)) - TOASCII(TOUPPER(*b))))#if ANSI_PREPRO /* will use partially inlined version */#define orig_HTChunkPutUtf8Char HTChunkPutUtf8Char#undef HTChunkPutUtf8Char/* ...used for comments and attributes value like href... */#define HTChunkPutUtf8Char(ch,x) \    { \    if ((TOASCII(x) < 128)  && (ch->size < ch->allocated)) \	ch->data[ch->size++] = (char)x; \    else \	orig_HTChunkPutUtf8Char(ch,x); \    }#if 0#define orig_HTChunkPutc HTChunkPutc#undef HTChunkPutc#define HTChunkPutc(ch,x) \    { \    if (ch->size < ch->allocated) \	ch->data[ch->size++] = x; \    else \	orig_HTChunkPutc(ch,x); \    }#undef HTChunkTerminate#define HTChunkTerminate(ch) \    HTChunkPutc(ch, (char)0)#endif /* */#endif	/* ANSI_PREPRO */#define PUTS(str) ((*context->actions->put_string)(context->target, str))#define PUTC(ch)  ((*context->actions->put_character)(context->target, ch))#define PUTUTF8(code) (UCPutUtf8_charstring((HTStream *)context->target, \		      (putc_func_t*)(context->actions->put_character), code))#define OPT 1/*the following macros are used for pretty source view. */#define IS_C(attr) (attr.type == HTMLA_CLASS)PUBLIC HTCJKlang HTCJK = NOCJK;		/* CJK enum value.		*/PUBLIC BOOL HTPassEightBitRaw = FALSE;	/* Pass 161-172,174-255 raw.	*/PUBLIC BOOL HTPassEightBitNum = FALSE;	/* Pass ^ numeric entities raw. */PUBLIC BOOL HTPassHighCtrlRaw = FALSE;	/* Pass 127-160,173,&#127; raw. */PUBLIC BOOL HTPassHighCtrlNum = FALSE;	/* Pass &#128;-&#159; raw.	*//*	The State (context) of the parser****	This is passed with each call to make the parser reentrant***/#define MAX_ATTRIBUTES 36	/* Max number of attributes per element *//*		Element Stack**		-------------**	This allows us to return down the stack reselecting styles.**	As we return, attribute values will be garbage in general.*/typedef struct _HTElement HTElement;struct _HTElement {	HTElement *	next;	/* Previously nested element or 0 */	HTTag*		tag;	/* The tag at this level  */};typedef enum {    S_text = 0    ,S_attr    ,S_attr_gap    ,S_comment    ,S_cro    ,S_doctype    ,S_dollar    ,S_dollar_dq    ,S_dollar_paren    ,S_dollar_paren_dq    ,S_dollar_paren_sq    ,S_dollar_sq    ,S_dquoted    ,S_end    ,S_entity    ,S_equals    ,S_ero    ,S_esc    ,S_esc_dq    ,S_esc_sq    ,S_exclamation    ,S_in_kanji    ,S_incro    ,S_junk_pi    ,S_junk_tag    ,S_litteral    ,S_marked    ,S_nonascii_text    ,S_nonascii_text_dq    ,S_nonascii_text_sq    ,S_paren    ,S_paren_dq    ,S_paren_sq    ,S_pcdata    ,S_script    ,S_sgmlatt    ,S_sgmlele    ,S_sgmlent    ,S_squoted    ,S_tag    ,S_tag_gap    ,S_tagname_slash    ,S_value} sgml_state;/*	Internal Context Data Structure**	-------------------------------*/struct _HTStream {    CONST HTStreamClass *	isa;		/* inherited from HTStream */    CONST SGML_dtd		*dtd;    CONST HTStructuredClass	*actions;	/* target class	 */    HTStructured		*target;	/* target object */    HTTag			*current_tag;    HTTag			*slashedtag;    CONST HTTag			*unknown_tag;    BOOL			inSELECT;    BOOL			no_lynx_specialcodes;    int				current_attribute_number;    HTChunk			*string;    int				leading_spaces;    int				trailing_spaces;    HTElement			*element_stack;    sgml_state			state;    unsigned char kanji_buf;#ifdef CALLERDATA    void *			callerData;#endif /* CALLERDATA */    BOOL present[MAX_ATTRIBUTES];	/* Flags: attribute is present? */    char * value[MAX_ATTRIBUTES];	/* NULL, or strings alloc'd with StrAllocCopy_extra() */    BOOL			lead_exclamation;    BOOL			first_dash;    BOOL			end_comment;    BOOL			doctype_bracket;    BOOL			first_bracket;    BOOL			second_bracket;    BOOL			isHex;    HTParentAnchor *		node_anchor;    LYUCcharset *		inUCI;		/* pointer to anchor UCInfo */    int				inUCLYhndl;	/* charset we are fed	    */    LYUCcharset *		outUCI;		/* anchor UCInfo for target */    int				outUCLYhndl;	/* charset for target	    */    char			utf_count;    UCode_t			utf_char;    char			utf_buf[8];    char *			utf_buf_p;    UCTransParams		T;    int				current_tag_charset; /* charset to pass attributes */    char *			recover;    int				recover_index;    char *			include;    char *			active_include;    int				include_index;    char *			url;    char *			csi;    int				csi_index;#ifdef USE_PRETTYSRC    BOOL			cur_attr_is_href;    BOOL			cur_attr_is_name;    BOOL			seen_nonwhite_in_junk_tag;#endif};#ifndef NO_LYNX_TRACEPRIVATE char *state_name ARGS1(sgml_state, n){    char *result = "?";    switch (n) {    case S_attr:                result = "S_attr";              break;    case S_attr_gap:            result = "S_attr_gap";          break;    case S_comment:             result = "S_comment";           break;    case S_cro:                 result = "S_cro";               break;    case S_doctype:             result = "S_doctype";           break;    case S_dollar:              result = "S_dollar";            break;    case S_dollar_dq:           result = "S_dollar_dq";         break;    case S_dollar_paren:        result = "S_dollar_paren";      break;    case S_dollar_paren_dq:     result = "S_dollar_paren_dq";   break;    case S_dollar_paren_sq:     result = "S_dollar_paren_sq";   break;    case S_dollar_sq:           result = "S_dollar_sq";         break;    case S_dquoted:             result = "S_dquoted";           break;    case S_end:                 result = "S_end";               break;    case S_entity:              result = "S_entity";            break;    case S_equals:              result = "S_equals";            break;    case S_ero:                 result = "S_ero";               break;    case S_esc:                 result = "S_esc";               break;    case S_esc_dq:              result = "S_esc_dq";            break;    case S_esc_sq:              result = "S_esc_sq";            break;    case S_exclamation:         result = "S_exclamation";       break;    case S_in_kanji:            result = "S_in_kanji";          break;    case S_incro:               result = "S_incro";             break;    case S_junk_pi:             result = "S_junk_pi";           break;    case S_junk_tag:            result = "S_junk_tag";          break;    case S_litteral:            result = "S_litteral";          break;    case S_marked:              result = "S_marked";            break;    case S_nonascii_text:       result = "S_nonascii_text";     break;    case S_nonascii_text_dq:    result = "S_nonascii_text_dq";  break;    case S_nonascii_text_sq:    result = "S_nonascii_text_sq";  break;    case S_paren:               result = "S_paren";             break;    case S_paren_dq:            result = "S_paren_dq";          break;    case S_paren_sq:            result = "S_paren_sq";          break;    case S_pcdata:              result = "S_pcdata";            break;    case S_script:              result = "S_script";            break;    case S_sgmlatt:             result = "S_sgmlatt";           break;    case S_sgmlele:             result = "S_sgmlele";           break;    case S_sgmlent:             result = "S_sgmlent";           break;    case S_squoted:             result = "S_squoted";           break;    case S_tag:                 result = "S_tag";               break;    case S_tag_gap:             result = "S_tag_gap";           break;    case S_tagname_slash:       result = "S_tagname_slash";     break;    case S_text:                result = "S_text";              break;    case S_value:               result = "S_value";             break;    }    return result;}#endif/* storage for Element Stack */#define DEPTH 10static HTElement pool[DEPTH];static int depth = 0;PRIVATE HTElement* pool_alloc NOARGS{    depth++;    if (depth > DEPTH)	return (HTElement*) malloc(sizeof(HTElement));    return (pool + depth - 1);}PRIVATE void pool_free ARGS1(HTElement*, e){    if (depth > DEPTH)	FREE(e);    depth--;    return;}#ifdef USE_PRETTYSRCPRIVATE void HTMLSRC_apply_markup ARGS3(	    HTStream *,	      context,	    HTlexeme,	      lexeme,	    BOOL,	      start){    HT_tagspec* ts = *( ( start ? lexeme_start : lexeme_end ) + lexeme);    while (ts) {#ifdef USE_COLOR_STYLE	if (ts->start) {	    current_tag_style = ts->style;	    force_current_tag_style = TRUE;	    forced_classname = ts->class_name;	    force_classname = TRUE;	}#endif	CTRACE((tfp,ts->start ? "SRCSTART %d\n" : "SRCSTOP %d\n",(int)lexeme));	if (ts->start)	    (*context->actions->start_element)(		context->target,		ts->element,		ts->present,		(CONST char **)ts->value,		context->current_tag_charset,		(char **)&context->include);	else	    (*context->actions->end_element)(		context->target,		ts->element,		(char **)&context->include);	ts = ts->next;    }}#if ANSI_PREPRO#  define PSRCSTART(x)	HTMLSRC_apply_markup(context,HTL_##x,START)#  define PSRCSTOP(x)   HTMLSRC_apply_markup(context,HTL_##x,STOP)#else#  define PSRCSTART(x)	HTMLSRC_apply_markup(context,HTL_/**/x,START)#  define PSRCSTOP(x)   HTMLSRC_apply_markup(context,HTL_/**/x,STOP)#endif#define attr_is_href context->cur_attr_is_href#define attr_is_name context->cur_attr_is_name#endifPRIVATE void set_chartrans_handling ARGS3(	HTStream *,		context,	HTParentAnchor *,	anchor,	int,			chndl){    if (chndl < 0) {	/*	**  Nothing was set for the parser in earlier stages,	**  so the HTML parser's UCLYhndl should still be its	**  default. - FM	*/	chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_STRUCTURED);	if (chndl < 0)	    /*	    **	That wasn't set either, so seek the HText default. - FM	    */	    chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT);	if (chndl < 0)	    /*	    **	That wasn't set either, so assume the current display	    **	character set. - FM	    */	    chndl = current_char_set;	/*	**  Try to set the HText and HTML stages' chartrans info	**  with the default lock level (will not be changed if	**  it was set previously with a higher lock level). - FM	*/	HTAnchor_setUCInfoStage(anchor, chndl,				UCT_STAGE_HTEXT,				UCT_SETBY_DEFAULT);	HTAnchor_setUCInfoStage(anchor, chndl,				UCT_STAGE_STRUCTURED,				UCT_SETBY_DEFAULT);	/*	**  Get the chartrans info for output to the HTML parser. - FM	*/	context->outUCI = HTAnchor_getUCInfoStage(anchor,						  UCT_STAGE_STRUCTURED);	context->outUCLYhndl = HTAnchor_getUCLYhndl(context->node_anchor,						    UCT_STAGE_STRUCTURED);    }    /*    **	Set the in->out transformation parameters. - FM    */    UCSetTransParams(&context->T,		     context->inUCLYhndl, context->inUCI,		     context->outUCLYhndl, context->outUCI);    /*    **	This is intended for passing the SGML parser's input    **	charset as an argument in each call to the HTML    **	parser's start tag function, but it would be better    **	to call a Lynx_HTML_parser function to set an element    **	in its HTStructured object, itself, if this were    **	needed. - FM    */    if (HTCJK != NOCJK) {	context->current_tag_charset = -1;    } else if (context->T.transp) {	context->current_tag_charset = context->inUCLYhndl;    } else if (context->T.decode_utf8) {	context->current_tag_charset = context->inUCLYhndl;    } else if (context->T.do_8bitraw ||	       context->T.use_raw_char_in) {	context->current_tag_charset = context->inUCLYhndl;    } else if (context->T.output_utf8 ||	       context->T.trans_from_uni) {	context->current_tag_charset = UCGetLYhndl_byMIME("utf-8");    } else {	context->current_tag_charset = LATIN1;    }}PRIVATE void change_chartrans_handling ARGS1(	HTStream *,		context){    int new_LYhndl = HTAnchor_getUCLYhndl(context->node_anchor,					  UCT_STAGE_PARSER);    if (new_LYhndl != context->inUCLYhndl &&	new_LYhndl >= 0) {	/*	 *  Something changed. but ignore if a META wants an unknown charset.	 */	LYUCcharset * new_UCI = HTAnchor_getUCInfoStage(context->node_anchor,							UCT_STAGE_PARSER);	if (new_UCI) {	    LYUCcharset * next_UCI = HTAnchor_getUCInfoStage(				    context->node_anchor, UCT_STAGE_STRUCTURED							    );	    int next_LYhndl = HTAnchor_getUCLYhndl(				    context->node_anchor, UCT_STAGE_STRUCTURED						  );	    context->inUCI = new_UCI;	    context->inUCLYhndl = new_LYhndl;	    context->outUCI = next_UCI;	    context->outUCLYhndl = next_LYhndl;	    set_chartrans_handling(context,				   context->node_anchor, next_LYhndl);	}    }}#ifdef USE_COLOR_STYLE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -