📄 sgml.c

📁 elinks下lynx是最重要的二个文本浏览器, 在linux下非常实用, lynx比elinks早的多, 目前好像停止开发, 这是lynx源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
	*/	return &HTTag_unrecognized;    }    return NULL;}/*________________________________________________________________________**			Public Methods*//*	Could check that we are back to bottom of stack! @@  *//*	Do check! - FM					     *//*							     */PRIVATE void SGML_free ARGS1(	HTStream *,	context){    int i;    HTElement * cur;    HTTag * t;    /*    **	Free the buffers. - FM    */    FREE(context->recover);    FREE(context->url);    FREE(context->csi);    FREE(context->include);    FREE(context->active_include);    /*    **	Wind down stack if any elements are open. - FM    */    while (context->element_stack) {	cur = context->element_stack;	t = cur->tag;	context->element_stack = cur->next;	/* Remove from stack */	pool_free(cur);#ifdef USE_PRETTYSRC	if (!psrc_view) /* Don't actually call on target if viewing psrc - kw */#endif	    (*context->actions->end_element)(context->target,		    NORMAL_TAGNUM(TAGNUM_OF_TAGP(t)),		    (char **)&context->include);	FREE(context->include);    }    /*    **	Finish off the target. - FM    */    (*context->actions->_free)(context->target);    /*    **	Free the strings and context structure. - FM    */    HTChunkFree(context->string);    for (i = 0; i < MAX_ATTRIBUTES; i++)	FREE_extra(context->value[i]);    FREE(context);#ifdef USE_PRETTYSRC    sgml_in_psrc_was_initialized = FALSE;#endif}PRIVATE void SGML_abort ARGS2(	HTStream *,	context,	HTError,	e){    int i;    HTElement * cur;    /*    **	Abort the target. - FM    */    (*context->actions->_abort)(context->target, e);    /*    **	Free the buffers. - FM    */    FREE(context->recover);    FREE(context->include);    FREE(context->active_include);    FREE(context->url);    FREE(context->csi);    /*    **	Free stack memory if any elements were left open. - KW    */    while (context->element_stack) {	cur = context->element_stack;	context->element_stack = cur->next;	/* Remove from stack */	pool_free(cur);    }    /*    **	Free the strings and context structure. - FM    */    HTChunkFree(context->string);    for (i = 0; i < MAX_ATTRIBUTES; i++)	FREE_extra(context->value[i]);    FREE(context);#ifdef USE_PRETTYSRC    sgml_in_psrc_was_initialized = FALSE;#endif}/*	Read and write user callback handle**	-----------------------------------****   The callbacks from the SGML parser have an SGML context parameter.**   These calls allow the caller to associate his own context with a**   particular SGML context.*/#ifdef CALLERDATAPUBLIC void* SGML_callerData ARGS1(	HTStream *,	context){    return context->callerData;}PUBLIC void SGML_setCallerData ARGS2(	HTStream *,	context,	void*,		data){    context->callerData = data;}#endif /* CALLERDATA */PRIVATE void SGML_character ARGS2(	HTStream *,	context,	char,		c_in){    CONST SGML_dtd *dtd =	context->dtd;    HTChunk	*string =	context->string;    CONST char * EntityName;    HTTag * testtag = NULL;    BOOLEAN chk;	/* Helps (?) walk through all the else ifs... */    UCode_t clong, uck = 0; /* Enough bits for UCS4 ... */    int testlast;#ifdef CJK_EX    unsigned char c;#else    char c;#endif    char saved_char_in = '\0';    /*    **	Now some fun with the preprocessor.    **	Use copies for c and unsign_c == clong, so that    **	we can revert back to the unchanged c_in. - KW    */#define unsign_c clong    c = c_in;    clong = UCH(c);	/* a.k.a. unsign_c */    if (context->T.decode_utf8) {	/*	**  Combine UTF-8 into Unicode.	**  Incomplete characters silently ignored.	**  From Linux kernel's console.c. - KW	*/	if (TOASCII(UCH(c)) > 127) { /* S/390 -- gil -- 0710 */	    /*	    **	We have an octet from a multibyte character. - FM	    */	    if (context->utf_count > 0 && (TOASCII(c) & 0xc0) == 0x80) {		context->utf_char = (context->utf_char << 6) | (TOASCII(c) & 0x3f);		context->utf_count--;		*(context->utf_buf_p) = c;		(context->utf_buf_p)++;		if (context->utf_count == 0) {		    /*		    **	We have all of the bytes, so terminate		    **	the buffer and set 'clong' to the UCode_t		    **	value. - FM		    */		    *(context->utf_buf_p) = '\0';		    clong = context->utf_char;		    if (clong < 256) {			c = ((char)(clong & 0xff));		    }		    goto top1;		} else {		    /*		    **	Wait for more. - KW		    */		    return;		}	    } else {		/*		**  Start handling a new multibyte character. - FM		*/		context->utf_buf_p = context->utf_buf;		*(context->utf_buf_p) = c;		(context->utf_buf_p)++;		if ((c & 0xe0) == 0xc0) {		    context->utf_count = 1;		    context->utf_char = (c & 0x1f);		} else if ((c & 0xf0) == 0xe0) {		    context->utf_count = 2;		    context->utf_char = (c & 0x0f);		} else if ((c & 0xf8) == 0xf0) {		    context->utf_count = 3;		    context->utf_char = (c & 0x07);		} else if ((c & 0xfc) == 0xf8) {		    context->utf_count = 4;		    context->utf_char = (c & 0x03);		} else if ((c & 0xfe) == 0xfc) {		    context->utf_count = 5;		    context->utf_char = (c & 0x01);		} else {		    /*		    **	Garbage. - KW		    */		    context->utf_count = 0;		    context->utf_buf_p = context->utf_buf;		    *(context->utf_buf_p) = '\0';		}		/*		**  Wait for more. - KW		*/		return;	    }	} else {	    /*	    **	Got an ASCII char. - KW	    */	    context->utf_count = 0;	    context->utf_buf_p = context->utf_buf;	    *(context->utf_buf_p) = '\0';		    /*	goto top;  */	}    } /* end of context->T.decode_utf8	S/390 -- gil -- 0726 */#ifdef NOTDEFINED    /*    **	If we have a koi8-r input and do not have    **	koi8-r as the output, save the raw input    **	in saved_char_in before we potentially    **	convert it to Unicode. - FM    */    if (context->T.strip_raw_char_in)	saved_char_in = c;#endif /* NOTDEFINED */    /*    **	If we want the raw input converted    **	to Unicode, try that now. - FM    */    if (context->T.trans_to_uni &&	((TOASCII(unsign_c) >= LYlowest_eightbit[context->inUCLYhndl]) ||  /* S/390 -- gil -- 0744 */	 (unsign_c < ' ' && unsign_c != 0 &&	  context->T.trans_C0_to_uni))) {	/*	**  Convert the octet to Unicode. - FM	*/	clong = UCTransToUni(c, context->inUCLYhndl);	if (clong > 0) {	    saved_char_in = c;	    if (clong < 256) {		c = FROMASCII((char)clong);	    }	}	goto top1;    } else if (unsign_c < ' ' && unsign_c != 0 &&  /* S/390 -- gil -- 0768 */	       context->T.trans_C0_to_uni) {	/*	**  This else if may be too ugly to keep. - KW	*/	if (context->T.trans_from_uni &&	    (((clong = UCTransToUni(c, context->inUCLYhndl)) >= ' ') ||	     (context->T.transp &&	      (clong = UCTransToUni(c, context->inUCLYhndl)) > 0))) {	    saved_char_in = c;	    if (clong < 256) {		c = FROMASCII((char)clong);	    }	    goto top1;	} else {	    uck = -1;	    if (context->T.transp) {		uck = UCTransCharStr(replace_buf, 60, c,				     context->inUCLYhndl,				     context->inUCLYhndl, NO);	    }	    if (!context->T.transp || uck < 0) {		uck = UCTransCharStr(replace_buf, 60, c,				     context->inUCLYhndl,				     context->outUCLYhndl, YES);	    }	    if (uck == 0) {		return;	    } else if (uck < 0) {		goto top0a;	    }	    c = replace_buf[0];	    if (c && replace_buf[1]) {		if (context->state == S_text) {		    PUTS(replace_buf);		    return;		}		StrAllocCat(context->recover, replace_buf + 1);	    }	    goto top0a;	} /*  Next line end of ugly stuff for C0. - KW */    } else {  /* end of context->T.trans_to_uni	 S/390 -- gil -- 0791 */	goto top0a;    }    /*    **	At this point we have either unsign_c a.k.a. clong in    **	Unicode (and c in latin1 if clong is in the latin1 range),    **	or unsign_c and c will have to be passed raw. - KW    *//***  We jump up to here from below if we have**  stuff in the recover, insert, or csi buffers**  to process.	 We zero saved_char_in, in effect**  as a flag that the octet in not that of the**  actual call to this function.  This may be OK**  for now, for the stuff this function adds to**  its recover buffer, but it might not be for**  stuff other functions added to the insert or**  csi buffer, so bear that in mind. - FM**  Stuff from the recover buffer is now handled**  as UTF-8 if we can expect that's what it is,**  and in that case we don't come back up here. - kw*/top:    saved_char_in = '\0';/***  We jump to here from above when we don't have**  UTF-8 input, haven't converted to Unicode, and**  want clong set to the input octet (unsigned)**  without zeroing its saved_char_in copy (which**  is signed). - FM*/top0a:    *(context->utf_buf) = '\0';    clong = UCH(c);/***  We jump to here from above if we have converted**  the input, or a multibyte sequence across calls,**  to a Unicode value and loaded it into clong (to**  which unsign_c has been defined), and from below**  when we are recycling a character (e.g., because**  it terminated an entity but is not the standard**  semi-colon).  The character will already have**  been put through the Unicode conversions. - FM*/top1:    /*    **	Ignore low ISO 646 7-bit control characters    **	if HTCJK is not set. - FM    */    /*    ** Works for both ASCII and EBCDIC. -- gil    */	/* S/390 -- gil -- 0811 */    if (TOASCII(unsign_c) < 32 &&	c != '\t' && c != '\n' && c != '\r' &&	HTCJK == NOCJK)	goto after_switch;    /*    **	Ignore 127 if we don't have HTPassHighCtrlRaw    **	or HTCJK set. - FM    */#define PASSHICTRL (context->T.transp || \		    unsign_c >= LYlowest_eightbit[context->inUCLYhndl])    if (TOASCII(c) == 127 &&  /* S/390 -- gil -- 0830 */	!(PASSHICTRL || HTCJK != NOCJK))	goto after_switch;    /*    **	Ignore 8-bit control characters 128 - 159 if    **	neither HTPassHighCtrlRaw nor HTCJK is set. - FM    */    if (TOASCII(unsign_c) > 127 && TOASCII(unsign_c) < 160 &&  /* S/390 -- gil -- 0847 */	!(PASSHICTRL || HTCJK != NOCJK))	goto after_switch;    /* Almost all CJK characters are double byte but only Japanese     * JIS X0201 Kana is single byte. To prevent to fail SGML parsing     * we have to care them here. -- TH     */    if ((HTCJK==JAPANESE) && (context->state==S_in_kanji) &&	!IS_JAPANESE_2BYTE(context->kanji_buf, UCH(c))) {#ifdef CONV_JISX0201KANA_JISX0208KANA	if (IS_SJIS_X0201KANA(context->kanji_buf)) {	    unsigned char sjis_hi, sjis_lo;	    JISx0201TO0208_SJIS(context->kanji_buf, &sjis_hi, &sjis_lo);	    PUTC(sjis_hi);	    PUTC(sjis_lo);	}	else#endif	    PUTC(context->kanji_buf);	context->state = S_text;    }    /*    **	Handle character based on context->state.    */    CTRACE2(TRACE_SGML, (tfp, "SGML before %s|%.*s|%c|\n",	    state_name(context->state),	    string->size,	    NonNull(string->data),	    UCH(c)));    switch(context->state) {    case S_in_kanji:	/*	**  Note that if we don't have a CJK input, then this	**  is not the second byte of a CJK di-byte, and we're	**  trashing the input.	 That's why 8-bit characters	**  followed by, for example, '<' can cause the tag to	**  be treated as text, not markup.  We could try to deal	**  with it by holding each first byte and then checking	**  byte pairs, but that doesn't seem worth the overhead	**  (see below). - FM	*/	context->state = S_text;	PUTC(context->kanji_buf);	PUTC(c);	break;    case S_tagname_slash:	/*	 *  We had something link "<name/" so far, set state to S_text	 *  but keep context->slashedtag as as a flag; except if we get	 *  '>' directly after the "<name/", and really have a tag for	 *  that name in context->slashedtag, in which case keep state as	 *  is and let code below deal with it. - kw	 */	if (!(c == '>' && context->slashedtag && TOASCII(unsign_c) < 127)) {	    context->state = S_text;	} /* fall through in any case! */    case S_text:	if (HTCJK != NOCJK && (TOASCII(c) & 0200) != 0) {  /* S/390 -- gil -- 0864 */	    /*	    **	Setting up for Kanji multibyte handling (based on	    **	Takuya ASADA's (asada@three-a.co.jp) CJK Lynx).	    **	Note that if the input is not in fact CJK, the	    **	next byte also will be mishandled, as explained	    **	above.	Toggle raw mode off in such cases, or	    **	select the "7 bit approximations" display	    **	character set, which is largely equivalent	    **	to having raw mode off with CJK. - FM	    */	    context->state = S_in_kanji;	    context->kanji_buf = c;	    break;	} else if (HTCJK != NOCJK && TOASCII(c) == '\033') {  /* S/390 -- gil -- 0881 */	    /*	    **	Setting up for CJK escape sequence handling (based on	    **	Takuya ASADA's (asada@three-a.co.jp) CJK Lynx). - FM	    */	    context->state = S_esc;	    PUTC(c);	    break;	}	if (c == '&' || c == '<') {#ifdef USE_PRETTYSRC	    if (psrc_view) { /*there is nothing useful in the element_stack*/		testtag = context->current_tag;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -