📄 sgml.c

📁 elinks下lynx是最重要的二个文本浏览器, 在linux下非常实用, lynx比elinks早的多, 目前好像停止开发, 这是lynx源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
	    } else#endif	    {		testtag = context->element_stack ?		     context->element_stack->tag : NULL;	    }	}	if (c == '&' && TOASCII(unsign_c) < 127	 &&  /* S/390 -- gil -- 0898 */	    (!testtag ||	     (testtag->contents == SGML_MIXED ||	      testtag->contents == SGML_ELEMENT ||	      testtag->contents == SGML_PCDATA ||#ifdef USE_PRETTYSRC	      testtag->contents == SGML_EMPTY ||#endif	      testtag->contents == SGML_RCDATA))) {	    /*	    **	Setting up for possible entity, without the leading '&'. - FM	    */	    string->size = 0;	    context->state = S_ero;	} else if (c == '<' && TOASCII(unsign_c) < 127) {  /* S/390 -- gil -- 0915 */	    /*	    **	Setting up for possible tag. - FM	    */	    string->size = 0;	    if (testtag && testtag->contents == SGML_PCDATA) {		context->state = S_pcdata;	    } else if (testtag && (testtag->contents == SGML_LITTERAL				|| testtag->contents == SGML_CDATA)) {		context->state = S_litteral;	    } else if (testtag && (testtag->contents == SGML_SCRIPT)) {		context->state = S_script;	    } else {		context->state = S_tag;	    }	    context->slashedtag = NULL;	} else if (context->slashedtag &&		   (c == '/' ||		    (c == '>' && context->state == S_tagname_slash)) &&		   TOASCII(unsign_c) < 127) {	    /*	    **	We got either the second slash of a pending "<NAME/blah blah/"	    **  shortref construct, or the '>' of a mere "<NAME/>".  In both	    **  cases generate a "</NAME>" end tag in the recover buffer for	    **  reparsing unless NAME is really an empty element. - kw	    */#ifdef USE_PRETTYSRC	    if (psrc_view) {		PSRCSTART(abracket);		PUTC(c);		PSRCSTOP(abracket);	    } else#endif	    if (context->slashedtag != context->unknown_tag &&		!ReallyEmptyTag(context->slashedtag)) {		if (context->recover == NULL) {		    StrAllocCopy(context->recover, "</");		    context->recover_index = 0;		} else {		    StrAllocCat(context->recover, "</");		}		StrAllocCat(context->recover, context->slashedtag->name);		StrAllocCat(context->recover, ">");	    }	    context->slashedtag = NULL;	} else if (context->element_stack &&		   (context->element_stack->tag->flags & Tgf_frecyc)) {	    /*	     *  The element stack says we are within the contents of an	     *  element that the next stage (HTML.c) may want to feed	     *  us back again (via the *include string).  So try to output	     *  text in UTF-8 if possible, using the same logic as for	     *  attribute values (which should be in line with what	     *  context->current_tag_charset indicates). - kw	     */	    if (context->T.decode_utf8 &&		*context->utf_buf) {		PUTS(context->utf_buf);		context->utf_buf_p = context->utf_buf;		*(context->utf_buf_p) = '\0';	    } else if (HTCJK == NOCJK &&		       (context->T.output_utf8 ||			context->T.trans_from_uni)) {		if (LYIsASCII(clong)) {		    PUTC(c);		} else if (clong == 0xfffd && saved_char_in &&		    HTPassEightBitRaw &&		    UCH(saved_char_in) >=		    LYlowest_eightbit[context->outUCLYhndl]) {		    PUTUTF8((0xf000 | UCH(saved_char_in)));		} else {		    PUTUTF8(clong);		}	    } else if (saved_char_in && context->T.use_raw_char_in) {		PUTC(saved_char_in);	    } else {		PUTC(c);	    }#define PASS8859SPECL context->T.pass_160_173_raw	/*	**  Convert 160 (nbsp) to Lynx special character if	**  neither HTPassHighCtrlRaw nor HTCJK is set. - FM	*/	} else if (unsign_c == CH_NBSP &&  /* S/390 -- gil -- 0932 */		   !context->no_lynx_specialcodes &&		   !(PASS8859SPECL || HTCJK != NOCJK)) {	    PUTC(HT_NON_BREAK_SPACE);	/*	**  Convert 173 (shy) to Lynx special character if	**  neither HTPassHighCtrlRaw nor HTCJK is set. - FM	*/	} else if (unsign_c == CH_SHY &&  /* S/390 -- gil -- 0949 */		   !context->no_lynx_specialcodes &&		   !(PASS8859SPECL || HTCJK != NOCJK)) {	    PUTC(LY_SOFT_HYPHEN);	/*	**  Handle the case in which we think we have a character	**  which doesn't need further processing (e.g., a koi8-r	**  input for a koi8-r output). - FM	*/	} else if (context->T.use_raw_char_in && saved_char_in) {	    /*	    **	Only if the original character is still in saved_char_in,	    **	otherwise we may be iterating from a goto top. - KW	    */	    PUTC(saved_char_in);	    saved_char_in = '\0';/****************************************************************** *   I. LATIN-1 OR UCS2	 TO  DISPLAY CHARSET ******************************************************************/	} else if ((chk = (BOOL) (context->T.trans_from_uni && TOASCII(unsign_c) >= 160)) &&  /* S/390 -- gil -- 0968 */		   (uck = UCTransUniChar(unsign_c,					 context->outUCLYhndl)) >= ' ' &&		   uck < 256) {	    CTRACE((tfp, "UCTransUniChar returned 0x%.2lX:'%c'.\n",			uck, FROMASCII((char)uck)));	    /*	    **	We got one octet from the conversions, so use it. - FM	    */	    PUTC(FROMASCII((char)uck));	} else if ((chk &&		   (uck == -4 ||		    (context->T.repl_translated_C0 &&		     uck > 0 && uck < 32))) &&		   /*		   **  Not found; look for replacement string. - KW		   */		   (uck = UCTransUniCharStr(replace_buf, 60, clong,					    context->outUCLYhndl,					    0) >= 0)) {	    /*	    **	Got a replacement string.	    **	No further tests for validity - assume that whoever	    **	defined replacement strings knew what she was doing. - KW	    */	    PUTS(replace_buf);	/*	**  If we're displaying UTF-8, try that now. - FM	*/	} else if (context->T.output_utf8 && PUTUTF8(clong)) {	    ; /* do nothing more */	/*	**  If it's any other (> 160) 8-bit character, and	**  we have not set HTPassEightBitRaw nor HTCJK, nor	**  have the "ISO Latin 1" character set selected,	**  back translate for our character set. - FM	*/#define IncludesLatin1Enc \		(context->outUCLYhndl == LATIN1 || \		 (context->outUCI && \		  (context->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1))))#define PASSHI8BIT (HTPassEightBitRaw || \		    (context->T.do_8bitraw && !context->T.trans_from_uni))	} else if (unsign_c > 160 && unsign_c < 256 &&		   !(PASSHI8BIT || HTCJK != NOCJK) &&		   !IncludesLatin1Enc) {#ifdef USE_PRETTYSRC	    int psrc_view_backup = 0;#endif	    string->size = 0;	    EntityName = HTMLGetEntityName((int)(unsign_c - 160));	    HTChunkPuts(string, EntityName);	    HTChunkTerminate(string);#ifdef USE_PRETTYSRC	    /* we need to disable it temporary*/	    if (psrc_view) {		psrc_view_backup =1; psrc_view =0;	    }#endif	    handle_entity(context, '\0');#ifdef USE_PRETTYSRC	    /* we need to disable it temporary*/	    if (psrc_view_backup)		psrc_view = TRUE;#endif	    string->size = 0;	    if (!FoundEntity)		PUTC(';');	/*	**  If we get to here and have an ASCII char,	**  pass the character. - KW	*/	} else if (TOASCII(unsign_c) < 127 && unsign_c > 0) {  /* S/390 -- gil -- 0987 */	    PUTC(c);	/*	**  If we get to here, and should have translated,	**  translation has failed so far. - KW	**	**  We should have sent UTF-8 output to the parser	**  already, but what the heck, try again. - FM	*/	} else if (context->T.output_utf8 && *context->utf_buf) {	    PUTS(context->utf_buf);	    context->utf_buf_p = context->utf_buf;	    *(context->utf_buf_p) = '\0';#ifdef NOTDEFINED	/*	**  Check for a strippable koi8-r 8-bit character. - FM	*/	} else if (context->T.strip_raw_char_in && saved_char_in &&		   (UCH(saved_char_in) >= 0xc0) &&		   (UCH(saved_char_in) < 255)) {	    /*	    **	KOI8 special: strip high bit, gives (somewhat) readable	    **	ASCII or KOI7 - it was constructed that way! - KW	    */	    PUTC(((char)(saved_char_in & 0x7f)));	    saved_char_in = '\0';#endif /* NOTDEFINED */	/*	**  If we don't actually want the character,	**  make it safe and output that now. - FM	*/	} else if (TOASCII(UCH(c)) <	 /* S/390 -- gil -- 0997 */			LYlowest_eightbit[context->outUCLYhndl] ||		   (context->T.trans_from_uni && !HTPassEightBitRaw)) {	/*	**  If we get to here, pass the character. - FM	*/	} else {	    PUTC(c);	}	break;    /*    **	Found '<' in SGML_PCDATA content; treat this mode nearly like    **  S_litteral, but recognize '<!' and '<?' to filter out comments    **  and processing instructions. - kw    */    case S_pcdata:	if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */	    if (c == '!') { /* <! */		/*		**	Terminate and set up for possible comment,		**	identifier, declaration, or marked section		**  as under S_tag. - kw		*/		context->state = S_exclamation;		context->lead_exclamation = TRUE;		context->doctype_bracket = FALSE;		context->first_bracket = FALSE;		HTChunkPutc(string, c);		break;	    } else if (c == '?') { /* <? - ignore as a PI until '>' - kw */		CTRACE((tfp,			"SGML: Found PI in PCDATA, junking it until '>'\n"));#ifdef USE_PRETTYSRC		if (psrc_view) {		    PSRCSTART(abracket);		    PUTS("<?");		    PSRCSTOP(abracket);		    context->seen_nonwhite_in_junk_tag = TRUE; /* show all */		}#endif		context->state = S_junk_pi;		break;	    }	}	goto case_S_litteral;    /*    **  Found '<' in SGML_SCRIPT content; treat this mode nearly like    **  S_litteral, but recognize '<!' to allow the content to be treated    **  as a comment by lynx.    */    case S_script:	if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */	    if (c == '!') { /* <! */		/*		**	Terminate and set up for possible comment,		**	identifier, declaration, or marked section		**  as under S_tag. - kw		*/		context->state = S_exclamation;		context->lead_exclamation = TRUE;		context->doctype_bracket = FALSE;		context->first_bracket = FALSE;		HTChunkPutc(string, c);		break;	    }	}	goto case_S_litteral;    /*    **	In litteral mode, waits only for specific end tag (for    **	compatibility with old servers, and for Lynx). - FM    */    case_S_litteral:    case S_litteral: /*PSRC:this case not understood completely by HV, not done*/	HTChunkPutc(string, c);#ifdef USE_PRETTYSRC	if (psrc_view) { /*there is nothing useful in the element_stack*/	    testtag = context->current_tag;	} else#endif	    testtag = context->element_stack ?		context->element_stack->tag : NULL;	if (testtag == NULL) {	    string->size--;	    context->state = S_text;	    goto top1;	}	/*	 * Normally when we get the closing ">",	 *	testtag contains something like "TITLE"	 *	string contains something like "/title>"	 * so we decrement by 2 to compare the final character of each.	 */	testlast = string->size - 2 - context->trailing_spaces - context->leading_spaces;	if (TOUPPER(c) != ((testlast < 0)			    ? '/'			    : testtag->name[testlast])) {	    int i;	    /*	    **	If complete match, end litteral.	    */	    if ((c == '>') &&		testlast >= 0 && !testtag->name[testlast]) {#ifdef USE_PRETTYSRC		if (psrc_view) {		    PSRCSTART(abracket);		    PUTS("</");		    PSRCSTOP(abracket);		    PSRCSTART(tag);		    strcpy(string->data,context->current_tag->name);		    if (tagname_transform != 1) {			if (tagname_transform == 0)			    LYLowerCase(string->data);			else			    LYUpperCase(string->data);		    }		    PUTS(string->data);		    PSRCSTOP(tag);		    PSRCSTART(abracket);		    PUTC('>');		    PSRCSTOP(abracket);		    context->current_tag = NULL;		} else#endif		    end_element(context, context->element_stack->tag);		string->size = 0;		context->current_attribute_number = INVALID;		context->state = S_text;		context->leading_spaces = 0;		context->trailing_spaces = 0;		break;	    }	    /*	     * Allow whitespace between the "<" or ">" and the keyword, for	     * error-recovery.	     */	    if (isspace(UCH(c))) {		if (testlast == -1) {		    context->leading_spaces += 1;		    CTRACE2(TRACE_SGML, (tfp, "leading spaces: %d\n", context->leading_spaces));		    break;		} else if (testlast > 0) {		    context->trailing_spaces += 1;		    CTRACE2(TRACE_SGML, (tfp, "trailing spaces: %d\n", context->trailing_spaces));		    break;		}	    }	    /*	     * Mismatch - recover.	     */	    context->leading_spaces = 0;	    context->trailing_spaces = 0;	    if (((testtag->contents != SGML_LITTERAL &&		  (testtag->flags & Tgf_strict)) ||		 (context->state == S_pcdata &&		  (testtag->flags & (Tgf_strict|Tgf_endO)))) &&		(testlast > -1 &&		 (c == '>' || testlast > 0 || IsNmStart(c)))) {		context->state = S_end;		string->size--;		for (i = 0; i < string->size; i++)  /* remove '/' */		    string->data[i] = string->data[i+1];		if ((string->size == 1) ? IsNmStart(c) : IsNmChar(c))		    break;		string->size--;		goto top1;	    }	    if (context->state == S_pcdata &&		(testtag->flags & (Tgf_strict|Tgf_endO)) &&		(testlast < 0 && IsNmStart(c))) {		context->state = S_tag;		break;	    }	    /*	    **	If Mismatch: recover string literally.	    */	    PUTC('<');	    for (i = 0; i < string->size-1; i++)  /* recover, except last c */	       PUTC(string->data[i]);	    string->size = 0;	    context->state = S_text;	    goto top1;		/* to recover last c */	}	break;    /*    **	Character reference (numeric entity) or named entity.    */    case S_ero:	if (c == '#') {	    /*	    **	Setting up for possible numeric entity.	    */	    context->state = S_cro;  /* &# is Char Ref Open */	    break;	}	context->state = S_entity;   /* Fall through! */    /*    **	Handle possible named entity.    */    case S_entity:	if (TOASCII(unsign_c) < 127 && (string->size ?	/* S/390 -- gil -- 1029 */		  isalnum(UCH(c)) : isalpha(UCH(c)))) {	    /* Should probably use IsNmStart/IsNmChar above (is that right?),	       but the world is not ready for that - there's &nbsp: (note	       colon!) and stuff around. */	    /*	    **	Accept valid ASCII character. - FM	    */	    HTChunkPutc(string, c);	} else if (string->size == 0) {	    /*	    **	It was an ampersand that's just text, so output	    **	the ampersand and recycle this character. - FM	    */#ifdef USE_PRETTYSRC	    if (p
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -