📄 sgml.c

📁 用于linux和其他unix下面的
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
	if (c == '&' && TOASCII(unsign_c) < 127	 &&  /* S/390 -- gil -- 0898 */	    (!testtag ||	     (testtag->contents == SGML_MIXED ||	      testtag->contents == SGML_ELEMENT ||	      testtag->contents == SGML_PCDATA ||#ifdef USE_PRETTYSRC	      testtag->contents == SGML_EMPTY ||#endif	      testtag->contents == SGML_RCDATA))) {	    /*	    **	Setting up for possible entity, without the leading '&'. - FM	    */	    string->size = 0;	    context->state = S_ero;	} else if (c == '<' && TOASCII(unsign_c) < 127) {  /* S/390 -- gil -- 0915 */	    /*	    **	Setting up for possible tag. - FM	    */	    string->size = 0;	    if (testtag && testtag->contents == SGML_PCDATA) {		context->state = S_pcdata;	    } else if (testtag && (testtag->contents == SGML_LITTERAL	    			|| testtag->contents == SGML_CDATA)) {		context->state = S_litteral;	    } else if (testtag && (testtag->contents == SGML_SCRIPT)) {		context->state = S_script;	    } else {		context->state = S_tag;	    }	    context->slashedtag = NULL;	} else if (context->slashedtag &&		   (c == '/' ||		    (c == '>' && context->state == S_tagname_slash)) &&		   TOASCII(unsign_c) < 127) {	    /*	    **	We got either the second slash of a pending "<NAME/blah blah/"	    **  shortref construct, or the '>' of a mere "<NAME/>".  In both	    **  cases generate a "</NAME>" end tag in the recover buffer for	    **  reparsing unless NAME is really an empty element. - kw	    */#ifdef USE_PRETTYSRC	    if (psrc_view) {		PSRCSTART(abracket);		PUTC(c);		PSRCSTOP(abracket);	    } else#endif	    if (context->slashedtag != context->unknown_tag &&		!ReallyEmptyTag(context->slashedtag)) {		if (context->recover == NULL) {		    StrAllocCopy(context->recover, "</");		    context->recover_index = 0;		} else {		    StrAllocCat(context->recover, "</");		}		StrAllocCat(context->recover, context->slashedtag->name);		StrAllocCat(context->recover, ">");	    }	    context->slashedtag = NULL;	} else if (context->element_stack &&		   (context->element_stack->tag->flags & Tgf_frecyc)) {	    /*	     *  The element stack says we are within the contents of an	     *  element that the next stage (HTML.c) may want to feed	     *  us back again (via the *include string).  So try to output	     *  text in UTF-8 if possible, using the same logic as for	     *  attribute values (which should be in line with what	     *  context->current_tag_charset indicates). - kw	     */	    if (context->T.decode_utf8 &&		*context->utf_buf) {		PUTS(context->utf_buf);		context->utf_buf_p = context->utf_buf;		*(context->utf_buf_p) = '\0';	    } else if (HTCJK == NOCJK &&		       (context->T.output_utf8 ||			context->T.trans_from_uni)) {		if (LYIsASCII(clong)) {		    PUTC(c);		} else if (clong == 0xfffd && saved_char_in &&		    HTPassEightBitRaw &&		    UCH(saved_char_in) >=		    LYlowest_eightbit[context->outUCLYhndl]) {		    PUTUTF8((0xf000 | UCH(saved_char_in)));		} else {		    PUTUTF8(clong);		}	    } else if (saved_char_in && context->T.use_raw_char_in) {		PUTC(saved_char_in);	    } else {		PUTC(c);	    }#define PASS8859SPECL context->T.pass_160_173_raw	/*	**  Convert 160 (nbsp) to Lynx special character if	**  neither HTPassHighCtrlRaw nor HTCJK is set. - FM	*/	} else if (unsign_c == CH_NBSP &&  /* S/390 -- gil -- 0932 */		   !context->no_lynx_specialcodes &&		   !(PASS8859SPECL || HTCJK != NOCJK)) {	    PUTC(HT_NON_BREAK_SPACE);	/*	**  Convert 173 (shy) to Lynx special character if	**  neither HTPassHighCtrlRaw nor HTCJK is set. - FM	*/	} else if (unsign_c == CH_SHY &&  /* S/390 -- gil -- 0949 */		   !context->no_lynx_specialcodes &&		   !(PASS8859SPECL || HTCJK != NOCJK)) {	    PUTC(LY_SOFT_HYPHEN);	/*	**  Handle the case in which we think we have a character	**  which doesn't need further processing (e.g., a koi8-r	**  input for a koi8-r output). - FM	*/	} else if (context->T.use_raw_char_in && saved_char_in) {	    /*	    **	Only if the original character is still in saved_char_in,	    **	otherwise we may be iterating from a goto top. - KW	    */	    PUTC(saved_char_in);	    saved_char_in = '\0';/****************************************************************** *   I. LATIN-1 OR UCS2	 TO  DISPLAY CHARSET ******************************************************************/	} else if ((chk = (BOOL) (context->T.trans_from_uni && TOASCII(unsign_c) >= 160)) &&  /* S/390 -- gil -- 0968 */		   (uck = UCTransUniChar(unsign_c,					 context->outUCLYhndl)) >= ' ' &&		   uck < 256) {	    CTRACE((tfp, "UCTransUniChar returned 0x%.2lX:'%c'.\n",			uck, FROMASCII((char)uck)));	    /*	    **	We got one octet from the conversions, so use it. - FM	    */	    PUTC(FROMASCII((char)uck));	} else if ((chk &&		   (uck == -4 ||		    (context->T.repl_translated_C0 &&		     uck > 0 && uck < 32))) &&		   /*		   **  Not found; look for replacement string. - KW		   */		   (uck = UCTransUniCharStr(replace_buf, 60, clong,					    context->outUCLYhndl,					    0) >= 0)) {	    /*	    **	Got a replacement string.	    **	No further tests for validity - assume that whoever	    **	defined replacement strings knew what she was doing. - KW	    */	    for (p = replace_buf; *p; p++)		PUTC(*p);	/*	**  If we're displaying UTF-8, try that now. - FM	*/	} else if (context->T.output_utf8 && PUTUTF8(clong)) {	    ; /* do nothing more */	/*	**  If it's any other (> 160) 8-bit character, and	**  we have not set HTPassEightBitRaw nor HTCJK, nor	**  have the "ISO Latin 1" character set selected,	**  back translate for our character set. - FM	*/#define IncludesLatin1Enc \		(context->outUCLYhndl == LATIN1 || \		 (context->outUCI && \		  (context->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1))))#define PASSHI8BIT (HTPassEightBitRaw || \		    (context->T.do_8bitraw && !context->T.trans_from_uni))	} else if (unsign_c > 160 && unsign_c < 256 &&		   !(PASSHI8BIT || HTCJK != NOCJK) &&		   !IncludesLatin1Enc) {	    int i;#ifdef USE_PRETTYSRC	    int psrc_view_backup = 0;#endif	    string->size = 0;	    EntityName = HTMLGetEntityName((int)(unsign_c - 160));	    for (i = 0; EntityName[i]; i++)		HTChunkPutc(string, EntityName[i]);	    HTChunkTerminate(string);#ifdef USE_PRETTYSRC	    /* we need to disable it temporary*/	    if (psrc_view) {		psrc_view_backup =1; psrc_view =0;	    }#endif	    handle_entity(context, '\0');#ifdef USE_PRETTYSRC	    /* we need to disable it temporary*/	    if (psrc_view_backup)		psrc_view = TRUE;#endif	    string->size = 0;	    if (!FoundEntity)		PUTC(';');	/*	**  If we get to here and have an ASCII char,	**  pass the character. - KW	*/	} else if (TOASCII(unsign_c) < 127 && unsign_c > 0) {  /* S/390 -- gil -- 0987 */	    PUTC(c);	/*	**  If we get to here, and should have translated,	**  translation has failed so far. - KW	**	**  We should have sent UTF-8 output to the parser	**  already, but what the heck, try again. - FM	*/	} else if (context->T.output_utf8 && *context->utf_buf) {	    for (p = context->utf_buf; *p; p++)		PUTC(*p);	    context->utf_buf_p = context->utf_buf;	    *(context->utf_buf_p) = '\0';#ifdef NOTDEFINED	/*	**  Check for a strippable koi8-r 8-bit character. - FM	*/	} else if (context->T.strip_raw_char_in && saved_char_in &&		   (UCH(saved_char_in) >= 0xc0) &&		   (UCH(saved_char_in) < 255)) {	    /*	    **	KOI8 special: strip high bit, gives (somewhat) readable	    **	ASCII or KOI7 - it was constructed that way! - KW	    */	    PUTC(((char)(saved_char_in & 0x7f)));	    saved_char_in = '\0';#endif /* NOTDEFINED */	/*	**  If we don't actually want the character,	**  make it safe and output that now. - FM	*/	} else if (TOASCII(UCH(c)) <	 /* S/390 -- gil -- 0997 */			LYlowest_eightbit[context->outUCLYhndl] ||		   (context->T.trans_from_uni && !HTPassEightBitRaw)) {#ifdef NOTUSED_FOTEMODS	    /*	    **	If we do not have the "7-bit approximations" as our	    **	output character set (in which case we did it already)	    **	seek a translation for that.  Otherwise, or if the	    **	translation fails, use UHHH notation. - FM	    */	    if ((chk = (context->outUCLYhndl !=			UCGetLYhndl_byMIME("us-ascii"))) &&		(uck = UCTransUniChar(unsign_c,				      UCGetLYhndl_byMIME("us-ascii")))				      >= ' ' && TOASCII(uck) < 127) {  /* S/390 -- gil -- 1008 */		/*		**  Got an ASCII character (yippey). - FM		*/		PUTC(((char)FROMASCII(TOASCII(uck) & 0xff)));	    } else if ((chk && uck == -4) &&		       (uck = UCTransUniCharStr(replace_buf,						60, clong,						UCGetLYhndl_byMIME("us-ascii"),						0) >= 0)) {		/*		**  Got a replacement string (yippey). - FM		*/		for (p = replace_buf; *p; p++)		    PUTC(*p);	    } else {#endif /* NOTUSED_FOTEMODS */		/*		**  Out of luck, so use the UHHH notation (ugh). - FM		*/			/* S/390 -- gil -- 1018 */			/* do not print UHHH for now		sprintf(replace_buf, "U%.2lX", TOASCII(unsign_c));		for (p = replace_buf; *p; p++) {		    PUTC(*p);		}			 */#ifdef NOTUSED_FOTEMODS	    }#endif /* NOTUSED_FOTEMODS */	/*	**  If we get to here, pass the character. - FM	*/	} else {	    PUTC(c);	}	break;    /*    **	Found '<' in SGML_PCDATA content; treat this mode nearly like    **  S_litteral, but recognize '<!' and '<?' to filter out comments    **  and processing instructions. - kw    */    case S_pcdata:	if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */	    if (c == '!') { /* <! */		/*		**	Terminate and set up for possible comment,		**	identifier, declaration, or marked section		**  as under S_tag. - kw		*/		context->state = S_exclamation;		context->lead_exclamation = TRUE;		context->doctype_bracket = FALSE;		context->first_bracket = FALSE;		HTChunkPutc(string, c);		break;	    } else if (c == '?') { /* <? - ignore as a PI until '>' - kw */		CTRACE((tfp,			"SGML: Found PI in PCDATA, junking it until '>'\n"));#ifdef USE_PRETTYSRC		if (psrc_view) {		    PSRCSTART(abracket);PUTS("<?");PSRCSTOP(abracket);		    context->seen_nonwhite_in_junk_tag = TRUE; /* show all */		}#endif		context->state = S_junk_pi;		break;	    }	}	goto case_S_litteral;    /*    **  Found '<' in SGML_SCRIPT content; treat this mode nearly like    **  S_litteral, but recognize '<!' to allow the content to be treated    **  as a comment by lynx.    */    case S_script:	if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */	    if (c == '!') { /* <! */		/*		**	Terminate and set up for possible comment,		**	identifier, declaration, or marked section		**  as under S_tag. - kw		*/		context->state = S_exclamation;		context->lead_exclamation = TRUE;		context->doctype_bracket = FALSE;		context->first_bracket = FALSE;		HTChunkPutc(string, c);		break;	    }	}	goto case_S_litteral;    /*    **	In litteral mode, waits only for specific end tag (for    **	compatibility with old servers, and for Lynx). - FM    */    case_S_litteral:    case S_litteral: /*PSRC:this case not understood completely by HV, not done*/	HTChunkPutc(string, c);#ifdef USE_PRETTYSRC	if (psrc_view) { /*there is nothing useful in the element_stack*/	    testtag = context->current_tag;	} else#endif	    testtag = context->element_stack ?		context->element_stack->tag : NULL;	if (TOUPPER(c) != ((string->size == 1) ?					   '/' :			   testtag->name[string->size-2])) {	    int i;	    /*	    **	If complete match, end litteral.	    */	    if ((c == '>') && testtag &&		string->size > 1 && !testtag->name[string->size-2]) {#ifdef USE_PRETTYSRC		if (psrc_view) {		    PSRCSTART(abracket);PUTC('<');PUTC('/');PSRCSTOP(abracket);		    PSRCSTART(tag);		    strcpy(string->data,context->current_tag->name);		    if (tagname_transform != 1) {			if (tagname_transform == 0)			    LYLowerCase(string->data);			else			    LYUpperCase(string->data);		    }		    PUTS(string->data);		    PSRCSTOP(tag);		    PSRCSTART(abracket);PUTC('>');PSRCSTOP(abracket);		    context->current_tag = NULL;		    string->size = 0;		    context->current_attribute_number = INVALID;		    context->state = S_text;		    break;		}#endif		end_element(context, context->element_stack->tag);		string->size = 0;		context->current_attribute_number = INVALID;		context->state = S_text;		break;	    }	    if (((testtag->contents != SGML_LITTERAL &&		  (testtag->flags & Tgf_strict)) ||		 (context->state == S_pcdata &&		  (testtag->flags & (Tgf_strict|Tgf_endO)))) &&		(string->size > 1 &&		 (c == '>' || string->size > 2 || IsNmStart(c)))) {		context->state = S_end;		string->size--;		for (i = 0; i < string->size; i++)  /* remove '/' */		    string->data[i] = string->data[i+1];		if ((string->size == 1) ? IsNmStart(c) : IsNmChar(c))		    break;		string->size--;		goto top1;	    }	    if (context->state == S_pcdata &&		(testtag->flags & (Tgf_strict|Tgf_endO)) &&		(string->size == 1 && IsNmStart(c))) {		context->state = S_tag;		break;	    }	    /*	    **	If Mismatch: recover string literally.	    */	    PUTC('<');	    for (i = 0; i < string->size-1; i++)  /* recover, except last c */	       PUTC(string->data[i]);	    string->size = 0;	    context->state = S_text;	    goto top1;		/* to recover last c */	}	break;    /*    **	Character reference (numeric entity) or named entity.    */    case S_ero:	if (c == '#') {	    /*	    **	Setting up for possible numeric entity.	    */	    context->state = S_cro;  /* &# is Char Ref Open */	    break;	}	context->state = S_entity;   /* Fall through! */    /*    **	Handle possible named entity.    */    case S_entity:	if (TOASCII(unsign_c) < 127 && (string->size ?	/* S/390 -- gil -- 1029 */		  isalnum(UCH(c)) : isalpha(UCH(c)))) {	    /* Should probably use IsNmStart/IsNmChar above (is that right?),	       but the world is not ready for that - there's &nbsp: (note	       colon!) and stuff around. */	    /*	    **	Accept valid ASCII character. - FM	    */	    HTChunkPutc(string, c);	} else if (string->size == 0) {	    /*	    **	It was an ampersand that's just text, so output	    **	the ampersand and recycle this character. - FM	    */#ifdef USE_
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -