📄 sgml.c
字号:
if (c == '&' && TOASCII(unsign_c) < 127 && /* S/390 -- gil -- 0898 */ (!testtag || (testtag->contents == SGML_MIXED || testtag->contents == SGML_ELEMENT || testtag->contents == SGML_PCDATA ||#ifdef USE_PRETTYSRC testtag->contents == SGML_EMPTY ||#endif testtag->contents == SGML_RCDATA))) { /* ** Setting up for possible entity, without the leading '&'. - FM */ string->size = 0; context->state = S_ero; } else if (c == '<' && TOASCII(unsign_c) < 127) { /* S/390 -- gil -- 0915 */ /* ** Setting up for possible tag. - FM */ string->size = 0; if (testtag && testtag->contents == SGML_PCDATA) { context->state = S_pcdata; } else if (testtag && (testtag->contents == SGML_LITTERAL || testtag->contents == SGML_CDATA)) { context->state = S_litteral; } else if (testtag && (testtag->contents == SGML_SCRIPT)) { context->state = S_script; } else { context->state = S_tag; } context->slashedtag = NULL; } else if (context->slashedtag && (c == '/' || (c == '>' && context->state == S_tagname_slash)) && TOASCII(unsign_c) < 127) { /* ** We got either the second slash of a pending "<NAME/blah blah/" ** shortref construct, or the '>' of a mere "<NAME/>". In both ** cases generate a "</NAME>" end tag in the recover buffer for ** reparsing unless NAME is really an empty element. - kw */#ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket); PUTC(c); PSRCSTOP(abracket); } else#endif if (context->slashedtag != context->unknown_tag && !ReallyEmptyTag(context->slashedtag)) { if (context->recover == NULL) { StrAllocCopy(context->recover, "</"); context->recover_index = 0; } else { StrAllocCat(context->recover, "</"); } StrAllocCat(context->recover, context->slashedtag->name); StrAllocCat(context->recover, ">"); } context->slashedtag = NULL; } else if (context->element_stack && (context->element_stack->tag->flags & Tgf_frecyc)) { /* * The element stack says we are within the contents of an * element that the next stage (HTML.c) may want to feed * us back again (via the *include string). So try to output * text in UTF-8 if possible, using the same logic as for * attribute values (which should be in line with what * context->current_tag_charset indicates). - kw */ if (context->T.decode_utf8 && *context->utf_buf) { PUTS(context->utf_buf); context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0'; } else if (HTCJK == NOCJK && (context->T.output_utf8 || context->T.trans_from_uni)) { if (LYIsASCII(clong)) { PUTC(c); } else if (clong == 0xfffd && saved_char_in && HTPassEightBitRaw && UCH(saved_char_in) >= LYlowest_eightbit[context->outUCLYhndl]) { PUTUTF8((0xf000 | UCH(saved_char_in))); } else { PUTUTF8(clong); } } else if (saved_char_in && context->T.use_raw_char_in) { PUTC(saved_char_in); } else { PUTC(c); }#define PASS8859SPECL context->T.pass_160_173_raw /* ** Convert 160 (nbsp) to Lynx special character if ** neither HTPassHighCtrlRaw nor HTCJK is set. - FM */ } else if (unsign_c == CH_NBSP && /* S/390 -- gil -- 0932 */ !context->no_lynx_specialcodes && !(PASS8859SPECL || HTCJK != NOCJK)) { PUTC(HT_NON_BREAK_SPACE); /* ** Convert 173 (shy) to Lynx special character if ** neither HTPassHighCtrlRaw nor HTCJK is set. - FM */ } else if (unsign_c == CH_SHY && /* S/390 -- gil -- 0949 */ !context->no_lynx_specialcodes && !(PASS8859SPECL || HTCJK != NOCJK)) { PUTC(LY_SOFT_HYPHEN); /* ** Handle the case in which we think we have a character ** which doesn't need further processing (e.g., a koi8-r ** input for a koi8-r output). - FM */ } else if (context->T.use_raw_char_in && saved_char_in) { /* ** Only if the original character is still in saved_char_in, ** otherwise we may be iterating from a goto top. - KW */ PUTC(saved_char_in); saved_char_in = '\0';/****************************************************************** * I. LATIN-1 OR UCS2 TO DISPLAY CHARSET ******************************************************************/ } else if ((chk = (BOOL) (context->T.trans_from_uni && TOASCII(unsign_c) >= 160)) && /* S/390 -- gil -- 0968 */ (uck = UCTransUniChar(unsign_c, context->outUCLYhndl)) >= ' ' && uck < 256) { CTRACE((tfp, "UCTransUniChar returned 0x%.2lX:'%c'.\n", uck, FROMASCII((char)uck))); /* ** We got one octet from the conversions, so use it. - FM */ PUTC(FROMASCII((char)uck)); } else if ((chk && (uck == -4 || (context->T.repl_translated_C0 && uck > 0 && uck < 32))) && /* ** Not found; look for replacement string. - KW */ (uck = UCTransUniCharStr(replace_buf, 60, clong, context->outUCLYhndl, 0) >= 0)) { /* ** Got a replacement string. ** No further tests for validity - assume that whoever ** defined replacement strings knew what she was doing. - KW */ for (p = replace_buf; *p; p++) PUTC(*p); /* ** If we're displaying UTF-8, try that now. - FM */ } else if (context->T.output_utf8 && PUTUTF8(clong)) { ; /* do nothing more */ /* ** If it's any other (> 160) 8-bit character, and ** we have not set HTPassEightBitRaw nor HTCJK, nor ** have the "ISO Latin 1" character set selected, ** back translate for our character set. - FM */#define IncludesLatin1Enc \ (context->outUCLYhndl == LATIN1 || \ (context->outUCI && \ (context->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1))))#define PASSHI8BIT (HTPassEightBitRaw || \ (context->T.do_8bitraw && !context->T.trans_from_uni)) } else if (unsign_c > 160 && unsign_c < 256 && !(PASSHI8BIT || HTCJK != NOCJK) && !IncludesLatin1Enc) { int i;#ifdef USE_PRETTYSRC int psrc_view_backup = 0;#endif string->size = 0; EntityName = HTMLGetEntityName((int)(unsign_c - 160)); for (i = 0; EntityName[i]; i++) HTChunkPutc(string, EntityName[i]); HTChunkTerminate(string);#ifdef USE_PRETTYSRC /* we need to disable it temporary*/ if (psrc_view) { psrc_view_backup =1; psrc_view =0; }#endif handle_entity(context, '\0');#ifdef USE_PRETTYSRC /* we need to disable it temporary*/ if (psrc_view_backup) psrc_view = TRUE;#endif string->size = 0; if (!FoundEntity) PUTC(';'); /* ** If we get to here and have an ASCII char, ** pass the character. - KW */ } else if (TOASCII(unsign_c) < 127 && unsign_c > 0) { /* S/390 -- gil -- 0987 */ PUTC(c); /* ** If we get to here, and should have translated, ** translation has failed so far. - KW ** ** We should have sent UTF-8 output to the parser ** already, but what the heck, try again. - FM */ } else if (context->T.output_utf8 && *context->utf_buf) { for (p = context->utf_buf; *p; p++) PUTC(*p); context->utf_buf_p = context->utf_buf; *(context->utf_buf_p) = '\0';#ifdef NOTDEFINED /* ** Check for a strippable koi8-r 8-bit character. - FM */ } else if (context->T.strip_raw_char_in && saved_char_in && (UCH(saved_char_in) >= 0xc0) && (UCH(saved_char_in) < 255)) { /* ** KOI8 special: strip high bit, gives (somewhat) readable ** ASCII or KOI7 - it was constructed that way! - KW */ PUTC(((char)(saved_char_in & 0x7f))); saved_char_in = '\0';#endif /* NOTDEFINED */ /* ** If we don't actually want the character, ** make it safe and output that now. - FM */ } else if (TOASCII(UCH(c)) < /* S/390 -- gil -- 0997 */ LYlowest_eightbit[context->outUCLYhndl] || (context->T.trans_from_uni && !HTPassEightBitRaw)) {#ifdef NOTUSED_FOTEMODS /* ** If we do not have the "7-bit approximations" as our ** output character set (in which case we did it already) ** seek a translation for that. Otherwise, or if the ** translation fails, use UHHH notation. - FM */ if ((chk = (context->outUCLYhndl != UCGetLYhndl_byMIME("us-ascii"))) && (uck = UCTransUniChar(unsign_c, UCGetLYhndl_byMIME("us-ascii"))) >= ' ' && TOASCII(uck) < 127) { /* S/390 -- gil -- 1008 */ /* ** Got an ASCII character (yippey). - FM */ PUTC(((char)FROMASCII(TOASCII(uck) & 0xff))); } else if ((chk && uck == -4) && (uck = UCTransUniCharStr(replace_buf, 60, clong, UCGetLYhndl_byMIME("us-ascii"), 0) >= 0)) { /* ** Got a replacement string (yippey). - FM */ for (p = replace_buf; *p; p++) PUTC(*p); } else {#endif /* NOTUSED_FOTEMODS */ /* ** Out of luck, so use the UHHH notation (ugh). - FM */ /* S/390 -- gil -- 1018 */ /* do not print UHHH for now sprintf(replace_buf, "U%.2lX", TOASCII(unsign_c)); for (p = replace_buf; *p; p++) { PUTC(*p); } */#ifdef NOTUSED_FOTEMODS }#endif /* NOTUSED_FOTEMODS */ /* ** If we get to here, pass the character. - FM */ } else { PUTC(c); } break; /* ** Found '<' in SGML_PCDATA content; treat this mode nearly like ** S_litteral, but recognize '<!' and '<?' to filter out comments ** and processing instructions. - kw */ case S_pcdata: if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */ if (c == '!') { /* <! */ /* ** Terminate and set up for possible comment, ** identifier, declaration, or marked section ** as under S_tag. - kw */ context->state = S_exclamation; context->lead_exclamation = TRUE; context->doctype_bracket = FALSE; context->first_bracket = FALSE; HTChunkPutc(string, c); break; } else if (c == '?') { /* <? - ignore as a PI until '>' - kw */ CTRACE((tfp, "SGML: Found PI in PCDATA, junking it until '>'\n"));#ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket);PUTS("<?");PSRCSTOP(abracket); context->seen_nonwhite_in_junk_tag = TRUE; /* show all */ }#endif context->state = S_junk_pi; break; } } goto case_S_litteral; /* ** Found '<' in SGML_SCRIPT content; treat this mode nearly like ** S_litteral, but recognize '<!' to allow the content to be treated ** as a comment by lynx. */ case S_script: if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */ if (c == '!') { /* <! */ /* ** Terminate and set up for possible comment, ** identifier, declaration, or marked section ** as under S_tag. - kw */ context->state = S_exclamation; context->lead_exclamation = TRUE; context->doctype_bracket = FALSE; context->first_bracket = FALSE; HTChunkPutc(string, c); break; } } goto case_S_litteral; /* ** In litteral mode, waits only for specific end tag (for ** compatibility with old servers, and for Lynx). - FM */ case_S_litteral: case S_litteral: /*PSRC:this case not understood completely by HV, not done*/ HTChunkPutc(string, c);#ifdef USE_PRETTYSRC if (psrc_view) { /*there is nothing useful in the element_stack*/ testtag = context->current_tag; } else#endif testtag = context->element_stack ? context->element_stack->tag : NULL; if (TOUPPER(c) != ((string->size == 1) ? '/' : testtag->name[string->size-2])) { int i; /* ** If complete match, end litteral. */ if ((c == '>') && testtag && string->size > 1 && !testtag->name[string->size-2]) {#ifdef USE_PRETTYSRC if (psrc_view) { PSRCSTART(abracket);PUTC('<');PUTC('/');PSRCSTOP(abracket); PSRCSTART(tag); strcpy(string->data,context->current_tag->name); if (tagname_transform != 1) { if (tagname_transform == 0) LYLowerCase(string->data); else LYUpperCase(string->data); } PUTS(string->data); PSRCSTOP(tag); PSRCSTART(abracket);PUTC('>');PSRCSTOP(abracket); context->current_tag = NULL; string->size = 0; context->current_attribute_number = INVALID; context->state = S_text; break; }#endif end_element(context, context->element_stack->tag); string->size = 0; context->current_attribute_number = INVALID; context->state = S_text; break; } if (((testtag->contents != SGML_LITTERAL && (testtag->flags & Tgf_strict)) || (context->state == S_pcdata && (testtag->flags & (Tgf_strict|Tgf_endO)))) && (string->size > 1 && (c == '>' || string->size > 2 || IsNmStart(c)))) { context->state = S_end; string->size--; for (i = 0; i < string->size; i++) /* remove '/' */ string->data[i] = string->data[i+1]; if ((string->size == 1) ? IsNmStart(c) : IsNmChar(c)) break; string->size--; goto top1; } if (context->state == S_pcdata && (testtag->flags & (Tgf_strict|Tgf_endO)) && (string->size == 1 && IsNmStart(c))) { context->state = S_tag; break; } /* ** If Mismatch: recover string literally. */ PUTC('<'); for (i = 0; i < string->size-1; i++) /* recover, except last c */ PUTC(string->data[i]); string->size = 0; context->state = S_text; goto top1; /* to recover last c */ } break; /* ** Character reference (numeric entity) or named entity. */ case S_ero: if (c == '#') { /* ** Setting up for possible numeric entity. */ context->state = S_cro; /* &# is Char Ref Open */ break; } context->state = S_entity; /* Fall through! */ /* ** Handle possible named entity. */ case S_entity: if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1029 */ isalnum(UCH(c)) : isalpha(UCH(c)))) { /* Should probably use IsNmStart/IsNmChar above (is that right?), but the world is not ready for that - there's  : (note colon!) and stuff around. */ /* ** Accept valid ASCII character. - FM */ HTChunkPutc(string, c); } else if (string->size == 0) { /* ** It was an ampersand that's just text, so output ** the ampersand and recycle this character. - FM */#ifdef USE_
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -