📄 htplain.c
字号:
} else if ((*p & 0xf8) == 0xf0) { me->utf_count = 3; me->utf_char = (c & 0x07); } else if ((*p & 0xfc) == 0xf8) { me->utf_count = 4; me->utf_char = (c & 0x03); } else if ((*p & 0xfe) == 0xfc) { me->utf_count = 5; me->utf_char = (c & 0x01); } else { /* * We got garbage, so ignore it. - FM */ me->utf_count = 0; me->utf_buf_p[0] = '\0'; me->utf_buf_p = me->utf_buf; } /* ** Get the next byte. - FM */ continue; } } else if (me->utf_count > 0) { /* ** Got an ASCII character when expecting ** UTF-8 multibytes, so ignore the buffered ** multibye characters and fall through with ** the current ASCII character. - FM */ me->utf_count = 0; me->utf_buf[0] = '\0'; me->utf_buf_p = me->utf_buf; code = (UCode_t)c_unsign; } else { /* ** Got a valid ASCII character, so fall ** through with it. - FM */ code = (UCode_t)c_unsign; } } /* ** Convert characters from non-UTF-8 charsets ** to Unicode (if appropriate). - FM */ if (!(me->T.decode_utf8 && UCH(*p) > 127)) {#ifdef NOTDEFINED if (me->T.strip_raw_char_in) saved_char_in = c;#endif /* NOTDEFINED */ if (me->T.trans_to_uni && (TOASCII(code) >= LYlowest_eightbit[me->inUCLYhndl] || /* S/390 -- gil -- 0389 */ (code < ' ' && code != 0 && me->T.trans_C0_to_uni))) { /* ** Convert the octet to Unicode. - FM */ code = (UCode_t)UCTransToUni(c, me->inUCLYhndl); if (code > 0) { saved_char_in = c; if (code < 256) { c = FROMASCII((char)code); c_unsign = UCH(c); } } } else if (code < 32 && code != 0 && me->T.trans_C0_to_uni) { /* ** Quote from SGML.c: ** "This else if may be too ugly to keep. - KW" */ if (me->T.trans_from_uni && (((code = UCTransToUni(c, me->inUCLYhndl)) >= 32) || (me->T.transp && (code = UCTransToUni(c, me->inUCLYhndl)) > 0))) { saved_char_in = c; if (code < 256) { c = FROMASCII((char)code); c_unsign = UCH(c); } } else { uck = -1; if (me->T.transp) { uck = UCTransCharStr(replace_buf, 60, c, me->inUCLYhndl, me->inUCLYhndl, NO); } if (!me->T.transp || uck < 0) { uck = UCTransCharStr(replace_buf, 60, c, me->inUCLYhndl, me->outUCLYhndl, YES); } if (uck == 0) { continue; } else if (uck < 0) { me->utf_buf[0] = '\0'; code = UCH(c); } else { c = replace_buf[0]; if (c && replace_buf[1]) { HText_appendText(me->text, replace_buf); continue; } } me->utf_buf[0] = '\0'; code = UCH(c); } /* Next line end of ugly stuff for C0. - KW */ } else { me->utf_buf[0] = '\0'; code = UCH(c); } } /* ** At this point we have either code in Unicode ** (and c in latin1 if code is in the latin1 range), ** or code and c will have to be passed raw. */ /* ** If CJK mode is on, we'll assume the document matches ** the user's display character set, and if not, the ** user should toggle off raw/CJK mode to reload. - FM */ if (HTCJK != NOCJK) { HText_appendCharacter(me->text, c);#define PASSHICTRL (me->T.transp || \ code >= LYlowest_eightbit[me->inUCLYhndl])#define PASS8859SPECL me->T.pass_160_173_raw#define PASSHI8BIT (HTPassEightBitRaw || \ (me->T.do_8bitraw && !me->T.trans_from_uni)) /* ** If HTPassHighCtrlRaw is set (e.g., for KOI8-R) assume the ** document matches and pass 127-160 8-bit characters. If it ** doesn't match, the user should toggle raw/CJK mode off. - FM */ } else if (TOASCII(code) >= 127 && TOASCII(code) < 161 && /* S/390 -- gil -- 0427 */ PASSHICTRL && PASS8859SPECL) { HText_appendCharacter(me->text, c); } else if (code == CH_SHY && PASS8859SPECL) { HText_appendCharacter(me->text, c); /* ** If neither HTPassHighCtrlRaw nor CJK is set, play it safe ** and treat 160 (nbsp) as an ASCII space (32). - FM */ } else if (code == CH_NBSP) { HText_appendCharacter(me->text, ' '); /* ** If neither HTPassHighCtrlRaw nor CJK is set, play it safe ** and ignore 173 (shy). - FM ** Now only ignore it for color style, which doesn't handle it anyway. ** Otherwise pass it on as LY_SOFT_HYPHEN and let HText deal with it. ** It should be either ignored, or displayed as a hyphen if it was ** indeed at the end of a line. Well it should. - kw */ } else if (code == CH_SHY) {#ifndef USE_COLOR_STYLE HText_appendCharacter(me->text, LY_SOFT_HYPHEN);#endif continue; /* ** If we get to here, pass the displayable ASCII characters. - FM */ } else if ((code >= ' ' && TOASCII(code) < 127) || (PASSHI8BIT && c >= LYlowest_eightbit[me->outUCLYhndl]) || *p == '\n' || *p == '\t') { HText_appendCharacter(me->text, c); /* ** Use an ASCII space (32) for ensp, emsp or thinsp. - FM */ } else if (code == 8194 || code == 8195 || code == 8201) { HText_appendCharacter(me->text, ' '); /* ** If we want the raw character, pass it now. - FM */ } else if (me->T.use_raw_char_in && saved_char_in) { HText_appendCharacter(me->text, saved_char_in);/****************************************************************** * I. LATIN-1 OR UCS2 TO DISPLAY CHARSET ******************************************************************/ } else if ((chk = (BOOL) (me->T.trans_from_uni && code >= 160)) && (uck = UCTransUniChar(code, me->outUCLYhndl)) >= ' ' && /* S/390 -- gil -- 0464 */ uck < 256) { CTRACE((tfp, "UCTransUniChar returned 0x%.2lX:'%c'.\n", uck, FROMASCII((char)uck))); HText_appendCharacter(me->text, ((char)(uck & 0xff))); } else if (chk && (uck == -4 || (me->T.repl_translated_C0 && uck > 0 && uck < ' ')) && /* S/390 -- gil -- 0481 */ /* ** Not found; look for replacement string. */ (uck = UCTransUniCharStr(replace_buf, 60, code, me->outUCLYhndl, 0) >= 0)) { /* ** No further tests for valididy - assume that whoever ** defined replacement strings knew what she was doing. */ HText_appendText(me->text, replace_buf); /* ** If we get to here, and should have translated, ** translation has failed so far. */ } else if (chk && TOASCII(code) > 127 && me->T.output_utf8) { /* S/390 -- gil -- 0498 */ /* ** We want UTF-8 output, so do it now. - FM */ if (*me->utf_buf) { HText_appendText(me->text, me->utf_buf); me->utf_buf[0] = '\0'; me->utf_buf_p = me->utf_buf; } else if (UCConvertUniToUtf8(code, replace_buf)) { HText_appendText(me->text, replace_buf); } else { /* ** Out of luck, so use the UHHH notation (ugh). - gil */ /* S/390 -- gil -- 0517 */ sprintf(replace_buf, "U%.2lX", TOASCII(code)); HText_appendText(me->text, replace_buf); }#ifdef NOTDEFINED } else if (me->T.strip_raw_char_in && UCH(*p) >= 192 && UCH(*p) < 255) { /* ** KOI special: strip high bit, gives ** (somewhat) readable ASCII. */ HText_appendCharacter(me->text, (char)(*p & 0x7f));#endif /* NOTDEFINED */ /* ** If we don't actually want the character, ** make it safe and output that now. - FM */ } else if ((c_unsign > 0 && (int) c_unsign < LYlowest_eightbit[me->outUCLYhndl]) || (me->T.trans_from_uni && !HTPassEightBitRaw)) { /* ** If we do not have the "7-bit approximations" as our ** output character set (in which case we did it already) ** seek a translation for that. Otherwise, or if the ** translation fails, use UHHH notation. - FM */ if ((chk = (BOOL) (me->outUCLYhndl != UCGetLYhndl_byMIME("us-ascii"))) && (uck = UCTransUniChar(code, UCGetLYhndl_byMIME("us-ascii"))) >= ' ' && TOASCII(uck) < 127) { /* S/390 -- gil -- 0535 */ /* ** Got an ASCII character (yippey). - FM */ c = FROMASCII((char)uck); HText_appendCharacter(me->text, c); } else if ((chk && uck == -4) && (uck = UCTransUniCharStr(replace_buf, 60, code, UCGetLYhndl_byMIME("us-ascii"), 0) >= 0)) { /* ** Got a repacement string (yippey). - FM */ HText_appendText(me->text, replace_buf); } else if (code == 8204 || code == 8205) { /* ** Ignore 8204 (zwnj) or 8205 (zwj), if we get to here. - FM */ CTRACE((tfp, "HTPlain_write: Ignoring '%ld'.\n", code)); } else if (code == 8206 || code == 8207) { /* ** Ignore 8206 (lrm) or 8207 (rlm), if we get to here. - FM */ CTRACE((tfp, "HTPlain_write: Ignoring '%ld'.\n", code)); } else { /* ** Out of luck, so use the UHHH notation (ugh). - FM */ /* do not print UHHH for now sprintf(replace_buf, "U%.2lX", code); HText_appendText(me->text, replace_buf); */ } /* ** If we get to here and have a monobyte character, ** pass it. - FM */ } else if (c_unsign != 0 && c_unsign < 256) { HText_appendCharacter(me->text, c); }#endif /* REMOVE_CR_ONLY */ }}/* Free an HTML object** -------------------**** Note that the SGML parsing context is freed, but the created object is** not, as it takes on an existence of its own unless explicitly freed.*/PRIVATE void HTPlain_free ARGS1( HTStream *, me){ if (HTPlain_bs_pending >= 2) HText_appendCharacter(me->text, '_'); FREE(me);}/* End writing*/PRIVATE void HTPlain_abort ARGS2( HTStream *, me, HTError, e GCC_UNUSED){ HTPlain_free(me);}/* Structured Object Class** -----------------------*/PUBLIC CONST HTStreamClass HTPlain ={ "PlainPresenter", HTPlain_free, HTPlain_abort, HTPlain_put_character, HTPlain_put_string, HTPlain_write,};/* New object** ----------*/PUBLIC HTStream* HTPlainPresent ARGS3( HTPresentation *, pres GCC_UNUSED, HTParentAnchor *, anchor, HTStream *, sink GCC_UNUSED){ HTStream* me = (HTStream*)malloc(sizeof(*me)); if (me == NULL) outofmem(__FILE__, "HTPlain_new"); me->isa = &HTPlain; HTPlain_lastraw = -1; me->utf_count = 0; me->utf_char = 0; me->utf_buf[0] = me->utf_buf[6] =me->utf_buf[7] = '\0'; me->utf_buf_p = me->utf_buf; me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor,UCT_STAGE_HTEXT); me->inUCLYhndl = HTAnchor_getUCLYhndl(anchor,UCT_STAGE_PARSER); HTPlain_getChartransInfo(me, anchor); UCSetTransParams(&me->T, me->inUCLYhndl, me->inUCI, me->outUCLYhndl, HTAnchor_getUCInfoStage(anchor,UCT_STAGE_HTEXT)); me->text = HText_new(anchor); HText_setStyle(me->text, LYstyles(HTML_XMP) ); HText_beginAppend(me->text); return (HTStream*) me;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -