📄 htplain.c

📁 用于linux和其他unix下面的
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
		    } else if ((*p & 0xf8) == 0xf0) {			me->utf_count = 3;			me->utf_char = (c & 0x07);		    } else if ((*p & 0xfc) == 0xf8) {			me->utf_count = 4;			me->utf_char = (c & 0x03);		    } else if ((*p & 0xfe) == 0xfc) {			me->utf_count = 5;			me->utf_char = (c & 0x01);		    } else {			/*			 *  We got garbage, so ignore it. - FM			 */			me->utf_count = 0;			me->utf_buf_p[0] = '\0';			me->utf_buf_p = me->utf_buf;		    }		    /*		    **	Get the next byte. - FM		    */		    continue;		}	    } else if (me->utf_count > 0) {		/*		**  Got an ASCII character when expecting		**  UTF-8 multibytes, so ignore the buffered		**  multibye characters and fall through with		**  the current ASCII character. - FM		*/		me->utf_count = 0;		me->utf_buf[0] = '\0';		me->utf_buf_p = me->utf_buf;		code = (UCode_t)c_unsign;	    } else {		/*		**  Got a valid ASCII character, so fall		**  through with it. - FM		*/		code = (UCode_t)c_unsign;	    }	}	/*	**  Convert characters from non-UTF-8 charsets	**  to Unicode (if appropriate). - FM	*/	if (!(me->T.decode_utf8 &&	      UCH(*p) > 127)) {#ifdef NOTDEFINED	    if (me->T.strip_raw_char_in)		saved_char_in = c;#endif /* NOTDEFINED */	if (me->T.trans_to_uni &&	    (TOASCII(code) >= LYlowest_eightbit[me->inUCLYhndl] ||  /* S/390 -- gil -- 0389 */	     (code < ' ' && code != 0 &&	     me->T.trans_C0_to_uni))) {		/*		**  Convert the octet to Unicode. - FM		*/	    code = (UCode_t)UCTransToUni(c, me->inUCLYhndl);	    if (code > 0) {		    saved_char_in = c;		if (code < 256) {			c = FROMASCII((char)code);			c_unsign = UCH(c);		}	    }	    } else if (code < 32 && code != 0 &&		       me->T.trans_C0_to_uni) {		/*		**  Quote from SGML.c:		**  	"This else if may be too ugly to keep. - KW"		*/		if (me->T.trans_from_uni &&		    (((code = UCTransToUni(c, me->inUCLYhndl)) >= 32) ||		     (me->T.transp &&		      (code = UCTransToUni(c, me->inUCLYhndl)) > 0))) {		    saved_char_in = c;		    if (code < 256) {			c = FROMASCII((char)code);			c_unsign = UCH(c);		    }		} else {		    uck = -1;		    if (me->T.transp) {			uck = UCTransCharStr(replace_buf, 60, c,					     me->inUCLYhndl,					     me->inUCLYhndl, NO);		    }		    if (!me->T.transp || uck < 0) {			uck = UCTransCharStr(replace_buf, 60, c,					     me->inUCLYhndl,					     me->outUCLYhndl, YES);		    }		    if (uck == 0) {			continue;		    } else if (uck < 0) {			me->utf_buf[0] = '\0';			code = UCH(c);		    } else {			c = replace_buf[0];			if (c && replace_buf[1]) {			    HText_appendText(me->text, replace_buf);			    continue;			}		    }		    me->utf_buf[0] = '\0';		    code = UCH(c);		} /*  Next line end of ugly stuff for C0. - KW */	    } else {		me->utf_buf[0] = '\0';		code = UCH(c);	    }	}	/*	**  At this point we have either code in Unicode	**  (and c in latin1 if code is in the latin1 range),	**  or code and c will have to be passed raw.	*/	/*	**  If CJK mode is on, we'll assume the document matches	**  the user's display character set, and if not, the	**  user should toggle off raw/CJK mode to reload. - FM	*/	if (HTCJK != NOCJK) {	    HText_appendCharacter(me->text, c);#define PASSHICTRL (me->T.transp || \		    code >= LYlowest_eightbit[me->inUCLYhndl])#define PASS8859SPECL me->T.pass_160_173_raw#define PASSHI8BIT (HTPassEightBitRaw || \		    (me->T.do_8bitraw && !me->T.trans_from_uni))	/*	**  If HTPassHighCtrlRaw is set (e.g., for KOI8-R) assume the	**  document matches and pass 127-160 8-bit characters.  If it	**  doesn't match, the user should toggle raw/CJK mode off. - FM	*/	} else if (TOASCII(code) >= 127 && TOASCII(code) < 161 &&  /* S/390 -- gil -- 0427 */		   PASSHICTRL && PASS8859SPECL) {	    HText_appendCharacter(me->text, c);	} else if (code == CH_SHY && PASS8859SPECL) {	    HText_appendCharacter(me->text, c);	/*	**  If neither HTPassHighCtrlRaw nor CJK is set, play it safe	**  and treat 160 (nbsp) as an ASCII space (32). - FM	*/	} else if (code == CH_NBSP) {	    HText_appendCharacter(me->text, ' ');	/*	**  If neither HTPassHighCtrlRaw nor CJK is set, play it safe	**  and ignore 173 (shy). - FM	**  Now only ignore it for color style, which doesn't handle it anyway.	**  Otherwise pass it on as LY_SOFT_HYPHEN and let HText deal with it.	**  It should be either ignored, or displayed as a hyphen if it was	**  indeed at the end of a line.  Well it should. - kw	*/	} else if (code == CH_SHY) {#ifndef USE_COLOR_STYLE	    HText_appendCharacter(me->text, LY_SOFT_HYPHEN);#endif	    continue;	/*	**  If we get to here, pass the displayable ASCII characters. - FM	*/	} else if ((code >= ' ' && TOASCII(code) < 127) ||		   (PASSHI8BIT &&		    c >= LYlowest_eightbit[me->outUCLYhndl]) ||		   *p == '\n' || *p == '\t') {	    HText_appendCharacter(me->text, c);	/*	**  Use an ASCII space (32) for ensp, emsp or thinsp. - FM	*/	} else if (code == 8194 || code == 8195 || code == 8201) {	    HText_appendCharacter(me->text, ' ');	/*	**  If we want the raw character, pass it now. - FM	*/	} else if (me->T.use_raw_char_in && saved_char_in) {	    HText_appendCharacter(me->text, saved_char_in);/****************************************************************** *   I. LATIN-1 OR UCS2  TO  DISPLAY CHARSET ******************************************************************/	} else if ((chk = (BOOL) (me->T.trans_from_uni && code >= 160)) &&		   (uck = UCTransUniChar(code,					 me->outUCLYhndl)) >= ' ' &&  /* S/390 -- gil -- 0464 */		   uck < 256) {	    CTRACE((tfp, "UCTransUniChar returned 0x%.2lX:'%c'.\n",			uck, FROMASCII((char)uck)));	    HText_appendCharacter(me->text, ((char)(uck & 0xff)));	} else if (chk &&		   (uck == -4 ||		    (me->T.repl_translated_C0 && uck > 0 && uck < ' ')) &&  /* S/390 -- gil -- 0481 */		   /*		   **  Not found; look for replacement string.		   */		   (uck = UCTransUniCharStr(replace_buf, 60, code,					    me->outUCLYhndl, 0) >= 0)) {	    /*	    **	No further tests for valididy - assume that whoever	    **	defined replacement strings knew what she was doing.	    */	    HText_appendText(me->text, replace_buf);	/*	**  If we get to here, and should have translated,	**  translation has failed so far.	*/	} else if (chk && TOASCII(code) > 127 && me->T.output_utf8) {  /* S/390 -- gil -- 0498 */	    /*	    **	We want UTF-8 output, so do it now. - FM	    */	    if (*me->utf_buf) {		HText_appendText(me->text, me->utf_buf);		me->utf_buf[0] = '\0';		me->utf_buf_p = me->utf_buf;	    } else if (UCConvertUniToUtf8(code, replace_buf)) {		HText_appendText(me->text, replace_buf);	    } else {		/*		**  Out of luck, so use the UHHH notation (ugh). - gil		*/  /* S/390 -- gil -- 0517 */		sprintf(replace_buf, "U%.2lX", TOASCII(code));		HText_appendText(me->text, replace_buf);	    }#ifdef NOTDEFINED	} else if (me->T.strip_raw_char_in &&		   UCH(*p) >= 192 &&		   UCH(*p) < 255) {	    /*	    **	KOI special: strip high bit, gives	    **	(somewhat) readable ASCII.	    */	    HText_appendCharacter(me->text, (char)(*p & 0x7f));#endif /* NOTDEFINED */	    /*	    **  If we don't actually want the character,	    **  make it safe and output that now. - FM	    */	} else if ((c_unsign > 0 &&		      (int) c_unsign < LYlowest_eightbit[me->outUCLYhndl]) ||		      (me->T.trans_from_uni && !HTPassEightBitRaw)) {	    /*	    **	If we do not have the "7-bit approximations" as our	    **	output character set (in which case we did it already)	    **	seek a translation for that.  Otherwise, or if the	    **	translation fails, use UHHH notation. - FM	    */	    if ((chk = (BOOL) (me->outUCLYhndl !=			UCGetLYhndl_byMIME("us-ascii"))) &&		   (uck = UCTransUniChar(code,					 UCGetLYhndl_byMIME("us-ascii")))				      >= ' ' && TOASCII(uck) < 127) {  /* S/390 -- gil -- 0535 */		/*		**  Got an ASCII character (yippey). - FM		*/		c = FROMASCII((char)uck);		HText_appendCharacter(me->text, c);	    } else if ((chk && uck == -4) &&		       (uck = UCTransUniCharStr(replace_buf,						60, code,						UCGetLYhndl_byMIME("us-ascii"),						0) >= 0)) {		/*		**  Got a repacement string (yippey). - FM		*/		HText_appendText(me->text, replace_buf);	    } else if (code == 8204 || code == 8205) {		/*		**	Ignore 8204 (zwnj) or 8205 (zwj), if we get to here. - FM		*/		CTRACE((tfp, "HTPlain_write: Ignoring '%ld'.\n", code));	    } else if (code == 8206 || code == 8207) {		/*		**	Ignore 8206 (lrm) or 8207 (rlm), if we get to here. - FM		*/		CTRACE((tfp, "HTPlain_write: Ignoring '%ld'.\n", code));	    } else {		/*		**  Out of luck, so use the UHHH notation (ugh). - FM		*/			/* do not print UHHH for now			sprintf(replace_buf, "U%.2lX", code);			HText_appendText(me->text, replace_buf);			*/		}		/*		**  If we get to here and have a monobyte character,		**  pass it. - FM		*/	} else if (c_unsign != 0 && c_unsign < 256) {	    HText_appendCharacter(me->text, c);	}#endif /* REMOVE_CR_ONLY */    }}/*	Free an HTML object**	-------------------****	Note that the SGML parsing context is freed, but the created object is**	not, as it takes on an existence of its own unless explicitly freed.*/PRIVATE void HTPlain_free ARGS1(	HTStream *,	me){    if (HTPlain_bs_pending >= 2)	HText_appendCharacter(me->text, '_');    FREE(me);}/*	End writing*/PRIVATE void HTPlain_abort ARGS2(	HTStream *,	me,	HTError,	e GCC_UNUSED){    HTPlain_free(me);}/*		Structured Object Class**		-----------------------*/PUBLIC CONST HTStreamClass HTPlain ={	"PlainPresenter",	HTPlain_free,	HTPlain_abort,	HTPlain_put_character,	HTPlain_put_string, HTPlain_write,};/*		New object**		----------*/PUBLIC HTStream* HTPlainPresent ARGS3(	HTPresentation *,	pres GCC_UNUSED,	HTParentAnchor *,	anchor,	HTStream *,		sink GCC_UNUSED){    HTStream* me = (HTStream*)malloc(sizeof(*me));    if (me == NULL)	outofmem(__FILE__, "HTPlain_new");    me->isa = &HTPlain;    HTPlain_lastraw = -1;    me->utf_count = 0;    me->utf_char = 0;    me->utf_buf[0] = me->utf_buf[6] =me->utf_buf[7] = '\0';    me->utf_buf_p = me->utf_buf;    me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor,UCT_STAGE_HTEXT);    me->inUCLYhndl = HTAnchor_getUCLYhndl(anchor,UCT_STAGE_PARSER);    HTPlain_getChartransInfo(me, anchor);    UCSetTransParams(&me->T,		     me->inUCLYhndl, me->inUCI,		     me->outUCLYhndl,		     HTAnchor_getUCInfoStage(anchor,UCT_STAGE_HTEXT));    me->text = HText_new(anchor);    HText_setStyle(me->text, LYstyles(HTML_XMP) );    HText_beginAppend(me->text);    return (HTStream*) me;}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -