📄 htplain.c

📁 用于linux和其他unix下面的
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*		Plain text object		HTWrite.c**		=================****	This version of the stream object just writes to a socket.**	The socket is assumed open and left open.****	Bugs:**		strings written must be less than buffer size.*/#include <HTUtils.h>#include <LYCharVals.h>  /* S/390 -- gil -- 0288 */#include <HTPlain.h>#include <HTChunk.h>#include <HText.h>#include <HTStyle.h>#define Lynx_HTML_Handler#include <HTML.h>		/* styles[] */#define BUFFER_SIZE 4096;	/* Tradeoff */#include <HTMLDTD.h>#include <HTCJK.h>#include <UCMap.h>#include <UCDefs.h>#include <UCAux.h>#include <LYCharSets.h>#include <LYLeaks.h>extern BOOL HTPassEightBitRaw;extern BOOL HTPassHighCtrlRaw;PUBLIC int HTPlain_lastraw = -1;PRIVATE int HTPlain_bs_pending = 0; /* 1:bs 2:underline 3:underline+bs - kw *//*		HTML Object**		-----------*/struct _HTStream {    CONST HTStreamClass *	isa;    HText *			text;    /*    **	The node_anchor UCInfo and handle for the input (PARSER) stage. - FM    */    LYUCcharset		*	inUCI;    int				inUCLYhndl;    /*    **	The node_anchor UCInfo and handle for the output (HTEXT) stage. - FM    */    LYUCcharset	*		outUCI;    int outUCLYhndl;    /*    **	Counter, value, buffer and pointer for UTF-8 handling. - FM    */    char			utf_count;    UCode_t			utf_char;    char			utf_buf[8];    char *			utf_buf_p;    /*    **	The charset transformation structure. - FM    */    UCTransParams		T;};PRIVATE char replace_buf [64];	      /* buffer for replacement strings */PRIVATE void HTPlain_getChartransInfo ARGS2(	HTStream *,		me,	HTParentAnchor *,	anchor){    if (me->inUCLYhndl < 0) {	HTAnchor_copyUCInfoStage(anchor, UCT_STAGE_PARSER, UCT_STAGE_MIME,					 UCT_SETBY_PARSER);	me->inUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER);    }    if (me->outUCLYhndl < 0) {	int chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT);	if (chndl < 0) {	    chndl = current_char_set;	    HTAnchor_setUCInfoStage(anchor, chndl,				    UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT);	}	HTAnchor_setUCInfoStage(anchor, chndl,				UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT);	me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT);    }    me->inUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_PARSER);    me->outUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT);}/*	Write the buffer out to the socket**	----------------------------------*//*_________________________________________________________________________****			A C T I O N	R O U T I N E S*/PRIVATE void HTPlain_write PARAMS((	HTStream *		me,	CONST char *		s,	int			l));/*	Character handling**	------------------*/PRIVATE void HTPlain_put_character ARGS2(	HTStream *,		me,	char,			c){#ifdef REMOVE_CR_ONLY    /*    **	Throw away \r's.    */    if (c != '\r') {       HText_appendCharacter(me->text, c);    }#else    /*    **	See HTPlain_write() for explanations of the following code    **	(we've been called via HTPlain_put_string() to do for each    **	character of a terminated string what HTPlain_write() does    **	via a while loop for each character in a stream of given    **	length). - FM    */    if ((HTPlain_lastraw == '\r') && c == '\n') {	HTPlain_lastraw = -1;	return;    }    if (c == '\b' || c == '_' || HTPlain_bs_pending) {	HTPlain_write(me, &c, 1);	return;    }    HTPlain_lastraw = UCH(c);    if (c == '\r') {	HText_appendCharacter(me->text, '\n');    } else if (TOASCII(UCH(c)) >= 127) {  /* S/390 -- gil -- 0305 */	/*	**  For now, don't repeat everything here	**  that has been done below - KW	*/	HTPlain_write(me, &c, 1);    } else if (HTCJK != NOCJK) {	HText_appendCharacter(me->text, c);    } else if (TOASCII(UCH(c)) >= 127 && TOASCII(UCH(c)) < 161 &&	       HTPassHighCtrlRaw) {	HText_appendCharacter(me->text, c);    } else if (UCH(c) == CH_NBSP) { /* S/390 -- gil -- 0341 */	HText_appendCharacter(me->text, ' ');    } else if (UCH(c) == CH_SHY) {	return;    } else if ((UCH(c) >= ' ' && TOASCII(UCH(c)) < 127) ||	       c == '\n' || c == '\t') {	HText_appendCharacter(me->text, c);    } else if (TOASCII(UCH(c)) > 160) {	if (!HTPassEightBitRaw &&	    !((me->outUCLYhndl == LATIN1) ||	      (me->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1)))) {	    int len, high, low, i, diff = 1;	    CONST char * name;	    UCode_t value = (UCode_t)FROMASCII((TOASCII(UCH(c)) - 160));	    name = HTMLGetEntityName(value);	    len =  strlen(name);	    for (low = 0, high = HTML_dtd.number_of_entities;		high > low;		diff < 0 ? (low = i+1) : (high = i)) {		/* Binary search */		i = (low + (high-low)/2);		diff = AS_ncmp(HTML_dtd.entity_names[i], name, len);		if (diff == 0) {		    HText_appendText(me->text,				     LYCharSets[me->outUCLYhndl][i]);		    break;		}	    }	    if (diff) {		HText_appendCharacter(me->text, c);	    }	} else {	    HText_appendCharacter(me->text, c);	}    }#endif /* REMOVE_CR_ONLY */}/*	String handling**	---------------***/PRIVATE void HTPlain_put_string ARGS2(HTStream *, me, CONST char*, s){#ifdef REMOVE_CR_ONLY    HText_appendText(me->text, s);#else    CONST char * p;    if (s == NULL)	return;    for (p = s; *p; p++) {	HTPlain_put_character(me, *p);    }#endif /* REMOVE_CR_ONLY */}/***	Entry function for displayed text/plain and WWW_SOURCE strings. - FM**	---------------------------------------------------------------*/PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l){    CONST char * p;    CONST char * e = s+l;    char c;    unsigned c_unsign;    BOOL chk;    UCode_t code, uck = -1;    char saved_char_in = '\0';    for (p = s; p < e; p++) {#ifdef REMOVE_CR_ONLY	/*	**  Append the whole string, but remove any \r's. - FM	*/	if (*p != '\r') {	    HText_appendCharacter(me->text, *p);	}#else	if (*p == '\b') {	    if (HTPlain_lastraw >= UCH(' ') &&		HTPlain_lastraw != '\r' && HTPlain_lastraw != '\n') {		if (!HTPlain_bs_pending) {		    HTPlain_bs_pending = 1;		    continue;		} else if (HTPlain_bs_pending == 2) {		    HTPlain_bs_pending = 3;		    continue;		}	    }	    if (HTPlain_bs_pending >= 2)		HText_appendCharacter(me->text, '_');	    HTPlain_bs_pending = 0;	} else if (*p == '_') {		if (!HTPlain_bs_pending) {		    HTPlain_bs_pending = 2;		    HTPlain_lastraw = UCH(*p);		    continue;#if 0		} else if (HTPlain_bs_pending != 2) {		    HTPlain_bs_pending--; /* 1 -> 0, 3 -> 2 */		    HTPlain_lastraw = UCH(*p);		    continue;#endif		}	}	/*	**  Try to handle lone LFs, CRLFs and lone CRs	**  as newline, and to deal with control, ASCII,	**  and 8-bit characters based on best guesses	**  of what's appropriate. - FM	*/	if ((HTPlain_lastraw == '\r') && *p == '\n') {	    HTPlain_lastraw = -1;	    continue;	}	if (HTPlain_bs_pending &&	    !(UCH(*p) >= ' ' && *p != '\r' && *p != '\n' &&	      (HTPlain_lastraw == UCH(*p) ||	       HTPlain_lastraw == UCH('_') ||	       *p == '_'))) {	    if (HTPlain_bs_pending >= 2)		HText_appendCharacter(me->text, '_');	    HTPlain_bs_pending = 0;	} else if (HTPlain_bs_pending == 1) {	    HTPlain_bs_pending = 0;	    continue;	/* ignore last two of "X\bX" or "X\b_" - kw */	} else if (HTPlain_bs_pending == 3) {	    if (*p == '_') {		HTPlain_bs_pending = 2;		continue;	/* ignore last two of "_\b_" - kw */	    } else {		HTPlain_bs_pending = 0;				/* ignore first two of "_\bX" - kw */	    }	} else if (HTPlain_bs_pending == 2) {	    HText_appendCharacter(me->text, '_');	    if (*p == '_')		continue;	/* keep second of "__" pending - kw */	    HTPlain_bs_pending = 0;	} else {	    HTPlain_bs_pending = 0;	}	HTPlain_lastraw = UCH(*p);	if (*p == '\r') {	    HText_appendCharacter(me->text, '\n');	    continue;	}	/*	**  Make sure the character is handled as Unicode	**  whenever that's appropriate.  - FM	*/	c = *p;	c_unsign = UCH(c);	code = (UCode_t)c_unsign;	saved_char_in = '\0';	/*	**  Combine any UTF-8 multibytes into Unicode	**  to check for special characters. - FM	*/	if (me->T.decode_utf8) {	    /*	    **	Combine UTF-8 into Unicode.	    **	Incomplete characters silently ignored.	    **	from Linux kernel's console.c - KW	    */	    if (TOASCII(c_unsign) > 127) {  /* S/390 -- gil -- 0371 */		/*		**  We have an octet from a multibyte character. - FM		*/		if (me->utf_count > 0 && (c & 0xc0) == 0x80) {		    /*		    **	Adjust the UCode_t value, add the octet		    **	to the buffer, and decrement the byte		    **	count. - FM		    */		    me->utf_char = (me->utf_char << 6) | (c & 0x3f);		    me->utf_count--;		    *(me->utf_buf_p) = c;		    (me->utf_buf_p)++;		    if (me->utf_count == 0) {			/*			**  Got a complete multibyte character.			*/			*(me->utf_buf_p) = '\0';			code = me->utf_char;			if (code > 0 && code < 256) {			    c = FROMASCII((char)code);			    c_unsign = UCH(c);			}		    } else {			/*			**  Get the next byte. - FM			*/			continue;		    }		} else {		    /*		    **	Start handling a new multibyte character. - FM		    */		    me->utf_buf_p[0] = c;		    me->utf_buf_p = &me->utf_buf[1];		    if ((*p & 0xe0) == 0xc0) {			me->utf_count = 1;			me->utf_char = (c & 0x1f);		    } else if ((*p & 0xf0) == 0xe0) {			me->utf_count = 2;			me->utf_char = (c & 0x0f);
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -