📄 htplain.c
字号:
/* Plain text object HTWrite.c** =================**** This version of the stream object just writes to a socket.** The socket is assumed open and left open.**** Bugs:** strings written must be less than buffer size.*/#include <HTUtils.h>#include <LYCharVals.h> /* S/390 -- gil -- 0288 */#include <HTPlain.h>#include <HTChunk.h>#include <HText.h>#include <HTStyle.h>#define Lynx_HTML_Handler#include <HTML.h> /* styles[] */#define BUFFER_SIZE 4096; /* Tradeoff */#include <HTMLDTD.h>#include <HTCJK.h>#include <UCMap.h>#include <UCDefs.h>#include <UCAux.h>#include <LYCharSets.h>#include <LYLeaks.h>extern BOOL HTPassEightBitRaw;extern BOOL HTPassHighCtrlRaw;PUBLIC int HTPlain_lastraw = -1;PRIVATE int HTPlain_bs_pending = 0; /* 1:bs 2:underline 3:underline+bs - kw *//* HTML Object** -----------*/struct _HTStream { CONST HTStreamClass * isa; HText * text; /* ** The node_anchor UCInfo and handle for the input (PARSER) stage. - FM */ LYUCcharset * inUCI; int inUCLYhndl; /* ** The node_anchor UCInfo and handle for the output (HTEXT) stage. - FM */ LYUCcharset * outUCI; int outUCLYhndl; /* ** Counter, value, buffer and pointer for UTF-8 handling. - FM */ char utf_count; UCode_t utf_char; char utf_buf[8]; char * utf_buf_p; /* ** The charset transformation structure. - FM */ UCTransParams T;};PRIVATE char replace_buf [64]; /* buffer for replacement strings */PRIVATE void HTPlain_getChartransInfo ARGS2( HTStream *, me, HTParentAnchor *, anchor){ if (me->inUCLYhndl < 0) { HTAnchor_copyUCInfoStage(anchor, UCT_STAGE_PARSER, UCT_STAGE_MIME, UCT_SETBY_PARSER); me->inUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER); } if (me->outUCLYhndl < 0) { int chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT); if (chndl < 0) { chndl = current_char_set; HTAnchor_setUCInfoStage(anchor, chndl, UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); } HTAnchor_setUCInfoStage(anchor, chndl, UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT); } me->inUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_PARSER); me->outUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT);}/* Write the buffer out to the socket** ----------------------------------*//*_________________________________________________________________________**** A C T I O N R O U T I N E S*/PRIVATE void HTPlain_write PARAMS(( HTStream * me, CONST char * s, int l));/* Character handling** ------------------*/PRIVATE void HTPlain_put_character ARGS2( HTStream *, me, char, c){#ifdef REMOVE_CR_ONLY /* ** Throw away \r's. */ if (c != '\r') { HText_appendCharacter(me->text, c); }#else /* ** See HTPlain_write() for explanations of the following code ** (we've been called via HTPlain_put_string() to do for each ** character of a terminated string what HTPlain_write() does ** via a while loop for each character in a stream of given ** length). - FM */ if ((HTPlain_lastraw == '\r') && c == '\n') { HTPlain_lastraw = -1; return; } if (c == '\b' || c == '_' || HTPlain_bs_pending) { HTPlain_write(me, &c, 1); return; } HTPlain_lastraw = UCH(c); if (c == '\r') { HText_appendCharacter(me->text, '\n'); } else if (TOASCII(UCH(c)) >= 127) { /* S/390 -- gil -- 0305 */ /* ** For now, don't repeat everything here ** that has been done below - KW */ HTPlain_write(me, &c, 1); } else if (HTCJK != NOCJK) { HText_appendCharacter(me->text, c); } else if (TOASCII(UCH(c)) >= 127 && TOASCII(UCH(c)) < 161 && HTPassHighCtrlRaw) { HText_appendCharacter(me->text, c); } else if (UCH(c) == CH_NBSP) { /* S/390 -- gil -- 0341 */ HText_appendCharacter(me->text, ' '); } else if (UCH(c) == CH_SHY) { return; } else if ((UCH(c) >= ' ' && TOASCII(UCH(c)) < 127) || c == '\n' || c == '\t') { HText_appendCharacter(me->text, c); } else if (TOASCII(UCH(c)) > 160) { if (!HTPassEightBitRaw && !((me->outUCLYhndl == LATIN1) || (me->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1)))) { int len, high, low, i, diff = 1; CONST char * name; UCode_t value = (UCode_t)FROMASCII((TOASCII(UCH(c)) - 160)); name = HTMLGetEntityName(value); len = strlen(name); for (low = 0, high = HTML_dtd.number_of_entities; high > low; diff < 0 ? (low = i+1) : (high = i)) { /* Binary search */ i = (low + (high-low)/2); diff = AS_ncmp(HTML_dtd.entity_names[i], name, len); if (diff == 0) { HText_appendText(me->text, LYCharSets[me->outUCLYhndl][i]); break; } } if (diff) { HText_appendCharacter(me->text, c); } } else { HText_appendCharacter(me->text, c); } }#endif /* REMOVE_CR_ONLY */}/* String handling** ---------------***/PRIVATE void HTPlain_put_string ARGS2(HTStream *, me, CONST char*, s){#ifdef REMOVE_CR_ONLY HText_appendText(me->text, s);#else CONST char * p; if (s == NULL) return; for (p = s; *p; p++) { HTPlain_put_character(me, *p); }#endif /* REMOVE_CR_ONLY */}/*** Entry function for displayed text/plain and WWW_SOURCE strings. - FM** ---------------------------------------------------------------*/PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l){ CONST char * p; CONST char * e = s+l; char c; unsigned c_unsign; BOOL chk; UCode_t code, uck = -1; char saved_char_in = '\0'; for (p = s; p < e; p++) {#ifdef REMOVE_CR_ONLY /* ** Append the whole string, but remove any \r's. - FM */ if (*p != '\r') { HText_appendCharacter(me->text, *p); }#else if (*p == '\b') { if (HTPlain_lastraw >= UCH(' ') && HTPlain_lastraw != '\r' && HTPlain_lastraw != '\n') { if (!HTPlain_bs_pending) { HTPlain_bs_pending = 1; continue; } else if (HTPlain_bs_pending == 2) { HTPlain_bs_pending = 3; continue; } } if (HTPlain_bs_pending >= 2) HText_appendCharacter(me->text, '_'); HTPlain_bs_pending = 0; } else if (*p == '_') { if (!HTPlain_bs_pending) { HTPlain_bs_pending = 2; HTPlain_lastraw = UCH(*p); continue;#if 0 } else if (HTPlain_bs_pending != 2) { HTPlain_bs_pending--; /* 1 -> 0, 3 -> 2 */ HTPlain_lastraw = UCH(*p); continue;#endif } } /* ** Try to handle lone LFs, CRLFs and lone CRs ** as newline, and to deal with control, ASCII, ** and 8-bit characters based on best guesses ** of what's appropriate. - FM */ if ((HTPlain_lastraw == '\r') && *p == '\n') { HTPlain_lastraw = -1; continue; } if (HTPlain_bs_pending && !(UCH(*p) >= ' ' && *p != '\r' && *p != '\n' && (HTPlain_lastraw == UCH(*p) || HTPlain_lastraw == UCH('_') || *p == '_'))) { if (HTPlain_bs_pending >= 2) HText_appendCharacter(me->text, '_'); HTPlain_bs_pending = 0; } else if (HTPlain_bs_pending == 1) { HTPlain_bs_pending = 0; continue; /* ignore last two of "X\bX" or "X\b_" - kw */ } else if (HTPlain_bs_pending == 3) { if (*p == '_') { HTPlain_bs_pending = 2; continue; /* ignore last two of "_\b_" - kw */ } else { HTPlain_bs_pending = 0; /* ignore first two of "_\bX" - kw */ } } else if (HTPlain_bs_pending == 2) { HText_appendCharacter(me->text, '_'); if (*p == '_') continue; /* keep second of "__" pending - kw */ HTPlain_bs_pending = 0; } else { HTPlain_bs_pending = 0; } HTPlain_lastraw = UCH(*p); if (*p == '\r') { HText_appendCharacter(me->text, '\n'); continue; } /* ** Make sure the character is handled as Unicode ** whenever that's appropriate. - FM */ c = *p; c_unsign = UCH(c); code = (UCode_t)c_unsign; saved_char_in = '\0'; /* ** Combine any UTF-8 multibytes into Unicode ** to check for special characters. - FM */ if (me->T.decode_utf8) { /* ** Combine UTF-8 into Unicode. ** Incomplete characters silently ignored. ** from Linux kernel's console.c - KW */ if (TOASCII(c_unsign) > 127) { /* S/390 -- gil -- 0371 */ /* ** We have an octet from a multibyte character. - FM */ if (me->utf_count > 0 && (c & 0xc0) == 0x80) { /* ** Adjust the UCode_t value, add the octet ** to the buffer, and decrement the byte ** count. - FM */ me->utf_char = (me->utf_char << 6) | (c & 0x3f); me->utf_count--; *(me->utf_buf_p) = c; (me->utf_buf_p)++; if (me->utf_count == 0) { /* ** Got a complete multibyte character. */ *(me->utf_buf_p) = '\0'; code = me->utf_char; if (code > 0 && code < 256) { c = FROMASCII((char)code); c_unsign = UCH(c); } } else { /* ** Get the next byte. - FM */ continue; } } else { /* ** Start handling a new multibyte character. - FM */ me->utf_buf_p[0] = c; me->utf_buf_p = &me->utf_buf[1]; if ((*p & 0xe0) == 0xc0) { me->utf_count = 1; me->utf_char = (c & 0x1f); } else if ((*p & 0xf0) == 0xe0) { me->utf_count = 2; me->utf_char = (c & 0x0f);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -