📄 ucaux.c

📁 基于rtos开发的浏览器！
💻 C
字号:
#include "HTUtils.h"#include "tcp.h"//#include "HTCJK.h"//#include "UCDefs.h"#include "HTStream.h"#include "UCAux.h"extern HTCJKlang HTCJK;extern LYUCcharset LYCharSet_UC[];PUBLIC BOOL UCCanUniTranslateFrom ARGS1(	int,		from){    if (from < 0)	return NO;    if (LYCharSet_UC[from].enc == UCT_ENC_7BIT ||	LYCharSet_UC[from].enc == UCT_ENC_UTF8)	return YES;    if (LYCharSet_UC[from].codepoints & (UCT_CP_SUBSETOF_LAT1))	return YES;    return (LYCharSet_UC[from].UChndl >= 0);}PUBLIC BOOL UCCanTranslateUniTo ARGS1(	int,		to){    if (to < 0)	return NO;    return YES;			/* well at least some characters... */}PUBLIC BOOL UCCanTranslateFromTo ARGS2(	int,		from,	int,		to){    if (from == to)	return YES;    if (from < 0 || to < 0)	return NO;    if (from == 0)	return UCCanTranslateUniTo(to);    if (to == 0 || LYCharSet_UC[to].enc == UCT_ENC_UTF8)	return UCCanUniTranslateFrom(from);    {	CONST char * fromname = LYCharSet_UC[from].MIMEname;	CONST char * toname = LYCharSet_UC[to].MIMEname;	if (!strcmp(fromname, "x-transparent") ||	    !strcmp(toname, "x-transparent")) {	    return YES;	} else if (!strcmp(fromname, "us-ascii")) {	    return YES;	}	if (LYCharSet_UC[from].enc == UCT_ENC_CJK) {	    /*	    **  CJK mode may be off (i.e., HTCJK == NOCJK) because	    **  the current document is not CJK, but the check may	    **  be for capability in relation to another document,	    **  for which CJK mode might be turned on when retrieved.	    **  Thus, when the from charset is CJK, check if the to	    **  charset is CJK, and return NO or YES in relation to	    **  that. - FM	    */	    if (LYCharSet_UC[to].enc != UCT_ENC_CJK)		return NO;	    if ((!strcmp(toname, "euc-jp") ||		 !strcmp(toname, "shift_jis")) &&		(!strcmp(fromname, "euc-jp") ||		 !strcmp(fromname, "shift_jis")))		return YES;	    /*	    **  The euc-cn and euc-kr charsets were handled	    **  by the (from == to) above, so we need not	    **  check those. - FM	    **/	    return NO;	}    }    return (LYCharSet_UC[from].UChndl >= 0);}/***  Returns YES if no translation necessary (because**  charsets are equal, are equivalent, etc.).*/PUBLIC BOOL UCNeedNotTranslate ARGS2(	int,		from,	int,		to){    CONST char *fromname;    CONST char *toname;    if (from == to)	return YES;    if (from < 0)	return NO;		/* ??? */    if (LYCharSet_UC[from].enc == UCT_ENC_7BIT) {	return YES;		/* Only 7bit chars. */    }    fromname = LYCharSet_UC[from].MIMEname;    if (!strcmp(fromname, "x-transparent") ||	!strcmp(fromname, "us-ascii")) {	    return YES;    }    if (to < 0)	return NO;		/* ??? */    if (to == 0) {	if (LYCharSet_UC[from].codepoints & (UCT_CP_SUBSETOF_LAT1))	    return YES;    }    toname = LYCharSet_UC[to].MIMEname;    if (!strcmp(toname, "x-transparent")) {	return YES;    }    if (LYCharSet_UC[to].enc == UCT_ENC_UTF8) {	return NO;    }    if (from == 0) {	if (LYCharSet_UC[from].codepoints & (UCT_CP_SUPERSETOF_LAT1))	    return YES;    }    if (LYCharSet_UC[from].enc == UCT_ENC_CJK) {	if (HTCJK == NOCJK)	/* Use that global flag, for now. */	    return NO;	if (HTCJK == JAPANESE &&	    /*	    **  Always strip the "x-" from "x-euc-jp",	    **  or convert "x-shift-jis" to "shift_jis",	    **  before calling this function, and so	    **  don't check for them here. - FM	    */	    (!strcmp(fromname, "euc-jp") ||	     !strncmp(fromname, "iso-2022-jp",11) ||	     !strcmp(fromname, "shift_jis")))	    return YES;	/* ??? */	return NO;	/* If not handled by (from == to) above. */    }    return NO;}/***  The idea here is that any stage of the stream pipe which is interested**  in some charset dependent processing will call this function.**  Given input and output charsets, this function will set various flags**  in a UCTransParams structure that _suggest_ to the caller what to do.****  Should be called once when a stage starts processing text (and the**  input and output charsets are known), or whenever one of input or**  output charsets has changed (e.g. by SGML.c stage after HTML.c stage**  has processed a META tag).**  The global flags (LYRawMode, HTPassEightBitRaw etc.) are currently**  not taken into account here (except for HTCJK, somewhat), it's still**  up to the caller to do something about them. - KW*/PUBLIC void UCSetTransParams ARGS5(    UCTransParams *, 	pT,    int,		cs_in,    CONST LYUCcharset*,	p_in,    int,		cs_out,    CONST LYUCcharset*,	p_out){    /*    **  Initialize this element to FALSE, and set it TRUE    **  below if we're dealing with VISCII. - FM    */    pT->trans_C0_to_uni = FALSE;    /*    **  The "transparent" display character set is a    **  "super raw mode". - FM    */    pT->transp = (!strcmp(p_in->MIMEname, "x-transparent") ||		  !strcmp(p_out->MIMEname, "x-transparent"));    if (pT->transp) {	/*	**  Set up the structure for "transparent". - FM	*/	pT->do_cjk = FALSE;	pT->decode_utf8 = FALSE;	pT->output_utf8 = FALSE;  /* We may, but won't know about it. - KW */	pT->do_8bitraw = TRUE;	pT->use_raw_char_in = TRUE;	pT->strip_raw_char_in = FALSE;	pT->pass_160_173_raw = TRUE;	pT->repl_translated_C0 = (p_out->enc == UCT_ENC_8BIT_C0);	pT->trans_C0_to_uni = (p_in->enc == UCT_ENC_8BIT_C0 ||			       p_out->enc == UCT_ENC_8BIT_C0);    } else {        /*	**  Initialize local flags. - FM	*/	BOOL intm_ucs = FALSE;	BOOL use_ucs = FALSE;	/*	**  Set this element if we want to treat	**  the input as CJK. - FM	*/	pT->do_cjk = ((p_in->enc == UCT_ENC_CJK) && (HTCJK != NOCJK));	/*	**  Set these elements based on whether	**  we are dealing with UTF-8. - FM	*/	pT->decode_utf8 = (p_in->enc == UCT_ENC_UTF8);	pT->output_utf8 = (p_out->enc == UCT_ENC_UTF8);	if (pT->do_cjk) {	    /*	    **  Set up the structure for a CJK input with	    **  a CJK output (HTCJK != NOCJK). - FM	    */	    intm_ucs = FALSE;	    pT->trans_to_uni = FALSE;	    use_ucs = FALSE;	    pT->do_8bitraw = FALSE;	    pT->pass_160_173_raw = TRUE;	    pT->use_raw_char_in = FALSE; /* Not used for CJK. - KW */	    pT->repl_translated_C0 = FALSE;	    pT->trans_from_uni = FALSE;	 /* Not used for CJK. - KW */	} else {	    /*	    **  Set up for all other charset combinations.	    **  The intm_ucs flag is set TRUE if the input	    **  charset is iso-8859-1 or UTF-8, or largely	    **  equivalent to them, i.e. if we have UCS without	    **  having to do a table translation.	    */	    intm_ucs = (cs_in == 0 || pT->decode_utf8 ||			(p_in->codepoints &			 (UCT_CP_SUBSETOF_LAT1|UCT_CP_SUBSETOF_UCS2)));	    /*	    **  pT->trans_to_uni is set TRUE if we do not have that as	    **  input already, and we can translate to Unicode.  Note	    **  that UTF-8 always is converted to Unicode in functions	    **  that use the transformation structure, so it is	    **  treated as already Unicode here.	    */	    pT->trans_to_uni = (!intm_ucs &&				UCCanUniTranslateFrom(cs_in));	    /*	    **  We set this if we are translating to Unicode and	    **  what normally are low value control characters in	    **  fact are encoding octets for the input charset	    **  (presently, this applies to VISCII). - FM	    */	    pT->trans_C0_to_uni = (pT->trans_to_uni &&				   p_in->enc == UCT_ENC_8BIT_C0);	    /*	    **  We set this, presently, for VISCII. - FM	    */	    pT->repl_translated_C0 = (p_out->enc == UCT_ENC_8BIT_C0);	    /*	    **  Currently unused for any charset combination.	    **  Should always be FALSE	    */	    pT->strip_raw_char_in = FALSE;	    /*	    **  use_ucs should be set TRUE if we have or will create	    **  Unicode values for input octets or UTF multibytes. - FM	    */	    use_ucs = (intm_ucs || pT->trans_to_uni);	    /*	    **  This is set TRUE if use_ucs was set FALSE.  It is	    **  complementary to the HTPassEightBitRaw flag, which	    **  is set TRUE or FALSE elsewhere based on the raw mode	    **  setting in relation to the current Display Character	    **  Set. - FM	    */	    pT->do_8bitraw = (!use_ucs);	    /*	    **  This is set TRUE when 160 and 173 should not be	    **  treated as nbsp and shy, respectively. - FM	    */	    pT->pass_160_173_raw = (!use_ucs &&				    !(p_in->like8859 & UCT_R_8859SPECL));	    /*	    **  This is set when the input and output charsets match,	    **  and they are not ones which should go through a Unicode	    **  translation process anyway. - FM	    */	    pT->use_raw_char_in = (!pT->output_utf8 &&				   cs_in == cs_out &&		                   !pT->trans_C0_to_uni);	    /*	    **  This should be set TRUE when we expect to have	    **  done translation to Unicode or had the equivalent	    **  as input, can translate it to our output charset,	    **  and normally want to do so.  The latter depends on	    **  the pT->do_8bitraw and pT->use_raw_char_in values set	    **  above, but also on HTPassEightBitRaw in any functions	    **  which use the transformation structure.. - FM	    */	    pT->trans_from_uni = (use_ucs && !pT->do_8bitraw &&				  !pT->use_raw_char_in &&				  UCCanTranslateUniTo(cs_out));	}    }}/***  This function initializes the transformation**  structure by setting all its elements to**  FALSE. - KW*/PUBLIC void UCTransParams_clear ARGS1(    UCTransParams *,    pT){    pT->transp = FALSE;    pT->do_cjk = FALSE;    pT->decode_utf8 = FALSE;    pT->output_utf8 = FALSE;    pT->do_8bitraw = FALSE;    pT->use_raw_char_in = FALSE;    pT->strip_raw_char_in = FALSE;    pT->pass_160_173_raw = FALSE;    pT->trans_to_uni = FALSE;    pT->trans_C0_to_uni = FALSE;    pT->repl_translated_C0 = FALSE;    pT->trans_from_uni = FALSE;}/***  If terminal is in UTF-8 mode, it probably cannot understand**  box drawing chars as (n)curses handles them.  (This may also**  be true for other display character sets, but isn't currently**  checked.)  In that case set the chars for hori and vert drawing**  chars to displayable ASCII chars if '0' was requested.  They'll**  stay as they are otherwise. - kw*/PUBLIC void UCSetBoxChars ARGS5(    int,	cset,    int *,	pvert_out,    int *,	phori_out,    int,	vert_in,    int,	hori_in){    if (cset >= -1 && LYCharSet_UC[cset].enc == UCT_ENC_UTF8) {	*pvert_out = (vert_in ? vert_in : '|');	*phori_out = (hori_in ? hori_in : '-');    } else {	*pvert_out = vert_in;	*phori_out = hori_in;    }}/***  Given an output target HTStream* (can also be a HTStructured* via**  typecast), the target stream's put_character method, and a Unicode**  character,  CPutUtf8_charstring() will either output the UTF8**  encoding of the Unicode and return YES, or do nothing and return**  NO (if conversion would be unnecessary or the Unicode character is**  considered invalid).****  [Could be used more generally, but is currently only used for &#nnnnn**  stuff - generation of UTF8 from 8-bit encoded charsets not yet done**  by SGML.c etc.]*/#define PUTC(ch) ((*myPutc)(target, (char)(ch)))#define PUTC2(ch) ((*myPutc)(target,(char)(0x80|(0x3f &(ch)))))PUBLIC BOOL UCPutUtf8_charstring ARGS3(	HTStream *,	target,	putc_func_t *,	myPutc,	long,		code){    if (code < 128)	return NO;		/* indicate to caller we didn't handle it */    else if   (code < 0x800L) {	PUTC(0xc0 | (code>>6));	PUTC2(code);    } else if (code < 0x10000L) {	PUTC(0xe0 | (code>>12));	PUTC2(code>>6);	PUTC2(code);    } else if (code < 0x200000L) {	PUTC(0xf0 | (code>>18));	PUTC2(code>>12);	PUTC2(code>>6);	PUTC2(code);    } else if (code < 0x4000000L) {	PUTC(0xf8 | (code>>24));	PUTC2(code>>18);	PUTC2(code>>12);	PUTC2(code>>6);	PUTC2(code);    } else if (code <= 0x7fffffffL) {	PUTC(0xfc | (code>>30));	PUTC2(code>>24);	PUTC2(code>>18);	PUTC2(code>>12);	PUTC2(code>>6);	PUTC2(code);    } else	return NO;    return YES;}/***  This function converts a Unicode (UCode_t) value**  to a multibyte UTF-8 character, which is loaded**  into the buffer received as an argument.  The**  buffer should be large enough to hold at least**  seven characters (but should be declared as 8**  to minimize byte alignment problems with some**  compilers). - FM*/PUBLIC BOOL UCConvertUniToUtf8 ARGS2(	UCode_t,	code,	char *,		buffer){    char *ch = buffer;    if (!ch)	return NO;    if (code <= 0 || code > 0x7fffffffL) {	*ch = '\0';        return NO;    }    if (code < 0x800L) {	*ch++ = (char)(0xc0 | (code>>6));	*ch++ = (char)(0x80 | (0x3f & (code)));	*ch = '\0';    } else if (code < 0x10000L) {	*ch++ = (char)(0xe0 | (code>>12));	*ch++ = (char)(0x80 | (0x3f & (code>>6)));	*ch++ = (char)(0x80 | (0x3f & (code)));	*ch = '\0';    } else if (code < 0x200000L) {	*ch++ = (char)(0xf0 | (code>>18));	*ch++ = (char)(0x80 | (0x3f & (code>>12)));	*ch++ = (char)(0x80 | (0x3f & (code>>6)));	*ch++ = (char)(0x80 | (0x3f & (code)));	*ch = '\0';    } else if (code < 0x4000000L) {	*ch++ = (char)(0xf8 | (code>>24));	*ch++ = (char)(0x80 | (0x3f & (code>>18)));	*ch++ = (char)(0x80 | (0x3f & (code>>12)));	*ch++ = (char)(0x80 | (0x3f & (code>>6)));	*ch++ = (char)(0x80 | (0x3f & (code)));	*ch = '\0';    } else {	*ch++ = (char)(0xfc | (code>>30));	*ch++ = (char)(0x80 | (0x3f & (code>>24)));	*ch++ = (char)(0x80 | (0x3f & (code>>18)));	*ch++ = (char)(0x80 | (0x3f & (code>>12)));	*ch++ = (char)(0x80 | (0x3f & (code>>6)));	*ch++ = (char)(0x80 | (0x3f & (code)));	*ch = '\0';    }    return YES;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -