📄 utf8.c
字号:
else { /* CS2 not set up */ c = BOGON; /* swallow byte, say bogon */ if (i < text->size) i++; } break; case EUC_CS3: /* CS3 */ if (p3->base_ku) { /* CS3 set up? */ if (p3->base_ten) /* yes, multibyte? */ c = ((i < text->size) && ((c = text->data[i++]) & BIT8) && ((ku = (c1 & BITS7) - p3->base_ku) < p3->max_ku) && ((ten = (c & BITS7) - p3->base_ten) < p3->max_ten)) ? t3[(ku*p3->max_ten) + ten] : BOGON; else c = ((c1 >= p3->base_ku) && (c1 <= p3->max_ku)) ? c1 + ((unsigned int) p3->tab) : BOGON; } else { /* CS3 not set up */ c = BOGON; /* swallow byte, say bogon */ if (i < text->size) i++; } break; default: if (((ku = (c & BITS7) - p1->base_ku) < p1->max_ku) && ((ten = (c1 & BITS7) - p1->base_ten) < p1->max_ten)) { if (!(c = t1[(ku*p1->max_ten) + ten]) && ku && (ku < 10) && t3 && p3->base_ten) /* special hack for JIS X 0212: merge rows less than 10 */ c = t3[((ku - (p3->base_ku - p1->base_ku))*p3->max_ten) + ten]; } else c = BOGON; } } if (pass) UTF8_PUT (s,c) else ret->size += UTF8_SIZE (c); } if (!pass) s = ret->data = (unsigned char *) fs_get (ret->size + 1); }}/* Convert ASCII + double-byte sized text to UTF-8 * Accepts: source sized text * pointer to return sized text * conversion table */void utf8_text_dbyte (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab){ unsigned long i; unsigned char *s; unsigned int c,c1,ku,ten; struct utf8_eucparam *p1 = (struct utf8_eucparam *) tab; unsigned short *t1 = (unsigned short *) p1->tab; for (ret->size = i = 0; i < text->size; ret->size += UTF8_SIZE (c)) if ((c = text->data[i++]) & BIT8) c = ((i < text->size) && (c1 = text->data[i++]) && ((ku = c - p1->base_ku) < p1->max_ku) && ((ten = c1 - p1->base_ten) < p1->max_ten)) ? t1[(ku*p1->max_ten) + ten] : BOGON; s = ret->data = (unsigned char *) fs_get (ret->size + 1); for (i = 0; i < text->size;) { if ((c = text->data[i++]) & BIT8) c = ((i < text->size) && (c1 = text->data[i++]) && ((ku = c - p1->base_ku) < p1->max_ku) && ((ten = c1 - p1->base_ten) < p1->max_ten)) ? t1[(ku*p1->max_ten) + ten] : BOGON; UTF8_PUT (s,c) /* convert Unicode to UTF-8 */ }}/* Convert ASCII + double byte 2 plane sized text to UTF-8 * Accepts: source sized text * pointer to return sized text * conversion table */void utf8_text_dbyte2 (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab){ unsigned long i,j; unsigned char *s; unsigned int c,c1,ku,ten; struct utf8_eucparam *p1 = (struct utf8_eucparam *) tab; struct utf8_eucparam *p2 = p1 + 1; unsigned short *t = (unsigned short *) p1->tab; for (ret->size = i = 0; i < text->size; ret->size += UTF8_SIZE (c)) if ((c = text->data[i++]) & BIT8) { if ((i >= text->size) || !(c1 = text->data[i++])) c = BOGON; /* out of space or bogon */ else if (c1 & BIT8) /* high vs. low plane */ c = ((ku = c - p2->base_ku) < p2->max_ku && ((ten = c1 - p2->base_ten) < p2->max_ten)) ? t[(ku*(p1->max_ten + p2->max_ten)) + p1->max_ten + ten] : BOGON; else c = ((ku = c - p1->base_ku) < p1->max_ku && ((ten = c1 - p1->base_ten) < p1->max_ten)) ? t[(ku*(p1->max_ten + p2->max_ten)) + ten] : BOGON; } s = ret->data = (unsigned char *) fs_get (ret->size + 1); for (i = j = 0; i < text->size;) { if ((c = text->data[i++]) & BIT8) { if ((i >= text->size) || !(c1 = text->data[i++])) c = BOGON; /* out of space or bogon */ else if (c1 & BIT8) /* high vs. low plane */ c = ((ku = c - p2->base_ku) < p2->max_ku && ((ten = c1 - p2->base_ten) < p2->max_ten)) ? t[(ku*(p1->max_ten + p2->max_ten)) + p1->max_ten + ten] : BOGON; else c = ((ku = c - p1->base_ku) < p1->max_ku && ((ten = c1 - p1->base_ten) < p1->max_ten)) ? t[(ku*(p1->max_ten + p2->max_ten)) + ten] : BOGON; } UTF8_PUT (s,c) /* convert Unicode to UTF-8 */ }}#ifdef JISTOUNICODE /* Japanese *//* Convert Shift JIS sized text to UTF-8 * Accepts: source sized text * pointer to return sized text * conversion table */void utf8_text_sjis (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab){ unsigned long i; unsigned char *s; unsigned int c,c1,ku,ten; for (ret->size = i = 0; i < text->size; ret->size += UTF8_SIZE (c)) if ((c = text->data[i++]) & BIT8) { /* half-width katakana */ if ((c >= MIN_KANA_8) && (c <= MAX_KANA_8)) c += KANA_8; else if (i >= text->size) c = BOGON; else { /* Shift-JIS */ c1 = text->data[i++]; SJISTOJIS (c,c1); c = JISTOUNICODE (c,c1,ku,ten); } } s = ret->data = (unsigned char *) fs_get (ret->size + 1); for (i = 0; i < text->size;) { if ((c = text->data[i++]) & BIT8) { /* half-width katakana */ if ((c >= MIN_KANA_8) && (c <= MAX_KANA_8)) c += KANA_8; else { /* Shift-JIS */ c1 = text->data[i++]; SJISTOJIS (c,c1); c = JISTOUNICODE (c,c1,ku,ten); } } UTF8_PUT (s,c) /* convert Unicode to UTF-8 */ }}#endif/* Convert ISO-2022 sized text to UTF-8 * Accepts: source sized text * pointer to returned sized text * conversion table */void utf8_text_2022 (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab){ unsigned long i; unsigned char *s; unsigned int pass,state,c,co,gi,gl,gr,g[4],ku,ten; for (pass = 0,s = NIL,ret->size = 0; pass <= 1; pass++) { gi = 0; /* quell compiler warnings */ state = I2S_CHAR; /* initialize engine */ g[0]= g[2] = I2CS_ASCII; /* G0 and G2 are ASCII */ g[1]= g[3] = I2CS_ISO8859_1;/* G1 and G3 are ISO-8850-1 */ gl = I2C_G0; gr = I2C_G1; /* left is G0, right is G1 */ for (i = 0; i < text->size;) { c = text->data[i++]; switch (state) { /* dispatch based upon engine state */ case I2S_ESC: /* ESC seen */ switch (c) { /* process intermediate character */ case I2C_MULTI: /* multibyte character? */ state = I2S_MUL; /* mark multibyte flag seen */ break; case I2C_SS2: /* single shift GL to G2 */ case I2C_SS2_ALT: /* Taiwan SeedNet */ gl |= I2C_SG2; break; case I2C_SS3: /* single shift GL to G3 */ case I2C_SS3_ALT: /* Taiwan SeedNet */ gl |= I2C_SG3; break; case I2C_LS2: /* shift GL to G2 */ gl = I2C_G2; break; case I2C_LS3: /* shift GL to G3 */ gl = I2C_G3; break; case I2C_LS1R: /* shift GR to G1 */ gr = I2C_G1; break; case I2C_LS2R: /* shift GR to G2 */ gr = I2C_G2; break; case I2C_LS3R: /* shift GR to G3 */ gr = I2C_G3; break; case I2C_G0_94: case I2C_G1_94: case I2C_G2_94: case I2C_G3_94: g[gi = c - I2C_G0_94] = (state == I2S_MUL) ? I2CS_94x94 : I2CS_94; state = I2S_INT; /* ready for character set */ break; case I2C_G0_96: case I2C_G1_96: case I2C_G2_96: case I2C_G3_96: g[gi = c - I2C_G0_96] = (state == I2S_MUL) ? I2CS_96x96 : I2CS_96; state = I2S_INT; /* ready for character set */ break; default: /* bogon */ if (pass) *s++ = I2C_ESC,*s++ = c; else ret->size += 2; state = I2S_CHAR; /* return to previous state */ } break; case I2S_MUL: /* ESC $ */ switch (c) { /* process multibyte intermediate character */ case I2C_G0_94: case I2C_G1_94: case I2C_G2_94: case I2C_G3_94: g[gi = c - I2C_G0_94] = I2CS_94x94; state = I2S_INT; /* ready for character set */ break; case I2C_G0_96: case I2C_G1_96: case I2C_G2_96: case I2C_G3_96: g[gi = c - I2C_G0_96] = I2CS_96x96; state = I2S_INT; /* ready for character set */ break; default: /* probably omitted I2CS_94x94 */ g[gi = I2C_G0] = I2CS_94x94 | c; state = I2S_CHAR; /* return to character state */ } break; case I2S_INT: state = I2S_CHAR; /* return to character state */ g[gi] |= c; /* set character set */ break; case I2S_CHAR: /* character data */ switch (c) { case I2C_ESC: /* ESC character */ state = I2S_ESC; /* see if ISO-2022 prefix */ break; case I2C_SI: /* shift GL to G0 */ gl = I2C_G0; break; case I2C_SO: /* shift GL to G1 */ gl = I2C_G1; break; case I2C_SS2_ALT: /* single shift GL to G2 */ case I2C_SS2_ALT_7: gl |= I2C_SG2; break; case I2C_SS3_ALT: /* single shift GL to G3 */ case I2C_SS3_ALT_7: gl |= I2C_SG3; break; default: /* ordinary character */ co = c; /* note original character */ if (gl & (3 << 2)) { /* single shifted? */ gi = g[gl >> 2]; /* get shifted character set */ gl &= 0x3; /* cancel shift */ } /* select left or right half */ else gi = (c & BIT8) ? g[gr] : g[gl]; c &= BITS7; /* make 7-bit */ switch (gi) { /* interpret in character set */ case I2CS_ASCII: /* ASCII */ break; /* easy! */ case I2CS_BRITISH: /* British ASCII */ /* Pound sterling sign */ if (c == 0x23) c = UCS2_POUNDSTERLING; break; case I2CS_JIS_ROMAN: /* JIS Roman */ case I2CS_JIS_BUGROM: /* old bugs */ switch (c) { /* two exceptions to ASCII */ case 0x5c: /* Yen sign */ c = UCS2_YEN; break; case 0x7e: /* overline */ c = UCS2_OVERLINE; break; } break; case I2CS_JIS_KANA: /* JIS katakana */ if ((c >= MIN_KANA_7) && (c <= MAX_KANA_7)) c += KANA_7; break; case I2CS_ISO8859_1: /* Latin-1 (West European) */ c |= BIT8; /* just turn on high bit */ break; case I2CS_ISO8859_2: /* Latin-2 (Czech, Slovak) */ c = iso8859_2tab[c]; break; case I2CS_ISO8859_3: /* Latin-3 (Dutch, Turkish) */ c = iso8859_3tab[c]; break; case I2CS_ISO8859_4: /* Latin-4 (Scandinavian) */ c = iso8859_4tab[c]; break; case I2CS_ISO8859_5: /* Cyrillic */ c = iso8859_5tab[c]; break; case I2CS_ISO8859_6: /* Arabic */ c = iso8859_6tab[c]; break; case I2CS_ISO8859_7: /* Greek */ c = iso8859_7tab[c]; break; case I2CS_ISO8859_8: /* Hebrew */ c = iso8859_8tab[c]; break; case I2CS_ISO8859_9: /* Latin-5 (Finnish, Portuguese) */ c = iso8859_9tab[c]; break; case I2CS_TIS620: /* Thai */ c = tis620tab[c]; break; case I2CS_ISO8859_10: /* Latin-6 (Northern Europe) */ c = iso8859_10tab[c]; break; case I2CS_ISO8859_13: /* Latin-7 (Baltic) */ c = iso8859_13tab[c]; break; case I2CS_VSCII: /* Vietnamese */ c = visciitab[c]; break; case I2CS_ISO8859_14: /* Latin-8 (Celtic) */ c = iso8859_14tab[c]; break; case I2CS_ISO8859_15: /* Euro */ c = iso8859_15tab[c]; break; default: /* all other character sets */ /* multibyte character set */ if ((gi & I2CS_MUL) && !(c & BIT8) && isgraph (c)) { c = (i < text->size) ? text->data[i++] : 0; switch (gi) {#ifdef GBTOUNICODE case I2CS_GB: /* GB 2312 */ c = GBTOUNICODE (co,c,ku,ten); break;#endif#ifdef JISTOUNICODE case I2CS_JIS_OLD:/* JIS X 0208-1978 */ case I2CS_JIS_NEW:/* JIS X 0208-1983 */ c = JISTOUNICODE (co,c,ku,ten); break;#endif#ifdef JIS0212TOUNICODE case I2CS_JIS_EXT:/* JIS X 0212-1990 */ c = JIS0212TOUNICODE (co,c,ku,ten); break;#endif#ifdef KSCTOUNICODE case I2CS_KSC: /* KSC 5601 */ co |= BIT8; /* make into EUC */ c |= BIT8; c = KSCTOUNICODE (co,c,ku,ten);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -