📄 utf8.c
字号:
break;#endif#ifdef CNS1TOUNICODE case I2CS_CNS1: /* CNS 11643 plane 1 */ c = CNS1TOUNICODE (co,c,ku,ten); break;#endif#ifdef CNS2TOUNICODE case I2CS_CNS2: /* CNS 11643 plane 2 */ c = CNS2TOUNICODE (co,c,ku,ten); break;#endif#ifdef CNS3TOUNICODE case I2CS_CNS3: /* CNS 11643 plane 3 */ c = CNS3TOUNICODE (co,c,ku,ten); break;#endif#ifdef CNS4TOUNICODE case I2CS_CNS4: /* CNS 11643 plane 4 */ c = CNS4TOUNICODE (co,c,ku,ten); break;#endif#ifdef CNS5TOUNICODE case I2CS_CNS5: /* CNS 11643 plane 5 */ c = CNS5TOUNICODE (co,c,ku,ten); break;#endif#ifdef CNS6TOUNICODE case I2CS_CNS6: /* CNS 11643 plane 6 */ c = CNS6TOUNICODE (co,c,ku,ten); break;#endif#ifdef CNS7TOUNICODE case I2CS_CNS7: /* CNS 11643 plane 7 */ c = CNS7TOUNICODE (co,c,ku,ten); break;#endif default: /* unknown multibyte, treat as UCS-2 */ c |= (co << 8); /* wrong, but nothing else to do */ break; } } else c = co; /* unknown single byte, treat as 8859-1 */ } if (pass) UTF8_PUT (s,c) else ret->size += UTF8_SIZE (c); } } } if (!pass) s = ret->data = (unsigned char *) fs_get (ret->size + 1); else if (((unsigned long) (s - ret->data)) != ret->size) fatal ("ISO-2022 to UTF-8 botch"); }}/* Convert UTF-7 sized text to UTF-8 * Accepts: source sized text * pointer to returned sized text * conversion table */void utf8_text_utf7 (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab){ unsigned long i; unsigned char *s; unsigned int c,c1,d,uc,pass,e,e1,state; for (pass = 0,s = NIL,ret->size = 0; pass <= 1; pass++) { c1 = d = uc = e = e1 = 0; for (i = 0,state = NIL; i < text->size;) { c = text->data[i++]; /* get next byte */ switch (state) { case U7_PLUS: /* previous character was + */ if (c == '-') { /* +- means textual + */ c = '+'; state = U7_ASCII; /* revert to ASCII */ break; } state = U7_UNICODE; /* enter Unicode state */ e = e1 = 0; /* initialize Unicode quantum position */ case U7_UNICODE: /* Unicode state */ if (c == '-') state = U7_MINUS; else { /* decode Unicode */ if (isupper (c)) c -= 'A'; else if (islower (c)) c -= 'a' - 26; else if (isdigit (c)) c -= '0' - 52; else if (c == '+') c = 62; else if (c == '/') c = 63; else state = U7_ASCII;/* end of modified BASE64 */ } break; case U7_MINUS: /* previous character was absorbed - */ state = U7_ASCII; /* revert to ASCII */ case U7_ASCII: /* ASCII state */ if (c == '+') state = U7_PLUS; break; } switch (state) { /* store character if in character mode */ case U7_UNICODE: /* Unicode */ switch (e++) { /* install based on BASE64 state */ case 0: c1 = c << 2; /* byte 1: high 6 bits */ break; case 1: d = c1 | (c >> 4); /* byte 1: low 2 bits */ c1 = c << 4; /* byte 2: high 4 bits */ break; case 2: d = c1 | (c >> 2); /* byte 2: low 4 bits */ c1 = c << 6; /* byte 3: high 2 bits */ break; case 3: d = c | c1; /* byte 3: low 6 bits */ e = 0; /* reinitialize mechanism */ break; } if (e == 1) break; /* done if first BASE64 state */ if (!e1) { /* first byte of UCS-2 character */ uc = (d & 0xff) << 8; /* note first byte */ e1 = T; /* enter second UCS-2 state */ break; /* done */ } c = uc | (d & 0xff); /* build UCS-2 character */ e1 = NIL; /* back to first UCS-2 state, drop in */ case U7_ASCII: /* just install if ASCII */ if (pass) UTF8_PUT (s,c) else ret->size += UTF8_SIZE (c); } } if (!pass) s = ret->data = (unsigned char *) fs_get (ret->size + 1); else if (((unsigned long) (s - ret->data)) != ret->size) fatal ("UTF-7 to UTF-8 botch"); }}/* Convert charset labelled searchpgm to UTF-8 in place * Accepts: search program * charset */void utf8_searchpgm (SEARCHPGM *pgm,char *charset){ SIZEDTEXT txt; SEARCHHEADER *hl; SEARCHOR *ol; SEARCHPGMLIST *pl; if (pgm) { /* must have a search program */ utf8_stringlist (pgm->bcc,charset); utf8_stringlist (pgm->cc,charset); utf8_stringlist (pgm->from,charset); utf8_stringlist (pgm->to,charset); utf8_stringlist (pgm->subject,charset); for (hl = pgm->header; hl; hl = hl->next) { if (utf8_text (&hl->line,charset,&txt,NIL)) { fs_give ((void **) &hl->line.data); hl->line.data = txt.data; hl->line.size = txt.size; } if (utf8_text (&hl->text,charset,&txt,NIL)) { fs_give ((void **) &hl->text.data); hl->text.data = txt.data; hl->text.size = txt.size; } } utf8_stringlist (pgm->body,charset); utf8_stringlist (pgm->text,charset); for (ol = pgm->search_or; ol; ol = ol->next) { utf8_searchpgm (ol->first,charset); utf8_searchpgm (ol->second,charset); } for (pl = pgm->search_not; pl; pl = pl->next) utf8_searchpgm (pl->pgm,charset); }}/* Convert charset labelled stringlist to UTF-8 in place * Accepts: string list * charset */void utf8_stringlist (STRINGLIST *st,char *charset){ SIZEDTEXT txt; /* convert entire stringstruct */ if (st) do if (utf8_text (&st->text,charset,&txt,NIL)) { fs_give ((void **) &st->text.data); st->text.data = txt.data; /* transfer this text */ st->text.size = txt.size; } while (st = st->next);}/* Convert MIME-2 sized text to UTF-8 * Accepts: source sized text * charset * Returns: T if successful, NIL if failure */#define MINENCWORD 9long utf8_mime2text (SIZEDTEXT *src,SIZEDTEXT *dst){ unsigned char *s,*se,*e,*ee,*t,*te; char *cs,*ce,*ls; SIZEDTEXT txt,rtxt; unsigned long i; dst->data = NIL; /* default is no encoded words */ /* look for encoded words */ for (s = src->data, se = src->data + src->size; s < se; s++) { if (((se - s) > MINENCWORD) && (*s == '=') && (s[1] == '?') && (cs = (char *) mime2_token (s+2,se,(unsigned char **) &ce)) && (e = mime2_token ((unsigned char *) ce+1,se,&ee)) && (t = mime2_text (e+2,se,&te)) && (ee == e + 1)) { if (mime2_decode (e,t,te,&txt)) { *ce = '\0'; /* temporarily tie off charset */ if (ls = strchr (cs,'*')) *ls = '\0'; if (utf8_text (&txt,cs,&rtxt,NIL)) { if (!dst->data) { /* need to create buffer now? */ /* allocate for worst case */ dst->data = (unsigned char *) fs_get ((size_t) ((src->size / 8) + 1) * 9); memcpy (dst->data,src->data,(size_t) (dst->size = s - src->data)); } for (i=0; i < rtxt.size; i++) dst->data[dst->size++] = rtxt.data[i]; /* all done with converted text */ if (rtxt.data != txt.data) fs_give ((void **) &rtxt.data); } if (ls) *ls = '*'; /* restore language tag delimiter */ *ce = '?'; /* restore charset delimiter */ /* all done with decoded text */ fs_give ((void **) &txt.data); s = te+1; /* continue scan after encoded word */ /* skip leading whitespace */ for (t = s + 1; (t < se) && ((*t == ' ') || (*t == '\t')); t++); /* see if likely continuation encoded word */ if (t < (se - MINENCWORD)) switch (*t) { case '=': /* possible encoded word? */ if (t[1] == '?') s = t - 1; break; case '\015': /* CR, eat a following LF */ if (t[1] == '\012') t++; case '\012': /* possible end of logical line */ if ((t[1] == ' ') || (t[1] == '\t')) { do t++; while ((t < (se - MINENCWORD)) && ((t[1] == ' ')||(t[1] == '\t'))); if ((t < (se - MINENCWORD)) && (t[1] == '=') && (t[2] == '?')) s = t; /* definitely looks like continuation */ } } } else { /* restore original text */ if (dst->data) fs_give ((void **) &dst->data); dst->data = src->data; dst->size = src->size; return NIL; /* syntax error: MIME-2 decoding failure */ } } /* stash ordinary character */ else if (dst->data) dst->data[dst->size++] = *s; } if (dst->data) dst->data[dst->size] = '\0'; else { /* nothing converted, return identity */ dst->data = src->data; dst->size = src->size; } return T; /* success */}/* Decode MIME-2 text * Accepts: Encoding * text * text end * destination sized text * Returns: T if successful, else NIL */long mime2_decode (unsigned char *e,unsigned char *t,unsigned char *te, SIZEDTEXT *txt){ unsigned char *q; txt->data = NIL; /* initially no returned data */ switch (*e) { /* dispatch based upon encoding */ case 'Q': case 'q': /* sort-of QUOTED-PRINTABLE */ txt->data = (unsigned char *) fs_get ((size_t) (te - t) + 1); for (q = t,txt->size = 0; q < te; q++) switch (*q) { case '=': /* quoted character */ /* both must be hex */ if (!isxdigit (q[1]) || !isxdigit (q[2])) { fs_give ((void **) &txt->data); return NIL; /* syntax error: bad quoted character */ } txt->data[txt->size++] = /* assemble character */ ((q[1] - (isdigit (q[1]) ? '0' : ((isupper (q[1]) ? 'A' : 'a') - 10))) << 4) + (q[2] - (isdigit (q[2]) ? '0' : ((isupper (q[2]) ? 'A' : 'a') - 10))); q += 2; /* advance past quoted character */ break; case '_': /* convert to space */ txt->data[txt->size++] = ' '; break; default: /* ordinary character */ txt->data[txt->size++] = *q; break; } txt->data[txt->size] = '\0'; break; case 'B': case 'b': /* BASE64 */ if (txt->data = (unsigned char *) rfc822_base64 (t,te - t,&txt->size)) break; default: /* any other encoding is unknown */ return NIL; /* syntax error: unknown encoding */ } return T;}/* Get MIME-2 token from encoded word * Accepts: current text pointer * text limit pointer * pointer to returned end pointer * Returns: current text pointer & end pointer if success, else NIL */unsigned char *mime2_token (unsigned char *s,unsigned char *se, unsigned char **t){ for (*t = s; **t != '?'; ++*t) { if ((*t < se) && isgraph (**t)) switch (**t) { case '(': case ')': case '<': case '>': case '@': case ',': case ';': case ':': case '\\': case '"': case '/': case '[': case ']': case '.': case '=': return NIL; /* none of these are valid in tokens */ } else return NIL; /* out of text or CTL or space */ } return s;}/* Get MIME-2 text from encoded word * Accepts: current text pointer * text limit pointer * pointer to returned end pointer * Returns: current text pointer & end pointer if success, else NIL */unsigned char *mime2_text (unsigned char *s,unsigned char *se, unsigned char **t){ /* make sure valid, search for closing ? */ for (*t = s; **t != '?'; ++*t) if ((*t >= se) || !isgraph (**t)) return NIL; /* make sure terminated properly */ if ((*t)[1] != '=') return NIL; return s;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -