📄 charsets.c
字号:
if (/*from == to ||*/ from == -1 || to == -1) return NULL; if (codepages[to].table == table_utf_8) return get_translation_table_to_utf_8(from); if (table_init) memset(table, 0, sizeof(struct conv_table) * 256), table_init = 0; if (from == lfr && to == lto) return table; lfr = from; lto = to; new_translation_table(table); if (codepages[from].table == table_utf_8) { int j; for (j = 0; codepages[to].table[j].c; j++) add_utf_8(table, codepages[to].table[j].u, codepages[to].table[j].u == 0xa0 ? "\001" : codepages[to].table[j].u == 0xad ? "" : strings[codepages[to].table[j].c]); for (i = 0; unicode_7b[i].x != -1; i++) if (unicode_7b[i].x >= 0x80) add_utf_8(table, unicode_7b[i].x, unicode_7b[i].s); } else for (i = 128; i < 256; i++) { int j; char *u; for (j = 0; codepages[from].table[j].c; j++) { if (codepages[from].table[j].c == i) goto f; } continue; f: u = u2cp(codepages[from].table[j].u, to, 1); if (u) table[i].u.str = u; } return table;}static inline int xxstrcmp(unsigned char *s1, unsigned char *s2, int l2){ while (l2) { if (*s1 > *s2) return 1; if (!*s1 || *s1 < *s2) return -1; s1++, s2++, l2--; } return !!*s1;}int get_entity_number(unsigned char *st, int l){ int n = 0; if (upcase(st[0]) == 'X') { st++, l--; if (!l || l > 4) return -1; do { char c = upcase(*(st++)); if (c >= '0' && c <= '9') n = n * 16 + c - '0'; else if (c >= 'A' && c <= 'F') n = n * 16 + c - 'A' + 10; else return -1; if (n >= 0x10000) return -1; } while (--l); } else { if (!l || l > 5) return -1; do { char c = *(st++); if (c >= '0' && c <= '9') n = n * 10 + c - '0'; else return -1; if (n >= 0x10000) return -1; } while (--l); } return n;}unsigned char *get_entity_string(unsigned char *st, int l, int encoding){ int n; if (l <= 0) return NULL; if (st[0] == '#') { if (l == 1) return NULL; if ((n = get_entity_number(st + 1, l - 1)) == -1) return NULL; if (n < 32 && get_attr_val_nl != 2) n = 32; } else { int s = 0, e = N_ENTITIES - 1; while (s <= e) { int c; int m = (s + e) / 2; c = xxstrcmp(entities[m].s, st, l); if (!c) { n = entities[m].c; goto f; } if (c > 0) e = m - 1; else s = m + 1; } return NULL; f:; } return u2cp(n, encoding, 1);}unsigned char *convert_string(struct conv_table *ct, unsigned char *c, int l, struct document_options *dopt){ unsigned char *buffer; int bp = 0; int pp = 0; if (!ct) { int i; for (i = 0; i < l; i++) if (c[i] == '&') goto xx; return memacpy(c, l); xx:; } buffer = mem_alloc(ALLOC_GR); while (pp < l) { unsigned char *e; if (c[pp] < 128 && c[pp] != '&') { putc: buffer[bp++] = c[pp++]; if (!(bp & (ALLOC_GR - 1))) { if ((unsigned)bp > MAXINT - ALLOC_GR) overalloc(); buffer = mem_realloc(buffer, bp + ALLOC_GR); } continue; } if (c[pp] != '&') { struct conv_table *t; int i; if (!ct) goto putc; t = ct; i = pp; decode: if (!t[c[i]].t) { e = t[c[i]].u.str; } else { t = t[c[i++]].u.tbl; if (i >= l) goto putc; goto decode; } pp = i + 1; } else { int i = pp + 1; if (!dopt || dopt->plain) goto putc; while (i < l && c[i] != ';' && c[i] != '&' && c[i] > ' ') i++; if (!(e = get_entity_string(&c[pp + 1], i - pp - 1, dopt->cp))) goto putc; pp = i + (i < l && c[i] == ';'); } if (!e[0]) continue; if (!e[1]) { buffer[bp++] = e[0]; if (!(bp & (ALLOC_GR - 1))) { if ((unsigned)bp > MAXINT - ALLOC_GR) overalloc(); buffer = mem_realloc(buffer, bp + ALLOC_GR); } continue; } while (*e) { buffer[bp++] = *(e++); if (!(bp & (ALLOC_GR - 1))) { if ((unsigned)bp > MAXINT - ALLOC_GR) overalloc(); buffer = mem_realloc(buffer, bp + ALLOC_GR); } } } buffer[bp] = 0; return buffer;}/* 1 match 0 mismatch */static inline int compare_names(unsigned char *one, unsigned char *two){ while(1){ if (!*one) return !*two; if (!*two||(upcase(*one)!=upcase(*two))) return 0; one++; two++; }}/*int get_cp_index(unsigned char *n){ int i, a; for (i = 0; codepages[i].name; i++) { if (compare_names(n,codepages[i].name)) return i; for (a = 0; codepages[i].aliases[a]; a++) if (compare_names(n,codepages[i].aliases[a])) return i; } return -1;}*/int get_cp_index(unsigned char *n){ int i, a, p, q; int ii = -1, ll = 0; for (i = 0; codepages[i].name; i++) { for (a = 0; codepages[i].aliases[a]; a++) { for (p = 0; n[p]; p++) { if (upcase(n[p]) == upcase(codepages[i].aliases[a][0])) { for (q = 1; codepages[i].aliases[a][q]; q++) { if (upcase(n[p+q]) != upcase(codepages[i].aliases[a][q])) goto fail; } if (strlen(codepages[i].aliases[a]) > (size_t)ll) { ll = strlen(codepages[i].aliases[a]); ii = i; } } fail:; } } } return ii;}unsigned char *get_cp_name(int index){ if (index < 0) return "none"; return codepages[index].name;}unsigned char *get_cp_mime_name(int index){ if (index < 0) return "none"; if (!codepages[index].aliases) return NULL; return codepages[index].aliases[0];}int is_cp_special(int index){ return codepages[index].table == table_utf_8;}#define UP_EQUAL(a, b) unicode_upcase[a].lo == (b)#define UP_ABOVE(a, b) unicode_upcase[a].lo > (b)unsigned char charset_upcase(unsigned char ch, int cp){ unsigned u; int res; unsigned char *str; if (ch < 0x80) return upcase(ch); u = cp2u(ch, cp); BIN_SEARCH(sizeof(unicode_upcase) / sizeof(*unicode_upcase), UP_EQUAL, UP_ABOVE, u, res); if (res == -1) return ch; str = u2cp(unicode_upcase[res].up, cp, 0); if (!str || !str[0] || str[1]) return ch; return str[0];}void charset_upcase_string(unsigned char **chp, int cp){ unsigned char *ch = *chp; int i; if (codepages[cp].table == table_utf_8) { ch = unicode_upcase_string(ch); mem_free(*chp); *chp = ch; } else { for (i = 0; ch[i]; i++) ch[i] = charset_upcase(ch[i], cp); }}unsigned char *unicode_upcase_string(unsigned char *ch){ unsigned char *r = init_str(); int rl = 0; while (1) { unsigned c; int res; GET_UTF_8(ch, c); if (!c) break; BIN_SEARCH(sizeof(unicode_upcase) / sizeof(*unicode_upcase), UP_EQUAL, UP_ABOVE, c, res); if (res != -1) c = unicode_upcase[res].up; add_to_str(&r, &rl, encode_utf_8(c)); } return r;}unsigned char *to_utf8_upcase(unsigned char *str, int cp){ unsigned char *str1, *str2; struct conv_table *ct = get_translation_table(cp, get_cp_index("utf-8")); str1 = convert_string(ct, str, strlen(str), NULL); str2 = unicode_upcase_string(str1); mem_free(str1); return str2;}int compare_case_utf8(unsigned char *u1, unsigned char *u2){ unsigned char *uu1 = u1; unsigned c1, c2; int cc1; while (1) { GET_UTF_8(u2, c2); if (!c2) return u1 - uu1; skip_discr: GET_UTF_8(u1, c1); BIN_SEARCH(sizeof(unicode_upcase) / sizeof(*unicode_upcase), UP_EQUAL, UP_ABOVE, c1, cc1); if (cc1 != -1) c1 = unicode_upcase[cc1].up; if (c1 == 0xad) goto skip_discr; if (c1 != c2) return 0; if (c1 == ' ') { unsigned char *x1; do { x1 = u1; GET_UTF_8(u1, c1); BIN_SEARCH(sizeof(unicode_upcase) / sizeof(*unicode_upcase), UP_EQUAL, UP_ABOVE, c1, cc1); if (cc1 != -1) c1 = unicode_upcase[cc1].up; } while (c1 == ' '); u1 = x1; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -