📄 gen_locale.c
字号:
#define _GNU_SOURCE#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include <limits.h>#include <assert.h>#include <locale.h>#include <langinfo.h>#include <nl_types.h>#include <stdint.h>#include "c8tables.h"#define __LOCALE_DATA_CATEGORIES 6/* must agree with ordering of gen_mmap! */static const unsigned char *lc_names[] = { "LC_CTYPE", "LC_NUMERIC", "LC_MONETARY", "LC_TIME", "LC_COLLATE", "LC_MESSAGES",#if __LOCALE_DATA_CATEGORIES == 12 "LC_PAPER", "LC_NAME", "LC_ADDRESS", "LC_TELEPHONE", "LC_MEASUREMENT", "LC_IDENTIFICATION",#elif __LOCALE_DATA_CATEGORIES != 6#error unsupported __LOCALE_DATA_CATEGORIES value!#endif};typedef struct { char *glibc_name; char name[5]; char dot_cs; /* 0 if no codeset specified */ char cs; unsigned char idx_name; unsigned char lc_time_row; unsigned char lc_numeric_row; unsigned char lc_monetary_row; unsigned char lc_messages_row; unsigned char lc_ctype_row;#if __LOCALE_DATA_CATEGORIES != 6#error unsupported __LOCALE_DATA_CATEGORIES value#endif} locale_entry;static void read_at_mappings(void);static void read_enable_disable(void);static void read_locale_list(void);static int find_codeset_num(const char *cs);static int find_at_string_num(const char *as);static int le_cmp(const void *, const void *);static void dump_table8(const char *name, const char *tbl, int len);static void dump_table8c(const char *name, const char *tbl, int len);static void dump_table16(const char *name, const int *tbl, int len);static void do_lc_time(void);static void do_lc_numeric(void);static void do_lc_monetary(void);static void do_lc_messages(void);static void do_lc_ctype(void);static FILE *fp;static FILE *ofp;static char line_buf[80];static char at_mappings[256];static char at_mapto[256];static char at_strings[1024];static char *at_strings_end;static locale_entry locales[700];static char glibc_locale_names[60000];static int num_locales;static int default_utf8;static int default_8bit;static int total_size;static int null_count;static void do_locale_names(void){ /* "C" locale name is handled specially by the setlocale code. */ int uniq = 0; int i; if (num_locales <= 1) {/* printf("error - only C locale?\n"); *//* exit(EXIT_FAILURE); */ fprintf(ofp, "static const unsigned char __locales[%d];\n", (3 + __LOCALE_DATA_CATEGORIES)); fprintf(ofp, "static const unsigned char __locale_names5[5];\n"); } else { if (default_utf8) { fprintf(ofp, "#define __CTYPE_HAS_UTF_8_LOCALES\t\t\t1\n"); } fprintf(ofp, "#define __LOCALE_DATA_CATEGORIES\t\t\t%d\n", __LOCALE_DATA_CATEGORIES); fprintf(ofp, "#define __LOCALE_DATA_WIDTH_LOCALES\t\t\t%d\n", 3+__LOCALE_DATA_CATEGORIES); fprintf(ofp, "#define __LOCALE_DATA_NUM_LOCALES\t\t\t%d\n", num_locales); fprintf(ofp, "static const unsigned char __locales[%d] = {\n", (num_locales) * (3 + __LOCALE_DATA_CATEGORIES)); for (i=0 ; i < num_locales ; i++) { if (memcmp(locales[i].name, locales[i-1].name, 5) != 0) { locales[i].idx_name = uniq; ++uniq; } else { locales[i].idx_name = uniq - 1; } fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].idx_name)); fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].dot_cs)); fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].cs)); /* lc_ctype would store translit flags and turkish up/low flag. */ fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_ctype_row)); fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_numeric_row)); fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_monetary_row)); fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_time_row));#if 1 /* lc_collate */ if (strlen(locales[i].glibc_name) >= 5) { fprintf(ofp, "COL_IDX_%.2s_%.2s, ", locales[i].glibc_name, locales[i].glibc_name+3); } else if (!strcmp(locales[i].glibc_name, "C")) { fprintf(ofp, "COL_IDX_C , "); } else { printf("don't know how to handle COL_IDX_ for %s\n", locales[i].glibc_name); exit(EXIT_FAILURE); }#else fprintf(ofp, "%#4x, ", 0); /* place holder for lc_collate */#endif fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_messages_row)); fprintf(ofp, "\t/* %s */\n", locales[i].glibc_name); } fprintf(ofp, "};\n\n"); fprintf(ofp, "#define __LOCALE_DATA_NUM_LOCALE_NAMES\t\t%d\n", uniq ); fprintf(ofp, "static const unsigned char __locale_names5[%d] = \n\t", uniq * 5); uniq = 0; for (i=1 ; i < num_locales ; i++) { if (memcmp(locales[i].name, locales[i-1].name, 5) != 0) { fprintf(ofp, "\"%5.5s\" ", locales[i].name); ++uniq; if ((uniq % 8) == 0) { fprintf(ofp, "\n\t"); } } } fprintf(ofp,";\n\n"); if (at_strings_end > at_strings) { int i, j; char *p; i = 0; p = at_strings; while (*p) { ++i; p += 1 + (unsigned char) *p; } /* len, char, string\0 */ fprintf(ofp, "#define __LOCALE_DATA_AT_MODIFIERS_LENGTH\t\t%d\n", i + (at_strings_end - at_strings)); fprintf(ofp, "static const unsigned char __locale_at_modifiers[%d] = {", i + (at_strings_end - at_strings)); i = 0; p = at_strings; while (*p) { fprintf(ofp, "\n\t%4d, '%c',", (unsigned char) *p, /* len of string\0 */ at_mapto[i]); for (j=1 ; j < ((unsigned char) *p) ; j++) { fprintf(ofp, " '%c',", p[j]); } fprintf(ofp, " 0,"); ++i; p += 1 + (unsigned char) *p; } fprintf(ofp, "\n};\n\n"); } { int pos[__LOCALE_DATA_CATEGORIES]; pos[0] = __LOCALE_DATA_CATEGORIES; for (i=0 ; i < __LOCALE_DATA_CATEGORIES ; i++) { fprintf(ofp, "#define __%s\t\t%d\n", lc_names[i], i); if (i + 1 < __LOCALE_DATA_CATEGORIES) { pos[i+1] = 1 + strlen(lc_names[i]) + pos[i]; } } if (pos[__LOCALE_DATA_CATEGORIES-1] > 255) { printf("error - lc_names is too big (%d)\n", pos[__LOCALE_DATA_CATEGORIES-1]); exit(EXIT_FAILURE); } fprintf(ofp, "#define __LC_ALL\t\t%d\n\n", i); fprintf(ofp, "#define __lc_names_LEN\t\t%d\n", pos[__LOCALE_DATA_CATEGORIES-1] + strlen(lc_names[__LOCALE_DATA_CATEGORIES-1]) + 1); total_size += pos[__LOCALE_DATA_CATEGORIES-1] + strlen(lc_names[__LOCALE_DATA_CATEGORIES-1]) + 1; fprintf(ofp, "static unsigned const char lc_names[%d] =\n", pos[__LOCALE_DATA_CATEGORIES-1] + strlen(lc_names[__LOCALE_DATA_CATEGORIES-1]) + 1); fprintf(ofp, "\t\""); for (i=0 ; i < __LOCALE_DATA_CATEGORIES ; i++) { fprintf(ofp, "\\x%02x", (unsigned char) pos[i]); } fprintf(ofp, "\""); for (i=0 ; i < __LOCALE_DATA_CATEGORIES ; i++) { fprintf(ofp, "\n\t\"%s\\0\"", lc_names[i]); } fprintf(ofp, ";\n\n"); } printf("locale data = %d name data = %d for %d uniq\n", num_locales * (3 + __LOCALE_DATA_CATEGORIES), uniq * 5, uniq); total_size += num_locales * (3 + __LOCALE_DATA_CATEGORIES) + uniq * 5; }}static void read_at_mappings(void){ char *p; char *m; int mc = 0; do { if (!(p = strtok(line_buf, " \t\n")) || (*p == '#')) { if (!fgets(line_buf, sizeof(line_buf), fp)) { if (ferror(fp)) { printf("error reading file\n"); exit(EXIT_FAILURE); } return; /* EOF */ } if ((*line_buf == '#') && (line_buf[1] == '-')) { break; } continue; } if (*p == '@') { if (p[1] == 0) { printf("error: missing @modifier name\n"); exit(EXIT_FAILURE); } m = p; /* save the modifier name */ if (!(p = strtok(NULL, " \t\n")) || p[1] || (((unsigned char) *p) > 0x7f)) { printf("error: missing or illegal @modifier mapping char\n"); exit(EXIT_FAILURE); } if (at_mappings[(int)((unsigned char) *p)]) { printf("error: reused @modifier mapping char\n"); exit(EXIT_FAILURE); } at_mappings[(int)((unsigned char) *p)] = 1; at_mapto[mc] = *p; ++mc; *at_strings_end = (char)( (unsigned char) (strlen(m)) ); strcpy(++at_strings_end, m+1); at_strings_end += (unsigned char) at_strings_end[-1]; printf("@mapping: \"%s\" to '%c'\n", m, *p); if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) { printf("ignoring trailing text: %s...\n", p); } *line_buf = 0; continue; } break; } while (1);#if 0 { p = at_strings; if (!*p) { printf("no @ strings\n"); return; } do { printf("%s\n", p+1); p += 1 + (unsigned char) *p; } while (*p); }#endif}static void read_enable_disable(void){ char *p; do { if (!(p = strtok(line_buf, " =\t\n")) || (*p == '#')) { if (!fgets(line_buf, sizeof(line_buf), fp)) { if (ferror(fp)) { printf("error reading file\n"); exit(EXIT_FAILURE); } return; /* EOF */ } if ((*line_buf == '#') && (line_buf[1] == '-')) { break; } continue; } if (!strcmp(p, "UTF-8")) { if (!(p = strtok(NULL, " =\t\n")) || ((toupper(*p) != 'Y') && (toupper(*p) != 'N'))) { printf("error: missing or illegal UTF-8 setting\n"); exit(EXIT_FAILURE); } default_utf8 = (toupper(*p) == 'Y'); printf("UTF-8 locales are %sabled\n", "dis\0en"+ (default_utf8 << 2)); } else if (!strcmp(p, "8-BIT")) { if (!(p = strtok(NULL, " =\t\n")) || ((toupper(*p) != 'Y') && (toupper(*p) != 'N'))) { printf("error: missing or illegal 8-BIT setting\n"); exit(EXIT_FAILURE); } default_8bit = (toupper(*p) == 'Y'); printf("8-BIT locales are %sabled\n", "dis\0en" + (default_8bit << 2)); } else { break; } if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) { printf("ignoring trailing text: %s...\n", p); } *line_buf = 0; continue; } while (1);}#ifdef __LOCALE_DATA_CODESET_LISTstatic int find_codeset_num(const char *cs){ int r = 2; char *s = __LOCALE_DATA_CODESET_LIST; /* 7-bit is 1, UTF-8 is 2, 8-bits are > 2 */ if (strcmp(cs, "UTF-8") != 0) { ++r; while (*s && strcmp(__LOCALE_DATA_CODESET_LIST+ ((unsigned char) *s), cs)) {/* printf("tried %s\n", __LOCALE_DATA_CODESET_LIST + ((unsigned char) *s)); */ ++r; ++s; } if (!*s) { printf("error: unsupported codeset %s\n", cs); exit(EXIT_FAILURE); } } return r;}#elsestatic int find_codeset_num(const char *cs){ int r = 2; /* 7-bit is 1, UTF-8 is 2, 8-bits are > 2 */ if (strcmp(cs, "UTF-8") != 0) { printf("error: unsupported codeset %s\n", cs); exit(EXIT_FAILURE); } return r;}#endifstatic int find_at_string_num(const char *as){ int i = 0; char *p = at_strings; while (*p) { if (!strcmp(p+1, as)) { return i; } ++i; p += 1 + (unsigned char) *p; } printf("error: unmapped @string %s\n", as); exit(EXIT_FAILURE);}static void read_locale_list(void){ char *p; char *s; char *ln; /* locale name */ char *ls; /* locale name ll_CC */ char *as; /* at string */ char *ds; /* dot string */ char *cs; /* codeset */ int i; typedef struct { char *glibc_name; char name[5]; char dot_cs; /* 0 if no codeset specified */ char cs; } locale_entry; /* First the C locale. */ locales[0].glibc_name = locales[0].name; strncpy(locales[0].name,"C",5); locales[0].dot_cs = 0; locales[0].cs = 1; /* 7-bit encoding */ ++num_locales; do { if (!(p = strtok(line_buf, " \t\n")) || (*p == '#')) { if (!fgets(line_buf, sizeof(line_buf), fp)) { if (ferror(fp)) { printf("error reading file\n"); exit(EXIT_FAILURE); } return; /* EOF */ } if ((*line_buf == '#') && (line_buf[1] == '-')) { break; } continue; } s = glibc_locale_names; for (i=0 ; i < num_locales ; i++) { if (!strcmp(s+1, p)) { break; } s += 1 + ((unsigned char) *s); } if (i < num_locales) { printf("ignoring dulplicate locale name: %s", p); *line_buf = 0; continue; } /* New locale, but don't increment num until codeset verified! */ *s = (char)((unsigned char) (strlen(p) + 1)); strcpy(s+1, p); locales[num_locales].glibc_name = s+1; ln = p; /* save locale name */ if (!(p = strtok(NULL, " \t\n"))) { printf("error: missing codeset for locale %s\n", ln); exit(EXIT_FAILURE); } cs = p; i = find_codeset_num(p); if ((i == 2) && !default_utf8) { printf("ignoring UTF-8 locale %s\n", ln); *line_buf = 0; continue; } else if ((i > 2) && !default_8bit) { printf("ignoring 8-bit codeset locale %s\n", ln); *line_buf = 0; continue; } locales[num_locales].cs = (char)((unsigned char) i); if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) { printf("ignoring trailing text: %s...\n", p); } /* Now go back to locale string for .codeset and @modifier */ as = strtok(ln, "@"); if (as) { as = strtok(NULL, "@"); } ds = strtok(ln, "."); if (ds) { ds = strtok(NULL, "."); } ls = ln; if ((strlen(ls) != 5) || (ls[2] != '_')) { printf("error: illegal locale name %s\n", ls); exit(EXIT_FAILURE); } i = 0; /* value for unspecified codeset */ if (ds) { i = find_codeset_num(ds); if ((i == 2) && !default_utf8) { printf("ignoring UTF-8 locale %s\n", ln); *line_buf = 0; continue; } else if ((i > 2) && !default_8bit) { printf("ignoring 8-bit codeset locale %s\n", ln); *line_buf = 0; continue; } } locales[num_locales].dot_cs = (char)((unsigned char) i); if (as) { i = find_at_string_num(as); ls[2] = at_mapto[i]; } memcpy(locales[num_locales].name, ls, 5);/* printf("locale: %5.5s %2d %2d %s\n", *//* locales[num_locales].name, *//* locales[num_locales].cs, *//* locales[num_locales].dot_cs, *//* locales[num_locales].glibc_name *//* ); */ ++num_locales; *line_buf = 0; } while (1);}static int le_cmp(const void *a, const void *b){ const locale_entry *p; const locale_entry *q; int r; p = (const locale_entry *) a; q = (const locale_entry *) b; if (!(r = p->name[0] - q->name[0]) && !(r = p->name[1] - q->name[1]) && !(r = p->name[3] - q->name[3]) && !(r = p->name[4] - q->name[4]) && !(r = p->name[2] - q->name[2]) && !(r = -(p->cs - q->cs)) ) { r = -(p->dot_cs - q->dot_cs); /* Reverse the ordering of the codesets so UTF-8 comes last. * Work-around (hopefully) for glibc bug affecting at least * the euro currency symbol. */ } return r;}int main(int argc, char **argv){ if ((argc != 2) || (!(fp = fopen(*++argv, "r")))) { printf("error: missing filename or file!\n"); return EXIT_FAILURE; } at_strings_end = at_strings; read_at_mappings(); read_enable_disable(); read_locale_list(); fclose(fp); /* handle C locale specially */ qsort(locales+1, num_locales-1, sizeof(locale_entry), le_cmp);#if 0 for (i=0 ; i < num_locales ; i++) { printf("locale: %5.5s %2d %2d %s\n", locales[i].name, locales[i].cs, locales[i].dot_cs, locales[i].glibc_name ); }#endif if (!(ofp = fopen("locale_tables.h", "w"))) { printf("error: can not open locale_tables.h for writing!\n"); return EXIT_FAILURE; } do_lc_time(); do_lc_numeric(); do_lc_monetary(); do_lc_messages(); do_lc_ctype(); do_locale_names(); fclose(ofp); printf("total data size = %d\n", total_size); printf("null count = %d\n", null_count); return EXIT_SUCCESS;}static char *idx[10000];static char buf[100000];static char *last;static int uniq;static int addblock(const char *s, size_t n) /* l includes nul terminator */{ int j; if (!s) { ++null_count; return 0; } for (j=0 ; (j < uniq) && (idx[j] + n < last) ; j++) { if (!memcmp(s, idx[j], n)) { return idx[j] - buf; } } if (uniq >= sizeof(idx)) { printf("too many uniq strings!\n"); exit(EXIT_FAILURE); } if (last + n >= buf + sizeof(buf)) { printf("need to increase size of buf!\n"); exit(EXIT_FAILURE); } idx[uniq] = last; ++uniq; memcpy(last, s, n); last += n; return idx[uniq - 1] - buf;}static int addstring(const char *s){ int j; size_t l; if (!s) { ++null_count; return 0; } for (j=0 ; j < uniq ; j++) { if (!strcmp(s, idx[j])) { return idx[j] - buf; } } if (uniq >= sizeof(idx)) { printf("too many uniq strings!\n"); exit(EXIT_FAILURE); } l = strlen(s) + 1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -