📄 8bit_tab_to_h.c
字号:
/* Copyright (C) 1999-2002 Free Software Foundation, Inc. This file is part of the GNU LIBICONV Tools. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *//* * Generates an 8-bit character set table from a .TXT table as found on * ftp.unicode.org or from a table containing the 256 Unicode values as * hexadecimal integers. * Examples: * * ./8bit_tab_to_h ISO-8859-1 iso8859_1 < tab8859_1 * ./8bit_tab_to_h ISO-8859-2 iso8859_2 < tab8859_2 * ./8bit_tab_to_h ISO-8859-3 iso8859_3 < tab8859_3 * ./8bit_tab_to_h ISO-8859-4 iso8859_4 < tab8859_4 * ./8bit_tab_to_h ISO-8859-5 iso8859_5 < tab8859_5 * ./8bit_tab_to_h ISO-8859-6 iso8859_6 < tab8859_6 * ./8bit_tab_to_h ISO-8859-7 iso8859_7 < tab8859_7 * ./8bit_tab_to_h ISO-8859-8 iso8859_8 < tab8859_8 * ./8bit_tab_to_h ISO-8859-9 iso8859_9 < tab8859_9 * ./8bit_tab_to_h ISO-8859-10 iso8859_10 < tab8859_10 * ./8bit_tab_to_h ISO-8859-14 iso8859_14 < tab8859_14 * ./8bit_tab_to_h ISO-8859-15 iso8859_15 < tab8859_15 * ./8bit_tab_to_h JISX0201.1976-0 jisx0201 < jis0201 * ./8bit_tab_to_h TIS620.2533-1 tis620 < tabtis620 * ./8bit_tab_to_h KOI8-R koi8_r < tabkoi8_r * ./8bit_tab_to_h KOI8-U koi8_u < tabkoi8_u * ./8bit_tab_to_h ARMSCII-8 armscii_8 < tabarmscii_8 * ./8bit_tab_to_h CP1133 cp1133 < tabibm_cp1133 * ./8bit_tab_to_h MULELAO-1 mulelao < tabmulelao_1 * ./8bit_tab_to_h VISCII1.1-1 viscii1 < tabviscii * ./8bit_tab_to_h TCVN-5712 tcvn < tabtcvn * ./8bit_tab_to_h GEORGIAN-ACADEMY georgian_ac < tabgeorgian_academy * ./8bit_tab_to_h GEORGIAN-PS georgian_ps < tabgeorgian_ps * * ./8bit_tab_to_h ISO-8859-1 iso8859_1 < 8859-1.TXT * ./8bit_tab_to_h ISO-8859-2 iso8859_2 < 8859-2.TXT * ./8bit_tab_to_h ISO-8859-3 iso8859_3 < 8859-3.TXT * ./8bit_tab_to_h ISO-8859-4 iso8859_4 < 8859-4.TXT * ./8bit_tab_to_h ISO-8859-5 iso8859_5 < 8859-5.TXT * ./8bit_tab_to_h ISO-8859-6 iso8859_6 < 8859-6.TXT * ./8bit_tab_to_h ISO-8859-7 iso8859_7 < 8859-7.TXT * ./8bit_tab_to_h ISO-8859-8 iso8859_8 < 8859-8.TXT * ./8bit_tab_to_h ISO-8859-9 iso8859_9 < 8859-9.TXT * ./8bit_tab_to_h ISO-8859-10 iso8859_10 < 8859-10.TXT * ./8bit_tab_to_h ISO-8859-14 iso8859_14 < 8859-14.TXT * ./8bit_tab_to_h ISO-8859-15 iso8859_15 < 8859-15.TXT * ./8bit_tab_to_h JISX0201.1976-0 jisx0201 < JIS0201.TXT * ./8bit_tab_to_h KOI8-R koi8_r < KOI8-R.TXT */#include <stdio.h>#include <stdlib.h>#include <stdbool.h>#include <string.h>int main (int argc, char *argv[]){ const char* charsetname; const char* c_charsetname; const char* filename; const char* directory; int charset2uni[0x100]; if (argc != 3 && argc != 4 && argc != 5) exit(1); charsetname = argv[1]; c_charsetname = argv[2]; if (argc > 3) { filename = argv[3]; } else { char* s = (char*) malloc(strlen(c_charsetname)+strlen(".h")+1); strcpy(s,c_charsetname); strcat(s,".h"); filename = s; } directory = (argc > 4 ? argv[4] : ""); fprintf(stderr, "Creating %s%s\n", directory, filename); { int i, c; c = getc(stdin); ungetc(c,stdin); if (c == '#') { /* Read a unicode.org style .TXT file. */ for (i = 0; i < 0x100; i++) charset2uni[i] = 0xfffd; for (;;) { c = getc(stdin); if (c == EOF) break; if (c == '\n' || c == ' ' || c == '\t') continue; if (c == '#') { do { c = getc(stdin); } while (!(c == EOF || c == '\n')); continue; } ungetc(c,stdin); if (scanf("0x%x", &i) != 1 || !(i >= 0 && i < 0x100)) exit(1); do { c = getc(stdin); } while (c == ' ' || c == '\t'); if (c != EOF) ungetc(c,stdin); if (c == '\n' || c == '#') continue; if (scanf("0x%x", &charset2uni[i]) != 1) exit(1); } } else { /* Read a table of hexadecimal Unicode values. */ for (i = 0; i < 0x100; i++) { if (scanf("%x", &charset2uni[i]) != 1) exit(1); if (charset2uni[i] < 0 || charset2uni[i] == 0xffff) charset2uni[i] = 0xfffd; } if (scanf("%x", &i) != EOF) exit(1); } } /* Write the output file. */ { FILE* f; { char* fname = malloc(strlen(directory)+strlen(filename)+1); strcpy(fname,directory); strcat(fname,filename); f = fopen(fname,"w"); if (f == NULL) exit(1); } fprintf(f, "/*\n"); fprintf(f, " * Copyright (C) 1999-2002 Free Software Foundation, Inc.\n"); fprintf(f, " * This file is part of the GNU LIBICONV Library.\n"); fprintf(f, " *\n"); fprintf(f, " * The GNU LIBICONV Library is free software; you can redistribute it\n"); fprintf(f, " * and/or modify it under the terms of the GNU Library General Public\n"); fprintf(f, " * License as published by the Free Software Foundation; either version 2\n"); fprintf(f, " * of the License, or (at your option) any later version.\n"); fprintf(f, " *\n"); fprintf(f, " * The GNU LIBICONV Library is distributed in the hope that it will be\n"); fprintf(f, " * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); fprintf(f, " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"); fprintf(f, " * Library General Public License for more details.\n"); fprintf(f, " *\n"); fprintf(f, " * You should have received a copy of the GNU Library General Public\n"); fprintf(f, " * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n"); fprintf(f, " * If not, write to the Free Software Foundation, Inc., 59 Temple Place -\n"); fprintf(f, " * Suite 330, Boston, MA 02111-1307, USA.\n"); fprintf(f, " */\n"); fprintf(f, "\n"); fprintf(f, "/*\n"); fprintf(f, " * %s\n", charsetname); fprintf(f, " */\n"); fprintf(f, "\n"); { int i, i1, i2, i3; int line[16]; int tableno; struct { int minline; int maxline; } tables[16]; bool some_invalid; bool final_ret_reached; for (i1 = 0; i1 < 16; i1++) { bool all_invalid = true; bool all_identity = true; for (i2 = 0; i2 < 16; i2++) { i = 16*i1+i2; if (charset2uni[i] != 0xfffd) all_invalid = false; if (charset2uni[i] != i) all_identity = false; } if (all_invalid) line[i1] = -2; else if (all_identity) line[i1] = -1; else line[i1] = 0; } tableno = 0; for (i1 = 0; i1 < 16; i1++) { if (line[i1] >= 0) { if (i1 > 0 && tableno > 0 && line[i1-1] == tableno-1) { line[i1] = tableno-1; tables[tableno-1].maxline = i1; } else { tableno++; line[i1] = tableno-1; tables[tableno-1].minline = tables[tableno-1].maxline = i1; } } } some_invalid = false; for (i = 0; i < 0x100; i++) if (charset2uni[i] == 0xfffd) some_invalid = true; if (tableno > 0) { int t; for (t = 0; t < tableno; t++) { fprintf(f, "static const unsigned short %s_2uni", c_charsetname); if (tableno > 1) fprintf(f, "_%d", t+1); fprintf(f, "[%d] = {\n", 16*(tables[t].maxline-tables[t].minline+1)); for (i1 = tables[t].minline; i1 <= tables[t].maxline; i1++) { fprintf(f, " /* 0x%02x */\n", 16*i1); for (i2 = 0; i2 < 2; i2++) { fprintf(f, " "); for (i3 = 0; i3 < 8; i3++) { i = 16*i1+8*i2+i3; fprintf(f, " 0x%04x,", charset2uni[i]); } fprintf(f, "\n"); } } fprintf(f, "};\n"); } fprintf(f, "\n"); } final_ret_reached = false; fprintf(f, "static int\n%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", c_charsetname); fprintf(f, "{\n"); fprintf(f, " unsigned char c = *s;\n"); if (some_invalid) { for (i1 = 0; i1 < 16;) { int t = line[i1]; const char* indent; for (i2 = i1; i2 < 16 && line[i2] == t; i2++); indent = (i1 == 0 && i2 == 16 ? " " : " "); if (i1 == 0) { if (i2 == 16) { } else { fprintf(f, " if (c < 0x%02x) {\n", 16*i2); } } else { if (i2 == 16) { fprintf(f, " else {\n"); } else { fprintf(f, " else if (c < 0x%02x) {\n", 16*i2); } } if (t == -2) { final_ret_reached = true; } else if (t == -1) { fprintf(f, "%s*pwc = (ucs4_t) c;\n", indent); fprintf(f, "%sreturn 1;\n", indent); } else { fprintf(f, "%s", indent); some_invalid = false; for (i = 16*i1; i < 16*i2; i++) if (charset2uni[i] == 0xfffd) some_invalid = true; if (some_invalid) fprintf(f, "unsigned short wc = "); else fprintf(f, "*pwc = (ucs4_t) "); fprintf(f, "%s_2uni", c_charsetname); if (tableno > 1) fprintf(f, "_%d", t+1); fprintf(f, "[c"); if (tables[t].minline > 0) fprintf(f, "-0x%02x", 16*tables[t].minline); fprintf(f, "];\n"); if (some_invalid) { fprintf(f, "%sif (wc != 0xfffd) {\n", indent); fprintf(f, "%s *pwc = (ucs4_t) wc;\n", indent); fprintf(f, "%s return 1;\n", indent); fprintf(f, "%s}\n", indent); final_ret_reached = true; } else { fprintf(f, "%sreturn 1;\n", indent);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -