⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 8bit_tab_to_h.c

📁 libiconv是一个很不错的字符集转换库。程序接口也很简单
💻 C
📖 第 1 页 / 共 2 页
字号:
/* Copyright (C) 1999-2002 Free Software Foundation, Inc.   This file is part of the GNU LIBICONV Tools.   This program is free software; you can redistribute it and/or modify   it under the terms of the GNU General Public License as published by   the Free Software Foundation; either version 2, or (at your option)   any later version.   This program is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the   GNU General Public License for more details.   You should have received a copy of the GNU General Public License   along with this program; if not, write to the Free Software Foundation,   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  *//* * Generates an 8-bit character set table from a .TXT table as found on * ftp.unicode.org or from a table containing the 256 Unicode values as * hexadecimal integers. * Examples: * *   ./8bit_tab_to_h ISO-8859-1 iso8859_1 < tab8859_1 *   ./8bit_tab_to_h ISO-8859-2 iso8859_2 < tab8859_2 *   ./8bit_tab_to_h ISO-8859-3 iso8859_3 < tab8859_3 *   ./8bit_tab_to_h ISO-8859-4 iso8859_4 < tab8859_4 *   ./8bit_tab_to_h ISO-8859-5 iso8859_5 < tab8859_5 *   ./8bit_tab_to_h ISO-8859-6 iso8859_6 < tab8859_6 *   ./8bit_tab_to_h ISO-8859-7 iso8859_7 < tab8859_7 *   ./8bit_tab_to_h ISO-8859-8 iso8859_8 < tab8859_8 *   ./8bit_tab_to_h ISO-8859-9 iso8859_9 < tab8859_9 *   ./8bit_tab_to_h ISO-8859-10 iso8859_10 < tab8859_10 *   ./8bit_tab_to_h ISO-8859-14 iso8859_14 < tab8859_14 *   ./8bit_tab_to_h ISO-8859-15 iso8859_15 < tab8859_15 *   ./8bit_tab_to_h JISX0201.1976-0 jisx0201 < jis0201 *   ./8bit_tab_to_h TIS620.2533-1 tis620 < tabtis620 *   ./8bit_tab_to_h KOI8-R koi8_r < tabkoi8_r *   ./8bit_tab_to_h KOI8-U koi8_u < tabkoi8_u *   ./8bit_tab_to_h ARMSCII-8 armscii_8 < tabarmscii_8 *   ./8bit_tab_to_h CP1133 cp1133 < tabibm_cp1133 *   ./8bit_tab_to_h MULELAO-1 mulelao < tabmulelao_1 *   ./8bit_tab_to_h VISCII1.1-1 viscii1 < tabviscii *   ./8bit_tab_to_h TCVN-5712 tcvn < tabtcvn *   ./8bit_tab_to_h GEORGIAN-ACADEMY georgian_ac < tabgeorgian_academy *   ./8bit_tab_to_h GEORGIAN-PS georgian_ps < tabgeorgian_ps * *   ./8bit_tab_to_h ISO-8859-1 iso8859_1 < 8859-1.TXT *   ./8bit_tab_to_h ISO-8859-2 iso8859_2 < 8859-2.TXT *   ./8bit_tab_to_h ISO-8859-3 iso8859_3 < 8859-3.TXT *   ./8bit_tab_to_h ISO-8859-4 iso8859_4 < 8859-4.TXT *   ./8bit_tab_to_h ISO-8859-5 iso8859_5 < 8859-5.TXT *   ./8bit_tab_to_h ISO-8859-6 iso8859_6 < 8859-6.TXT *   ./8bit_tab_to_h ISO-8859-7 iso8859_7 < 8859-7.TXT *   ./8bit_tab_to_h ISO-8859-8 iso8859_8 < 8859-8.TXT *   ./8bit_tab_to_h ISO-8859-9 iso8859_9 < 8859-9.TXT *   ./8bit_tab_to_h ISO-8859-10 iso8859_10 < 8859-10.TXT *   ./8bit_tab_to_h ISO-8859-14 iso8859_14 < 8859-14.TXT *   ./8bit_tab_to_h ISO-8859-15 iso8859_15 < 8859-15.TXT *   ./8bit_tab_to_h JISX0201.1976-0 jisx0201 < JIS0201.TXT *   ./8bit_tab_to_h KOI8-R koi8_r < KOI8-R.TXT */#include <stdio.h>#include <stdlib.h>#include <stdbool.h>#include <string.h>int main (int argc, char *argv[]){  const char* charsetname;  const char* c_charsetname;  const char* filename;  const char* directory;  int charset2uni[0x100];  if (argc != 3 && argc != 4 && argc != 5)    exit(1);  charsetname = argv[1];  c_charsetname = argv[2];  if (argc > 3) {    filename = argv[3];  } else {    char* s = (char*) malloc(strlen(c_charsetname)+strlen(".h")+1);    strcpy(s,c_charsetname); strcat(s,".h");    filename = s;  }  directory = (argc > 4 ? argv[4] : "");  fprintf(stderr, "Creating %s%s\n", directory, filename);  {    int i, c;    c = getc(stdin);    ungetc(c,stdin);    if (c == '#') {      /* Read a unicode.org style .TXT file. */      for (i = 0; i < 0x100; i++)        charset2uni[i] = 0xfffd;      for (;;) {        c = getc(stdin);        if (c == EOF)          break;        if (c == '\n' || c == ' ' || c == '\t')          continue;        if (c == '#') {          do { c = getc(stdin); } while (!(c == EOF || c == '\n'));          continue;        }        ungetc(c,stdin);        if (scanf("0x%x", &i) != 1 || !(i >= 0 && i < 0x100))          exit(1);        do { c = getc(stdin); } while (c == ' ' || c == '\t');        if (c != EOF)          ungetc(c,stdin);        if (c == '\n' || c == '#')          continue;        if (scanf("0x%x", &charset2uni[i]) != 1)          exit(1);      }    } else {      /* Read a table of hexadecimal Unicode values. */      for (i = 0; i < 0x100; i++) {        if (scanf("%x", &charset2uni[i]) != 1)          exit(1);        if (charset2uni[i] < 0 || charset2uni[i] == 0xffff)          charset2uni[i] = 0xfffd;      }      if (scanf("%x", &i) != EOF)        exit(1);    }  }  /* Write the output file. */  {    FILE* f;    {      char* fname = malloc(strlen(directory)+strlen(filename)+1);      strcpy(fname,directory); strcat(fname,filename);      f = fopen(fname,"w");      if (f == NULL)        exit(1);    }    fprintf(f, "/*\n");    fprintf(f, " * Copyright (C) 1999-2002 Free Software Foundation, Inc.\n");    fprintf(f, " * This file is part of the GNU LIBICONV Library.\n");    fprintf(f, " *\n");    fprintf(f, " * The GNU LIBICONV Library is free software; you can redistribute it\n");    fprintf(f, " * and/or modify it under the terms of the GNU Library General Public\n");    fprintf(f, " * License as published by the Free Software Foundation; either version 2\n");    fprintf(f, " * of the License, or (at your option) any later version.\n");    fprintf(f, " *\n");    fprintf(f, " * The GNU LIBICONV Library is distributed in the hope that it will be\n");    fprintf(f, " * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");    fprintf(f, " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n");    fprintf(f, " * Library General Public License for more details.\n");    fprintf(f, " *\n");    fprintf(f, " * You should have received a copy of the GNU Library General Public\n");    fprintf(f, " * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");    fprintf(f, " * If not, write to the Free Software Foundation, Inc., 59 Temple Place -\n");    fprintf(f, " * Suite 330, Boston, MA 02111-1307, USA.\n");    fprintf(f, " */\n");    fprintf(f, "\n");    fprintf(f, "/*\n");    fprintf(f, " * %s\n", charsetname);    fprintf(f, " */\n");    fprintf(f, "\n");    {      int i, i1, i2, i3;      int line[16];      int tableno;      struct { int minline; int maxline; } tables[16];      bool some_invalid;      bool final_ret_reached;      for (i1 = 0; i1 < 16; i1++) {        bool all_invalid = true;        bool all_identity = true;        for (i2 = 0; i2 < 16; i2++) {          i = 16*i1+i2;          if (charset2uni[i] != 0xfffd)            all_invalid = false;          if (charset2uni[i] != i)            all_identity = false;        }        if (all_invalid)          line[i1] = -2;        else if (all_identity)          line[i1] = -1;        else          line[i1] = 0;      }      tableno = 0;      for (i1 = 0; i1 < 16; i1++) {        if (line[i1] >= 0) {          if (i1 > 0 && tableno > 0 && line[i1-1] == tableno-1) {            line[i1] = tableno-1;            tables[tableno-1].maxline = i1;          } else {            tableno++;            line[i1] = tableno-1;            tables[tableno-1].minline = tables[tableno-1].maxline = i1;          }        }      }      some_invalid = false;      for (i = 0; i < 0x100; i++)        if (charset2uni[i] == 0xfffd)          some_invalid = true;      if (tableno > 0) {        int t;        for (t = 0; t < tableno; t++) {          fprintf(f, "static const unsigned short %s_2uni", c_charsetname);          if (tableno > 1)            fprintf(f, "_%d", t+1);          fprintf(f, "[%d] = {\n", 16*(tables[t].maxline-tables[t].minline+1));          for (i1 = tables[t].minline; i1 <= tables[t].maxline; i1++) {            fprintf(f, "  /* 0x%02x */\n", 16*i1);            for (i2 = 0; i2 < 2; i2++) {              fprintf(f, " ");              for (i3 = 0; i3 < 8; i3++) {                i = 16*i1+8*i2+i3;                fprintf(f, " 0x%04x,", charset2uni[i]);              }              fprintf(f, "\n");            }          }          fprintf(f, "};\n");        }        fprintf(f, "\n");      }      final_ret_reached = false;      fprintf(f, "static int\n%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", c_charsetname);      fprintf(f, "{\n");      fprintf(f, "  unsigned char c = *s;\n");      if (some_invalid) {        for (i1 = 0; i1 < 16;) {          int t = line[i1];          const char* indent;          for (i2 = i1; i2 < 16 && line[i2] == t; i2++);          indent = (i1 == 0 && i2 == 16 ? "  " : "    ");          if (i1 == 0) {            if (i2 == 16) {            } else {              fprintf(f, "  if (c < 0x%02x) {\n", 16*i2);            }          } else {            if (i2 == 16) {              fprintf(f, "  else {\n");            } else {              fprintf(f, "  else if (c < 0x%02x) {\n", 16*i2);            }          }          if (t == -2) {            final_ret_reached = true;          } else if (t == -1) {            fprintf(f, "%s*pwc = (ucs4_t) c;\n", indent);            fprintf(f, "%sreturn 1;\n", indent);          } else {            fprintf(f, "%s", indent);            some_invalid = false;            for (i = 16*i1; i < 16*i2; i++)              if (charset2uni[i] == 0xfffd)                some_invalid = true;            if (some_invalid)              fprintf(f, "unsigned short wc = ");            else              fprintf(f, "*pwc = (ucs4_t) ");            fprintf(f, "%s_2uni", c_charsetname);            if (tableno > 1)              fprintf(f, "_%d", t+1);            fprintf(f, "[c");            if (tables[t].minline > 0)              fprintf(f, "-0x%02x", 16*tables[t].minline);            fprintf(f, "];\n");            if (some_invalid) {              fprintf(f, "%sif (wc != 0xfffd) {\n", indent);              fprintf(f, "%s  *pwc = (ucs4_t) wc;\n", indent);              fprintf(f, "%s  return 1;\n", indent);              fprintf(f, "%s}\n", indent);              final_ret_reached = true;            } else {              fprintf(f, "%sreturn 1;\n", indent);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -