⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gentranslit.c

📁 libiconv是一个很不错的字符集转换库。程序接口也很简单
💻 C
字号:
/* Copyright (C) 1999-2003 Free Software Foundation, Inc.   This file is part of the GNU LIBICONV Library.   The GNU LIBICONV Library is free software; you can redistribute it   and/or modify it under the terms of the GNU Library General Public   License as published by the Free Software Foundation; either version 2   of the License, or (at your option) any later version.   The GNU LIBICONV Library is distributed in the hope that it will be   useful, but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU   Library General Public License for more details.   You should have received a copy of the GNU Library General Public   License along with the GNU LIBICONV Library; see the file COPYING.LIB.   If not, write to the Free Software Foundation, Inc., 59 Temple Place -   Suite 330, Boston, MA 02111-1307, USA.  *//* * Generates a table of small strings, used for transliteration, from a table * containing lines of the form *   Unicode <tab> utf-8 replacement <tab> # comment */#include <stdio.h>#include <stdlib.h>#include <stdbool.h>int main (int argc, char *argv[]){  unsigned int data[0x100000];  int uni2index[0x110000];  int index;  if (argc != 1)    exit(1);  printf("/*\n");  printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");  printf(" * This file is part of the GNU LIBICONV Library.\n");  printf(" *\n");  printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");  printf(" * and/or modify it under the terms of the GNU Library General Public\n");  printf(" * License as published by the Free Software Foundation; either version 2\n");  printf(" * of the License, or (at your option) any later version.\n");  printf(" *\n");  printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");  printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");  printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n");  printf(" * Library General Public License for more details.\n");  printf(" *\n");  printf(" * You should have received a copy of the GNU Library General Public\n");  printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");  printf(" * If not, write to the Free Software Foundation, Inc., 59 Temple Place -\n");  printf(" * Suite 330, Boston, MA 02111-1307, USA.\n");  printf(" */\n");  printf("\n");  printf("/*\n");  printf(" * Transliteration table\n");  printf(" */\n");  printf("\n");  {    int c;    int j;    for (j = 0; j < 0x110000; j++)      uni2index[j] = -1;    index = 0;    for (;;) {      c = getc(stdin);      if (c == EOF)        break;      if (c == '#') {        do { c = getc(stdin); } while (!(c == EOF || c == '\n'));        continue;      }      ungetc(c,stdin);      if (scanf("%x",&j) != 1)        exit(1);      c = getc(stdin);      if (c != '\t')        exit(1);      for (;;) {        c = getc(stdin);        if (c == EOF || c == '\n')          exit(1);        if (c == '\t')          break;        if (uni2index[j] < 0) {          uni2index[j] = index;          data[index++] = 0;        }        if (c >= 0x80) {          /* Finish reading an UTF-8 character. */          if (c < 0xc0)            exit(1);          else {            unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6);            c &= (1 << (8-i)) - 1;            while (--i > 0) {              int cc = getc(stdin);              if (!(cc >= 0x80 && cc < 0xc0))                exit(1);              c <<= 6; c |= (cc & 0x3f);            }          }        }        data[index++] = (unsigned int) c;      }      if (uni2index[j] >= 0)        data[uni2index[j]] = index - uni2index[j] - 1;      do { c = getc(stdin); } while (!(c == EOF || c == '\n'));    }  }  printf("static const unsigned int translit_data[%d] = {",index);  {    int i;    for (i = 0; i < index; i++) {      if (data[i] < 32)        printf("\n %3d,",data[i]);      else if (data[i] == '\'')        printf("'\\'',");      else if (data[i] == '\\')        printf("'\\\\',");      else if (data[i] < 127)        printf(" '%c',",data[i]);      else if (data[i] < 256)        printf("0x%02X,",data[i]);      else        printf("0x%04X,",data[i]);    }    printf("\n};\n");  }  printf("\n");  {    bool pages[0x1100];    int line[0x22000];    int tableno;    struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];    int i, j, p, j1, j2, t;    for (p = 0; p < 0x1100; p++)      pages[p] = false;    for (j = 0; j < 0x110000; j++)      if (uni2index[j] >= 0)        pages[j>>8] = true;    for (j1 = 0; j1 < 0x22000; j1++) {      bool all_invalid = true;      for (j2 = 0; j2 < 8; j2++) {        j = 8*j1+j2;        if (uni2index[j] >= 0)          all_invalid = false;      }      if (all_invalid)        line[j1] = -1;      else        line[j1] = 0;    }    tableno = 0;    for (j1 = 0; j1 < 0x22000; j1++) {      if (line[j1] >= 0) {        if (tableno > 0            && ((j1 > 0 && line[j1-1] == tableno-1)                || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)                    && j1 - tables[tableno-1].maxline <= 8))) {          line[j1] = tableno-1;          tables[tableno-1].maxline = j1;        } else {          tableno++;          line[j1] = tableno-1;          tables[tableno-1].minline = tables[tableno-1].maxline = j1;        }      }    }    for (t = 0; t < tableno; t++) {      tables[t].usecount = 0;      j1 = 8*tables[t].minline;      j2 = 8*(tables[t].maxline+1);      for (j = j1; j < j2; j++)        if (uni2index[j] >= 0)          tables[t].usecount++;    }    for (t = 0, p = -1, i = 0; t < tableno; t++) {      if (tables[t].usecount > 1) {        char* s;        if (p == tables[t].minline >> 5) {          s = (char*) malloc(5+1);          sprintf(s, "%02x_%d", p, ++i);        } else {          p = tables[t].minline >> 5;          s = (char*) malloc(2+1);          sprintf(s, "%02x", p);        }        tables[t].suffix = s;      } else        tables[t].suffix = NULL;    }    {      p = -1;      for (t = 0; t < tableno; t++)        if (tables[t].usecount > 1) {          p = 0;          printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));          for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {            if ((j1 % 0x20) == 0 && j1 > tables[t].minline)              printf("  /* 0x%04x */\n", 8*j1);            printf(" ");            for (j2 = 0; j2 < 8; j2++) {              j = 8*j1+j2;              printf(" %4d,", uni2index[j]);            }            printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);          }          printf("};\n");        }      if (p >= 0)        printf("\n");    }    printf("#define translit_index(wc) \\\n  (");    for (j1 = 0; j1 < 0x22000;) {      t = line[j1];      for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++);      if (t >= 0) {        if (j1 != tables[t].minline) abort();        if (j2 > tables[t].maxline+1) abort();        j2 = tables[t].maxline+1;      }      if (t == -1) {      } else {        if (t >= 0 && tables[t].usecount == 0) abort();        if (t >= 0 && tables[t].usecount == 1) {          if (j2 != j1+1) abort();          for (j = 8*j1; j < 8*j2; j++)            if (uni2index[j] >= 0) {              printf("wc == 0x%04x ? %d", j, uni2index[j]);              break;            }        } else {          if (j1 == 0) {            printf("wc < 0x%04x", 8*j2);          } else {            printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2);          }          printf(" ? translit_page%s[wc", tables[t].suffix);          if (tables[t].minline > 0)            printf("-0x%04x", 8*j1);          printf("]");        }        printf(" : \\\n   ");      }      j1 = j2;    }    printf("-1)\n");  }  fflush(stdout);  if (ferror(stdout))    exit(1);  exit(0);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -