📄 cjk_tab_to_h.c

📁 libiconv是一个很不错的字符集转换库。程序接口也很简单
💻 C
📖 第 1 页 / 共 5 页
字号:
  printf("        /* Keep in `used' only the bits 0..i-1. */\n");  printf("        used &= ((unsigned short) 1 << i) - 1;\n");  printf("        /* Add `summary->indx' and the number of bits set in `used'. */\n");  printf("        used = (used & 0x5555) + ((used & 0xaaaa) >> 1);\n");  printf("        used = (used & 0x3333) + ((used & 0xcccc) >> 2);\n");  printf("        used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);\n");  printf("        used = (used & 0x00ff) + (used >> 8);\n");  if (monotonic) {    printf("        used += summary->indx;\n");    printf("        c = %s_2charset_main[used>>%d] + %s_2charset[used];\n", name, log2_stepsize, name);    printf("        r[0] = (c >> 8); r[1] = (c & 0xff);\n");    printf("        return 2;\n");  } else {    if (is_large) {      printf("        used += summary->indx;\n");      printf("        r[0] = %s_2charset[3*used];\n", name);      printf("        r[1] = %s_2charset[3*used+1];\n", name);      printf("        r[2] = %s_2charset[3*used+2];\n", name);      printf("        return 3;\n");    } else {      printf("        c = %s_2charset[summary->indx + used];\n", name);      printf("        r[0] = (c >> 8); r[1] = (c & 0xff);\n");      printf("        return 2;\n");    }  }  printf("      }\n");  printf("    }\n");  printf("    return RET_ILUNI;\n");  printf("  }\n");  printf("  return RET_TOOSMALL;\n");  printf("}\n");}/* ISO-2022/EUC specifics */static int row_byte_normal (int row) { return 0x21+row; }static int col_byte_normal (int col) { return 0x21+col; }static int byte_row_normal (int byte) { return byte-0x21; }static int byte_col_normal (int byte) { return byte-0x21; }static void do_normal (const char* name){  Encoding enc;  enc.rows = 94;  enc.cols = 94;  enc.row_byte = row_byte_normal;  enc.col_byte = col_byte_normal;  enc.byte_row = byte_row_normal;  enc.byte_col = byte_col_normal;  enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";  enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";  enc.byte_row_expr = "%1$s - 0x21";  enc.byte_col_expr = "%1$s - 0x21";  read_table(&enc);  output_charset2uni(name,&enc);  invert(&enc); output_uni2charset_sparse(name,&enc,false);}/* Note: On first sight, the jisx0212_2charset[] table seems to be in order,   starting from the charset=0x3021/uni=0x4e02 pair. But it's only mostly in   order. There are 75 out-of-order values, scattered all throughout the table. */static void do_normal_only_charset2uni (const char* name){  Encoding enc;  enc.rows = 94;  enc.cols = 94;  enc.row_byte = row_byte_normal;  enc.col_byte = col_byte_normal;  enc.byte_row = byte_row_normal;  enc.byte_col = byte_col_normal;  enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";  enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";  enc.byte_row_expr = "%1$s - 0x21";  enc.byte_col_expr = "%1$s - 0x21";  read_table(&enc);  output_charset2uni(name,&enc);}/* CNS 11643 specifics - trick to put two tables into one */static int row_byte_cns11643 (int row) {  return 0x100 * (row / 94) + (row % 94) + 0x21;}static int byte_row_cns11643 (int byte) {  return (byte >> 8) * 94 + (byte & 0xff) - 0x21;}static void do_cns11643_only_uni2charset (const char* name){  Encoding enc;  enc.rows = 16*94;  enc.cols = 94;  enc.row_byte = row_byte_cns11643;  enc.col_byte = col_byte_normal;  enc.byte_row = byte_row_cns11643;  enc.byte_col = byte_col_normal;  enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";  enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";  enc.byte_row_expr = "%1$s - 0x21";  enc.byte_col_expr = "%1$s - 0x21";  read_table(&enc);  invert(&enc);  output_uni2charset_sparse(name,&enc,false);}/* GBK specifics */static int row_byte_gbk1 (int row) {  return 0x81+row;}static int col_byte_gbk1 (int col) {  return (col >= 0x3f ? 0x41 : 0x40) + col;}static int byte_row_gbk1 (int byte) {  if (byte >= 0x81 && byte < 0xff)    return byte-0x81;  else    return -1;}static int byte_col_gbk1 (int byte) {  if (byte >= 0x40 && byte < 0x7f)    return byte-0x40;  else if (byte >= 0x80 && byte < 0xff)    return byte-0x41;  else    return -1;}static void do_gbk1 (const char* name){  Encoding enc;  enc.rows = 126;  enc.cols = 190;  enc.row_byte = row_byte_gbk1;  enc.col_byte = col_byte_gbk1;  enc.byte_row = byte_row_gbk1;  enc.byte_col = byte_col_gbk1;  enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";  enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";  enc.byte_row_expr = "%1$s - 0x81";  enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";  read_table(&enc);  output_charset2uni(name,&enc);  invert(&enc); output_uni2charset_dense(name,&enc);}static void do_gbk1_only_charset2uni (const char* name){  Encoding enc;  enc.rows = 126;  enc.cols = 190;  enc.row_byte = row_byte_gbk1;  enc.col_byte = col_byte_gbk1;  enc.byte_row = byte_row_gbk1;  enc.byte_col = byte_col_gbk1;  enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";  enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";  enc.byte_row_expr = "%1$s - 0x81";  enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";  read_table(&enc);  output_charset2uni(name,&enc);}static int row_byte_gbk2 (int row) {  return 0x81+row;}static int col_byte_gbk2 (int col) {  return (col >= 0x3f ? 0x41 : 0x40) + col;}static int byte_row_gbk2 (int byte) {  if (byte >= 0x81 && byte < 0xff)    return byte-0x81;  else    return -1;}static int byte_col_gbk2 (int byte) {  if (byte >= 0x40 && byte < 0x7f)    return byte-0x40;  else if (byte >= 0x80 && byte < 0xa1)    return byte-0x41;  else    return -1;}static void do_gbk2_only_charset2uni (const char* name){  Encoding enc;  enc.rows = 126;  enc.cols = 96;  enc.row_byte = row_byte_gbk2;  enc.col_byte = col_byte_gbk2;  enc.byte_row = byte_row_gbk2;  enc.byte_col = byte_col_gbk2;  enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";  enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xa1)";  enc.byte_row_expr = "%1$s - 0x81";  enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";  read_table(&enc);  output_charset2uni(name,&enc);}static void do_gbk1_only_uni2charset (const char* name){  Encoding enc;  enc.rows = 126;  enc.cols = 190;  enc.row_byte = row_byte_gbk1;  enc.col_byte = col_byte_gbk1;  enc.byte_row = byte_row_gbk1;  enc.byte_col = byte_col_gbk1;  enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";  enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";  enc.byte_row_expr = "%1$s - 0x81";  enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";  read_table(&enc);  invert(&enc); output_uni2charset_sparse(name,&enc,false);}/* KSC 5601 specifics *//* * Reads the charset2uni table from standard input. */static void read_table_ksc5601 (Encoding* enc){  int row, col, i, i1, i2, c, j;  enc->charset2uni = (int**) malloc(enc->rows*sizeof(int*));  for (row = 0; row < enc->rows; row++)    enc->charset2uni[row] = (int*) malloc(enc->cols*sizeof(int));  for (row = 0; row < enc->rows; row++)    for (col = 0; col < enc->cols; col++)      enc->charset2uni[row][col] = 0xfffd;  c = getc(stdin);  ungetc(c,stdin);  if (c == '#') {    /* Read a unicode.org style .TXT file. */    for (;;) {      c = getc(stdin);      if (c == EOF)        break;      if (c == '\n' || c == ' ' || c == '\t')        continue;      if (c == '#') {        do { c = getc(stdin); } while (!(c == EOF || c == '\n'));        continue;      }      ungetc(c,stdin);      if (scanf("0x%x", &j) != 1)        exit(1);      i1 = j >> 8;      i2 = j & 0xff;      if (scanf(" 0x%x", &j) != 1)        exit(1);      /* Take only the range covered by KS C 5601.1987-0 = KS C 5601.1989-0         = KS X 1001.1992, ignore the rest. */      if (!(i1 >= 128+33 && i1 < 128+127 && i2 >= 128+33 && i2 < 128+127))        continue;  /* KSC5601 specific */      i1 &= 0x7f;  /* KSC5601 specific */      i2 &= 0x7f;  /* KSC5601 specific */      row = enc->byte_row(i1);      col = enc->byte_col(i2);      if (row < 0 || col < 0) {        fprintf(stderr, "lost entry for %02x %02x\n", i1, i2);        exit(1);      }      enc->charset2uni[row][col] = j;    }  } else {    /* Read a table of hexadecimal Unicode values. */    for (i1 = 33; i1 < 127; i1++)      for (i2 = 33; i2 < 127; i2++) {        i = scanf("%x", &j);        if (i == EOF)          goto read_done;        if (i != 1)          exit(1);        if (j < 0 || j == 0xffff)          j = 0xfffd;        if (j != 0xfffd) {          if (enc->byte_row(i1) < 0 || enc->byte_col(i2) < 0) {            fprintf(stderr, "lost entry at %02x %02x\n", i1, i2);            exit (1);          }          enc->charset2uni[enc->byte_row(i1)][enc->byte_col(i2)] = j;        }      }   read_done: ;  }}static void do_ksc5601 (const char* name){  Encoding enc;  enc.rows = 94;  enc.cols = 94;  enc.row_byte = row_byte_normal;  enc.col_byte = col_byte_normal;  enc.byte_row = byte_row_normal;  enc.byte_col = byte_col_normal;  enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";  enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";  enc.byte_row_expr = "%1$s - 0x21";  enc.byte_col_expr = "%1$s - 0x21";  read_table_ksc5601(&enc);  output_charset2uni(name,&enc);  invert(&enc); output_uni2charset_sparse(name,&enc,false);}/* UHC specifics *//* UHC part 1: 0x{81..A0}{41..5A,61..7A,81..FE} */static int row_byte_uhc_1 (int row) {  return 0x81 + row;}static int col_byte_uhc_1 (int col) {  return (col >= 0x34 ? 0x4d : col >= 0x1a ? 0x47 : 0x41) + col;}static int byte_row_uhc_1 (int byte) {  if (byte >= 0x81 && byte < 0xa1)    return byte-0x81;  else    return -1;}static int byte_col_uhc_1 (int byte) {  if (byte >= 0x41 && byte < 0x5b)    return byte-0x41;  else if (byte >= 0x61 && byte < 0x7b)    return byte-0x47;  else if (byte >= 0x81 && byte < 0xff)    return byte-0x4d;  else    return -1;}static void do_uhc_1 (const char* name){  Encoding enc;  enc.rows = 32;  enc.cols = 178;  enc.row_byte = row_byte_uhc_1;  enc.col_byte = col_byte_uhc_1;  enc.byte_row = byte_row_uhc_1;  enc.byte_col = byte_col_uhc_1;  enc.check_row_expr = "(%1$s >= 0x81 && %1$s < 0xa1)";  enc.check_col_expr = "(%1$s >= 0x41 && %1$s < 0x5b) || (%1$s >= 0x61 && %1$s < 0x7b) || (%1$s >= 0x81 && %1$s < 0xff)";  enc.byte_row_expr = "%1$s - 0x81";  enc.byte_col_expr = "%1$s - (%1$s >= 0x81 ? 0x4d : %1$s >= 0x61 ? 0x47 : 0x41)";  read_table(&enc);  output_charset2uni_noholes_monotonic(name,&enc);  invert(&enc); output_uni2charset_sparse(name,&enc,true);}/* UHC part 2: 0x{A1..C6}{41..5A,61..7A,81..A0} */static int row_byte_uhc_2 (int row) {  return 0xa1 + row;}static int col_byte_uhc_2 (int col) {  return (col >= 0x34 ? 0x4d : col >= 0x1a ? 0x47 : 0x41) + col;}static int byte_row_uhc_2 (int byte) {  if (byte >= 0xa1 && byte < 0xff)    return byte-0xa1;  else    return -1;}static int byte_col_uhc_2 (int byte) {  if (byte >= 0x41 && byte < 0x5b)    return byte-0x41;  else if (byte >= 0x61 && byte < 0x7b)    return byte-0x47;  else if (byte >= 0x81 && byte < 0xa1)    return byte-0x4d;  else    return -1;}static void do_uhc_2 (const char* name){  Encoding enc;  enc.rows = 94;  enc.cols = 84;  enc.row_byte = row_byte_uhc_2;  enc.col_byte = col_byte_uhc_2;  enc.byte_row = byte_row_uhc_2;  enc.byte_col = byte_col_uhc_2;  enc.check_row_expr = "(%1$s >= 0xa1 && %1$s < 0xff)";
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -