⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cjk_tab_to_h.c

📁 libiconv是一个很不错的字符集转换库。程序接口也很简单
💻 C
📖 第 1 页 / 共 5 页
字号:
  int steps_per_row = 2;  int stepsize = (enc->cols + steps_per_row-1) / steps_per_row;  find_charset2uni_pages(enc);  find_charset2uni_blocks(enc);  for (row = 0; row < enc->rows; row++)    if (enc->charsetpage[row] > 0) {      if (row == 0 || enc->charsetpage[row-1] == 0) {        /* Start a new block. */        for (lastrow = row; enc->charsetpage[lastrow+1] > 0; lastrow++);        printf("static const unsigned short %s_2uni_main_page%02x[%d] = {\n ",               name, enc->row_byte(row),               steps_per_row*(lastrow-row+1));        for (r = row; r <= lastrow; r++) {          for (i = 0; i < steps_per_row; i++)            printf(" 0x%04x,", enc->charset2uni[r][i*stepsize]);          if (((r-row) % 4) == 3 && (r < lastrow)) printf("\n ");        }        printf("\n");        printf("};\n");        printf("static const unsigned char %s_2uni_page%02x[%d] = {\n",               name, enc->row_byte(row),               (lastrow-row) * enc->cols + enc->charsetpage[lastrow]);      }      printf("  /""* 0x%02x *""/\n ", enc->row_byte(row));      col_max = (enc->charsetpage[row+1] > 0 ? enc->cols : enc->charsetpage[row]);      for (col = 0; col < col_max; col++) {        printf(" 0x%02x,", enc->charset2uni[row][col] - enc->charset2uni[row][col/stepsize*stepsize]);        if ((col % 8) == 7 && (col+1 < col_max)) printf("\n ");      }      printf("\n");      if (enc->charsetpage[row+1] == 0) {        /* End a block. */        printf("};\n");      }    }  printf("\n");  printf("static int\n");  printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name);  printf("{\n");  printf("  unsigned char c1 = s[0];\n");  printf("  if (");  for (i = 0; i < enc->ncharsetblocks; i++) {    i1_min = enc->row_byte(enc->charsetblocks[i].start / enc->cols);    i1_max = enc->row_byte((enc->charsetblocks[i].end-1) / enc->cols);    if (i > 0)      printf(" || ");    if (i1_min == i1_max)      printf("(c1 == 0x%02x)", i1_min);    else      printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min, i1_max);  }  printf(") {\n");  printf("    if (n >= 2) {\n");  printf("      unsigned char c2 = s[1];\n");  printf("      if (");  printf(enc->check_col_expr, "c2");  printf(") {\n");  printf("        unsigned int row = ");  printf(enc->byte_row_expr, "c1");  printf(";\n");  printf("        unsigned int col = ");  printf(enc->byte_col_expr, "c2");  printf(";\n");  printf("        unsigned int i = %d * row + col;\n", enc->cols);  printf("        unsigned short wc = 0xfffd;\n");  for (i = 0; i < enc->ncharsetblocks; i++) {    printf("        ");    if (i > 0)      printf("} else ");    if (i < enc->ncharsetblocks-1)      printf("if (i < %d) ", enc->charsetblocks[i+1].start);    printf("{\n");    printf("          if (i < %d)\n", enc->charsetblocks[i].end);    printf("            wc = %s_2uni_main_page%02x[%d*", name, enc->row_byte(enc->charsetblocks[i].start / enc->cols), steps_per_row);    if (enc->charsetblocks[i].start > 0)      printf("(row-%d)", enc->charsetblocks[i].start / enc->cols);    else      printf("row");    printf("+");    if (steps_per_row == 2)      printf("(col>=%d?1:0)", stepsize);    else      printf("col/%d", stepsize);    printf("] + %s_2uni_page%02x[i", name, enc->row_byte(enc->charsetblocks[i].start / enc->cols));    if (enc->charsetblocks[i].start > 0)      printf("-%d", enc->charsetblocks[i].start);    printf("];\n");  }  printf("        }\n");  printf("        if (wc != 0xfffd) {\n");  printf("          *pwc = (ucs4_t) wc;\n");  printf("          return 2;\n");  printf("        }\n");  printf("      }\n");  printf("      return RET_ILSEQ;\n");  printf("    }\n");  printf("    return RET_TOOFEW(0);\n");  printf("  }\n");  printf("  return RET_ILSEQ;\n");  printf("}\n");  printf("\n");}/* * Computes the uni2charset[0x0000..0x2ffff] array. */static void invert (Encoding* enc){  int row, col, j;  enc->uni2charset = (int*) malloc(0x30000*sizeof(int));  for (j = 0; j < 0x30000; j++)    enc->uni2charset[j] = 0;  for (row = 0; row < enc->rows; row++)    for (col = 0; col < enc->cols; col++) {      j = enc->charset2uni[row][col];      if (j != 0xfffd)        enc->uni2charset[j] = 0x100 * enc->row_byte(row) + enc->col_byte(col);    }}/* * Outputs the unicode to charset table and function, using a linear array. * (Suitable if the table is dense.) */static void output_uni2charset_dense (const char* name, Encoding* enc){  /* Like in 8bit_tab_to_h.c */  bool pages[0x300];  int line[0x6000];  int tableno;  struct { int minline; int maxline; int usecount; } tables[0x6000];  bool first;  int row, col, j, p, j1, j2, t;  for (p = 0; p < 0x300; p++)    pages[p] = false;  for (row = 0; row < enc->rows; row++)    for (col = 0; col < enc->cols; col++) {      j = enc->charset2uni[row][col];      if (j != 0xfffd)        pages[j>>8] = true;    }  for (j1 = 0; j1 < 0x6000; j1++) {    bool all_invalid = true;    for (j2 = 0; j2 < 8; j2++) {      j = 8*j1+j2;      if (enc->uni2charset[j] != 0)        all_invalid = false;    }    if (all_invalid)      line[j1] = -1;    else      line[j1] = 0;  }  tableno = 0;  for (j1 = 0; j1 < 0x6000; j1++) {    if (line[j1] >= 0) {      if (tableno > 0          && ((j1 > 0 && line[j1-1] == tableno-1)              || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)                  && j1 - tables[tableno-1].maxline <= 8))) {        line[j1] = tableno-1;        tables[tableno-1].maxline = j1;      } else {        tableno++;        line[j1] = tableno-1;        tables[tableno-1].minline = tables[tableno-1].maxline = j1;      }    }  }  for (t = 0; t < tableno; t++) {    tables[t].usecount = 0;    j1 = 8*tables[t].minline;    j2 = 8*(tables[t].maxline+1);    for (j = j1; j < j2; j++)      if (enc->uni2charset[j] != 0)        tables[t].usecount++;  }  {    p = -1;    for (t = 0; t < tableno; t++)      if (tables[t].usecount > 1) {        p = tables[t].minline >> 5;        printf("static const unsigned short %s_page%02x[%d] = {\n", name, p, 8*(tables[t].maxline-tables[t].minline+1));        for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {          if ((j1 % 0x20) == 0 && j1 > tables[t].minline)            printf("  /* 0x%04x */\n", 8*j1);          printf(" ");          for (j2 = 0; j2 < 8; j2++) {            j = 8*j1+j2;            printf(" 0x%04x,", enc->uni2charset[j]);          }          printf(" /*0x%02x-0x%02x*/\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);        }        printf("};\n");      }    if (p >= 0)      printf("\n");  }  printf("static int\n%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name);  printf("{\n");  printf("  if (n >= 2) {\n");  printf("    unsigned short c = 0;\n");  first = true;  for (j1 = 0; j1 < 0x6000;) {    t = line[j1];    for (j2 = j1; j2 < 0x6000 && line[j2] == t; j2++);    if (t >= 0) {      if (j1 != tables[t].minline) abort();      if (j2 > tables[t].maxline+1) abort();      j2 = tables[t].maxline+1;      if (first)        printf("    ");      else        printf("    else ");      first = false;      if (tables[t].usecount == 0) abort();      if (tables[t].usecount == 1) {        if (j2 != j1+1) abort();        for (j = 8*j1; j < 8*j2; j++)          if (enc->uni2charset[j] != 0) {            printf("if (wc == 0x%04x)\n      c = 0x%02x;\n", j, enc->uni2charset[j]);            break;          }      } else {        if (j1 == 0) {          printf("if (wc < 0x%04x)", 8*j2);        } else {          printf("if (wc >= 0x%04x && wc < 0x%04x)", 8*j1, 8*j2);        }        printf("\n      c = %s_page%02x[wc", name, j1 >> 5);        if (tables[t].minline > 0)          printf("-0x%04x", 8*j1);        printf("];\n");      }    }    j1 = j2;  }  printf("    if (c != 0) {\n");  printf("      r[0] = (c >> 8); r[1] = (c & 0xff);\n");  printf("      return 2;\n");  printf("    }\n");  printf("    return RET_ILUNI;\n");  printf("  }\n");  printf("  return RET_TOOSMALL;\n");  printf("}\n");}/* * Outputs the unicode to charset table and function, using a packed array. * (Suitable if the table is sparse.) * The argument 'monotonic' may be set to true if the mapping is monotonically * increasing with small gaps only. */static void output_uni2charset_sparse (const char* name, Encoding* enc, bool monotonic){  bool pages[0x300];  Block pageblocks[0x300]; int npageblocks;  int indx2charset[0x30000];  int summary_indx[0x3000];  int summary_used[0x3000];  int i, row, col, j, p, j1, j2, indx;  bool is_large;  /* for monotonic: */  int log2_stepsize = (!strcmp(name,"uhc_2") ? 6 : 7);  int stepsize = 1 << log2_stepsize;  int indxsteps;  /* Fill pages[0x300]. */  for (p = 0; p < 0x300; p++)    pages[p] = false;  for (row = 0; row < enc->rows; row++)    for (col = 0; col < enc->cols; col++) {      j = enc->charset2uni[row][col];      if (j != 0xfffd)        pages[j>>8] = true;    }  /* Determine whether two or three bytes are needed for each character. */  is_large = false;  for (j = 0; j < 0x30000; j++)    if (enc->uni2charset[j] >= 0x10000)      is_large = true;#if 0  for (p = 0; p < 0x300; p++)    if (pages[p]) {      printf("static const unsigned short %s_page%02x[256] = {\n", name, p);      for (j1 = 0; j1 < 32; j1++) {        printf("  ");        for (j2 = 0; j2 < 8; j2++)          printf("0x%04x, ", enc->uni2charset[256*p+8*j1+j2]);        printf("/""*0x%02x-0x%02x*""/\n", 8*j1, 8*j1+7);      }      printf("};\n");    }  printf("\n");#endif  /* Fill summary_indx[] and summary_used[]. */  indx = 0;  for (j1 = 0; j1 < 0x3000; j1++) {    summary_indx[j1] = indx;    summary_used[j1] = 0;    for (j2 = 0; j2 < 16; j2++) {      j = 16*j1+j2;      if (enc->uni2charset[j] != 0) {        indx2charset[indx++] = enc->uni2charset[j];        summary_used[j1] |= (1 << j2);      }    }  }  /* Fill npageblocks and pageblocks[]. */  npageblocks = 0;  for (p = 0; p < 0x300; ) {    if (pages[p] && (p == 0 || !pages[p-1])) {      pageblocks[npageblocks].start = 16*p;      do p++; while (p < 0x300 && pages[p]);      j1 = 16*p;      while (summary_used[j1-1] == 0) j1--;      pageblocks[npageblocks].end = j1;      npageblocks++;    } else      p++;  }  if (monotonic) {    indxsteps = (indx + stepsize-1) / stepsize;    printf("static const unsigned short %s_2charset_main[%d] = {\n", name, indxsteps);    for (i = 0; i < indxsteps; ) {      if ((i % 8) == 0) printf(" ");      printf(" 0x%04x,", indx2charset[i*stepsize]);      i++;      if ((i % 8) == 0 || i == indxsteps) printf("\n");    }    printf("};\n");    printf("static const unsigned char %s_2charset[%d] = {\n", name, indx);    for (i = 0; i < indx; ) {      if ((i % 8) == 0) printf(" ");      printf(" 0x%02x,", indx2charset[i] - indx2charset[i/stepsize*stepsize]);      i++;      if ((i % 8) == 0 || i == indx) printf("\n");    }    printf("};\n");  } else {    if (is_large) {      printf("static const unsigned char %s_2charset[3*%d] = {\n", name, indx);      for (i = 0; i < indx; ) {        if ((i % 4) == 0) printf(" ");        printf(" 0x%1x,0x%02x,0x%02x,", indx2charset[i] >> 16,               (indx2charset[i] >> 8) & 0xff, indx2charset[i] & 0xff);        i++;        if ((i % 4) == 0 || i == indx) printf("\n");      }      printf("};\n");    } else {      printf("static const unsigned short %s_2charset[%d] = {\n", name, indx);      for (i = 0; i < indx; ) {        if ((i % 8) == 0) printf(" ");        printf(" 0x%04x,", indx2charset[i]);        i++;        if ((i % 8) == 0 || i == indx) printf("\n");      }      printf("};\n");    }  }  printf("\n");  for (i = 0; i < npageblocks; i++) {    printf("static const Summary16 %s_uni2indx_page%02x[%d] = {\n", name,           pageblocks[i].start/16, pageblocks[i].end-pageblocks[i].start);    for (j1 = pageblocks[i].start; j1 < pageblocks[i].end; ) {      if (((16*j1) % 0x100) == 0) printf("  /""* 0x%04x *""/\n", 16*j1);      if ((j1 % 4) == 0) printf(" ");      printf(" { %4d, 0x%04x },", summary_indx[j1], summary_used[j1]);      j1++;      if ((j1 % 4) == 0 || j1 == pageblocks[i].end) printf("\n");    }    printf("};\n");  }  printf("\n");  printf("static int\n");  printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name);  printf("{\n");  printf("  if (n >= 2) {\n");  printf("    const Summary16 *summary = NULL;\n");  for (i = 0; i < npageblocks; i++) {    printf("    ");    if (i > 0)      printf("else ");    printf("if (wc >= 0x%04x && wc < 0x%04x)\n",           16*pageblocks[i].start, 16*pageblocks[i].end);    printf("      summary = &%s_uni2indx_page%02x[(wc>>4)", name,           pageblocks[i].start/16);    if (pageblocks[i].start > 0)      printf("-0x%03x", pageblocks[i].start);    printf("];\n");  }  printf("    if (summary) {\n");  printf("      unsigned short used = summary->used;\n");  printf("      unsigned int i = wc & 0x0f;\n");  printf("      if (used & ((unsigned short) 1 << i)) {\n");  if (monotonic || !is_large)    printf("        unsigned short c;\n");

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -