📄 gen_wctype.c
字号:
#define _GNU_SOURCE#include <stdio.h>#include <stdlib.h>#include <string.h>#include <locale.h>#include <wctype.h>#include <limits.h>#include <stdint.h>#include <wchar.h>#include <ctype.h>#ifndef _CTYPE_H#define _CTYPE_H#endif#ifndef _WCTYPE_H#define _WCTYPE_H#endif#include "../../libc/sysdeps/linux/common/bits/uClibc_ctype.h"/* 0x9 : space blank *//* 0xa : space *//* 0xb : space *//* 0xc : space *//* 0xd : space *//* 0x20 : space blank *//* 0x1680 : space blank *//* 0x2000 : space blank *//* 0x2001 : space blank *//* 0x2002 : space blank *//* 0x2003 : space blank *//* 0x2004 : space blank *//* 0x2005 : space blank *//* 0x2006 : space blank *//* 0x2008 : space blank *//* 0x2009 : space blank *//* 0x200a : space blank *//* 0x200b : space blank *//* 0x2028 : space *//* 0x2029 : space *//* 0x3000 : space blank *//* typecount[ 0] = 88670 C_alpha_nonupper_nonlower *//* typecount[ 1] = 742 C_alpha_lower *//* typecount[ 2] = 4 C_alpha_upper_lower *//* typecount[ 3] = 731 C_alpha_upper *//* typecount[ 4] = 10 C_digit *//* typecount[ 5] = 10270 C_punct *//* typecount[ 6] = 0 C_graph *//* typecount[ 7] = 0 C_print_space_nonblank *//* typecount[ 8] = 14 C_print_space_blank *//* typecount[ 9] = 0 C_space_nonblank_noncntrl *//* typecount[10] = 0 C_space_blank_noncntrl *//* typecount[11] = 6 C_cntrl_space_nonblank *//* typecount[12] = 1 C_cntrl_space_blank *//* typecount[13] = 60 C_cntrl_nonspace *//* typecount[14] = 96100 C_unclassified *//* typecount[15] = 0 empty_slot *//* Set to #if 0 to restrict wchars to 16 bits. */#if 1#define RANGE 0x2ffffUL#elif 0#define RANGE 0x1ffffUL#else#define RANGE 0xffffUL /* Restrict for 16-bit wchar_t... */#endif#if 0/* Classification codes. */static const char *typename[] = { "C_unclassified", "C_alpha_nonupper_nonlower", "C_alpha_lower", "C_alpha_upper_lower", "C_alpha_upper", "C_digit", "C_punct", "C_graph", "C_print_space_nonblank", "C_print_space_blank", "C_space_nonblank_noncntrl", "C_space_blank_noncntrl", "C_cntrl_space_nonblank", "C_cntrl_space_blank", "C_cntrl_nonspace", "empty_slot"};#endif#if 0/* Taking advantage of the C99 mutual-exclusion guarantees for the various * (w)ctype classes, including the descriptions of printing and control * (w)chars, we can place each in one of the following mutually-exlusive * subsets. Since there are less than 16, we can store the data for * each (w)chars in a nibble. In contrast, glibc uses an unsigned int * per (w)char, with one bit flag for each is* type. While this allows * a simple '&' operation to determine the type vs. a range test and a * little special handling for the "blank" and "xdigit" types in my * approach, it also uses 8 times the space for the tables on the typical * 32-bit archs we supported.*/enum { __CTYPE_unclassified = 0, __CTYPE_alpha_nonupper_nonlower, __CTYPE_alpha_lower, __CTYPE_alpha_upper_lower, __CTYPE_alpha_upper, __CTYPE_digit, __CTYPE_punct, __CTYPE_graph, __CTYPE_print_space_nonblank, __CTYPE_print_space_blank, __CTYPE_space_nonblank_noncntrl, __CTYPE_space_blank_noncntrl, __CTYPE_cntrl_space_nonblank, __CTYPE_cntrl_space_blank, __CTYPE_cntrl_nonspace,};#endif#define __CTYPE_isxdigit(D,X) \ (__CTYPE_isdigit(D) || (((unsigned int)(((X)|0x20) - 'a')) <= 5))#define mywalnum(x) __CTYPE_isalnum(d)#define mywalpha(x) __CTYPE_isalpha(d)#define mywblank(x) __CTYPE_isblank(d)#define mywcntrl(x) __CTYPE_iscntrl(d)#define mywdigit(x) __CTYPE_isdigit(d)#define mywgraph(x) __CTYPE_isgraph(d)#define mywlower(x) __CTYPE_islower(d)#define mywprint(x) __CTYPE_isprint(d)#define mywpunct(x) __CTYPE_ispunct(d)#define mywspace(x) __CTYPE_isspace(d)#define mywupper(x) __CTYPE_isupper(d)#define mywxdigit(x) __CTYPE_isxdigit(d,x)typedef struct { short l; short u;} uldiff_entry;typedef struct { uint16_t ii_len; uint16_t ti_len; uint16_t ut_len; unsigned char ii_shift; unsigned char ti_shift; unsigned char *ii; unsigned char *ti; unsigned char *ut;} table_data;void output_table(FILE *fp, const char *name, table_data *tbl){ size_t i; fprintf(fp, "#define __LOCALE_DATA_WC%s_II_LEN %7u\n", name, tbl->ii_len); fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_LEN %7u\n", name, tbl->ti_len); fprintf(fp, "#define __LOCALE_DATA_WC%s_UT_LEN %7u\n", name, tbl->ut_len); fprintf(fp, "#define __LOCALE_DATA_WC%s_II_SHIFT %7u\n", name, tbl->ii_shift); fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_SHIFT %7u\n", name, tbl->ti_shift); fprintf(fp, "\n#ifdef WANT_WC%s_data\n", name); i = tbl->ii_len + tbl->ti_len + tbl->ut_len; fprintf(fp, "\nstatic const unsigned char __LOCALE_DATA_WC%s_data[%zu] = {", name, i); for (i=0 ; i < tbl->ii_len ; i++) { if (i % 12 == 0) { fprintf(fp, "\n"); } fprintf(fp, " %#04x,", tbl->ii[i]); } for (i=0 ; i < tbl->ti_len ; i++) { if (i % 12 == 0) { fprintf(fp, "\n"); } fprintf(fp, " %#04x,", tbl->ti[i]); } for (i=0 ; i < tbl->ut_len ; i++) { if (i % 12 == 0) { fprintf(fp, "\n"); } fprintf(fp, " %#04x,", tbl->ut[i]); } fprintf(fp, "\n};\n\n"); fprintf(fp, "#endif /* WANT_WC%s_data */\n\n", name);}static void dump_table_data(table_data *tbl){ printf("ii_shift = %d ti_shift = %d\n" "ii_len = %d ti_len = %d ut_len = %d\n" "total = %d\n", tbl->ii_shift, tbl->ti_shift, tbl->ii_len, tbl->ti_len, tbl->ut_len, (int) tbl->ii_len + (int) tbl->ti_len + (int) tbl->ut_len);}/* For sorting the blocks of unsigned chars. */static size_t nu_val;int nu_memcmp(const void *a, const void *b){ return memcmp(*(unsigned char**)a, *(unsigned char**)b, nu_val);}static size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl);#define MAXTO 255 /* Restrict to minimal unsigned char max. */int main(int argc, char **argv){ long int u, l, tt; size_t smallest, t; unsigned int c; unsigned int d; int i, n; int ul_count = 0; uldiff_entry uldiff[MAXTO]; table_data cttable; table_data ultable; table_data combtable; table_data widthtable; long int last_comb = 0; unsigned char wct[(RANGE/2)+1]; /* wctype table (nibble per wchar) */ unsigned char ult[RANGE+1]; /* upper/lower table */ unsigned char combt[(RANGE/4)+1]; /* combining */ unsigned char widtht[(RANGE/4)+1]; /* width */ wctrans_t totitle; wctype_t is_comb, is_comb3; long int typecount[16]; const char *typename[16]; static const char empty_slot[] = "empty_slot"; int built = 0;#define INIT_TYPENAME(X) typename[__CTYPE_##X] = "C_" #X for (i=0 ; i < 16 ; i++) { typename[i] = empty_slot; } INIT_TYPENAME(unclassified); INIT_TYPENAME(alpha_nonupper_nonlower); INIT_TYPENAME(alpha_lower); INIT_TYPENAME(alpha_upper_lower); INIT_TYPENAME(alpha_upper); INIT_TYPENAME(digit); INIT_TYPENAME(punct); INIT_TYPENAME(graph); INIT_TYPENAME(print_space_nonblank); INIT_TYPENAME(print_space_blank); INIT_TYPENAME(space_nonblank_noncntrl); INIT_TYPENAME(space_blank_noncntrl); INIT_TYPENAME(cntrl_space_nonblank); INIT_TYPENAME(cntrl_space_blank); INIT_TYPENAME(cntrl_nonspace); setvbuf(stdout, NULL, _IONBF, 0); while (--argc) { if (!setlocale(LC_CTYPE, *++argv)) { printf("setlocale(LC_CTYPE,%s) failed!\n", *argv); continue; } if (!(totitle = wctrans("totitle"))) { printf("no totitle transformation.\n"); } if (!(is_comb = wctype("combining"))) { printf("no combining wctype.\n"); } if (!(is_comb3 = wctype("combining_level3"))) { printf("no combining_level3 wctype.\n"); } if (!built) { built = 1; ul_count = 1; uldiff[0].u = uldiff[0].l = 0; memset(wct, 0, sizeof(wct)); memset(combt, 0, sizeof(combt)); memset(widtht, 0, sizeof(widtht)); for (i = 0 ; i < 16 ; i++) { typecount[i] = 0; } for (c=0 ; c <= RANGE ; c++) { if (iswdigit(c)) { d = __CTYPE_digit; } else if (iswalpha(c)) { d = __CTYPE_alpha_nonupper_nonlower; if (iswlower(c)) { d = __CTYPE_alpha_lower; if (iswupper(c)) { d = __CTYPE_alpha_upper_lower; } } else if (iswupper(c)) { d = __CTYPE_alpha_upper; } } else if (iswpunct(c)) { d = __CTYPE_punct; } else if (iswgraph(c)) { d = __CTYPE_graph; } else if (iswprint(c)) { d = __CTYPE_print_space_nonblank; if (iswblank(c)) { d = __CTYPE_print_space_blank; } } else if (iswspace(c) && !iswcntrl(c)) { d = __CTYPE_space_nonblank_noncntrl; if (iswblank(c)) { d = __CTYPE_space_blank_noncntrl; } } else if (iswcntrl(c)) { d = __CTYPE_cntrl_nonspace; if (iswspace(c)) { d = __CTYPE_cntrl_space_nonblank; if (iswblank(c)) { d = __CTYPE_cntrl_space_blank; } } } else { d = __CTYPE_unclassified; } ++typecount[d];#if 0 if (iswspace(c)) { if (iswblank(c)) { printf("%#8x : space blank\n", c); } else { printf("%#8x : space\n", c); } }#endif#if 0 if (c < 256) { unsigned int glibc; glibc = 0; if (isalnum(c)) ++glibc; glibc <<= 1; if (isalpha(c)) ++glibc; glibc <<= 1; if (isblank(c)) ++glibc; glibc <<= 1; if (iscntrl(c)) ++glibc; glibc <<= 1; if (isdigit(c)) ++glibc; glibc <<= 1; if (isgraph(c)) ++glibc; glibc <<= 1; if (islower(c)) ++glibc; glibc <<= 1; if (isprint(c)) ++glibc; glibc <<= 1; if (ispunct(c)) ++glibc; glibc <<= 1; if (isspace(c)) ++glibc; glibc <<= 1; if (isupper(c)) ++glibc; glibc <<= 1; if (isxdigit(c)) ++glibc; printf("%#8x : ctype %#4x\n", c, glibc); }#endif#if 1 /* Paranoid checking... */ { unsigned int glibc; unsigned int mine; glibc = 0; if (iswalnum(c)) ++glibc; glibc <<= 1; if (iswalpha(c)) ++glibc; glibc <<= 1; if (iswblank(c)) ++glibc; glibc <<= 1; if (iswcntrl(c)) ++glibc; glibc <<= 1; if (iswdigit(c)) ++glibc; glibc <<= 1; if (iswgraph(c)) ++glibc; glibc <<= 1; if (iswlower(c)) ++glibc; glibc <<= 1; if (iswprint(c)) ++glibc; glibc <<= 1; if (iswpunct(c)) ++glibc; glibc <<= 1; if (iswspace(c)) ++glibc; glibc <<= 1; if (iswupper(c)) ++glibc; glibc <<= 1; if (iswxdigit(c)) ++glibc; mine = 0; if (mywalnum(c)) ++mine; mine <<= 1; if (mywalpha(c)) ++mine; mine <<= 1; if (mywblank(c)) ++mine; mine <<= 1; if (mywcntrl(c)) ++mine; mine <<= 1; if (mywdigit(c)) ++mine; mine <<= 1; if (mywgraph(c)) ++mine; mine <<= 1; if (mywlower(c)) ++mine; mine <<= 1; if (mywprint(c)) ++mine; mine <<= 1; if (mywpunct(c)) ++mine; mine <<= 1; if (mywspace(c)) ++mine; mine <<= 1; if (mywupper(c)) ++mine; mine <<= 1; if (mywxdigit(c)) ++mine; if (glibc != mine) { printf("%#8x : glibc %#4x != %#4x mine %u\n", c, glibc, mine, d); return EXIT_FAILURE; }#if 0 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {/* if (!iswpunct(c)) { */ printf("%#8x : %d %d %#4x\n", c, iswctype(c,is_comb),iswctype(c,is_comb3), glibc);/* } */ }#endif#if 0 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) { if (!last_comb) { printf("%#8x - ", c); last_comb = c; } else if (last_comb + 1 < c) { printf("%#8x\n%#8x - ", last_comb, c); last_comb = c; } else { last_comb = c; } }#endif }#endif combt[c/4] |= ((((!!iswctype(c,is_comb)) << 1) | !!iswctype(c,is_comb3)) << ((c & 3) << 1));/* comb3t[c/8] |= ((!!iswctype(c,is_comb3)) << (c & 7)); */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -