📄 ucdata.c
字号:
intuccanondecomp(const ac_uint4 *in, int inlen, ac_uint4 **out, int *outlen, void *ctx){ return uccanoncompatdecomp(in, inlen, out, outlen, 0, ctx);}intuccompatdecomp(const ac_uint4 *in, int inlen, ac_uint4 **out, int *outlen, void *ctx){ return uccanoncompatdecomp(in, inlen, out, outlen, 1, ctx);}/************************************************************************** * * Support for combining classes. * **************************************************************************/#if !HARDCODE_DATAstatic ac_uint4 _uccmcl_size;static ac_uint4 *_uccmcl_nodes;/* * Return -1 on error, 0 if okay */static int_uccmcl_load(char *paths, int reload){ FILE *in; ac_uint4 i; _ucheader_t hdr; if (_uccmcl_size > 0) { if (!reload) /* * The combining classes have already been loaded. */ return 0; free((char *) _uccmcl_nodes); _uccmcl_size = 0; } if ((in = _ucopenfile(paths, "cmbcl.dat", "rb")) == 0) return -1; /* * Load the header. */ fread((char *) &hdr, sizeof(_ucheader_t), 1, in); if (hdr.bom == 0xfffe) { hdr.cnt = endian_short(hdr.cnt); hdr.size.bytes = endian_long(hdr.size.bytes); } _uccmcl_size = hdr.cnt * 3; _uccmcl_nodes = (ac_uint4 *) malloc(hdr.size.bytes); /* * Read the combining classes in. */ fread((char *) _uccmcl_nodes, sizeof(ac_uint4), _uccmcl_size, in); /* * Do an endian swap if necessary. */ if (hdr.bom == 0xfffe) { for (i = 0; i < _uccmcl_size; i++) _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]); } fclose(in); return 0;}static void_uccmcl_unload(void){ if (_uccmcl_size == 0) return; free((char *) _uccmcl_nodes); _uccmcl_size = 0;}#endifac_uint4uccombining_class(ac_uint4 code){ long l, r, m; l = 0; r = _uccmcl_size - 1; while (l <= r) { m = (l + r) >> 1; m -= (m % 3); if (code > _uccmcl_nodes[m + 1]) l = m + 3; else if (code < _uccmcl_nodes[m]) r = m - 3; else if (code >= _uccmcl_nodes[m] && code <= _uccmcl_nodes[m + 1]) return _uccmcl_nodes[m + 2]; } return 0;}/************************************************************************** * * Support for numeric values. * **************************************************************************/#if !HARDCODE_DATAstatic ac_uint4 *_ucnum_nodes;static ac_uint4 _ucnum_size;static short *_ucnum_vals;/* * Return -1 on error, 0 if okay */static int_ucnumb_load(char *paths, int reload){ FILE *in; ac_uint4 size, i; _ucheader_t hdr; if (_ucnum_size > 0) { if (!reload) /* * The numbers have already been loaded. */ return 0; free((char *) _ucnum_nodes); _ucnum_size = 0; } if ((in = _ucopenfile(paths, "num.dat", "rb")) == 0) return -1; /* * Load the header. */ fread((char *) &hdr, sizeof(_ucheader_t), 1, in); if (hdr.bom == 0xfffe) { hdr.cnt = endian_short(hdr.cnt); hdr.size.bytes = endian_long(hdr.size.bytes); } _ucnum_size = hdr.cnt; _ucnum_nodes = (ac_uint4 *) malloc(hdr.size.bytes); _ucnum_vals = (short *) (_ucnum_nodes + _ucnum_size); /* * Read the combining classes in. */ fread((char *) _ucnum_nodes, sizeof(unsigned char), hdr.size.bytes, in); /* * Do an endian swap if necessary. */ if (hdr.bom == 0xfffe) { for (i = 0; i < _ucnum_size; i++) _ucnum_nodes[i] = endian_long(_ucnum_nodes[i]); /* * Determine the number of values that have to be adjusted. */ size = (hdr.size.bytes - (_ucnum_size * (sizeof(ac_uint4) << 1))) / sizeof(short); for (i = 0; i < size; i++) _ucnum_vals[i] = endian_short(_ucnum_vals[i]); } fclose(in); return 0;}static void_ucnumb_unload(void){ if (_ucnum_size == 0) return; free((char *) _ucnum_nodes); _ucnum_size = 0;}#endifintucnumber_lookup(ac_uint4 code, struct ucnumber *num){ long l, r, m; short *vp; l = 0; r = _ucnum_size - 1; while (l <= r) { /* * Determine a "mid" point and adjust to make sure the mid point is at * the beginning of a code+offset pair. */ m = (l + r) >> 1; m -= (m & 1); if (code > _ucnum_nodes[m]) l = m + 2; else if (code < _ucnum_nodes[m]) r = m - 2; else { vp = (short *)_ucnum_vals + _ucnum_nodes[m + 1]; num->numerator = (int) *vp++; num->denominator = (int) *vp; return 1; } } return 0;}intucdigit_lookup(ac_uint4 code, int *digit){ long l, r, m; short *vp; l = 0; r = _ucnum_size - 1; while (l <= r) { /* * Determine a "mid" point and adjust to make sure the mid point is at * the beginning of a code+offset pair. */ m = (l + r) >> 1; m -= (m & 1); if (code > _ucnum_nodes[m]) l = m + 2; else if (code < _ucnum_nodes[m]) r = m - 2; else { vp = (short *)_ucnum_vals + _ucnum_nodes[m + 1]; if (*vp == *(vp + 1)) { *digit = *vp; return 1; } return 0; } } return 0;}struct ucnumberucgetnumber(ac_uint4 code){ struct ucnumber num; /* * Initialize with some arbitrary value, because the caller simply cannot * tell for sure if the code is a number without calling the ucisnumber() * macro before calling this function. */ num.numerator = num.denominator = -111; (void) ucnumber_lookup(code, &num); return num;}intucgetdigit(ac_uint4 code){ int dig; /* * Initialize with some arbitrary value, because the caller simply cannot * tell for sure if the code is a number without calling the ucisdigit() * macro before calling this function. */ dig = -111; (void) ucdigit_lookup(code, &dig); return dig;}/************************************************************************** * * Setup and cleanup routines. * **************************************************************************/#if HARDCODE_DATAint ucdata_load(char *paths, int masks) { return 0; }void ucdata_unload(int masks) { }int ucdata_reload(char *paths, int masks) { return 0; }#else/* * Return 0 if okay, negative on error */intucdata_load(char *paths, int masks){ int error = 0; if (masks & UCDATA_CTYPE) error |= _ucprop_load(paths, 0) < 0 ? UCDATA_CTYPE : 0; if (masks & UCDATA_CASE) error |= _uccase_load(paths, 0) < 0 ? UCDATA_CASE : 0; if (masks & UCDATA_DECOMP) error |= _ucdcmp_load(paths, 0) < 0 ? UCDATA_DECOMP : 0; if (masks & UCDATA_CMBCL) error |= _uccmcl_load(paths, 0) < 0 ? UCDATA_CMBCL : 0; if (masks & UCDATA_NUM) error |= _ucnumb_load(paths, 0) < 0 ? UCDATA_NUM : 0; if (masks & UCDATA_COMP) error |= _uccomp_load(paths, 0) < 0 ? UCDATA_COMP : 0; if (masks & UCDATA_KDECOMP) error |= _uckdcmp_load(paths, 0) < 0 ? UCDATA_KDECOMP : 0; return -error;}voiducdata_unload(int masks){ if (masks & UCDATA_CTYPE) _ucprop_unload(); if (masks & UCDATA_CASE) _uccase_unload(); if (masks & UCDATA_DECOMP) _ucdcmp_unload(); if (masks & UCDATA_CMBCL) _uccmcl_unload(); if (masks & UCDATA_NUM) _ucnumb_unload(); if (masks & UCDATA_COMP) _uccomp_unload(); if (masks & UCDATA_KDECOMP) _uckdcmp_unload();}/* * Return 0 if okay, negative on error */intucdata_reload(char *paths, int masks){ int error = 0; if (masks & UCDATA_CTYPE) error |= _ucprop_load(paths, 1) < 0 ? UCDATA_CTYPE : 0; if (masks & UCDATA_CASE) error |= _uccase_load(paths, 1) < 0 ? UCDATA_CASE : 0; if (masks & UCDATA_DECOMP) error |= _ucdcmp_load(paths, 1) < 0 ? UCDATA_DECOMP : 0; if (masks & UCDATA_CMBCL) error |= _uccmcl_load(paths, 1) < 0 ? UCDATA_CMBCL : 0; if (masks & UCDATA_NUM) error |= _ucnumb_load(paths, 1) < 0 ? UCDATA_NUM : 0; if (masks & UCDATA_COMP) error |= _uccomp_load(paths, 1) < 0 ? UCDATA_COMP : 0; if (masks & UCDATA_KDECOMP) error |= _uckdcmp_load(paths, 1) < 0 ? UCDATA_KDECOMP : 0; return -error;}#endif#ifdef TESTvoidmain(void){ int dig; ac_uint4 i, lo, *dec; struct ucnumber num;/* ucdata_setup("."); */ if (ucisweak(0x30)) printf("WEAK\n"); else printf("NOT WEAK\n"); printf("LOWER 0x%04lX\n", uctolower(0xff3a)); printf("UPPER 0x%04lX\n", uctoupper(0xff5a)); if (ucisalpha(0x1d5)) printf("ALPHA\n"); else printf("NOT ALPHA\n"); if (ucisupper(0x1d5)) { printf("UPPER\n"); lo = uctolower(0x1d5); printf("0x%04lx\n", lo); lo = uctotitle(0x1d5); printf("0x%04lx\n", lo); } else printf("NOT UPPER\n"); if (ucistitle(0x1d5)) printf("TITLE\n"); else printf("NOT TITLE\n"); if (uciscomposite(0x1d5)) printf("COMPOSITE\n"); else printf("NOT COMPOSITE\n"); if (ucdecomp(0x1d5, &lo, &dec)) { for (i = 0; i < lo; i++) printf("0x%04lx ", dec[i]); putchar('\n'); } if ((lo = uccombining_class(0x41)) != 0) printf("0x41 CCL %ld\n", lo); if (ucisxdigit(0xfeff)) printf("0xFEFF HEX DIGIT\n"); else printf("0xFEFF NOT HEX DIGIT\n"); if (ucisdefined(0x10000)) printf("0x10000 DEFINED\n"); else printf("0x10000 NOT DEFINED\n"); if (ucnumber_lookup(0x30, &num)) { if (num.denominator != 1) printf("UCNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator); else printf("UCNUMBER: 0x30 = %d\n", num.numerator); } else printf("UCNUMBER: 0x30 NOT A NUMBER\n"); if (ucnumber_lookup(0xbc, &num)) { if (num.denominator != 1) printf("UCNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator); else printf("UCNUMBER: 0xbc = %d\n", num.numerator); } else printf("UCNUMBER: 0xbc NOT A NUMBER\n"); if (ucnumber_lookup(0xff19, &num)) { if (num.denominator != 1) printf("UCNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator); else printf("UCNUMBER: 0xff19 = %d\n", num.numerator); } else printf("UCNUMBER: 0xff19 NOT A NUMBER\n"); if (ucnumber_lookup(0x4e00, &num)) { if (num.denominator != 1) printf("UCNUMBER: 0x4e00 = %d/%d\n", num.numerator, num.denominator); else printf("UCNUMBER: 0x4e00 = %d\n", num.numerator); } else printf("UCNUMBER: 0x4e00 NOT A NUMBER\n"); if (ucdigit_lookup(0x06f9, &dig)) printf("UCDIGIT: 0x6f9 = %d\n", dig); else printf("UCDIGIT: 0x6f9 NOT A NUMBER\n"); dig = ucgetdigit(0x0969); printf("UCGETDIGIT: 0x969 = %d\n", dig); num = ucgetnumber(0x30); if (num.denominator != 1) printf("UCGETNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator); else printf("UCGETNUMBER: 0x30 = %d\n", num.numerator); num = ucgetnumber(0xbc); if (num.denominator != 1) printf("UCGETNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator); else printf("UCGETNUMBER: 0xbc = %d\n", num.numerator); num = ucgetnumber(0xff19); if (num.denominator != 1) printf("UCGETNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator); else printf("UCGETNUMBER: 0xff19 = %d\n", num.numerator);/* ucdata_cleanup(); */ exit(0);}#endif /* TEST */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -