📄 ucdata.c
字号:
r = (l + _uccase_len[1]) - 1; } return _uccase_lookup(code, l, r, field);}/************************************************************************** * * Support for compositions. * **************************************************************************/#if !HARDCODE_DATAstatic ac_uint4 _uccomp_size;static ac_uint4 *_uccomp_data;/* * Return -1 on error, 0 if okay */static int_uccomp_load(char *paths, int reload){ FILE *in; ac_uint4 size, i; _ucheader_t hdr; if (_uccomp_size > 0) { if (!reload) /* * The compositions have already been loaded. */ return 0; free((char *) _uccomp_data); _uccomp_size = 0; } if ((in = _ucopenfile(paths, "comp.dat", "rb")) == 0) return -1; /* * Load the header. */ fread((char *) &hdr, sizeof(_ucheader_t), 1, in); if (hdr.bom == 0xfffe) { hdr.cnt = endian_short(hdr.cnt); hdr.size.bytes = endian_long(hdr.size.bytes); } _uccomp_size = hdr.cnt; _uccomp_data = (ac_uint4 *) malloc(hdr.size.bytes); /* * Read the composition data in. */ size = hdr.size.bytes / sizeof(ac_uint4); fread((char *) _uccomp_data, sizeof(ac_uint4), size, in); /* * Do an endian swap if necessary. */ if (hdr.bom == 0xfffe) { for (i = 0; i < size; i++) _uccomp_data[i] = endian_long(_uccomp_data[i]); } /* * Assume that the data is ordered on count, so that all compositions * of length 2 come first. Only handling length 2 for now. */ for (i = 1; i < size; i += 4) if (_uccomp_data[i] != 2) break; _uccomp_size = i - 1; fclose(in); return 0;}static void_uccomp_unload(void){ if (_uccomp_size == 0) return; free((char *) _uccomp_data); _uccomp_size = 0;}#endifintuccomp(ac_uint4 node1, ac_uint4 node2, ac_uint4 *comp){ int l, r, m; l = 0; r = _uccomp_size - 1; while (l <= r) { m = ((r + l) >> 1); m -= m & 3; if (node1 > _uccomp_data[m+2]) l = m + 4; else if (node1 < _uccomp_data[m+2]) r = m - 4; else if (node2 > _uccomp_data[m+3]) l = m + 4; else if (node2 < _uccomp_data[m+3]) r = m - 4; else { *comp = _uccomp_data[m]; return 1; } } return 0;}intuccomp_hangul(ac_uint4 *str, int len){ const int SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7, LCount = 19, VCount = 21, TCount = 28, NCount = VCount * TCount, /* 588 */ SCount = LCount * NCount; /* 11172 */ int i, rlen; ac_uint4 ch, last, lindex, sindex; last = str[0]; rlen = 1; for ( i = 1; i < len; i++ ) { ch = str[i]; /* check if two current characters are L and V */ lindex = last - LBase; if (lindex < (ac_uint4) LCount) { ac_uint4 vindex = ch - VBase; if (vindex < (ac_uint4) VCount) { /* make syllable of form LV */ last = SBase + (lindex * VCount + vindex) * TCount; str[rlen-1] = last; /* reset last */ continue; } } /* check if two current characters are LV and T */ sindex = last - SBase; if (sindex < (ac_uint4) SCount && (sindex % TCount) == 0) { ac_uint4 tindex = ch - TBase; if (tindex <= (ac_uint4) TCount) { /* make syllable of form LVT */ last += tindex; str[rlen-1] = last; /* reset last */ continue; } } /* if neither case was true, just add the character */ last = ch; str[rlen] = ch; rlen++; } return rlen;}intuccanoncomp(ac_uint4 *str, int len){ int i, stpos, copos; ac_uint4 cl, prevcl, st, ch, co; st = str[0]; stpos = 0; copos = 1; prevcl = uccombining_class(st) == 0 ? 0 : 256; for (i = 1; i < len; i++) { ch = str[i]; cl = uccombining_class(ch); if (uccomp(st, ch, &co) && (prevcl < cl || prevcl == 0)) st = str[stpos] = co; else { if (cl == 0) { stpos = copos; st = ch; } prevcl = cl; str[copos++] = ch; } } return uccomp_hangul(str, copos);}/************************************************************************** * * Support for decompositions. * **************************************************************************/#if !HARDCODE_DATAstatic ac_uint4 _ucdcmp_size;static ac_uint4 *_ucdcmp_nodes;static ac_uint4 *_ucdcmp_decomp;static ac_uint4 _uckdcmp_size;static ac_uint4 *_uckdcmp_nodes;static ac_uint4 *_uckdcmp_decomp;/* * Return -1 on error, 0 if okay */static int_ucdcmp_load(char *paths, int reload){ FILE *in; ac_uint4 size, i; _ucheader_t hdr; if (_ucdcmp_size > 0) { if (!reload) /* * The decompositions have already been loaded. */ return 0; free((char *) _ucdcmp_nodes); _ucdcmp_size = 0; } if ((in = _ucopenfile(paths, "decomp.dat", "rb")) == 0) return -1; /* * Load the header. */ fread((char *) &hdr, sizeof(_ucheader_t), 1, in); if (hdr.bom == 0xfffe) { hdr.cnt = endian_short(hdr.cnt); hdr.size.bytes = endian_long(hdr.size.bytes); } _ucdcmp_size = hdr.cnt << 1; _ucdcmp_nodes = (ac_uint4 *) malloc(hdr.size.bytes); _ucdcmp_decomp = _ucdcmp_nodes + (_ucdcmp_size + 1); /* * Read the decomposition data in. */ size = hdr.size.bytes / sizeof(ac_uint4); fread((char *) _ucdcmp_nodes, sizeof(ac_uint4), size, in); /* * Do an endian swap if necessary. */ if (hdr.bom == 0xfffe) { for (i = 0; i < size; i++) _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]); } fclose(in); return 0;}/* * Return -1 on error, 0 if okay */static int_uckdcmp_load(char *paths, int reload){ FILE *in; ac_uint4 size, i; _ucheader_t hdr; if (_uckdcmp_size > 0) { if (!reload) /* * The decompositions have already been loaded. */ return 0; free((char *) _uckdcmp_nodes); _uckdcmp_size = 0; } if ((in = _ucopenfile(paths, "kdecomp.dat", "rb")) == 0) return -1; /* * Load the header. */ fread((char *) &hdr, sizeof(_ucheader_t), 1, in); if (hdr.bom == 0xfffe) { hdr.cnt = endian_short(hdr.cnt); hdr.size.bytes = endian_long(hdr.size.bytes); } _uckdcmp_size = hdr.cnt << 1; _uckdcmp_nodes = (ac_uint4 *) malloc(hdr.size.bytes); _uckdcmp_decomp = _uckdcmp_nodes + (_uckdcmp_size + 1); /* * Read the decomposition data in. */ size = hdr.size.bytes / sizeof(ac_uint4); fread((char *) _uckdcmp_nodes, sizeof(ac_uint4), size, in); /* * Do an endian swap if necessary. */ if (hdr.bom == 0xfffe) { for (i = 0; i < size; i++) _uckdcmp_nodes[i] = endian_long(_uckdcmp_nodes[i]); } fclose(in); return 0;}static void_ucdcmp_unload(void){ if (_ucdcmp_size == 0) return; /* * Only need to free the offsets because the memory is allocated as a * single block. */ free((char *) _ucdcmp_nodes); _ucdcmp_size = 0;}static void_uckdcmp_unload(void){ if (_uckdcmp_size == 0) return; /* * Only need to free the offsets because the memory is allocated as a * single block. */ free((char *) _uckdcmp_nodes); _uckdcmp_size = 0;}#endifintucdecomp(ac_uint4 code, ac_uint4 *num, ac_uint4 **decomp){ long l, r, m; if (code < _ucdcmp_nodes[0]) { return 0; } l = 0; r = _ucdcmp_nodes[_ucdcmp_size] - 1; while (l <= r) { /* * Determine a "mid" point and adjust to make sure the mid point is at * the beginning of a code+offset pair. */ m = (l + r) >> 1; m -= (m & 1); if (code > _ucdcmp_nodes[m]) l = m + 2; else if (code < _ucdcmp_nodes[m]) r = m - 2; else if (code == _ucdcmp_nodes[m]) { *num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1]; *decomp = (ac_uint4*)&_ucdcmp_decomp[_ucdcmp_nodes[m + 1]]; return 1; } } return 0;}intuckdecomp(ac_uint4 code, ac_uint4 *num, ac_uint4 **decomp){ long l, r, m; if (code < _uckdcmp_nodes[0]) { return 0; } l = 0; r = _uckdcmp_nodes[_uckdcmp_size] - 1; while (l <= r) { /* * Determine a "mid" point and adjust to make sure the mid point is at * the beginning of a code+offset pair. */ m = (l + r) >> 1; m -= (m & 1); if (code > _uckdcmp_nodes[m]) l = m + 2; else if (code < _uckdcmp_nodes[m]) r = m - 2; else if (code == _uckdcmp_nodes[m]) { *num = _uckdcmp_nodes[m + 3] - _uckdcmp_nodes[m + 1]; *decomp = (ac_uint4*)&_uckdcmp_decomp[_uckdcmp_nodes[m + 1]]; return 1; } } return 0;}intucdecomp_hangul(ac_uint4 code, ac_uint4 *num, ac_uint4 decomp[]){ if (!ucishangul(code)) return 0; code -= 0xac00; decomp[0] = 0x1100 + (ac_uint4) (code / 588); decomp[1] = 0x1161 + (ac_uint4) ((code % 588) / 28); decomp[2] = 0x11a7 + (ac_uint4) (code % 28); *num = (decomp[2] != 0x11a7) ? 3 : 2; return 1;}/* mode == 0 for canonical, mode == 1 for compatibility */static intuccanoncompatdecomp(const ac_uint4 *in, int inlen, ac_uint4 **out, int *outlen, short mode, void *ctx){ int l, size; unsigned i, j, k; ac_uint4 num, class, *decomp, hangdecomp[3]; size = inlen * 2; *out = (ac_uint4 *) ber_memalloc_x(size * sizeof(**out), ctx); if (*out == NULL) return *outlen = -1; i = 0; for (j = 0; j < (unsigned) inlen; j++) { if (mode ? uckdecomp(in[j], &num, &decomp) : ucdecomp(in[j], &num, &decomp)) { if ( size - i < num) { size = inlen + i - j + num - 1; *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx ); if (*out == NULL) return *outlen = -1; } for (k = 0; k < num; k++) { class = uccombining_class(decomp[k]); if (class == 0) { (*out)[i] = decomp[k]; } else { for (l = i; l > 0; l--) if (class >= uccombining_class((*out)[l-1])) break; AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out)); (*out)[l] = decomp[k]; } i++; } } else if (ucdecomp_hangul(in[j], &num, hangdecomp)) { if (size - i < num) { size = inlen + i - j + num - 1; *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx); if (*out == NULL) return *outlen = -1; } for (k = 0; k < num; k++) { (*out)[i] = hangdecomp[k]; i++; } } else { if (size - i < 1) { size = inlen + i - j; *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx); if (*out == NULL) return *outlen = -1; } class = uccombining_class(in[j]); if (class == 0) { (*out)[i] = in[j]; } else { for (l = i; l > 0; l--) if (class >= uccombining_class((*out)[l-1])) break; AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out)); (*out)[l] = in[j]; } i++; } } return *outlen = i;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -