📄 ucgendat.c
字号:
* Convert the combining class code from decimal. */ for (ccl_code = 0, e = s; *e && *e != ';'; e++) ccl_code = (ccl_code * 10) + (*e - '0'); /* * Add the code if it not 0. */ if (ccl_code != 0) ordered_ccl_insert(code, ccl_code); /* * Locate the second character property field. */ for (s = e; *s != 0 && i < 4; s++) { if (*s == ';') i++; } for (e = s; *e && *e != ';'; e++) ; ordered_range_insert(code, s, e - s); /* * Check for a decomposition. */ s = ++e; if (*s != ';') { compat = *s == '<'; if (compat) { /* * Skip compatibility formatting tag. */ while (*s++ != '>'); } /* * Collect the codes of the decomposition. */ for (dectmp_size = 0; *s != ';'; ) { /* * Skip all leading non-hex digits. */ while (!ishdigit(*s)) s++; for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) { dectmp[dectmp_size] <<= 4; if (*s >= '0' && *s <= '9') dectmp[dectmp_size] += *s - '0'; else if (*s >= 'A' && *s <= 'F') dectmp[dectmp_size] += (*s - 'A') + 10; else if (*s >= 'a' && *s <= 'f') dectmp[dectmp_size] += (*s - 'a') + 10; } dectmp_size++; } /* * If there are any codes in the temporary decomposition array, * then add the character with its decomposition. */ if (dectmp_size > 0) { if (!compat) { add_decomp(code, 0); } add_decomp(code, 1); } } /* * Skip to the number field. */ for (i = 0; i < 3 && *s; s++) { if (*s == ';') i++; } /* * Scan the number in. */ number[0] = number[1] = 0; for (e = s, neg = wnum = 0; *e && *e != ';'; e++) { if (*e == '-') { neg = 1; continue; } if (*e == '/') { /* * Move the the denominator of the fraction. */ if (neg) number[wnum] *= -1; neg = 0; e++; wnum++; } number[wnum] = (number[wnum] * 10) + (*e - '0'); } if (e > s) { /* * Adjust the denominator in case of integers and add the number. */ if (wnum == 0) number[1] = 1; add_number(code, number[0], number[1]); } /* * Skip to the start of the possible case mappings. */ for (s = e, i = 0; i < 4 && *s; s++) { if (*s == ';') i++; } /* * Collect the case mappings. */ cases[0] = cases[1] = cases[2] = 0; for (i = 0; i < 3; i++) { while (ishdigit(*s)) { cases[i] <<= 4; if (*s >= '0' && *s <= '9') cases[i] += *s - '0'; else if (*s >= 'A' && *s <= 'F') cases[i] += (*s - 'A') + 10; else if (*s >= 'a' && *s <= 'f') cases[i] += (*s - 'a') + 10; s++; } if (*s == ';') s++; } if (cases[0] && cases[1]) /* * Add the upper and lower mappings for a title case character. */ add_title(code); else if (cases[1]) /* * Add the lower and title case mappings for the upper case * character. */ add_upper(code); else if (cases[0]) /* * Add the upper and title case mappings for the lower case * character. */ add_lower(code); }}static _decomp_t *find_decomp(ac_uint4 code, short compat){ long l, r, m; _decomp_t *decs; l = 0; r = (compat ? kdecomps_used : decomps_used) - 1; decs = compat ? kdecomps : decomps; while (l <= r) { m = (l + r) >> 1; if (code > decs[m].code) l = m + 1; else if (code < decs[m].code) r = m - 1; else return &decs[m]; } return 0;}static voiddecomp_it(_decomp_t *d, short compat){ ac_uint4 i; _decomp_t *dp; for (i = 0; i < d->used; i++) { if ((dp = find_decomp(d->decomp[i], compat)) != 0) decomp_it(dp, compat); else dectmp[dectmp_size++] = d->decomp[i]; }}/* * Expand all decompositions by recursively decomposing each character * in the decomposition. */static voidexpand_decomp(void){ ac_uint4 i; for (i = 0; i < decomps_used; i++) { dectmp_size = 0; decomp_it(&decomps[i], 0); if (dectmp_size > 0) add_decomp(decomps[i].code, 0); } for (i = 0; i < kdecomps_used; i++) { dectmp_size = 0; decomp_it(&kdecomps[i], 1); if (dectmp_size > 0) add_decomp(kdecomps[i].code, 1); }}static intcmpcomps(const void *v_comp1, const void *v_comp2){ const _comp_t *comp1 = v_comp1, *comp2 = v_comp2; long diff = comp1->code1 - comp2->code1; if (!diff) diff = comp1->code2 - comp2->code2; return (int) diff;}/* * Load composition exclusion data */static voidread_compexdata(FILE *in){ ac_uint2 i; ac_uint4 code; char line[512], *s; (void) memset((char *) compexs, 0, sizeof(compexs)); while (fgets(line, sizeof(line), in)) { if( (s=strchr(line, '\n')) ) *s = '\0'; /* * Skip blank lines and lines that start with a '#'. */ if (line[0] == 0 || line[0] == '#') continue; /* * Collect the code. Assume max 6 digits */ for (s = line, i = code = 0; *s != '#' && i < 6; i++, s++) { if (isspace((unsigned char)*s)) break; code <<= 4; if (*s >= '0' && *s <= '9') code += *s - '0'; else if (*s >= 'A' && *s <= 'F') code += (*s - 'A') + 10; else if (*s >= 'a' && *s <= 'f') code += (*s - 'a') + 10; } COMPEX_SET(code); }}/* * Creates array of compositions from decomposition array */static voidcreate_comps(void){ ac_uint4 i, cu; comps = (_comp_t *) malloc(comps_used * sizeof(_comp_t)); for (i = cu = 0; i < decomps_used; i++) { if (decomps[i].used != 2 || COMPEX_TEST(decomps[i].code)) continue; comps[cu].comp = decomps[i].code; comps[cu].count = 2; comps[cu].code1 = decomps[i].decomp[0]; comps[cu].code2 = decomps[i].decomp[1]; cu++; } comps_used = cu; qsort(comps, comps_used, sizeof(_comp_t), cmpcomps);}#if HARDCODE_DATAstatic voidwrite_case(FILE *out, _case_t *tab, int num, int first){ int i; for (i=0; i<num; i++) { if (first) first = 0; else fprintf(out, ","); fprintf(out, "\n\t0x%08lx, 0x%08lx, 0x%08lx", (unsigned long) tab[i].key, (unsigned long) tab[i].other1, (unsigned long) tab[i].other2); }}#define PREF "static const "#endifstatic voidwrite_cdata(char *opath){ FILE *out; ac_uint4 bytes; ac_uint4 i, idx, nprops;#if !(HARDCODE_DATA) ac_uint2 casecnt[2];#endif char path[BUFSIZ];#if HARDCODE_DATA int j, k; /***************************************************************** * * Generate the ctype data. * *****************************************************************/ /* * Open the output file. */ snprintf(path, sizeof path, "%s" LDAP_DIRSEP "uctable.h", opath); if ((out = fopen(path, "w")) == 0) return;#else /* * Open the ctype.dat file. */ snprintf(path, sizeof path, "%s" LDAP_DIRSEP "ctype.dat", opath); if ((out = fopen(path, "wb")) == 0) return;#endif /* * Collect the offsets for the properties. The offsets array is * on a 4-byte boundary to keep things efficient for architectures * that need such a thing. */ for (i = idx = 0; i < NUMPROPS; i++) { propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff; idx += proptbl[i].used; } /* * Add the sentinel index which is used by the binary search as the upper * bound for a search. */ propcnt[i] = idx; /* * Record the actual number of property lists. This may be different than * the number of offsets actually written because of aligning on a 4-byte * boundary. */ hdr[1] = NUMPROPS; /* * Calculate the byte count needed and pad the property counts array to a * 4-byte boundary. */ if ((bytes = sizeof(ac_uint2) * (NUMPROPS + 1)) & 3) bytes += 4 - (bytes & 3); nprops = bytes / sizeof(ac_uint2); bytes += sizeof(ac_uint4) * idx;#if HARDCODE_DATA fprintf(out, PREF "ac_uint4 _ucprop_size = %d;\n\n", NUMPROPS); fprintf(out, PREF "ac_uint2 _ucprop_offsets[] = {"); for (i = 0; i<nprops; i++) { if (i) fprintf(out, ","); if (!(i&7)) fprintf(out, "\n\t"); else fprintf(out, " "); fprintf(out, "0x%04x", propcnt[i]); } fprintf(out, "\n};\n\n"); fprintf(out, PREF "ac_uint4 _ucprop_ranges[] = {"); k = 0; for (i = 0; i < NUMPROPS; i++) { if (proptbl[i].used > 0) { for (j=0; j<proptbl[i].used; j++) { if (k) fprintf(out, ","); if (!(k&3)) fprintf(out,"\n\t"); else fprintf(out, " "); k++; fprintf(out, "0x%08lx", (unsigned long) proptbl[i].ranges[j]); } } } fprintf(out, "\n};\n\n");#else /* * Write the header. */ fwrite((char *) hdr, sizeof(ac_uint2), 2, out); /* * Write the byte count. */ fwrite((char *) &bytes, sizeof(ac_uint4), 1, out); /* * Write the property list counts. */ fwrite((char *) propcnt, sizeof(ac_uint2), nprops, out); /* * Write the property lists. */ for (i = 0; i < NUMPROPS; i++) { if (proptbl[i].used > 0) fwrite((char *) proptbl[i].ranges, sizeof(ac_uint4), proptbl[i].used, out); } fclose(out);#endif /***************************************************************** * * Generate the case mapping data. * *****************************************************************/#if HARDCODE_DATA fprintf(out, PREF "ac_uint4 _uccase_size = %ld;\n\n", (long) (upper_used + lower_used + title_used)); fprintf(out, PREF "ac_uint2 _uccase_len[2] = {%ld, %ld};\n\n", (long) upper_used, (long) lower_used); fprintf(out, PREF "ac_uint4 _uccase_map[] = {"); if (upper_used > 0) /* * Write the upper case table. */ write_case(out, upper, upper_used, 1); if (lower_used > 0) /* * Write the lower case table. */ write_case(out, lower, lower_used, !upper_used); if (title_used > 0) /* * Write the title case table. */ write_case(out, title, title_used, !(upper_used||lower_used)); if (!(upper_used || lower_used || title_used)) fprintf(out, "\t0"); fprintf(out, "\n};\n\n");#else /* * Open the case.dat file. */ snprintf(path, sizeof path, "%s" LDAP_DIRSEP "case.dat", opath); if ((out = fopen(path, "wb")) == 0) return; /* * Write the case mapping tables. */ hdr[1] = upper_used + lower_used + title_used; casecnt[0] = upper_used; casecnt[1] = lower_used; /* * Write the header. */ fwrite((char *) hdr, sizeof(ac_uint2), 2, out); /* * Write the upper and lower case table sizes. */ fwrite((char *) casecnt, sizeof(ac_uint2), 2, out);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -