📄 ucgendat.c
字号:
/* $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucgendat.c,v 1.36.2.4 2007/01/02 21:43:51 kurt Exp $ *//* This work is part of OpenLDAP Software <http://www.openldap.org/>. * * Copyright 1998-2007 The OpenLDAP Foundation. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted only as authorized by the OpenLDAP * Public License. * * A copy of this license is available in file LICENSE in the * top-level directory of the distribution or, alternatively, at * <http://www.OpenLDAP.org/license.html>. *//* Copyright 2001 Computing Research Labs, New Mexico State University * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * THE USE OR OTHER DEALINGS IN THE SOFTWARE. *//* $Id: ucgendat.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $" */#include "portable.h"#include "ldap_config.h"#include <stdio.h>#include <ac/ctype.h>#include <ac/stdlib.h>#include <ac/string.h>#include <ac/unistd.h>#include <ac/bytes.h>#include <lutil.h>#ifndef HARDCODE_DATA#define HARDCODE_DATA 1#endif#undef ishdigit#define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\ ((cc) >= 'A' && (cc) <= 'F') ||\ ((cc) >= 'a' && (cc) <= 'f'))/* * A header written to the output file with the byte-order-mark and the number * of property nodes. */static ac_uint2 hdr[2] = {0xfeff, 0};#define NUMPROPS 50#define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3)))typedef struct { char *name; int len;} _prop_t;/* * List of properties expected to be found in the Unicode Character Database * including some implementation specific properties. * * The implementation specific properties are: * Cm = Composed (can be decomposed) * Nb = Non-breaking * Sy = Symmetric (has left and right forms) * Hd = Hex digit * Qm = Quote marks * Mr = Mirroring * Ss = Space, other * Cp = Defined character */static _prop_t props[NUMPROPS] = { {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2}, {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2}, {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2}, {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2}, {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L", 1}, {"R", 1}, {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B", 1}, {"S", 1}, {"WS", 2}, {"ON", 2}, {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2}, {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2}, {"AL", 2}};typedef struct { ac_uint4 *ranges; ac_uint2 used; ac_uint2 size;} _ranges_t;static _ranges_t proptbl[NUMPROPS];/* * Make sure this array is sized to be on a 4-byte boundary at compile time. */static ac_uint2 propcnt[NEEDPROPS];/* * Array used to collect a decomposition before adding it to the decomposition * table. */static ac_uint4 dectmp[64];static ac_uint4 dectmp_size;typedef struct { ac_uint4 code; ac_uint2 size; ac_uint2 used; ac_uint4 *decomp;} _decomp_t;/* * List of decomposition. Created and expanded in order as the characters are * encountered. First list contains canonical mappings, second also includes * compatibility mappings. */static _decomp_t *decomps;static ac_uint4 decomps_used;static ac_uint4 decomps_size;static _decomp_t *kdecomps;static ac_uint4 kdecomps_used;static ac_uint4 kdecomps_size;/* * Composition exclusion table stuff. */#define COMPEX_SET(c) (compexs[(c) >> 5] |= (1 << ((c) & 31)))#define COMPEX_TEST(c) (compexs[(c) >> 5] & (1 << ((c) & 31)))static ac_uint4 compexs[8192];/* * Struct for holding a composition pair, and array of composition pairs */typedef struct { ac_uint4 comp; ac_uint4 count; ac_uint4 code1; ac_uint4 code2;} _comp_t;static _comp_t *comps;static ac_uint4 comps_used;/* * Types and lists for handling lists of case mappings. */typedef struct { ac_uint4 key; ac_uint4 other1; ac_uint4 other2;} _case_t;static _case_t *upper;static _case_t *lower;static _case_t *title;static ac_uint4 upper_used;static ac_uint4 upper_size;static ac_uint4 lower_used;static ac_uint4 lower_size;static ac_uint4 title_used;static ac_uint4 title_size;/* * Array used to collect case mappings before adding them to a list. */static ac_uint4 cases[3];/* * An array to hold ranges for combining classes. */static ac_uint4 *ccl;static ac_uint4 ccl_used;static ac_uint4 ccl_size;/* * Structures for handling numbers. */typedef struct { ac_uint4 code; ac_uint4 idx;} _codeidx_t;typedef struct { short numerator; short denominator;} _num_t;/* * Arrays to hold the mapping of codes to numbers. */static _codeidx_t *ncodes;static ac_uint4 ncodes_used;static ac_uint4 ncodes_size;static _num_t *nums;static ac_uint4 nums_used;static ac_uint4 nums_size;/* * Array for holding numbers. */static _num_t *nums;static ac_uint4 nums_used;static ac_uint4 nums_size;static voidadd_range(ac_uint4 start, ac_uint4 end, char *p1, char *p2){ int i, j, k, len; _ranges_t *rlp; char *name; for (k = 0; k < 2; k++) { if (k == 0) { name = p1; len = 2; } else { if (p2 == 0) break; name = p2; len = 1; } for (i = 0; i < NUMPROPS; i++) { if (props[i].len == len && memcmp(props[i].name, name, len) == 0) break; } if (i == NUMPROPS) continue; rlp = &proptbl[i]; /* * Resize the range list if necessary. */ if (rlp->used == rlp->size) { if (rlp->size == 0) rlp->ranges = (ac_uint4 *) malloc(sizeof(ac_uint4) << 3); else rlp->ranges = (ac_uint4 *) realloc((char *) rlp->ranges, sizeof(ac_uint4) * (rlp->size + 8)); rlp->size += 8; } /* * If this is the first code for this property list, just add it * and return. */ if (rlp->used == 0) { rlp->ranges[0] = start; rlp->ranges[1] = end; rlp->used += 2; continue; } /* * Optimize the case of adding the range to the end. */ j = rlp->used - 1; if (start > rlp->ranges[j]) { j = rlp->used; rlp->ranges[j++] = start; rlp->ranges[j++] = end; rlp->used = j; continue; } /* * Need to locate the insertion point. */ for (i = 0; i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ; /* * If the start value lies in the current range, then simply set the * new end point of the range to the end value passed as a parameter. */ if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) { rlp->ranges[i + 1] = end; return; } /* * Shift following values up by two. */ for (j = rlp->used; j > i; j -= 2) { rlp->ranges[j] = rlp->ranges[j - 2]; rlp->ranges[j + 1] = rlp->ranges[j - 1]; } /* * Add the new range at the insertion point. */ rlp->ranges[i] = start; rlp->ranges[i + 1] = end; rlp->used += 2; }}static voidordered_range_insert(ac_uint4 c, char *name, int len){ int i, j; ac_uint4 s, e; _ranges_t *rlp; if (len == 0) return; /* * Deal with directionality codes introduced in Unicode 3.0. */ if ((len == 2 && memcmp(name, "BN", 2) == 0) || (len == 3 && (memcmp(name, "NSM", 3) == 0 || memcmp(name, "PDF", 3) == 0 || memcmp(name, "LRE", 3) == 0 || memcmp(name, "LRO", 3) == 0 || memcmp(name, "RLE", 3) == 0 || memcmp(name, "RLO", 3) == 0))) { /* * Mark all of these as Other Neutral to preserve compatibility with * older versions. */ len = 2; name = "ON"; } for (i = 0; i < NUMPROPS; i++) { if (props[i].len == len && memcmp(props[i].name, name, len) == 0) break; } if (i == NUMPROPS) return; /* * Have a match, so insert the code in order. */ rlp = &proptbl[i]; /* * Resize the range list if necessary. */ if (rlp->used == rlp->size) { if (rlp->size == 0) rlp->ranges = (ac_uint4 *) malloc(sizeof(ac_uint4) << 3); else rlp->ranges = (ac_uint4 *) realloc((char *) rlp->ranges, sizeof(ac_uint4) * (rlp->size + 8)); rlp->size += 8; } /* * If this is the first code for this property list, just add it * and return. */ if (rlp->used == 0) { rlp->ranges[0] = rlp->ranges[1] = c; rlp->used += 2; return; } /* * Optimize the cases of extending the last range and adding new ranges to * the end. */ j = rlp->used - 1; e = rlp->ranges[j]; s = rlp->ranges[j - 1]; if (c == e + 1) { /* * Extend the last range. */ rlp->ranges[j] = c; return; } if (c > e + 1) { /* * Start another range on the end. */ j = rlp->used; rlp->ranges[j] = rlp->ranges[j + 1] = c; rlp->used += 2; return; } if (c >= s) /* * The code is a duplicate of a code in the last range, so just return. */ return; /* * The code should be inserted somewhere before the last range in the * list. Locate the insertion point. */ for (i = 0; i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ; s = rlp->ranges[i]; e = rlp->ranges[i + 1]; if (c == e + 1) /* * Simply extend the current range. */ rlp->ranges[i + 1] = c; else if (c < s) { /* * Add a new entry before the current location. Shift all entries * before the current one up by one to make room. */ for (j = rlp->used; j > i; j -= 2) { rlp->ranges[j] = rlp->ranges[j - 2]; rlp->ranges[j + 1] = rlp->ranges[j - 1]; } rlp->ranges[i] = rlp->ranges[i + 1] = c; rlp->used += 2; }}static voidadd_decomp(ac_uint4 code, short compat){ ac_uint4 i, j, size; _decomp_t **pdecomps; ac_uint4 *pdecomps_used; ac_uint4 *pdecomps_size; if (compat) { pdecomps = &kdecomps; pdecomps_used = &kdecomps_used; pdecomps_size = &kdecomps_size; } else { pdecomps = &decomps; pdecomps_used = &decomps_used; pdecomps_size = &decomps_size; } /* * Add the code to the composite property. */ if (!compat) { ordered_range_insert(code, "Cm", 2); } /* * Locate the insertion point for the code. */ for (i = 0; i < *pdecomps_used && code > (*pdecomps)[i].code; i++) ; /* * Allocate space for a new decomposition. */ if (*pdecomps_used == *pdecomps_size) { if (*pdecomps_size == 0) *pdecomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3); else *pdecomps = (_decomp_t *) realloc((char *) *pdecomps, sizeof(_decomp_t) * (*pdecomps_size + 8)); (void) memset((char *) (*pdecomps + *pdecomps_size), '\0', sizeof(_decomp_t) << 3); *pdecomps_size += 8; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -