⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ure.c

📁 OpenLdap是LDAP的开源项目
💻 C
📖 第 1 页 / 共 5 页
字号:
     * Return the number of characters consumed.     */    return sp - pp;}/* * Collect a hex number with 1 to 4 digits and return the number * of characters used. */static unsigned long_ure_hex(ucs2_t *np, unsigned long limit, ucs4_t *n){    ucs2_t i;    ucs2_t *sp, *ep;    ucs4_t nn;    sp = np;    ep = sp + limit;    for (nn = 0, i = 0; i < 4 && sp < ep; i++, sp++) {        if (*sp >= '0' && *sp <= '9')          nn = (nn << 4) + (*sp - '0');        else if (*sp >= 'A' && *sp <= 'F')          nn = (nn << 4) + ((*sp - 'A') + 10);        else if (*sp >= 'a' && *sp <= 'f')          nn = (nn << 4) + ((*sp - 'a') + 10);        else          /*           * Encountered something that is not a hex digit.           */          break;    }    /*     * Assign the character code collected and return the number of     * characters used.     */    *n = nn;    return sp - np;}/* * Insert a range into a character class, removing duplicates and ordering * them in increasing range-start order. */static void_ure_add_range(_ure_ccl_t *ccl, _ure_range_t *r, _ure_buffer_t *b){    ucs2_t i;    ucs4_t tmp;    _ure_range_t *rp;    /*     * If the `casefold' flag is set, then make sure both endpoints of the     * range are converted to lower case.     */    if (b->flags & _URE_DFA_CASEFOLD) {        r->min_code = _ure_tolower(r->min_code);        r->max_code = _ure_tolower(r->max_code);    }    /*     * Swap the range endpoints if they are not in increasing order.     */    if (r->min_code > r->max_code) {        tmp = r->min_code;        r->min_code = r->max_code;        r->max_code = tmp;    }    for (i = 0, rp = ccl->ranges;         i < ccl->ranges_used && r->min_code < rp->min_code; i++, rp++) ;    /*     * Check for a duplicate.     */    if (i < ccl->ranges_used &&        r->min_code == rp->min_code && r->max_code == rp->max_code)      return;    if (ccl->ranges_used == ccl->ranges_size) {        if (ccl->ranges_size == 0)          ccl->ranges = (_ure_range_t *) malloc(sizeof(_ure_range_t) << 3);        else          ccl->ranges = (_ure_range_t *)              realloc((char *) ccl->ranges,                      sizeof(_ure_range_t) * (ccl->ranges_size + 8));        ccl->ranges_size += 8;    }    rp = ccl->ranges + ccl->ranges_used;    if (i < ccl->ranges_used)      _ure_memmove((char *) (rp + 1), (char *) rp,                   sizeof(_ure_range_t) * (ccl->ranges_used - i));    ccl->ranges_used++;    rp->min_code = r->min_code;    rp->max_code = r->max_code;}#define _URE_ALPHA_MASK  (_URE_UPPER|_URE_LOWER|_URE_OTHERLETTER|\_URE_MODIFIER|_URE_TITLE|_URE_NONSPACING|_URE_COMBINING)#define _URE_ALNUM_MASK  (_URE_ALPHA_MASK|_URE_NUMDIGIT)#define _URE_PUNCT_MASK  (_URE_DASHPUNCT|_URE_OPENPUNCT|_URE_CLOSEPUNCT|\_URE_OTHERPUNCT)#define _URE_GRAPH_MASK (_URE_NUMDIGIT|_URE_NUMOTHER|_URE_ALPHA_MASK|\_URE_MATHSYM|_URE_CURRENCYSYM|_URE_OTHERSYM)#define _URE_PRINT_MASK (_URE_GRAPH_MASK|_URE_SPACESEP)#define _URE_SPACE_MASK  (_URE_SPACESEP|_URE_LINESEP|_URE_PARASEP)typedef void (*_ure_cclsetup_t)(    _ure_symtab_t *sym,    unsigned long mask,    _ure_buffer_t *b);typedef struct {    ucs2_t key;    unsigned long len;    unsigned long next;    _ure_cclsetup_t func;    unsigned long mask;} _ure_trie_t;static void_ure_ccl_setup(_ure_symtab_t *sym, unsigned long mask, _ure_buffer_t *b){    sym->props |= mask;}static void_ure_space_setup(_ure_symtab_t *sym, unsigned long mask, _ure_buffer_t *b){    _ure_range_t range;    sym->props |= mask;    /*     * Add the additional characters needed for handling isspace().     */    range.min_code = range.max_code = '\t';    _ure_add_range(&sym->sym.ccl, &range, b);    range.min_code = range.max_code = '\r';    _ure_add_range(&sym->sym.ccl, &range, b);    range.min_code = range.max_code = '\n';    _ure_add_range(&sym->sym.ccl, &range, b);    range.min_code = range.max_code = '\f';    _ure_add_range(&sym->sym.ccl, &range, b);    range.min_code = range.max_code = 0xfeff;    _ure_add_range(&sym->sym.ccl, &range, b);}static void_ure_xdigit_setup(_ure_symtab_t *sym, unsigned long mask, _ure_buffer_t *b){    _ure_range_t range;    /*     * Add the additional characters needed for handling isxdigit().     */    range.min_code = '0';    range.max_code = '9';    _ure_add_range(&sym->sym.ccl, &range, b);    range.min_code = 'A';    range.max_code = 'F';    _ure_add_range(&sym->sym.ccl, &range, b);    range.min_code = 'a';    range.max_code = 'f';    _ure_add_range(&sym->sym.ccl, &range, b);}static _ure_trie_t cclass_trie[] = {    {0x003a, 1, 1, 0, 0},    {0x0061, 9, 10, 0, 0},    {0x0063, 8, 19, 0, 0},    {0x0064, 7, 24, 0, 0},    {0x0067, 6, 29, 0, 0},    {0x006c, 5, 34, 0, 0},    {0x0070, 4, 39, 0, 0},    {0x0073, 3, 49, 0, 0},    {0x0075, 2, 54, 0, 0},    {0x0078, 1, 59, 0, 0},    {0x006c, 1, 11, 0, 0},    {0x006e, 2, 13, 0, 0},    {0x0070, 1, 16, 0, 0},    {0x0075, 1, 14, 0, 0},    {0x006d, 1, 15, 0, 0},    {0x003a, 1, 16, _ure_ccl_setup, _URE_ALNUM_MASK},    {0x0068, 1, 17, 0, 0},    {0x0061, 1, 18, 0, 0},    {0x003a, 1, 19, _ure_ccl_setup, _URE_ALPHA_MASK},    {0x006e, 1, 20, 0, 0},    {0x0074, 1, 21, 0, 0},    {0x0072, 1, 22, 0, 0},    {0x006c, 1, 23, 0, 0},    {0x003a, 1, 24, _ure_ccl_setup, _URE_CNTRL},    {0x0069, 1, 25, 0, 0},    {0x0067, 1, 26, 0, 0},    {0x0069, 1, 27, 0, 0},    {0x0074, 1, 28, 0, 0},    {0x003a, 1, 29, _ure_ccl_setup, _URE_NUMDIGIT},    {0x0072, 1, 30, 0, 0},    {0x0061, 1, 31, 0, 0},    {0x0070, 1, 32, 0, 0},    {0x0068, 1, 33, 0, 0},    {0x003a, 1, 34, _ure_ccl_setup, _URE_GRAPH_MASK},    {0x006f, 1, 35, 0, 0},    {0x0077, 1, 36, 0, 0},    {0x0065, 1, 37, 0, 0},    {0x0072, 1, 38, 0, 0},    {0x003a, 1, 39, _ure_ccl_setup, _URE_LOWER},    {0x0072, 2, 41, 0, 0},    {0x0075, 1, 45, 0, 0},    {0x0069, 1, 42, 0, 0},    {0x006e, 1, 43, 0, 0},    {0x0074, 1, 44, 0, 0},    {0x003a, 1, 45, _ure_ccl_setup, _URE_PRINT_MASK},    {0x006e, 1, 46, 0, 0},    {0x0063, 1, 47, 0, 0},    {0x0074, 1, 48, 0, 0},    {0x003a, 1, 49, _ure_ccl_setup, _URE_PUNCT_MASK},    {0x0070, 1, 50, 0, 0},    {0x0061, 1, 51, 0, 0},    {0x0063, 1, 52, 0, 0},    {0x0065, 1, 53, 0, 0},    {0x003a, 1, 54, _ure_space_setup, _URE_SPACE_MASK},    {0x0070, 1, 55, 0, 0},    {0x0070, 1, 56, 0, 0},    {0x0065, 1, 57, 0, 0},    {0x0072, 1, 58, 0, 0},    {0x003a, 1, 59, _ure_ccl_setup, _URE_UPPER},    {0x0064, 1, 60, 0, 0},    {0x0069, 1, 61, 0, 0},    {0x0067, 1, 62, 0, 0},    {0x0069, 1, 63, 0, 0},    {0x0074, 1, 64, 0, 0},    {0x003a, 1, 65, _ure_xdigit_setup, 0},};/* * Probe for one of the POSIX colon delimited character classes in the static * trie. */static unsigned long_ure_posix_ccl(ucs2_t *cp, unsigned long limit, _ure_symtab_t *sym,               _ure_buffer_t *b){    int i;    unsigned long n;    _ure_trie_t *tp;    ucs2_t *sp, *ep;    /*     * If the number of characters left is less than 7, then this cannot be     * interpreted as one of the colon delimited classes.     */    if (limit < 7)      return 0;    sp = cp;    ep = sp + limit;    tp = cclass_trie;    for (i = 0; sp < ep && i < 8; i++, sp++) {        n = tp->len;        for (; n > 0 && tp->key != *sp; tp++, n--) ;        if (n == 0)          return 0;        if (*sp == ':' && (i == 6 || i == 7)) {            sp++;            break;        }        if (sp + 1 < ep)          tp = cclass_trie + tp->next;    }    if (tp->func == 0)      return 0;    (*tp->func)(sym, tp->mask, b);    return sp - cp;}/* * Construct a list of ranges and return the number of characters consumed. */static unsigned long_ure_cclass(ucs2_t *cp, unsigned long limit, _ure_symtab_t *symp,            _ure_buffer_t *b){    int range_end;    unsigned long n;    ucs2_t *sp, *ep;    ucs4_t c, last;    _ure_ccl_t *cclp;    _ure_range_t range;    sp = cp;    ep = sp + limit;    if (*sp == '^') {      symp->type = _URE_NCCLASS;      sp++;    } else      symp->type = _URE_CCLASS;    for (last = 0, range_end = 0;         b->error == _URE_OK && sp < ep && *sp != ']'; ) {        c = *sp++;        if (c == '\\') {            if (sp == ep) {                /*                 * The EOS was encountered when expecting the reverse solidus                 * to be followed by the character it is escaping.  Set an                 * error code and return the number of characters consumed up                 * to this point.                 */                b->error = _URE_UNEXPECTED_EOS;                return sp - cp;            }            c = *sp++;            switch (c) {              case 'a':                c = 0x07;                break;              case 'b':                c = 0x08;                break;              case 'f':                c = 0x0c;                break;              case 'n':                c = 0x0a;                break;              case 'r':                c = 0x0d;                break;              case 't':                c = 0x09;                break;              case 'v':                c = 0x0b;                break;              case 'p':              case 'P':                sp += _ure_prop_list(sp, ep - sp, &symp->props, b);                /*                 * Invert the bit mask of the properties if this is a negated                 * character class or if 'P' is used to specify a list of                 * character properties that should *not* match in a                 * character class.                 */                if (c == 'P')                  symp->props = ~symp->props;                continue;                break;              case 'x':              case 'X':              case 'u':              case 'U':                if (sp < ep &&                    ((*sp >= '0' && *sp <= '9') ||                     (*sp >= 'A' && *sp <= 'F') ||                     (*sp >= 'a' && *sp <= 'f')))                  sp += _ure_hex(sp, ep - sp, &c);            }        } else if (c == ':') {            /*             * Probe for a POSIX colon delimited character class.             */            sp--;            if ((n = _ure_posix_ccl(sp, ep - sp, symp, b)) == 0)              sp++;            else {                sp += n;                continue;            }        }        cclp = &symp->sym.ccl;        /*         * Check to see if the current character is a low surrogate that needs         * to be combined with a preceding high surrogate.         */        if (last != 0) {            if (c >= 0xdc00 && c <= 0xdfff)              /*               * Construct the UTF16 character code.               */              c = 0x10000 + (((last & 0x03ff) << 10) | (c & 0x03ff));            else {                /*                 * Add the isolated high surrogate to the range.                 */                if (range_end == 1)                  range.max_code = last & 0xffff;                else                  range.min_code = range.max_code = last & 0xffff;                _ure_add_range(cclp, &range, b);                range_end = 0;            }        }        /*         * Clear the last character code.         */        last = 0;        /*         * This slightly awkward code handles the different cases needed to         * construct a range.         */        if (c >= 0xd800 && c <= 0xdbff) {            /*             * If the high surrogate is followed by a range indicator, simply             * add it as the range start.  Otherwise, save it in case the next             * character is a low surrogate.             */            if (*sp == '-') {                sp++;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -