⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ure.c

📁 ldap服务器源码
💻 C
📖 第 1 页 / 共 5 页
字号:
                range.min_code = c;                range_end = 1;            } else              last = c;        } else if (range_end == 1) {            range.max_code = c;            _ure_add_range(cclp, &range, b);            range_end = 0;        } else {            range.min_code = range.max_code = c;            if (*sp == '-') {                sp++;                range_end = 1;            } else              _ure_add_range(cclp, &range, b);        }    }    if (sp < ep && *sp == ']')      sp++;    else      /*       * The parse was not terminated by the character class close symbol       * (']'), so set an error code.       */      b->error = _URE_CCLASS_OPEN;    return sp - cp;}/* * Probe for a low surrogate hex code. */static unsigned long_ure_probe_ls(ucs2_t *ls, unsigned long limit, ucs4_t *c){    ucs4_t i, code;    ucs2_t *sp, *ep;    for (i = code = 0, sp = ls, ep = sp + limit; i < 4 && sp < ep; sp++) {        if (*sp >= '0' && *sp <= '9')          code = (code << 4) + (*sp - '0');        else if (*sp >= 'A' && *sp <= 'F')          code = (code << 4) + ((*sp - 'A') + 10);        else if (*sp >= 'a' && *sp <= 'f')          code = (code << 4) + ((*sp - 'a') + 10);        else          break;    }    *c = code;    return (0xdc00 <= code && code <= 0xdfff) ? sp - ls : 0;}static unsigned long_ure_compile_symbol(ucs2_t *sym, unsigned long limit, _ure_symtab_t *symp,                    _ure_buffer_t *b){    ucs4_t c;    ucs2_t *sp, *ep;    sp = sym;    ep = sym + limit;    if ((c = *sp++) == '\\') {        if (sp == ep) {            /*             * The EOS was encountered when expecting the reverse solidus to             * be followed by the character it is escaping.  Set an error code             * and return the number of characters consumed up to this point.             */            b->error = _URE_UNEXPECTED_EOS;            return sp - sym;        }        c = *sp++;        switch (c) {          case 'p':          case 'P':            symp->type = (c == 'p') ? _URE_CCLASS : _URE_NCCLASS;            sp += _ure_prop_list(sp, ep - sp, &symp->props, b);            break;          case 'a':            symp->type = _URE_CHAR;            symp->sym.chr = 0x07;            break;          case 'b':            symp->type = _URE_CHAR;            symp->sym.chr = 0x08;            break;          case 'f':            symp->type = _URE_CHAR;            symp->sym.chr = 0x0c;            break;          case 'n':            symp->type = _URE_CHAR;            symp->sym.chr = 0x0a;            break;          case 'r':            symp->type = _URE_CHAR;            symp->sym.chr = 0x0d;            break;          case 't':            symp->type = _URE_CHAR;            symp->sym.chr = 0x09;            break;          case 'v':            symp->type = _URE_CHAR;            symp->sym.chr = 0x0b;            break;          case 'x':          case 'X':          case 'u':          case 'U':            /*             * Collect between 1 and 4 digits representing a UCS2 code.  Fall             * through to the next case.             */            if (sp < ep &&                ((*sp >= '0' && *sp <= '9') ||                 (*sp >= 'A' && *sp <= 'F') ||                 (*sp >= 'a' && *sp <= 'f')))              sp += _ure_hex(sp, ep - sp, &c);            /* FALLTHROUGH */          default:            /*             * Simply add an escaped character here.             */            symp->type = _URE_CHAR;            symp->sym.chr = c;        }    } else if (c == '^' || c == '$')      /*       * Handle the BOL and EOL anchors.  This actually consists simply of       * setting a flag that indicates that the user supplied anchor match       * function should be called.  This needs to be done instead of simply       * matching line/paragraph separators because beginning-of-text and       * end-of-text tests are needed as well.       */      symp->type = (c == '^') ? _URE_BOL_ANCHOR : _URE_EOL_ANCHOR;    else if (c == '[')      /*       * Construct a character class.       */      sp += _ure_cclass(sp, ep - sp, symp, b);    else if (c == '.')      symp->type = _URE_ANY_CHAR;    else {        symp->type = _URE_CHAR;        symp->sym.chr = c;    }    /*     * If the symbol type happens to be a character and is a high surrogate,     * then probe forward to see if it is followed by a low surrogate that     * needs to be added.     */    if (sp < ep && symp->type == _URE_CHAR &&        0xd800 <= symp->sym.chr && symp->sym.chr <= 0xdbff) {        if (0xdc00 <= *sp && *sp <= 0xdfff) {            symp->sym.chr = 0x10000 + (((symp->sym.chr & 0x03ff) << 10) |                                       (*sp & 0x03ff));            sp++;        } else if (*sp == '\\' && (*(sp + 1) == 'x' || *(sp + 1) == 'X' ||                                 *(sp + 1) == 'u' || *(sp + 1) == 'U')) {            sp += _ure_probe_ls(sp + 2, ep - (sp + 2), &c);            if (0xdc00 <= c && c <= 0xdfff) {                /*                 * Take into account the \[xu] in front of the hex code.                 */                sp += 2;                symp->sym.chr = 0x10000 + (((symp->sym.chr & 0x03ff) << 10) |                                           (c & 0x03ff));            }        }    }    /*     * Last, make sure any _URE_CHAR type symbols are changed to lower case if     * the `casefold' flag is set.     */    if ((b->flags & _URE_DFA_CASEFOLD) && symp->type == _URE_CHAR)      symp->sym.chr = _ure_tolower(symp->sym.chr);    /*     * If the symbol constructed is anything other than one of the anchors,     * make sure the _URE_DFA_BLANKLINE flag is removed.     */    if (symp->type != _URE_BOL_ANCHOR && symp->type != _URE_EOL_ANCHOR)      b->flags &= ~_URE_DFA_BLANKLINE;    /*     * Return the number of characters consumed.     */    return sp - sym;}static int_ure_sym_neq(_ure_symtab_t *a, _ure_symtab_t *b){    if (a->type != b->type || a->mods != b->mods || a->props != b->props)      return 1;    if (a->type == _URE_CCLASS || a->type == _URE_NCCLASS) {        if (a->sym.ccl.ranges_used != b->sym.ccl.ranges_used)          return 1;        if (a->sym.ccl.ranges_used > 0 &&            memcmp((char *) a->sym.ccl.ranges, (char *) b->sym.ccl.ranges,                   sizeof(_ure_range_t) * a->sym.ccl.ranges_used) != 0)          return 1;    } else if (a->type == _URE_CHAR && a->sym.chr != b->sym.chr)      return 1;    return 0;}/* * Construct a symbol, but only keep unique symbols. */static ucs2_t_ure_make_symbol(ucs2_t *sym, unsigned long limit, unsigned long *consumed,                 _ure_buffer_t *b){    ucs2_t i;    _ure_symtab_t *sp, symbol;    /*     * Build the next symbol so we can test to see if it is already in the     * symbol table.     */    (void) memset((char *) &symbol, '\0', sizeof(_ure_symtab_t));    *consumed = _ure_compile_symbol(sym, limit, &symbol, b);    /*     * Check to see if the symbol exists.     */    for (i = 0, sp = b->symtab;         i < b->symtab_used && _ure_sym_neq(&symbol, sp); i++, sp++) ;    if (i < b->symtab_used) {        /*         * Free up any ranges used for the symbol.         */        if ((symbol.type == _URE_CCLASS || symbol.type == _URE_NCCLASS) &&            symbol.sym.ccl.ranges_size > 0)          free((char *) symbol.sym.ccl.ranges);        return b->symtab[i].id;    }    /*     * Need to add the new symbol.     */    if (b->symtab_used == b->symtab_size) {        if (b->symtab_size == 0)          b->symtab = (_ure_symtab_t *) malloc(sizeof(_ure_symtab_t) << 3);        else          b->symtab = (_ure_symtab_t *)              realloc((char *) b->symtab,                      sizeof(_ure_symtab_t) * (b->symtab_size + 8));        sp = b->symtab + b->symtab_size;        (void) memset((char *) sp, '\0', sizeof(_ure_symtab_t) << 3);        b->symtab_size += 8;    }    symbol.id = b->symtab_used++;    (void) AC_MEMCPY((char *) &b->symtab[symbol.id], (char *) &symbol,                  sizeof(_ure_symtab_t));    return symbol.id;}/************************************************************************* * * End symbol parse functions. * *************************************************************************/static ucs2_t_ure_make_expr(ucs2_t type, ucs2_t lhs, ucs2_t rhs, _ure_buffer_t *b){    ucs2_t i;    if (b == 0)      return _URE_NOOP;    /*     * Determine if the expression already exists or not.     */    for (i = 0; i < b->expr_used; i++) {        if (b->expr[i].type == type && b->expr[i].lhs == lhs &&            b->expr[i].rhs == rhs)          break;    }    if (i < b->expr_used)      return i;    /*     * Need to add a new expression.     */    if (b->expr_used == b->expr_size) {        if (b->expr_size == 0)          b->expr = (_ure_elt_t *) malloc(sizeof(_ure_elt_t) << 3);        else          b->expr = (_ure_elt_t *)              realloc((char *) b->expr,                      sizeof(_ure_elt_t) * (b->expr_size + 8));        b->expr_size += 8;    }    b->expr[b->expr_used].onstack = 0;    b->expr[b->expr_used].type = type;    b->expr[b->expr_used].lhs = lhs;    b->expr[b->expr_used].rhs = rhs;    return b->expr_used++;}static unsigned char spmap[] = {    0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,    0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,};#define _ure_isspecial(cc) ((cc) > 0x20 && (cc) < 0x7f && \                            (spmap[(cc) >> 3] & (1 << ((cc) & 7))))/* * Convert the regular expression into an NFA in a form that will be easy to * reduce to a DFA.  The starting state for the reduction will be returned. */static ucs2_t_ure_re2nfa(ucs2_t *re, unsigned long relen, _ure_buffer_t *b){    ucs2_t c, state, top, sym, *sp, *ep;    unsigned long used;    state = _URE_NOOP;    sp = re;    ep = sp + relen;    while (b->error == _URE_OK && sp < ep) {        c = *sp++;        switch (c) {          case '(':            _ure_push(_URE_PAREN, b);            break;          case ')':            /*             * Check for the case of too many close parentheses.             */            if (_ure_peek(b) == _URE_NOOP) {                b->error = _URE_UNBALANCED_GROUP;                break;            }            while ((top = _ure_peek(b)) == _URE_AND || top == _URE_OR)              /*               * Make an expression with the AND or OR operator and its right               * hand side.               */              state = _ure_make_expr(_ure_pop(b), _ure_pop(b), state, b);            /*             * Remove the _URE_PAREN off the stack.             */            (void) _ure_pop(b);            break;          case '*':            state = _ure_make_expr(_URE_STAR, state, _URE_NOOP, b);            break;          case '+':            state = _ure_make_expr(_URE_PLUS, state, _URE_NOOP, b);            break;          case '?':            state = _ure_make_expr(_URE_QUEST, state, _URE_NOOP, b);            break;          case '|':            while ((top = _ure_peek(b)) == _URE_AND || top == _URE_OR)              /*               * Make an expression with the AND or OR operator and its right               * hand side.               */              state = _ure_make_expr(_ure_pop(b), _ure_pop(b), state, b);            _ure_push(state, b);            _ure_push(_URE_OR, b);            break;          default:            sp--;            sym = _ure_make_symbol(sp, ep - sp, &used, b);            sp += used;            state = _ure_make_expr(_URE_SYMBOL, sym, _URE_NOOP, b);            break;        }        if (c != '(' && c != '|' && sp < ep &&            (!_ure_isspecial(*sp) || *sp == '(')) {            _ure_push(state, b);            _ure_push(_URE_AND, b);        }    }    while ((top = _ure_peek(b)) == _URE_AND || top == _URE_OR)      /*       * Make an expression with the AND or OR operator and its right       * hand side.       */      state = _ure_make_expr(_ure_pop(b), _ure_pop(b), state, b);    if (b->stack.slist_used > 0)      b->error = _URE_UNBALANCED_GROUP;    return (b->error == _URE_OK) ? state : _URE_NOOP;}static void_ure_add_symstate(ucs2_t sym, ucs2_t state, _ure_buffer_t *b){    ucs2_t i, *stp;    _ure_symtab_t *sp;    /*     * Locate the symbol in the symbol table so the state can be added.     * If the symbol doesn't exist, then a real problem exists.     */    for (i = 0, sp = b->symtab; i < b->symtab_used && sym != sp->id;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -