ure.c

来自「ldap服务器源码」· C语言代码 · 共 2,132 行 · 第 1/5 页
2,132 行
    buf->stack.slist_used = 0;    buf->expr_used = 0;    for (i = 0; i < buf->symtab_used; i++)      buf->symtab[i].states.slist_used = 0;    buf->symtab_used = 0;    for (i = 0; i < buf->states.states_used; i++) {        buf->states.states[i].st.slist_used = 0;        buf->states.states[i].trans_used = 0;    }    buf->states.states_used = 0;    /*     * Construct the NFA.  If this stage returns a 0, then an error occured or     * an empty expression was passed.     */    if ((state = _ure_re2nfa(re, relen, buf)) == _URE_NOOP)      return 0;    /*     * Do the expression reduction to get the initial DFA.     */    _ure_reduce(state, buf);    /*     * Merge all the equivalent DFA states.     */    _ure_merge_equiv(buf);    /*     * Construct the minimal DFA.     */    dfa = (ure_dfa_t) malloc(sizeof(_ure_dfa_t));    (void) memset((char *) dfa, '\0', sizeof(_ure_dfa_t));    dfa->flags = buf->flags & (_URE_DFA_CASEFOLD|_URE_DFA_BLANKLINE);    /*     * Free up the NFA state groups and transfer the symbols from the buffer     * to the DFA.     */    for (i = 0; i < buf->symtab_size; i++) {        if (buf->symtab[i].states.slist_size > 0)          free((char *) buf->symtab[i].states.slist);    }    dfa->syms = buf->symtab;    dfa->nsyms = buf->symtab_used;    buf->symtab_used = buf->symtab_size = 0;    /*     * Collect the total number of states and transitions needed for the DFA.     */    for (i = state = 0, sp = buf->states.states; i < buf->states.states_used;         i++, sp++) {        if (sp->id == state) {            dfa->nstates++;            dfa->ntrans += sp->trans_used;            state++;        }    }    /*     * Allocate enough space for the states and transitions.     */    dfa->states = (_ure_dstate_t *) malloc(sizeof(_ure_dstate_t) *                                           dfa->nstates);    dfa->trans = (_ure_trans_t *) malloc(sizeof(_ure_trans_t) * dfa->ntrans);    /*     * Actually transfer the DFA states from the buffer.     */    dsp = dfa->states;    tp = dfa->trans;    for (i = state = 0, sp = buf->states.states; i < buf->states.states_used;         i++, sp++) {        if (sp->id == state) {            dsp->trans = tp;            dsp->ntrans = sp->trans_used;            dsp->accepting = sp->accepting;            /*             * Add the transitions for the state.             */            for (j = 0; j < dsp->ntrans; j++, tp++) {                tp->symbol = sp->trans[j].lhs;                tp->next_state = buf->states.states[sp->trans[j].rhs].id;            }            dsp++;            state++;        }    }    return dfa;}voidure_dfa_free(ure_dfa_t dfa){    ucs2_t i;    if (dfa == 0)      return;    for (i = 0; i < dfa->nsyms; i++) {        if ((dfa->syms[i].type == _URE_CCLASS ||             dfa->syms[i].type == _URE_NCCLASS) &&            dfa->syms[i].sym.ccl.ranges_size > 0)          free((char *) dfa->syms[i].sym.ccl.ranges);    }    if (dfa->nsyms > 0)      free((char *) dfa->syms);    if (dfa->nstates > 0)      free((char *) dfa->states);    if (dfa->ntrans > 0)      free((char *) dfa->trans);    free((char *) dfa);}voidure_write_dfa(ure_dfa_t dfa, FILE *out){    ucs2_t i, j, k, h, l;    _ure_dstate_t *sp;    _ure_symtab_t *sym;    _ure_range_t *rp;    if (dfa == 0 || out == 0)      return;    /*     * Write all the different character classes.     */    for (i = 0, sym = dfa->syms; i < dfa->nsyms; i++, sym++) {        if (sym->type == _URE_CCLASS || sym->type == _URE_NCCLASS) {            fprintf(out, "C%hd = ", sym->id);            if (sym->sym.ccl.ranges_used > 0) {                putc('[', out);                if (sym->type == _URE_NCCLASS)                  putc('^', out);            }            if (sym->props != 0) {                if (sym->type == _URE_NCCLASS)                  fprintf(out, "\\P");                else                  fprintf(out, "\\p");                for (k = h = 0; k < 32; k++) {                    if (sym->props & (1 << k)) {                        if (h != 0)                          putc(',', out);                        fprintf(out, "%hd", k + 1);                        h = 1;                    }                }            }            /*             * Dump the ranges.             */            for (k = 0, rp = sym->sym.ccl.ranges;                 k < sym->sym.ccl.ranges_used; k++, rp++) {                /*                 * Check for UTF16 characters.                 */                if (0x10000 <= rp->min_code &&                    rp->min_code <= 0x10ffff) {                    h = (ucs2_t) (((rp->min_code - 0x10000) >> 10) + 0xd800);                    l = (ucs2_t) (((rp->min_code - 0x10000) & 1023) + 0xdc00);                    fprintf(out, "\\x%04hX\\x%04hX", h, l);                } else                  fprintf(out, "\\x%04lX", rp->min_code & 0xffff);                if (rp->max_code != rp->min_code) {                    putc('-', out);                    if (rp->max_code >= 0x10000 &&                        rp->max_code <= 0x10ffff) {                        h = (ucs2_t) (((rp->max_code - 0x10000) >> 10) + 0xd800);                        l = (ucs2_t) (((rp->max_code - 0x10000) & 1023) + 0xdc00);                        fprintf(out, "\\x%04hX\\x%04hX", h, l);                    } else                      fprintf(out, "\\x%04lX", rp->max_code & 0xffff);                }            }            if (sym->sym.ccl.ranges_used > 0)              putc(']', out);            putc('\n', out);        }    }    for (i = 0, sp = dfa->states; i < dfa->nstates; i++, sp++) {        fprintf(out, "S%hd = ", i);        if (sp->accepting) {            fprintf(out, "1 ");            if (sp->ntrans)              fprintf(out, "| ");        }        for (j = 0; j < sp->ntrans; j++) {            if (j > 0)              fprintf(out, "| ");            sym = dfa->syms + sp->trans[j].symbol;            switch (sym->type) {              case _URE_CHAR:                if (0x10000 <= sym->sym.chr && sym->sym.chr <= 0x10ffff) {                    /*                     * Take care of UTF16 characters.                     */                    h = (ucs2_t) (((sym->sym.chr - 0x10000) >> 10) + 0xd800);                    l = (ucs2_t) (((sym->sym.chr - 0x10000) & 1023) + 0xdc00);                    fprintf(out, "\\x%04hX\\x%04hX ", h, l);                } else                  fprintf(out, "\\x%04lX ", sym->sym.chr & 0xffff);                break;              case _URE_ANY_CHAR:                fprintf(out, "<any> ");                break;              case _URE_BOL_ANCHOR:                fprintf(out, "<bol-anchor> ");                break;              case _URE_EOL_ANCHOR:                fprintf(out, "<eol-anchor> ");                break;              case _URE_CCLASS:              case _URE_NCCLASS:                fprintf(out, "[C%hd] ", sym->id);                break;            }            fprintf(out, "S%hd", sp->trans[j].next_state);            if (j + 1 < sp->ntrans)              putc(' ', out);        }        putc('\n', out);    }}#define _ure_issep(cc) ((cc) == '\n' || (cc) == '\r' || (cc) == 0x2028 ||\                        (cc) == 0x2029)inture_exec(ure_dfa_t dfa, int flags, ucs2_t *text, unsigned long textlen,         unsigned long *match_start, unsigned long *match_end){    int i, j, matched, found, skip;    unsigned long ms, me;    ucs4_t c;    ucs2_t *sp, *ep, *lp;    _ure_dstate_t *stp;    _ure_symtab_t *sym;    _ure_range_t *rp;    if (dfa == 0 || text == 0)      return 0;    /*     * Handle the special case of an empty string matching the "^$" pattern.     */    if (textlen == 0 && (dfa->flags & _URE_DFA_BLANKLINE)) {        *match_start = *match_end = 0;        return 1;    }    sp = text;    ep = sp + textlen;    ms = me = ~0;    stp = dfa->states;    for (found = skip = 0; found == 0 && sp < ep; ) {        lp = sp;        c = *sp++;        /*         * Check to see if this is a high surrogate that should be         * combined with a following low surrogate.         */        if (sp < ep && 0xd800 <= c && c <= 0xdbff &&            0xdc00 <= *sp && *sp <= 0xdfff)          c = 0x10000 + (((c & 0x03ff) << 10) | (*sp++ & 0x03ff));        /*         * Determine if the character is non-spacing and should be skipped.         */        if (_ure_matches_properties(_URE_NONSPACING, c) &&            (flags & URE_IGNORE_NONSPACING)) {            sp++;            continue;        }        if (dfa->flags & _URE_DFA_CASEFOLD)          c = _ure_tolower(c);        /*         * See if one of the transitions matches.         */        for (i = 0, matched = 0; matched == 0 && i < stp->ntrans; i++) {            sym = dfa->syms + stp->trans[i].symbol;            switch (sym->type) {              case _URE_ANY_CHAR:                if ((flags & URE_DOT_MATCHES_SEPARATORS) ||                    !_ure_issep(c))                  matched = 1;                break;              case _URE_CHAR:                if (c == sym->sym.chr)                  matched = 1;                break;              case _URE_BOL_ANCHOR:                if (lp == text) {                    sp = lp;                    matched = 1;                } else if (_ure_issep(c)) {                    if (c == '\r' && sp < ep && *sp == '\n')                      sp++;                    lp = sp;                    matched = 1;                }                break;              case _URE_EOL_ANCHOR:                if (_ure_issep(c)) {                    /*                     * Put the pointer back before the separator so the match                     * end position will be correct.  This case will also                     * cause the `sp' pointer to be advanced over the current                     * separator once the match end point has been recorded.                     */                    sp = lp;                    matched = 1;                }                break;              case _URE_CCLASS:              case _URE_NCCLASS:                if (sym->props != 0)                  matched = _ure_matches_properties(sym->props, c);                for (j = 0, rp = sym->sym.ccl.ranges;                     j < sym->sym.ccl.ranges_used; j++, rp++) {                    if (rp->min_code <= c && c <= rp->max_code)                      matched = 1;                }                if (sym->type == _URE_NCCLASS)                  matched = !matched;                break;            }            if (matched) {                if (ms == ~0UL)                  ms = lp - text;                else                  me = sp - text;                stp = dfa->states + stp->trans[i].next_state;                /*                 * If the match was an EOL anchor, adjust the pointer past the                 * separator that caused the match.  The correct match                 * position has been recorded already.                 */                if (sym->type == _URE_EOL_ANCHOR) {                    /*                     * Skip the character that caused the match.                     */                    sp++;                    /*                     * Handle the infamous CRLF situation.                     */                    if (sp < ep && c == '\r' && *sp == '\n')                      sp++;                }            }        }        if (matched == 0) {            if (stp->accepting == 0) {                /*                 * If the last state was not accepting, then reset                 * and start over.                 */                stp = dfa->states;                ms = me = ~0;            } else              /*               * The last state was accepting, so terminate the matching               * loop to avoid more work.               */              found = 1;        } else if (sp == ep) {            if (!stp->accepting) {                /*                 * This ugly hack is to make sure the end-of-line anchors                 * match when the source text hits the end.  This is only done                 * if the last subexpression matches.                 */                for (i = 0; found == 0 && i < stp->ntrans; i++) {                    sym = dfa->syms + stp->trans[i].symbol;                    if (sym->type ==_URE_EOL_ANCHOR) {                        stp = dfa->states + stp->trans[i].next_state;                        if (stp->accepting) {                            me = sp - text;                            found = 1;                        } else                          break;                    }                }            } else {                /*                 * Make sure any conditions that match all the way to the end                 * of the string match.                 */                found = 1;                me = sp - text;            }        }    }    if (found == 0)      ms = me = ~0;    *match_start = ms;    *match_end = me;    return (ms != ~0UL) ? 1 : 0;}
ure.c - 源码说明

本页面展示了「ldap服务器源码」中的 ure.c 源码文件，采用 C语言编程语言编写，共 2,132 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ldap相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?