⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gnuregex.c

📁 -
💻 C
📖 第 1 页 / 共 5 页
字号:
	    case '=':		BUF_PUSH(at_dot);		break;	    case 's':		laststart = b;		PATFETCH(c);		BUF_PUSH_2(syntaxspec, syntax_spec_code[c]);		break;	    case 'S':		laststart = b;		PATFETCH(c);		BUF_PUSH_2(notsyntaxspec, syntax_spec_code[c]);		break;#endif /* emacs */	    case 'w':		laststart = b;		BUF_PUSH(wordchar);		break;	    case 'W':		laststart = b;		BUF_PUSH(notwordchar);		break;	    case '<':		BUF_PUSH(wordbeg);		break;	    case '>':		BUF_PUSH(wordend);		break;	    case 'b':		BUF_PUSH(wordbound);		break;	    case 'B':		BUF_PUSH(notwordbound);		break;	    case '`':		BUF_PUSH(begbuf);		break;	    case '\'':		BUF_PUSH(endbuf);		break;	    case '1':	    case '2':	    case '3':	    case '4':	    case '5':	    case '6':	    case '7':	    case '8':	    case '9':		if (syntax & RE_NO_BK_REFS)		    goto normal_char;		c1 = c - '0';		if (c1 > regnum)		    return REG_ESUBREG;		/* Can't back reference to a subexpression if inside of it.  */		if (group_in_compile_stack(compile_stack, c1))		    goto normal_char;		laststart = b;		BUF_PUSH_2(duplicate, c1);		break;	    case '+':	    case '?':		if (syntax & RE_BK_PLUS_QM)		    goto handle_plus;		else		    goto normal_backslash;	    default:	      normal_backslash:		/* You might think it would be useful for \ to mean		 * not to translate; but if we don't translate it		 * it will never match anything.  */		c = TRANSLATE(c);		goto normal_char;	    }	    break;	default:	    /* Expects the character in `c'.  */	  normal_char:	    /* If no exactn currently being built.  */	    if (!pending_exact	    /* If last exactn not at current position.  */		|| pending_exact + *pending_exact + 1 != b	    /* We have only one byte following the exactn for the count.  */		|| *pending_exact == (1 << BYTEWIDTH) - 1	    /* If followed by a repetition operator.  */		|| *p == '*' || *p == '^'		|| ((syntax & RE_BK_PLUS_QM)		    ? *p == '\\' && (p[1] == '+' || p[1] == '?')		    : (*p == '+' || *p == '?'))		|| ((syntax & RE_INTERVALS)		    && ((syntax & RE_NO_BK_BRACES)			? *p == '{'			: (p[0] == '\\' && p[1] == '{')))) {		/* Start building a new exactn.  */		laststart = b;		BUF_PUSH_2(exactn, 0);		pending_exact = b - 1;	    }	    BUF_PUSH(c);	    (*pending_exact)++;	    break;	}			/* switch (c) */    }				/* while p != pend */    /* Through the pattern now.  */    if (fixup_alt_jump)	STORE_JUMP(jump_past_alt, fixup_alt_jump, b);    if (!COMPILE_STACK_EMPTY)	return REG_EPAREN;    free(compile_stack.stack);    /* We have succeeded; set the length of the buffer.  */    bufp->used = b - bufp->buffer;#ifdef DEBUG    if (debug) {	DEBUG_PRINT1("\nCompiled pattern: ");	print_compiled_pattern(bufp);    }#endif /* DEBUG */    return REG_NOERROR;}				/* regex_compile *//* Subroutines for `regex_compile'.  *//* Store OP at LOC followed by two-byte integer parameter ARG.  */static voidstore_op1(op, loc, arg)     re_opcode_t op;     unsigned char *loc;     int arg;{    *loc = (unsigned char) op;    STORE_NUMBER(loc + 1, arg);}/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */static voidstore_op2(op, loc, arg1, arg2)     re_opcode_t op;     unsigned char *loc;     int arg1, arg2;{    *loc = (unsigned char) op;    STORE_NUMBER(loc + 1, arg1);    STORE_NUMBER(loc + 3, arg2);}/* Copy the bytes from LOC to END to open up three bytes of space at LOC * for OP followed by two-byte integer parameter ARG.  */static voidinsert_op1(op, loc, arg, end)     re_opcode_t op;     unsigned char *loc;     int arg;     unsigned char *end;{    register unsigned char *pfrom = end;    register unsigned char *pto = end + 3;    while (pfrom != loc)	*--pto = *--pfrom;    store_op1(op, loc, arg);}/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */static voidinsert_op2(op, loc, arg1, arg2, end)     re_opcode_t op;     unsigned char *loc;     int arg1, arg2;     unsigned char *end;{    register unsigned char *pfrom = end;    register unsigned char *pto = end + 5;    while (pfrom != loc)	*--pto = *--pfrom;    store_op2(op, loc, arg1, arg2);}/* P points to just after a ^ in PATTERN.  Return true if that ^ comes * after an alternative or a begin-subexpression.  We assume there is at * least one character before the ^.  */static booleanat_begline_loc_p(pattern, p, syntax)     const char *pattern, *p;     reg_syntax_t syntax;{    const char *prev = p - 2;    boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';    return    /* After a subexpression?  */	(*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))    /* After an alternative?  */	|| (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));}/* The dual of at_begline_loc_p.  This one is for $.  We assume there is * at least one character after the $, i.e., `P < PEND'.  */static booleanat_endline_loc_p(p, pend, syntax)     const char *p, *pend;     int syntax;{    const char *next = p;    boolean next_backslash = *next == '\\';    const char *next_next = p + 1 < pend ? p + 1 : NULL;    return    /* Before a subexpression?  */	(syntax & RE_NO_BK_PARENS ? *next == ')'	: next_backslash && next_next && *next_next == ')')    /* Before an alternative?  */	|| (syntax & RE_NO_BK_VBAR ? *next == '|'	: next_backslash && next_next && *next_next == '|');}/* Returns true if REGNUM is in one of COMPILE_STACK's elements and  * false if it's not.  */static booleangroup_in_compile_stack(compile_stack, regnum)     compile_stack_type compile_stack;     regnum_t regnum;{    int this_element;    for (this_element = compile_stack.avail - 1;	this_element >= 0;	this_element--)	if (compile_stack.stack[this_element].regnum == regnum)	    return true;    return false;}/* Read the ending character of a range (in a bracket expression) from the * uncompiled pattern *P_PTR (which ends at PEND).  We assume the * starting character is in `P[-2]'.  (`P[-1]' is the character `-'.) * Then we set the translation of all bits between the starting and * ending characters (inclusive) in the compiled pattern B. *  * Return an error code. *  * We use these short variable names so we can use the same macros as * `regex_compile' itself.  */static reg_errcode_tcompile_range(p_ptr, pend, translate, syntax, b)     const char **p_ptr, *pend;     char *translate;     reg_syntax_t syntax;     unsigned char *b;{    unsigned this_char;    const char *p = *p_ptr;    int range_start, range_end;    if (p == pend)	return REG_ERANGE;    /* Even though the pattern is a signed `char *', we need to fetch     * with unsigned char *'s; if the high bit of the pattern character     * is set, the range endpoints will be negative if we fetch using a     * signed char *.     *      * We also want to fetch the endpoints without translating them; the      * appropriate translation is done in the bit-setting loop below.  */    range_start = ((unsigned char *) p)[-2];    range_end = ((unsigned char *) p)[0];    /* Have to increment the pointer into the pattern string, so the     * caller isn't still at the ending character.  */    (*p_ptr)++;    /* If the start is after the end, the range is empty.  */    if (range_start > range_end)	return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;    /* Here we see why `this_char' has to be larger than an `unsigned     * char' -- the range is inclusive, so if `range_end' == 0xff     * (assuming 8-bit characters), we would otherwise go into an infinite     * loop, since all characters <= 0xff.  */    for (this_char = range_start; this_char <= range_end; this_char++) {	SET_LIST_BIT(TRANSLATE(this_char));    }    return REG_NOERROR;}/* Failure stack declarations and macros; both re_compile_fastmap and * re_match_2 use a failure stack.  These have to be macros because of * REGEX_ALLOCATE.  *//* Number of failure points for which to initially allocate space * when matching.  If this number is exceeded, we allocate more * space, so it is not a hard limit.  */#ifndef INIT_FAILURE_ALLOC#define INIT_FAILURE_ALLOC 5#endif/* Roughly the maximum number of failure points on the stack.  Would be * exactly that if always used MAX_FAILURE_SPACE each time we failed. * This is a variable only so users of regex can assign to it; we never * change it ourselves.  */int re_max_failures = 2000;typedef const unsigned char *fail_stack_elt_t;typedef struct {    fail_stack_elt_t *stack;    unsigned size;    unsigned avail;		/* Offset of next open position.  */} fail_stack_type;#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)#define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])/* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */#define INIT_FAIL_STACK()						\  do {									\    fail_stack.stack = (fail_stack_elt_t *)				\      REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\									\    if (fail_stack.stack == NULL)					\      return -2;							\									\    fail_stack.size = INIT_FAILURE_ALLOC;				\    fail_stack.avail = 0;						\  } while (0)/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. *  * Return 1 if succeeds, and 0 if either ran out of memory * allocating space for it or it was already too large.   *  * REGEX_REALLOCATE requires `destination' be declared.   */#define DOUBLE_FAIL_STACK(fail_stack)					\  ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\   ? 0									\   : ((fail_stack).stack = (fail_stack_elt_t *)				\        REGEX_REALLOCATE ((fail_stack).stack, 				\          (fail_stack).size * sizeof (fail_stack_elt_t),		\          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\									\      (fail_stack).stack == NULL					\      ? 0								\      : ((fail_stack).size <<= 1, 					\         1)))/* Push PATTERN_OP on FAIL_STACK.  *  * Return 1 if was able to do so and 0 if ran out of memory allocating * space to do so.  */#define PUSH_PATTERN_OP(pattern_op, fail_stack)				\  ((FAIL_STACK_FULL ()							\    && !DOUBLE_FAIL_STACK (fail_stack))					\    ? 0									\    : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,		\       1))/* This pushes an item onto the failure stack.  Must be a four-byte * value.  Assumes the variable `fail_stack'.  Probably should only * be called from within `PUSH_FAILURE_POINT'.  */#define PUSH_FAILURE_ITEM(item)						\  fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item/* The complement operation.  Assumes `fail_stack' is nonempty.  */#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]/* Used to omit pushing failure point id's when we're not debugging.  */#ifdef DEBUG#define DEBUG_PUSH PUSH_FAILURE_ITEM#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()#else#define DEBUG_PUSH(item)#define DEBUG_POP(item_addr)#endif/* Push the information about the state we will need * if we ever fail back to it.   *  * Requires variables fail_stack, regstart, regend, reg_info, and * num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be * declared. *  * Does `return FAILURE_CODE' if runs out of memory.  */#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\  do {									\    char *destination;							\    /* Must be int, so when we don't save any registers, the arithmetic	\       of 0 + -1 isn't done as unsigned.  */				\    int this_reg;							\    									\    DEBUG_STATEMENT (failure_id++);					\    DEBUG_STATEMENT (nfailure_points_pushed++);				\    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\    DEBUG_PRINT2 ("  Bef

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -