⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gnuregex.c

📁 -
💻 C
📖 第 1 页 / 共 5 页
字号:
			    return ret;		    } else if (p[0] == '-' && p[1] != ']') {	/* This handles ranges made up of characters only.  */			reg_errcode_t ret;			/* Move past the `-'.  */			PATFETCH(c1);			ret = compile_range(&p, pend, translate, syntax, b);			if (ret != REG_NOERROR)			    return ret;		    }		    /* See if we're at the beginning of a possible character		     * class.  */		    else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') {	/* Leave room for the null.  */			char str[CHAR_CLASS_MAX_LENGTH + 1];			PATFETCH(c);			c1 = 0;			/* If pattern is `[[:'.  */			if (p == pend)			    return REG_EBRACK;			for (;;) {			    PATFETCH(c);			    if (c == ':' || c == ']' || p == pend				|| c1 == CHAR_CLASS_MAX_LENGTH)				break;			    str[c1++] = c;			}			str[c1] = '\0';			/* If isn't a word bracketed by `[:' and:`]':			 * undo the ending character, the letters, and leave 			 * the leading `:' and `[' (but set bits for them).  */			if (c == ':' && *p == ']') {			    int ch;			    boolean is_alnum = STREQ(str, "alnum");			    boolean is_alpha = STREQ(str, "alpha");			    boolean is_blank = STREQ(str, "blank");			    boolean is_cntrl = STREQ(str, "cntrl");			    boolean is_digit = STREQ(str, "digit");			    boolean is_graph = STREQ(str, "graph");			    boolean is_lower = STREQ(str, "lower");			    boolean is_print = STREQ(str, "print");			    boolean is_punct = STREQ(str, "punct");			    boolean is_space = STREQ(str, "space");			    boolean is_upper = STREQ(str, "upper");			    boolean is_xdigit = STREQ(str, "xdigit");			    if (!IS_CHAR_CLASS(str))				return REG_ECTYPE;			    /* Throw away the ] at the end of the character			     * class.  */			    PATFETCH(c);			    if (p == pend)				return REG_EBRACK;			    for (ch = 0; ch < 1 << BYTEWIDTH; ch++) {				if ((is_alnum && ISALNUM(ch))				    || (is_alpha && ISALPHA(ch))				    || (is_blank && ISBLANK(ch))				    || (is_cntrl && ISCNTRL(ch))				    || (is_digit && ISDIGIT(ch))				    || (is_graph && ISGRAPH(ch))				    || (is_lower && ISLOWER(ch))				    || (is_print && ISPRINT(ch))				    || (is_punct && ISPUNCT(ch))				    || (is_space && ISSPACE(ch))				    || (is_upper && ISUPPER(ch))				    || (is_xdigit && ISXDIGIT(ch)))				    SET_LIST_BIT(ch);			    }			    had_char_class = true;			} else {			    c1++;			    while (c1--)				PATUNFETCH;			    SET_LIST_BIT('[');			    SET_LIST_BIT(':');			    had_char_class = false;			}		    } else {			had_char_class = false;			SET_LIST_BIT(c);		    }		}		/* Discard any (non)matching list bytes that are all 0 at the		 * end of the map.  Decrease the map-length byte too.  */		while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)		    b[-1]--;		b += b[-1];	    }	    break;	case '(':	    if (syntax & RE_NO_BK_PARENS)		goto handle_open;	    else		goto normal_char;	case ')':	    if (syntax & RE_NO_BK_PARENS)		goto handle_close;	    else		goto normal_char;	case '\n':	    if (syntax & RE_NEWLINE_ALT)		goto handle_alt;	    else		goto normal_char;	case '|':	    if (syntax & RE_NO_BK_VBAR)		goto handle_alt;	    else		goto normal_char;	case '{':	    if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)		goto handle_interval;	    else		goto normal_char;	case '\\':	    if (p == pend)		return REG_EESCAPE;	    /* Do not translate the character after the \, so that we can	     * distinguish, e.g., \B from \b, even if we normally would	     * translate, e.g., B to b.  */	    PATFETCH_RAW(c);	    switch (c) {	    case '(':		if (syntax & RE_NO_BK_PARENS)		    goto normal_backslash;	      handle_open:		bufp->re_nsub++;		regnum++;		if (COMPILE_STACK_FULL) {		    RETALLOC(compile_stack.stack, compile_stack.size << 1,			compile_stack_elt_t);		    if (compile_stack.stack == NULL)			return REG_ESPACE;		    compile_stack.size <<= 1;		}		/* These are the values to restore when we hit end of this		 * group.  They are all relative offsets, so that if the		 * whole pattern moves because of realloc, they will still		 * be valid.  */		COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;		COMPILE_STACK_TOP.fixup_alt_jump		    = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;		COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;		COMPILE_STACK_TOP.regnum = regnum;		/* We will eventually replace the 0 with the number of		 * groups inner to this one.  But do not push a		 * start_memory for groups beyond the last one we can		 * represent in the compiled pattern.  */		if (regnum <= MAX_REGNUM) {		    COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;		    BUF_PUSH_3(start_memory, regnum, 0);		}		compile_stack.avail++;		fixup_alt_jump = 0;		laststart = 0;		begalt = b;		/* If we've reached MAX_REGNUM groups, then this open		 * won't actually generate any code, so we'll have to		 * clear pending_exact explicitly.  */		pending_exact = 0;		break;	    case ')':		if (syntax & RE_NO_BK_PARENS)		    goto normal_backslash;		if (COMPILE_STACK_EMPTY) {		    if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)			goto normal_backslash;		    else			return REG_ERPAREN;		}	      handle_close:		if (fixup_alt_jump) {	/* Push a dummy failure point at the end of the					 * alternative for a possible future					 * `pop_failure_jump' to pop.  See comments at					 * `push_dummy_failure' in `re_match_2'.  */		    BUF_PUSH(push_dummy_failure);		    /* We allocated space for this jump when we assigned		     * to `fixup_alt_jump', in the `handle_alt' case below.  */		    STORE_JUMP(jump_past_alt, fixup_alt_jump, b - 1);		}		/* See similar code for backslashed left paren above.  */		if (COMPILE_STACK_EMPTY) {		    if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)			goto normal_char;		    else			return REG_ERPAREN;		}		/* Since we just checked for an empty stack above, this		 * ``can't happen''.  */		assert(compile_stack.avail != 0);		{		    /* We don't just want to restore into `regnum', because		     * later groups should continue to be numbered higher,		     * as in `(ab)c(de)' -- the second group is #2.  */		    regnum_t this_group_regnum;		    compile_stack.avail--;		    begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;		    fixup_alt_jump			= COMPILE_STACK_TOP.fixup_alt_jump			? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1			: 0;		    laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;		    this_group_regnum = COMPILE_STACK_TOP.regnum;		    /* If we've reached MAX_REGNUM groups, then this open		     * won't actually generate any code, so we'll have to		     * clear pending_exact explicitly.  */		    pending_exact = 0;		    /* We're at the end of the group, so now we know how many		     * groups were inside this one.  */		    if (this_group_regnum <= MAX_REGNUM) {			unsigned char *inner_group_loc			= bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;			*inner_group_loc = regnum - this_group_regnum;			BUF_PUSH_3(stop_memory, this_group_regnum,			    regnum - this_group_regnum);		    }		}		break;	    case '|':		/* `\|'.  */		if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)		    goto normal_backslash;	      handle_alt:		if (syntax & RE_LIMITED_OPS)		    goto normal_char;		/* Insert before the previous alternative a jump which		 * jumps to this alternative if the former fails.  */		GET_BUFFER_SPACE(3);		INSERT_JUMP(on_failure_jump, begalt, b + 6);		pending_exact = 0;		b += 3;		/* The alternative before this one has a jump after it		 * which gets executed if it gets matched.  Adjust that		 * jump so it will jump to this alternative's analogous		 * jump (put in below, which in turn will jump to the next		 * (if any) alternative's such jump, etc.).  The last such		 * jump jumps to the correct final destination.  A picture:		 * _____ _____ 		 * |   | |   |   		 * |   v |   v 		 * a | b   | c   		 * 		 * If we are at `b', then fixup_alt_jump right now points to a		 * three-byte space after `a'.  We'll put in the jump, set		 * fixup_alt_jump to right after `b', and leave behind three		 * bytes which we'll fill in when we get to after `c'.  */		if (fixup_alt_jump)		    STORE_JUMP(jump_past_alt, fixup_alt_jump, b);		/* Mark and leave space for a jump after this alternative,		 * to be filled in later either by next alternative or		 * when know we're at the end of a series of alternatives.  */		fixup_alt_jump = b;		GET_BUFFER_SPACE(3);		b += 3;		laststart = 0;		begalt = b;		break;	    case '{':		/* If \{ is a literal.  */		if (!(syntax & RE_INTERVALS)		/* If we're at `\{' and it's not the open-interval 		 * operator.  */		    || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))		    || (p - 2 == pattern && p == pend))		    goto normal_backslash;	      handle_interval:		{		    /* If got here, then the syntax allows intervals.  */		    /* At least (most) this many matches must be made.  */		    int lower_bound = -1, upper_bound = -1;		    beg_interval = p - 1;		    if (p == pend) {			if (syntax & RE_NO_BK_BRACES)			    goto unfetch_interval;			else			    return REG_EBRACE;		    }		    GET_UNSIGNED_NUMBER(lower_bound);		    if (c == ',') {			GET_UNSIGNED_NUMBER(upper_bound);			if (upper_bound < 0)			    upper_bound = RE_DUP_MAX;		    } else			/* Interval such as `{1}' => match exactly once. */			upper_bound = lower_bound;		    if (lower_bound < 0 || upper_bound > RE_DUP_MAX			|| lower_bound > upper_bound) {			if (syntax & RE_NO_BK_BRACES)			    goto unfetch_interval;			else			    return REG_BADBR;		    }		    if (!(syntax & RE_NO_BK_BRACES)) {			if (c != '\\')			    return REG_EBRACE;			PATFETCH(c);		    }		    if (c != '}') {			if (syntax & RE_NO_BK_BRACES)			    goto unfetch_interval;			else			    return REG_BADBR;		    }		    /* We just parsed a valid interval.  */		    /* If it's invalid to have no preceding re.  */		    if (!laststart) {			if (syntax & RE_CONTEXT_INVALID_OPS)			    return REG_BADRPT;			else if (syntax & RE_CONTEXT_INDEP_OPS)			    laststart = b;			else			    goto unfetch_interval;		    }		    /* If the upper bound is zero, don't want to succeed at		     * all; jump from `laststart' to `b + 3', which will be		     * the end of the buffer after we insert the jump.  */		    if (upper_bound == 0) {			GET_BUFFER_SPACE(3);			INSERT_JUMP(jump, laststart, b + 3);			b += 3;		    }		    /* Otherwise, we have a nontrivial interval.  When		     * we're all done, the pattern will look like:		     * set_number_at <jump count> <upper bound>		     * set_number_at <succeed_n count> <lower bound>		     * succeed_n <after jump addr> <succed_n count>		     * <body of loop>		     * jump_n <succeed_n addr> <jump count>		     * (The upper bound and `jump_n' are omitted if		     * `upper_bound' is 1, though.)  */		    else {	/* If the upper bound is > 1, we need to insert				 * more at the end of the loop.  */			unsigned nbytes = 10 + (upper_bound > 1) * 10;			GET_BUFFER_SPACE(nbytes);			/* Initialize lower bound of the `succeed_n', even			 * though it will be set during matching by its			 * attendant `set_number_at' (inserted next),			 * because `re_compile_fastmap' needs to know.			 * Jump to the `jump_n' we might insert below.  */			INSERT_JUMP2(succeed_n, laststart,			    b + 5 + (upper_bound > 1) * 5,			    lower_bound);			b += 5;			/* Code to initialize the lower bound.  Insert 			 * before the `succeed_n'.  The `5' is the last two			 * bytes of this `set_number_at', plus 3 bytes of			 * the following `succeed_n'.  */			insert_op2(set_number_at, laststart, 5, lower_bound, b);			b += 5;			if (upper_bound > 1) {	/* More than one repetition is allowed, so						 * append a backward jump to the `succeed_n'						 * that starts this interval.						 * 						 * When we've reached this during matching,						 * we'll have matched the interval once, so						 * jump back only `upper_bound - 1' times.  */			    STORE_JUMP2(jump_n, b, laststart + 5,				upper_bound - 1);			    b += 5;			    /* The location we want to set is the second			     * parameter of the `jump_n'; that is `b-2' as			     * an absolute address.  `laststart' will be			     * the `set_number_at' we're about to insert;			     * `laststart+3' the number to set, the source			     * for the relative address.  But we are			     * inserting into the middle of the pattern --			     * so everything is getting moved up by 5.			     * Conclusion: (b - 2) - (laststart + 3) + 5,			     * i.e., b - laststart.			     * 			     * We insert this at the beginning of the loop			     * so that if we fail during matching, we'll			     * reinitialize the bounds.  */			    insert_op2(set_number_at, laststart, b - laststart,				upper_bound - 1, b);			    b += 5;			}		    }		    pending_exact = 0;		    beg_interval = NULL;		}		break;	      unfetch_interval:		/* If an invalid interval, match the characters as literals.  */		assert(beg_interval);		p = beg_interval;		beg_interval = NULL;		/* normal_char and normal_backslash need `c'.  */		PATFETCH(c);		if (!(syntax & RE_NO_BK_BRACES)) {		    if (p > pattern && p[-1] == '\\')			goto normal_backslash;		}		goto normal_char;#ifdef emacs		/* There is no way to specify the before_dot and after_dot		 * operators.  rms says this is ok.  --karl  */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -