⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gnuregex.c

📁 -
💻 C
📖 第 1 页 / 共 5 页
字号:
 * correct places in the new one.  If extending the buffer results in it * being larger than MAX_BUF_SIZE, then flag memory exhausted.  */#define EXTEND_BUFFER()							\  do { 									\    unsigned char *old_buffer = bufp->buffer;				\    if (bufp->allocated == MAX_BUF_SIZE) 				\      return REG_ESIZE;							\    bufp->allocated <<= 1;						\    if (bufp->allocated > MAX_BUF_SIZE)					\      bufp->allocated = MAX_BUF_SIZE; 					\    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\    if (bufp->buffer == NULL)						\      return REG_ESPACE;						\    /* If the buffer moved, move all the pointers into it.  */		\    if (old_buffer != bufp->buffer)					\      {									\        b = (b - old_buffer) + bufp->buffer;				\        begalt = (begalt - old_buffer) + bufp->buffer;			\        if (fixup_alt_jump)						\          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\        if (laststart)							\          laststart = (laststart - old_buffer) + bufp->buffer;		\        if (pending_exact)						\          pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\      }									\  } while (0)/* Since we have one byte reserved for the register number argument to * {start,stop}_memory, the maximum number of groups we can report * things about is what fits in that byte.  */#define MAX_REGNUM 255/* But patterns can have more than `MAX_REGNUM' registers.  We just * ignore the excess.  */typedef unsigned regnum_t;/* Macros for the compile stack.  *//* Since offsets can go either forwards or backwards, this type needs to * be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */typedef int pattern_offset_t;typedef struct {    pattern_offset_t begalt_offset;    pattern_offset_t fixup_alt_jump;    pattern_offset_t inner_group_offset;    pattern_offset_t laststart_offset;    regnum_t regnum;} compile_stack_elt_t;typedef struct {    compile_stack_elt_t *stack;    unsigned size;    unsigned avail;		/* Offset of next open position.  */} compile_stack_type;#define INIT_COMPILE_STACK_SIZE 32#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)/* The next available element.  */#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])/* Set the bit for character C in a list.  */#define SET_LIST_BIT(c)                               \  (b[((unsigned char) (c)) / BYTEWIDTH]               \   |= 1 << (((unsigned char) c) % BYTEWIDTH))/* Get the next unsigned number in the uncompiled pattern.  */#define GET_UNSIGNED_NUMBER(num) 					\  { if (p != pend)							\     {									\       PATFETCH (c); 							\       while (ISDIGIT (c)) 						\         { 								\           if (num < 0)							\              num = 0;							\           num = num * 10 + c - '0'; 					\           if (p == pend) 						\              break; 							\           PATFETCH (c);						\         } 								\       } 								\    }#define CHAR_CLASS_MAX_LENGTH  6	/* Namely, `xdigit'.  */#define IS_CHAR_CLASS(string)						\   (STREQ (string, "alpha") || STREQ (string, "upper")			\    || STREQ (string, "lower") || STREQ (string, "digit")		\    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\    || STREQ (string, "space") || STREQ (string, "print")		\    || STREQ (string, "punct") || STREQ (string, "graph")		\    || STREQ (string, "cntrl") || STREQ (string, "blank"))/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. * Returns one of error codes defined in `regex.h', or zero for success. *  * Assumes the `allocated' (and perhaps `buffer') and `translate' * fields are set in BUFP on entry. *  * If it succeeds, results are put in BUFP (if it returns an error, the * contents of BUFP are undefined): * `buffer' is the compiled pattern; * `syntax' is set to SYNTAX; * `used' is set to the length of the compiled pattern; * `fastmap_accurate' is zero; * `re_nsub' is the number of subexpressions in PATTERN; * `not_bol' and `not_eol' are zero; *  * The `fastmap' and `newline_anchor' fields are neither * examined nor set.  */static reg_errcode_tregex_compile(pattern, size, syntax, bufp)     const char *pattern;     int size;     reg_syntax_t syntax;     struct re_pattern_buffer *bufp;{    /* We fetch characters from PATTERN here.  Even though PATTERN is     * `char *' (i.e., signed), we declare these variables as unsigned, so     * they can be reliably used as array indices.  */    register unsigned char c, c1;    /* A random tempory spot in PATTERN.  */    const char *p1;    /* Points to the end of the buffer, where we should append.  */    register unsigned char *b;    /* Keeps track of unclosed groups.  */    compile_stack_type compile_stack;    /* Points to the current (ending) position in the pattern.  */    const char *p = pattern;    const char *pend = pattern + size;    /* How to translate the characters in the pattern.  */    char *translate = bufp->translate;    /* Address of the count-byte of the most recently inserted `exactn'     * command.  This makes it possible to tell if a new exact-match     * character can be added to that command or if the character requires     * a new `exactn' command.  */    unsigned char *pending_exact = 0;    /* Address of start of the most recently finished expression.     * This tells, e.g., postfix * where to find the start of its     * operand.  Reset at the beginning of groups and alternatives.  */    unsigned char *laststart = 0;    /* Address of beginning of regexp, or inside of last group.  */    unsigned char *begalt;    /* Place in the uncompiled pattern (i.e., the {) to     * which to go back if the interval is invalid.  */    const char *beg_interval;    /* Address of the place where a forward jump should go to the end of     * the containing expression.  Each alternative of an `or' -- except the     * last -- ends with a forward jump of this sort.  */    unsigned char *fixup_alt_jump = 0;    /* Counts open-groups as they are encountered.  Remembered for the     * matching close-group on the compile stack, so the same register     * number is put in the stop_memory as the start_memory.  */    regnum_t regnum = 0;#ifdef DEBUG    DEBUG_PRINT1("\nCompiling pattern: ");    if (debug) {	unsigned debug_count;	for (debug_count = 0; debug_count < size; debug_count++)	    printchar(pattern[debug_count]);	putchar('\n');    }#endif /* DEBUG */    /* Initialize the compile stack.  */    compile_stack.stack = TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);    if (compile_stack.stack == NULL)	return REG_ESPACE;    compile_stack.size = INIT_COMPILE_STACK_SIZE;    compile_stack.avail = 0;    /* Initialize the pattern buffer.  */    bufp->syntax = syntax;    bufp->fastmap_accurate = 0;    bufp->not_bol = bufp->not_eol = 0;    /* Set `used' to zero, so that if we return an error, the pattern     * printer (for debugging) will think there's no pattern.  We reset it     * at the end.  */    bufp->used = 0;    /* Always count groups, whether or not bufp->no_sub is set.  */    bufp->re_nsub = 0;#if !defined (emacs) && !defined (SYNTAX_TABLE)    /* Initialize the syntax table.  */    init_syntax_once();#endif    if (bufp->allocated == 0) {	if (bufp->buffer) {	/* If zero allocated, but buffer is non-null, try to realloc				 * enough space.  This loses if buffer's address is bogus, but				 * that is the user's responsibility.  */	    RETALLOC(bufp->buffer, INIT_BUF_SIZE, unsigned char);	} else {		/* Caller did not allocate a buffer.  Do it for them.  */	    bufp->buffer = TALLOC(INIT_BUF_SIZE, unsigned char);	}	if (!bufp->buffer)	    return REG_ESPACE;	bufp->allocated = INIT_BUF_SIZE;    }    begalt = b = bufp->buffer;    /* Loop through the uncompiled pattern until we're at the end.  */    while (p != pend) {	PATFETCH(c);	switch (c) {	case '^':	    {		if (		/* If at start of pattern, it's an operator.  */		    p == pattern + 1		/* If context independent, it's an operator.  */		    || syntax & RE_CONTEXT_INDEP_ANCHORS		/* Otherwise, depends on what's come before.  */		    || at_begline_loc_p(pattern, p, syntax))		    BUF_PUSH(begline);		else		    goto normal_char;	    }	    break;	case '$':	    {		if (		/* If at end of pattern, it's an operator.  */		    p == pend		/* If context independent, it's an operator.  */		    || syntax & RE_CONTEXT_INDEP_ANCHORS		/* Otherwise, depends on what's next.  */		    || at_endline_loc_p(p, pend, syntax))		    BUF_PUSH(endline);		else		    goto normal_char;	    }	    break;	case '+':	case '?':	    if ((syntax & RE_BK_PLUS_QM)		|| (syntax & RE_LIMITED_OPS))		goto normal_char;	  handle_plus:	case '*':	    /* If there is no previous pattern... */	    if (!laststart) {		if (syntax & RE_CONTEXT_INVALID_OPS)		    return REG_BADRPT;		else if (!(syntax & RE_CONTEXT_INDEP_OPS))		    goto normal_char;	    } {		/* Are we optimizing this jump?  */		boolean keep_string_p = false;		/* 1 means zero (many) matches is allowed.  */		char zero_times_ok = 0, many_times_ok = 0;		/* If there is a sequence of repetition chars, collapse it		 * down to just one (the right one).  We can't combine		 * interval operators with these because of, e.g., `a{2}*',		 * which should only match an even number of `a's.  */		for (;;) {		    zero_times_ok |= c != '+';		    many_times_ok |= c != '?';		    if (p == pend)			break;		    PATFETCH(c);		    if (c == '*'			|| (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')));		    else if (syntax & RE_BK_PLUS_QM && c == '\\') {			if (p == pend)			    return REG_EESCAPE;			PATFETCH(c1);			if (!(c1 == '+' || c1 == '?')) {			    PATUNFETCH;			    PATUNFETCH;			    break;			}			c = c1;		    } else {			PATUNFETCH;			break;		    }		    /* If we get here, we found another repeat character.  */		}		/* Star, etc. applied to an empty pattern is equivalent		 * to an empty pattern.  */		if (!laststart)		    break;		/* Now we know whether or not zero matches is allowed		 * and also whether or not two or more matches is allowed.  */		if (many_times_ok) {	/* More than one repetition is allowed, so put in at the					 * end a backward relative jump from `b' to before the next					 * jump we're going to put in below (which jumps from					 * laststart to after this jump).  					 * 					 * But if we are at the `*' in the exact sequence `.*\n',					 * insert an unconditional jump backwards to the .,					 * instead of the beginning of the loop.  This way we only					 * push a failure point once, instead of every time					 * through the loop.  */		    assert(p - 1 > pattern);		    /* Allocate the space for the jump.  */		    GET_BUFFER_SPACE(3);		    /* We know we are not at the first character of the pattern,		     * because laststart was nonzero.  And we've already		     * incremented `p', by the way, to be the character after		     * the `*'.  Do we have to do something analogous here		     * for null bytes, because of RE_DOT_NOT_NULL?  */		    if (TRANSLATE(*(p - 2)) == TRANSLATE('.')			&& zero_times_ok			&& p < pend && TRANSLATE(*p) == TRANSLATE('\n')			&& !(syntax & RE_DOT_NEWLINE)) {	/* We have .*\n.  */			STORE_JUMP(jump, b, laststart);			keep_string_p = true;		    } else			/* Anything else.  */			STORE_JUMP(maybe_pop_jump, b, laststart - 3);		    /* We've added more stuff to the buffer.  */		    b += 3;		}		/* On failure, jump from laststart to b + 3, which will be the		 * end of the buffer after this jump is inserted.  */		GET_BUFFER_SPACE(3);		INSERT_JUMP(keep_string_p ? on_failure_keep_string_jump		    : on_failure_jump,		    laststart, b + 3);		pending_exact = 0;		b += 3;		if (!zero_times_ok) {		    /* At least one repetition is required, so insert a		     * `dummy_failure_jump' before the initial		     * `on_failure_jump' instruction of the loop. This		     * effects a skip over that instruction the first time		     * we hit that loop.  */		    GET_BUFFER_SPACE(3);		    INSERT_JUMP(dummy_failure_jump, laststart, laststart + 6);		    b += 3;		}	    }	    break;	case '.':	    laststart = b;	    BUF_PUSH(anychar);	    break;	case '[':	    {		boolean had_char_class = false;		if (p == pend)		    return REG_EBRACK;		/* Ensure that we have enough space to push a charset: the		 * opcode, the length count, and the bitset; 34 bytes in all.  */		GET_BUFFER_SPACE(34);		laststart = b;		/* We test `*p == '^' twice, instead of using an if		 * statement, so we only need one BUF_PUSH.  */		BUF_PUSH(*p == '^' ? charset_not : charset);		if (*p == '^')		    p++;		/* Remember the first position in the bracket expression.  */		p1 = p;		/* Push the number of bytes in the bitmap.  */		BUF_PUSH((1 << BYTEWIDTH) / BYTEWIDTH);		/* Clear the whole map.  */		memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);		/* charset_not matches newline according to a syntax bit.  */		if ((re_opcode_t) b[-2] == charset_not		    && (syntax & RE_HAT_LISTS_NOT_NEWLINE))		    SET_LIST_BIT('\n');		/* Read in characters and ranges, setting map bits.  */		for (;;) {		    if (p == pend)			return REG_EBRACK;		    PATFETCH(c);		    /* \ might escape characters inside [...] and [^...].  */		    if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') {			if (p == pend)			    return REG_EESCAPE;			PATFETCH(c1);			SET_LIST_BIT(c1);			continue;		    }		    /* Could be the end of the bracket expression.  If it's		     * not (i.e., when the bracket expression is `[]' so		     * far), the ']' character bit gets set way below.  */		    if (c == ']' && p != p1 + 1)			break;		    /* Look ahead to see if it's a range when the last thing		     * was a character class.  */		    if (had_char_class && c == '-' && *p != ']')			return REG_ERANGE;		    /* Look ahead to see if it's a range when the last thing		     * was a character: if this is a hyphen not at the		     * beginning or the end of a list, then it's the range		     * operator.  */		    if (c == '-'			&& !(p - 2 >= pattern && p[-2] == '[')			&& !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')			&& *p != ']') {			reg_errcode_t ret			= compile_range(&p, pend, translate, syntax, b);			if (ret != REG_NOERROR)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -