⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gnuregex.c

📁 -
💻 C
📖 第 1 页 / 共 5 页
字号:
/* It is useful to test things that ``must'' be true when debugging.  */#include <assert.h>static int debug = 0;#define DEBUG_STATEMENT(e) e#define DEBUG_PRINT1(x) if (debug) printf (x)#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\  if (debug) print_partial_compiled_pattern (s, e)#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\  if (debug) print_double_string (w, s1, sz1, s2, sz2)extern void printchar();/* Print the fastmap in human-readable form.  */voidprint_fastmap(fastmap)     char *fastmap;{    unsigned was_a_range = 0;    unsigned i = 0;    while (i < (1 << BYTEWIDTH)) {	if (fastmap[i++]) {	    was_a_range = 0;	    printchar(i - 1);	    while (i < (1 << BYTEWIDTH) && fastmap[i]) {		was_a_range = 1;		i++;	    }	    if (was_a_range) {		printf("-");		printchar(i - 1);	    }	}    }    putchar('\n');}/* Print a compiled pattern string in human-readable form, starting at * the START pointer into it and ending just before the pointer END.  */voidprint_partial_compiled_pattern(start, end)     unsigned char *start;     unsigned char *end;{    int mcnt, mcnt2;    unsigned char *p = start;    unsigned char *pend = end;    if (start == NULL) {	printf("(null)\n");	return;    }    /* Loop over pattern commands.  */    while (p < pend) {	switch ((re_opcode_t) * p++) {	case no_op:	    printf("/no_op");	    break;	case exactn:	    mcnt = *p++;	    printf("/exactn/%d", mcnt);	    do {		putchar('/');		printchar(*p++);	    }	    while (--mcnt);	    break;	case start_memory:	    mcnt = *p++;	    printf("/start_memory/%d/%d", mcnt, *p++);	    break;	case stop_memory:	    mcnt = *p++;	    printf("/stop_memory/%d/%d", mcnt, *p++);	    break;	case duplicate:	    printf("/duplicate/%d", *p++);	    break;	case anychar:	    printf("/anychar");	    break;	case charset:	case charset_not:	    {		register int c;		printf("/charset%s",		    (re_opcode_t) * (p - 1) == charset_not ? "_not" : "");		assert(p + *p < pend);		for (c = 0; c < *p; c++) {		    unsigned bit;		    unsigned char map_byte = p[1 + c];		    putchar('/');		    for (bit = 0; bit < BYTEWIDTH; bit++)			if (map_byte & (1 << bit))			    printchar(c * BYTEWIDTH + bit);		}		p += 1 + *p;		break;	    }	case begline:	    printf("/begline");	    break;	case endline:	    printf("/endline");	    break;	case on_failure_jump:	    extract_number_and_incr(&mcnt, &p);	    printf("/on_failure_jump/0/%d", mcnt);	    break;	case on_failure_keep_string_jump:	    extract_number_and_incr(&mcnt, &p);	    printf("/on_failure_keep_string_jump/0/%d", mcnt);	    break;	case dummy_failure_jump:	    extract_number_and_incr(&mcnt, &p);	    printf("/dummy_failure_jump/0/%d", mcnt);	    break;	case push_dummy_failure:	    printf("/push_dummy_failure");	    break;	case maybe_pop_jump:	    extract_number_and_incr(&mcnt, &p);	    printf("/maybe_pop_jump/0/%d", mcnt);	    break;	case pop_failure_jump:	    extract_number_and_incr(&mcnt, &p);	    printf("/pop_failure_jump/0/%d", mcnt);	    break;	case jump_past_alt:	    extract_number_and_incr(&mcnt, &p);	    printf("/jump_past_alt/0/%d", mcnt);	    break;	case jump:	    extract_number_and_incr(&mcnt, &p);	    printf("/jump/0/%d", mcnt);	    break;	case succeed_n:	    extract_number_and_incr(&mcnt, &p);	    extract_number_and_incr(&mcnt2, &p);	    printf("/succeed_n/0/%d/0/%d", mcnt, mcnt2);	    break;	case jump_n:	    extract_number_and_incr(&mcnt, &p);	    extract_number_and_incr(&mcnt2, &p);	    printf("/jump_n/0/%d/0/%d", mcnt, mcnt2);	    break;	case set_number_at:	    extract_number_and_incr(&mcnt, &p);	    extract_number_and_incr(&mcnt2, &p);	    printf("/set_number_at/0/%d/0/%d", mcnt, mcnt2);	    break;	case wordbound:	    printf("/wordbound");	    break;	case notwordbound:	    printf("/notwordbound");	    break;	case wordbeg:	    printf("/wordbeg");	    break;	case wordend:	    printf("/wordend");#ifdef emacs	case before_dot:	    printf("/before_dot");	    break;	case at_dot:	    printf("/at_dot");	    break;	case after_dot:	    printf("/after_dot");	    break;	case syntaxspec:	    printf("/syntaxspec");	    mcnt = *p++;	    printf("/%d", mcnt);	    break;	case notsyntaxspec:	    printf("/notsyntaxspec");	    mcnt = *p++;	    printf("/%d", mcnt);	    break;#endif /* emacs */	case wordchar:	    printf("/wordchar");	    break;	case notwordchar:	    printf("/notwordchar");	    break;	case begbuf:	    printf("/begbuf");	    break;	case endbuf:	    printf("/endbuf");	    break;	default:	    printf("?%d", *(p - 1));	}    }    printf("/\n");}voidprint_compiled_pattern(bufp)     struct re_pattern_buffer *bufp;{    unsigned char *buffer = bufp->buffer;    print_partial_compiled_pattern(buffer, buffer + bufp->used);    printf("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);    if (bufp->fastmap_accurate && bufp->fastmap) {	printf("fastmap: ");	print_fastmap(bufp->fastmap);    }    printf("re_nsub: %d\t", bufp->re_nsub);    printf("regs_alloc: %d\t", bufp->regs_allocated);    printf("can_be_null: %d\t", bufp->can_be_null);    printf("newline_anchor: %d\n", bufp->newline_anchor);    printf("no_sub: %d\t", bufp->no_sub);    printf("not_bol: %d\t", bufp->not_bol);    printf("not_eol: %d\t", bufp->not_eol);    printf("syntax: %d\n", bufp->syntax);    /* Perhaps we should print the translate table?  */}voidprint_double_string(where, string1, size1, string2, size2)     const char *where;     const char *string1;     const char *string2;     int size1;     int size2;{    unsigned this_char;    if (where == NULL)	printf("(null)");    else {	if (FIRST_STRING_P(where)) {	    for (this_char = where - string1; this_char < size1; this_char++)		printchar(string1[this_char]);	    where = string2;	}	for (this_char = where - string2; this_char < size2; this_char++)	    printchar(string2[this_char]);    }}#else /* not DEBUG */#undef assert#define assert(e)#define DEBUG_STATEMENT(e)#define DEBUG_PRINT1(x)#define DEBUG_PRINT2(x1, x2)#define DEBUG_PRINT3(x1, x2, x3)#define DEBUG_PRINT4(x1, x2, x3, x4)#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)#endif /* not DEBUG *//* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can * also be assigned to arbitrarily: each pattern buffer stores its own * syntax, so it can be changed between regex compilations.  */reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;/* Specify the precise syntax of regexps for compilation.  This provides * for compatibility for various utilities which historically have * different, incompatible syntaxes. *  * The argument SYNTAX is a bit mask comprised of the various bits * defined in regex.h.  We return the old syntax.  */reg_syntax_tre_set_syntax(syntax)     reg_syntax_t syntax;{    reg_syntax_t ret = re_syntax_options;    re_syntax_options = syntax;    return ret;}/* This table gives an error message for each of the error codes listed * in regex.h.  Obviously the order here has to be same as there.  */static const char *re_error_msg[] ={NULL,				/* REG_NOERROR */    "No match",			/* REG_NOMATCH */    "Invalid regular expression",	/* REG_BADPAT */    "Invalid collation character",	/* REG_ECOLLATE */    "Invalid character class name",	/* REG_ECTYPE */    "Trailing backslash",	/* REG_EESCAPE */    "Invalid back reference",	/* REG_ESUBREG */    "Unmatched [ or [^",	/* REG_EBRACK */    "Unmatched ( or \\(",	/* REG_EPAREN */    "Unmatched \\{",		/* REG_EBRACE */    "Invalid content of \\{\\}",	/* REG_BADBR */    "Invalid range end",	/* REG_ERANGE */    "Memory exhausted",		/* REG_ESPACE */    "Invalid preceding regular expression",	/* REG_BADRPT */    "Premature end of regular expression",	/* REG_EEND */    "Regular expression too big",	/* REG_ESIZE */    "Unmatched ) or \\)",	/* REG_ERPAREN */};/* Subroutine declarations and macros for regex_compile.  */static void store_op1(), store_op2();static void insert_op1(), insert_op2();static boolean at_begline_loc_p(), at_endline_loc_p();static boolean group_in_compile_stack();static reg_errcode_t compile_range();/* Fetch the next character in the uncompiled pattern---translating it  * if necessary.  Also cast from a signed character in the constant * string passed to us by the user to an unsigned char that we can use * as an array index (in, e.g., `translate').  */#define PATFETCH(c)							\  do {if (p == pend) return REG_EEND;					\    c = (unsigned char) *p++;						\    if (translate) c = translate[c]; 					\  } while (0)/* Fetch the next character in the uncompiled pattern, with no * translation.  */#define PATFETCH_RAW(c)							\  do {if (p == pend) return REG_EEND;					\    c = (unsigned char) *p++; 						\  } while (0)/* Go backwards one character in the pattern.  */#define PATUNFETCH p--/* If `translate' is non-null, return translate[D], else just D.  We * cast the subscript to translate because some data is declared as * `char *', to avoid warnings when a string constant is passed.  But * when we use a character as a subscript we must make it unsigned.  */#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))/* Macros for outputting the compiled pattern into `buffer'.  *//* If the buffer isn't allocated when it comes in, use this.  */#define INIT_BUF_SIZE  32/* Make sure we have at least N more bytes of space in buffer.  */#define GET_BUFFER_SPACE(n)						\    while (b - bufp->buffer + (n) > bufp->allocated)			\      EXTEND_BUFFER ()/* Make sure we have one more byte of buffer space and then add C to it.  */#define BUF_PUSH(c)							\  do {									\    GET_BUFFER_SPACE (1);						\    *b++ = (unsigned char) (c);						\  } while (0)/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */#define BUF_PUSH_2(c1, c2)						\  do {									\    GET_BUFFER_SPACE (2);						\    *b++ = (unsigned char) (c1);					\    *b++ = (unsigned char) (c2);					\  } while (0)/* As with BUF_PUSH_2, except for three bytes.  */#define BUF_PUSH_3(c1, c2, c3)						\  do {									\    GET_BUFFER_SPACE (3);						\    *b++ = (unsigned char) (c1);					\    *b++ = (unsigned char) (c2);					\    *b++ = (unsigned char) (c3);					\  } while (0)/* Store a jump with opcode OP at LOC to location TO.  We store a * relative address offset by the three bytes the jump itself occupies.  */#define STORE_JUMP(op, loc, to) \  store_op1 (op, loc, (to) - (loc) - 3)/* Likewise, for a two-argument jump.  */#define STORE_JUMP2(op, loc, to, arg) \  store_op2 (op, loc, (to) - (loc) - 3, arg)/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */#define INSERT_JUMP(op, loc, to) \  insert_op1 (op, loc, (to) - (loc) - 3, b)/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */#define INSERT_JUMP2(op, loc, to, arg) \  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)/* This is not an arbitrary limit: the arguments which represent offsets * into the pattern are two bytes long.  So if 2^16 bytes turns out to * be too small, many things would have to change.  */#define MAX_BUF_SIZE (1L << 16)/* Extend the buffer by twice its current size via realloc and * reset the pointers that pointed into the old block to point to the

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -