⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexpr.c

📁 python s60 1.4.5版本的源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
{ \
	bufp->allocated = alloc; \
	bufp->buffer = pattern; \
	bufp->used = pattern_offset; \
}
    
#define GETHEX(var) \
{ \
	unsigned char gethex_ch, gethex_value; \
	NEXTCHAR(gethex_ch); \
	gethex_value = hex_char_to_decimal(gethex_ch); \
	if (gethex_value == 16) \
		goto hex_error; \
	NEXTCHAR(gethex_ch); \
	gethex_ch = hex_char_to_decimal(gethex_ch); \
	if (gethex_ch == 16) \
		goto hex_error; \
	(var) = gethex_value * 16 + gethex_ch; \
}

#define ANSI_TRANSLATE(ch) \
{ \
	switch (ch) \
	{ \
	case 'a': \
	case 'A': \
	{ \
		ch = 7; /* audible bell */ \
		break; \
	} \
	case 'b': \
	case 'B': \
	{ \
		ch = 8; /* backspace */ \
		break; \
	} \
	case 'f': \
	case 'F': \
	{ \
		ch = 12; /* form feed */ \
		break; \
	} \
	case 'n': \
	case 'N': \
	{ \
		ch = 10; /* line feed */ \
		break; \
	} \
	case 'r': \
	case 'R': \
	{ \
		ch = 13; /* carriage return */ \
		break; \
	} \
	case 't': \
	case 'T': \
	{ \
	      ch = 9; /* tab */ \
	      break; \
	} \
	case 'v': \
	case 'V': \
	{ \
		ch = 11; /* vertical tab */ \
		break; \
	} \
	case 'x': /* hex code */ \
	case 'X': \
	{ \
		GETHEX(ch); \
		break; \
	} \
	default: \
	{ \
		/* other characters passed through */ \
		if (translate) \
			ch = translate[(unsigned char)ch]; \
		break; \
	} \
	} \
}

char *re_compile_pattern(unsigned char *regex, int size, regexp_t bufp)
{
	int a;
	int pos;
	int op;
	int current_level;
	int level;
	int opcode;
	int pattern_offset = 0, alloc;
	int starts[NUM_LEVELS * MAX_NESTING];
	int starts_base;
	int future_jumps[MAX_NESTING];
	int num_jumps;
	unsigned char ch = '\0';
	unsigned char *pattern;
	unsigned char *translate;
	int next_register;
	int paren_depth;
	int num_open_registers;
	int open_registers[RE_NREGS];
	int beginning_context;
	
	if (!re_compile_initialized)
		re_compile_initialize();
	bufp->used = 0;
	bufp->fastmap_accurate = 0;
	bufp->uses_registers = 1;
	bufp->num_registers = 1;
	translate = bufp->translate;
	pattern = bufp->buffer;
	alloc = bufp->allocated;
	if (alloc == 0 || pattern == NULL)
	{
		alloc = 256;
		pattern = malloc(alloc);
		if (!pattern)
			goto out_of_memory;
	}
	pattern_offset = 0;
	starts_base = 0;
	num_jumps = 0;
	current_level = 0;
	SET_LEVEL_START;
	num_open_registers = 0;
	next_register = 1;
	paren_depth = 0;
	beginning_context = 1;
	op = -1;
	/* we use Rend dummy to ensure that pending jumps are updated
	   (due to low priority of Rend) before exiting the loop. */
	pos = 0;
	while (op != Rend)
	{
		if (pos >= size)
			op = Rend;
		else
		{
			NEXTCHAR(ch);
			if (translate)
				ch = translate[(unsigned char)ch];
			op = regexp_plain_ops[(unsigned char)ch];
			if (op == Rquote)
			{
				NEXTCHAR(ch);
				op = regexp_quoted_ops[(unsigned char)ch];
				if (op == Rnormal && regexp_ansi_sequences)
					ANSI_TRANSLATE(ch);
			}
		}
		level = regexp_precedences[op];
		/* printf("ch='%c' op=%d level=%d current_level=%d
		   curlevstart=%d\n", ch, op, level, current_level,
		   CURRENT_LEVEL_START); */
		if (level > current_level)
		{
			for (current_level++; current_level < level; current_level++)
				SET_LEVEL_START;
			SET_LEVEL_START;
		}
		else
			if (level < current_level)
			{
				current_level = level;
				for (;num_jumps > 0 &&
					     future_jumps[num_jumps-1] >= CURRENT_LEVEL_START;
				     num_jumps--)
					PUT_ADDR(future_jumps[num_jumps-1], pattern_offset);
			}
		switch (op)
		{
		case Rend:
		{
			break;
		}
		case Rnormal:
		{
		  normal_char:
			opcode = Cexact;
		  store_opcode_and_arg: /* opcode & ch must be set */
			SET_LEVEL_START;
			ALLOC(2);
			STORE(opcode);
			STORE(ch);
			break;
		}
		case Ranychar:
		{
			opcode = Canychar;
		  store_opcode:
			SET_LEVEL_START;
			ALLOC(1);
			STORE(opcode);
			break;
		}
		case Rquote:
		{
			abort();
			/*NOTREACHED*/
		}
		case Rbol:
		{
			if (!beginning_context) {
				if (regexp_context_indep_ops)
					goto op_error;
				else
					goto normal_char;
			}
			opcode = Cbol;
			goto store_opcode;
		}
		case Reol:
		{
			if (!((pos >= size) ||
			      ((regexp_syntax & RE_NO_BK_VBAR) ?
			       (regex[pos] == '\174') :
			       (pos+1 < size && regex[pos] == '\134' &&
				regex[pos+1] == '\174')) ||
			      ((regexp_syntax & RE_NO_BK_PARENS)?
			       (regex[pos] == ')'):
			       (pos+1 < size && regex[pos] == '\134' &&
				regex[pos+1] == ')')))) {
				if (regexp_context_indep_ops)
					goto op_error;
				else
					goto normal_char;
			}
			opcode = Ceol;
			goto store_opcode;
			/* NOTREACHED */
			break;
		}
		case Roptional:
		{
			if (beginning_context) {
				if (regexp_context_indep_ops)
					goto op_error;
				else
					goto normal_char;
			}
			if (CURRENT_LEVEL_START == pattern_offset)
				break; /* ignore empty patterns for ? */
			ALLOC(3);
			INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
				    pattern_offset + 3);
			break;
		}
		case Rstar:
		case Rplus:
		{
			if (beginning_context) {
				if (regexp_context_indep_ops)
					goto op_error;
				else
					goto normal_char;
			}
			if (CURRENT_LEVEL_START == pattern_offset)
				break; /* ignore empty patterns for + and * */
			ALLOC(9);
			INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
				    pattern_offset + 6);
			INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START);
			if (op == Rplus)  /* jump over initial failure_jump */
				INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump,
					    CURRENT_LEVEL_START + 6);
			break;
		}
		case Ror:
		{
			ALLOC(6);
			INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
				    pattern_offset + 6);
			if (num_jumps >= MAX_NESTING)
				goto too_complex;
			STORE(Cjump);
			future_jumps[num_jumps++] = pattern_offset;
			STORE(0);
			STORE(0);
			SET_LEVEL_START;
			break;
		}
		case Ropenpar:
		{
			SET_LEVEL_START;
			if (next_register < RE_NREGS)
			{
				bufp->uses_registers = 1;
				ALLOC(2);
				STORE(Cstart_memory);
				STORE(next_register);
				open_registers[num_open_registers++] = next_register;
				bufp->num_registers++;
				next_register++;
			}
			paren_depth++;
			PUSH_LEVEL_STARTS;
			current_level = 0;
			SET_LEVEL_START;
			break;
		}
		case Rclosepar:
		{
			if (paren_depth <= 0)
				goto parenthesis_error;
			POP_LEVEL_STARTS;
			current_level = regexp_precedences[Ropenpar];
			paren_depth--;
			if (paren_depth < num_open_registers)
			{
				bufp->uses_registers = 1;
				ALLOC(2);
				STORE(Cend_memory);
				num_open_registers--;
				STORE(open_registers[num_open_registers]);
			}
			break;
		}
		case Rmemory:
		{
			if (ch == '0')
				goto bad_match_register;
			assert(ch >= '0' && ch <= '9');
			bufp->uses_registers = 1;
			opcode = Cmatch_memory;
			ch -= '0';
			goto store_opcode_and_arg;
		}
		case Rextended_memory:
		{
			NEXTCHAR(ch);
			if (ch < '0' || ch > '9')
				goto bad_match_register;
			NEXTCHAR(a);
			if (a < '0' || a > '9')
				goto bad_match_register;
			ch = 10 * (a - '0') + ch - '0';
			if (ch == 0 || ch >= RE_NREGS)
				goto bad_match_register;
			bufp->uses_registers = 1;
			opcode = Cmatch_memory;
			goto store_opcode_and_arg;
		}
		case Ropenset:
		{
			int complement;
			int prev;
			int offset;
			int range;
                        int firstchar;
                        
			SET_LEVEL_START;
			ALLOC(1+256/8);
			STORE(Cset);
			offset = pattern_offset;
			for (a = 0; a < 256/8; a++)
				STORE(0);
			NEXTCHAR(ch);
			if (translate)
				ch = translate[(unsigned char)ch];
			if (ch == '\136')
			{
				complement = 1;
				NEXTCHAR(ch);
				if (translate)
					ch = translate[(unsigned char)ch];
			}
			else
				complement = 0;
			prev = -1;
			range = 0;
			firstchar = 1;
			while (ch != '\135' || firstchar)
			{
				firstchar = 0;
				if (regexp_ansi_sequences && ch == '\134')
				{
					NEXTCHAR(ch);
					ANSI_TRANSLATE(ch);
				}
				if (range)
				{
					for (a = prev; a <= (int)ch; a++)
						SETBIT(pattern, offset, a);
					prev = -1;
					range = 0;
				}
				else
					if (prev != -1 && ch == '-')
						range = 1;
					else
					{
						SETBIT(pattern, offset, ch);
						prev = ch;
					}
				NEXTCHAR(ch);
				if (translate)
					ch = translate[(unsigned char)ch];
			}
			if (range)
				SETBIT(pattern, offset, '-');
			if (complement)
			{
				for (a = 0; a < 256/8; a++)
					pattern[offset+a] ^= 0xff;
			}
			break;
		}
		case Rbegbuf:
		{
			opcode = Cbegbuf;
			goto store_opcode;
		}
		case Rendbuf:
		{
			opcode = Cendbuf;
			goto store_opcode;
		}
		case Rwordchar:
		{
			opcode = Csyntaxspec;
			ch = Sword;
			goto store_opcode_and_arg;
		}
		case Rnotwordchar:
		{
			opcode = Cnotsyntaxspec;
			ch = Sword;
			goto store_opcode_and_arg;
		}
		case Rwordbeg:
		{
			opcode = Cwordbeg;
			goto store_opcode;
		}
		case Rwordend:
		{
			opcode = Cwordend;
			goto store_opcode;
		}
		case Rwordbound:
		{
			opcode = Cwordbound;
			goto store_opcode;
		}
		case Rnotwordbound:
		{
			opcode = Cnotwordbound;
			goto store_opcode;
		}
		default:
		{
			abort();
		}
		}
		beginning_context = (op == Ropenpar || op == Ror);
	}
	if (starts_base != 0)
		goto parenthesis_error;
	assert(num_jumps == 0);
	ALLOC(1);
	STORE(Cend);
	SET_FIELDS;
	if(!re_optimize(bufp))
		return "Optimization error";
	return NULL;

  op_error:
	SET_FIELDS;
	return "Badly placed special character";

  bad_match_register:
	SET_FIELDS;
	return "Bad match register number";
   
  hex_error:
	SET_FIELDS;
	return "Bad hexadecimal number";
   
  parenthesis_error:
	SET_FIELDS;
	return "Badly placed parenthesis";
   
  out_of_memory:
	SET_FIELDS;
	return "Out of memory";
   
  ends_prematurely:
	SET_FIELDS;
	return "Regular expression ends prematurely";

  too_complex:
	SET_FIELDS;
	return "Regular expression too complex";
}

#undef CHARAT
#undef NEXTCHAR
#undef GETHEX
#undef ALLOC
#undef STORE
#undef CURRENT_LEVEL_START
#undef SET_LEVEL_START
#undef PUSH_LEVEL_STARTS
#undef POP_LEVEL_STARTS
#undef PUT_ADDR
#undef INSERT_JUMP
#undef SETBIT
#undef SET_FIELDS

#define PREFETCH if (text == textend) goto fail

#define NEXTCHAR(var) \
PREFETCH; \
var = (unsigned char)*text++; \
if (translate) \
	var = translate[var]

int re_match(regexp_t bufp, unsigned char *string, int size, int pos,
             regexp_registers_t old_regs)
{
	unsigned char *code;
	unsigned char *translate;
	unsigned char *text;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -