⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexpr.c

📁 python s60 1.4.5版本的源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
		regexp_quoted_ops['\''] = Rendbuf;
	}
	if (regexp_syntax & RE_ANSI_HEX)
		regexp_quoted_ops['v'] = Rextended_memory;
	for (a = 0; a < Rnum_ops; a++)
		regexp_precedences[a] = 4;
	if (regexp_syntax & RE_TIGHT_VBAR)
	{
		regexp_precedences[Ror] = 3;
		regexp_precedences[Rbol] = 2;
		regexp_precedences[Reol] = 2;
	}
	else
	{
		regexp_precedences[Ror] = 2;
		regexp_precedences[Rbol] = 3;
		regexp_precedences[Reol] = 3;
	}
	regexp_precedences[Rclosepar] = 1;
	regexp_precedences[Rend] = 0;
	regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0;
	regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0;
}

int re_set_syntax(int syntax)
{
	int ret;
	
	ret = regexp_syntax;
	regexp_syntax = syntax;
	re_syntax = syntax; /* Exported copy */
	re_compile_initialize();
	return ret;
}

static int hex_char_to_decimal(int ch)
{
	if (ch >= '0' && ch <= '9')
		return ch - '0';
	if (ch >= 'a' && ch <= 'f')
		return ch - 'a' + 10;
	if (ch >= 'A' && ch <= 'F')
		return ch - 'A' + 10;
	return 16;
}

static void re_compile_fastmap_aux(unsigned char *code, int pos,
                                   unsigned char *visited,
                                   unsigned char *can_be_null,
                                   unsigned char *fastmap)
{
	int a;
	int b;
	int syntaxcode;
	
	if (visited[pos])
		return;  /* we have already been here */
	visited[pos] = 1;
	for (;;)
		switch (code[pos++]) {
		case Cend:
			{
				*can_be_null = 1;
				return;
			}
		case Cbol:
		case Cbegbuf:
		case Cendbuf:
		case Cwordbeg:
		case Cwordend:
		case Cwordbound:
		case Cnotwordbound:
		{
			for (a = 0; a < 256; a++)
				fastmap[a] = 1;
			break;
		}
		case Csyntaxspec:
		{
			syntaxcode = code[pos++];
			for (a = 0; a < 256; a++)
				if (SYNTAX(a) & syntaxcode) 
					fastmap[a] = 1;
			return;
		}
		case Cnotsyntaxspec:
		{
			syntaxcode = code[pos++];
			for (a = 0; a < 256; a++)
				if (!(SYNTAX(a) & syntaxcode) )
					fastmap[a] = 1;
			return;
		}
		case Ceol:
		{
			fastmap['\n'] = 1;
			if (*can_be_null == 0)
				*can_be_null = 2; /* can match null, but only at end of buffer*/
			return;
		}
		case Cset:
		{
			for (a = 0; a < 256/8; a++)
				if (code[pos + a] != 0)
					for (b = 0; b < 8; b++)
						if (code[pos + a] & (1 << b))
							fastmap[(a << 3) + b] = 1;
			pos += 256/8;
			return;
		}
		case Cexact:
		{
			fastmap[(unsigned char)code[pos]] = 1;
			return;
		}
		case Canychar:
		{
			for (a = 0; a < 256; a++)
				if (a != '\n')
					fastmap[a] = 1;
			return;
		}
		case Cstart_memory:
		case Cend_memory:
		{
			pos++;
			break;
		}
		case Cmatch_memory:
		{
			for (a = 0; a < 256; a++)
				fastmap[a] = 1;
			*can_be_null = 1;
			return;
		}
		case Cjump:
		case Cdummy_failure_jump:
		case Cupdate_failure_jump:
		case Cstar_jump:
		{
			a = (unsigned char)code[pos++];
			a |= (unsigned char)code[pos++] << 8;
			pos += (int)SHORT(a);
			if (visited[pos])
			{
				/* argh... the regexp contains empty loops.  This is not
				   good, as this may cause a failure stack overflow when
				   matching.  Oh well. */
				/* this path leads nowhere; pursue other paths. */
				return;
			}
			visited[pos] = 1;
			break;
		}
		case Cfailure_jump:
		{
			a = (unsigned char)code[pos++];
			a |= (unsigned char)code[pos++] << 8;
			a = pos + (int)SHORT(a);
			re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
			break;
		}
		case Crepeat1:
		{
			pos += 2;
			break;
		}
		default:
		{
		        PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?");
		        return;
			/*NOTREACHED*/
		}
		}
}

static int re_do_compile_fastmap(unsigned char *buffer, int used, int pos,
                                 unsigned char *can_be_null,
                                 unsigned char *fastmap)
{
	unsigned char small_visited[512], *visited;
   
	if (used <= sizeof(small_visited))
		visited = small_visited;
	else
	{
		visited = malloc(used);
		if (!visited)
			return 0;
	}
	*can_be_null = 0;
	memset(fastmap, 0, 256);
	memset(visited, 0, used);
	re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);
	if (visited != small_visited)
		free(visited);
	return 1;
}

void re_compile_fastmap(regexp_t bufp)
{
	if (!bufp->fastmap || bufp->fastmap_accurate)
		return;
	assert(bufp->used > 0);
	if (!re_do_compile_fastmap(bufp->buffer,
				   bufp->used,
				   0,
				   &bufp->can_be_null,
				   bufp->fastmap))
		return;
	if (PyErr_Occurred()) return;
	if (bufp->buffer[0] == Cbol)
		bufp->anchor = 1;   /* begline */
	else
		if (bufp->buffer[0] == Cbegbuf)
			bufp->anchor = 2; /* begbuf */
		else
			bufp->anchor = 0; /* none */
	bufp->fastmap_accurate = 1;
}

/* 
 * star is coded as:
 * 1: failure_jump 2
 *    ... code for operand of star
 *    star_jump 1
 * 2: ... code after star
 *
 * We change the star_jump to update_failure_jump if we can determine
 * that it is safe to do so; otherwise we change it to an ordinary
 * jump.
 *
 * plus is coded as
 *
 *    jump 2
 * 1: failure_jump 3
 * 2: ... code for operand of plus
 *    star_jump 1
 * 3: ... code after plus
 *
 * For star_jump considerations this is processed identically to star.
 *
 */

static int re_optimize_star_jump(regexp_t bufp, unsigned char *code)
{
	unsigned char map[256];
	unsigned char can_be_null;
	unsigned char *p1;
	unsigned char *p2;
	unsigned char ch;
	int a;
	int b;
	int num_instructions = 0;

	a = (unsigned char)*code++;
	a |= (unsigned char)*code++ << 8;
	a = (int)SHORT(a);
	
	p1 = code + a + 3; /* skip the failure_jump */
	/* Check that the jump is within the pattern */
	if (p1<bufp->buffer || bufp->buffer+bufp->used<p1)
	  {
	    PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (failure_jump opt)");
	    return 0;
	  }
	
	assert(p1[-3] == Cfailure_jump);
	p2 = code;
	/* p1 points inside loop, p2 points to after loop */
	if (!re_do_compile_fastmap(bufp->buffer, bufp->used,
				   (int)(p2 - bufp->buffer),
				   &can_be_null, map))
		goto make_normal_jump;
	
	/* If we might introduce a new update point inside the
	 * loop, we can't optimize because then update_jump would
	 * update a wrong failure point.  Thus we have to be
	 * quite careful here.
	 */
	
	/* loop until we find something that consumes a character */
  loop_p1:
	num_instructions++;
	switch (*p1++)
	{
	case Cbol:
	case Ceol:
	case Cbegbuf:
	case Cendbuf:
	case Cwordbeg:
	case Cwordend:
	case Cwordbound:
	case Cnotwordbound:
	{
		goto loop_p1;
	}
	case Cstart_memory:
	case Cend_memory:
	{
		p1++;
		goto loop_p1;
	}
	case Cexact:
	{
		ch = (unsigned char)*p1++;
		if (map[(int)ch])
			goto make_normal_jump;
		break;
	}
	case Canychar:
	{
		for (b = 0; b < 256; b++)
			if (b != '\n' && map[b])
				goto make_normal_jump;
		break;
	}
	case Cset:
	{
		for (b = 0; b < 256; b++)
			if ((p1[b >> 3] & (1 << (b & 7))) && map[b])
				goto make_normal_jump;
		p1 += 256/8;
		break;
	}
	default:
	{
		goto make_normal_jump;
	}
	}
	/* now we know that we can't backtrack. */
	while (p1 != p2 - 3)
	{
		num_instructions++;
		switch (*p1++)
		{
		case Cend:
		{
			return 0;
		}
		case Cbol:
		case Ceol:
		case Canychar:
		case Cbegbuf:
		case Cendbuf:
		case Cwordbeg:
		case Cwordend:
		case Cwordbound:
		case Cnotwordbound:
		{
			break;
		}
		case Cset:
		{
			p1 += 256/8;
			break;
		}
		case Cexact:
		case Cstart_memory:
		case Cend_memory:
		case Cmatch_memory:
		case Csyntaxspec:
		case Cnotsyntaxspec:
		{
			p1++;
			break;
		}
		case Cjump:
		case Cstar_jump:
		case Cfailure_jump:
		case Cupdate_failure_jump:
		case Cdummy_failure_jump:
		{
			goto make_normal_jump;
		}
		default:
		{
			return 0;
		}
		}
	}
	
	/* make_update_jump: */
	code -= 3;
	a += 3;  /* jump to after the Cfailure_jump */
	code[0] = Cupdate_failure_jump;
	code[1] = a & 0xff;
	code[2] = a >> 8;
	if (num_instructions > 1)
		return 1;
	assert(num_instructions == 1);
	/* if the only instruction matches a single character, we can do
	 * better */
	p1 = code + 3 + a;   /* start of sole instruction */
	if (*p1 == Cset || *p1 == Cexact || *p1 == Canychar ||
	    *p1 == Csyntaxspec || *p1 == Cnotsyntaxspec)
		code[0] = Crepeat1;
	return 1;
	
  make_normal_jump:
	code -= 3;
	*code = Cjump;
	return 1;
}

static int re_optimize(regexp_t bufp)
{
	unsigned char *code;
	
	code = bufp->buffer;
	
	while(1)
	{
		switch (*code++)
		{
		case Cend:
		{
			return 1;
		}
		case Canychar:
		case Cbol:
		case Ceol:
		case Cbegbuf:
		case Cendbuf:
		case Cwordbeg:
		case Cwordend:
		case Cwordbound:
		case Cnotwordbound:
		{
			break;
		}
		case Cset:
		{
			code += 256/8;
			break;
		}
		case Cexact:
		case Cstart_memory:
		case Cend_memory:
		case Cmatch_memory:
		case Csyntaxspec:
		case Cnotsyntaxspec:
		{
			code++;
			break;
		}
		case Cstar_jump:
		{
			if (!re_optimize_star_jump(bufp, code))
			{
				return 0;
			}
			/* fall through */
		}
		case Cupdate_failure_jump:
		case Cjump:
		case Cdummy_failure_jump:
		case Cfailure_jump:
		case Crepeat1:
		{
			code += 2;
			break;
		}
		default:
		{
			return 0;
		}
		}
	}
}

#define NEXTCHAR(var) \
{ \
	if (pos >= size) \
		goto ends_prematurely; \
	(var) = regex[pos]; \
	pos++; \
}

#define ALLOC(amount) \
{ \
	  if (pattern_offset+(amount) > alloc) \
	  { \
		  alloc += 256 + (amount); \
		  pattern = realloc(pattern, alloc); \
		  if (!pattern) \
			  goto out_of_memory; \
	  } \
}

#define STORE(ch) pattern[pattern_offset++] = (ch)

#define CURRENT_LEVEL_START (starts[starts_base + current_level])

#define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset

#define PUSH_LEVEL_STARTS \
if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \
	starts_base += NUM_LEVELS; \
else \
	goto too_complex \

#define POP_LEVEL_STARTS starts_base -= NUM_LEVELS

#define PUT_ADDR(offset,addr) \
{ \
	int disp = (addr) - (offset) - 2; \
	pattern[(offset)] = disp & 0xff; \
	pattern[(offset)+1] = (disp>>8) & 0xff; \
}

#define INSERT_JUMP(pos,type,addr) \
{ \
	int a, p = (pos), t = (type), ad = (addr); \
	for (a = pattern_offset - 1; a >= p; a--) \
		pattern[a + 3] = pattern[a]; \
	pattern[p] = t; \
	PUT_ADDR(p+1,ad); \
	pattern_offset += 3; \
}

#define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7))

#define SET_FIELDS \

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -