📄 regexpr.c

📁 python s60 1.4.5版本的源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
	unsigned char *textstart;
	unsigned char *textend;
	int a;
	int b;
	int ch;
	int reg;
	int match_end;
	unsigned char *regstart;
	unsigned char *regend;
	int regsize;
	match_state state;
  
	assert(pos >= 0 && size >= 0);
	assert(pos <= size);
  
	text = string + pos;
	textstart = string;
	textend = string + size;
  
	code = bufp->buffer;
  
	translate = bufp->translate;
  
	NEW_STATE(state, bufp->num_registers);

  continue_matching:
	switch (*code++)
	{
	case Cend:
	{
		match_end = text - textstart;
		if (old_regs)
		{
			old_regs->start[0] = pos;
			old_regs->end[0] = match_end;
			if (!bufp->uses_registers)
			{
				for (a = 1; a < RE_NREGS; a++)
				{
					old_regs->start[a] = -1;
					old_regs->end[a] = -1;
				}
			}
			else
			{
				for (a = 1; a < bufp->num_registers; a++)
				{
					if ((GET_REG_START(state, a) == NULL) ||
					    (GET_REG_END(state, a) == NULL))
					{
						old_regs->start[a] = -1;
						old_regs->end[a] = -1;
						continue;
					}
					old_regs->start[a] = GET_REG_START(state, a) - textstart;
					old_regs->end[a] = GET_REG_END(state, a) - textstart;
				}
				for (; a < RE_NREGS; a++)
				{
					old_regs->start[a] = -1;
					old_regs->end[a] = -1;
				}
			}
		}
		FREE_STATE(state);
		return match_end - pos;
	}
	case Cbol:
	{
		if (text == textstart || text[-1] == '\n')
			goto continue_matching;
		goto fail;
	}
	case Ceol:
	{
		if (text == textend || *text == '\n')
			goto continue_matching;
		goto fail;
	}
	case Cset:
	{
		NEXTCHAR(ch);
		if (code[ch/8] & (1<<(ch & 7)))
		{
			code += 256/8;
			goto continue_matching;
		}
		goto fail;
	}
	case Cexact:
	{
		NEXTCHAR(ch);
		if (ch != (unsigned char)*code++)
			goto fail;
		goto continue_matching;
	}
	case Canychar:
	{
		NEXTCHAR(ch);
		if (ch == '\n')
			goto fail;
		goto continue_matching;
	}
	case Cstart_memory:
	{
		reg = *code++;
		SET_REG_START(state, reg, text, goto error);
		goto continue_matching;
	}
	case Cend_memory:
	{
		reg = *code++;
		SET_REG_END(state, reg, text, goto error);
		goto continue_matching;
	}
	case Cmatch_memory:
	{
		reg = *code++;
		regstart = GET_REG_START(state, reg);
		regend = GET_REG_END(state, reg);
		if ((regstart == NULL) || (regend == NULL))
			goto fail;  /* or should we just match nothing? */
		regsize = regend - regstart;

		if (regsize > (textend - text))
			goto fail;
		if(translate)
		{
			for (; regstart < regend; regstart++, text++)
				if (translate[*regstart] != translate[*text])
					goto fail;
		}
		else
			for (; regstart < regend; regstart++, text++)
				if (*regstart != *text)
					goto fail;
		goto continue_matching;
	}
	case Cupdate_failure_jump:
	{
		UPDATE_FAILURE(state, text, goto error);
		/* fall to next case */
	}
	/* treat Cstar_jump just like Cjump if it hasn't been optimized */
	case Cstar_jump:
	case Cjump:
	{
		a = (unsigned char)*code++;
		a |= (unsigned char)*code++ << 8;
		code += (int)SHORT(a);
		if (code<bufp->buffer || bufp->buffer+bufp->used<code) {
		        PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cjump)");
			FREE_STATE(state);
            	        return -2;
         	}
		goto continue_matching;
	}
	case Cdummy_failure_jump:
	{
                unsigned char *failuredest;
	  
		a = (unsigned char)*code++;
		a |= (unsigned char)*code++ << 8;
		a = (int)SHORT(a);
		assert(*code == Cfailure_jump);
		b = (unsigned char)code[1];
		b |= (unsigned char)code[2] << 8;
                failuredest = code + (int)SHORT(b) + 3;
		if (failuredest<bufp->buffer || bufp->buffer+bufp->used < failuredest) {
		        PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cdummy_failure_jump failuredest)");
			FREE_STATE(state);
            	        return -2;
		}
		PUSH_FAILURE(state, failuredest, NULL, goto error);
		code += a;
		if (code<bufp->buffer || bufp->buffer+bufp->used < code) {
		        PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cdummy_failure_jump code)");
			FREE_STATE(state);
            	        return -2;
         	}
		goto continue_matching;
	}
	case Cfailure_jump:
	{
		a = (unsigned char)*code++;
		a |= (unsigned char)*code++ << 8;
		a = (int)SHORT(a);
		if (code+a<bufp->buffer || bufp->buffer+bufp->used < code+a) {
		        PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cfailure_jump)");
			FREE_STATE(state);
            	        return -2;
         	}
		PUSH_FAILURE(state, code + a, text, goto error);
		goto continue_matching;
	}
	case Crepeat1:
	{
		unsigned char *pinst;
		a = (unsigned char)*code++;
		a |= (unsigned char)*code++ << 8;
		a = (int)SHORT(a);
		pinst = code + a;
		if (pinst<bufp->buffer || bufp->buffer+bufp->used<pinst) {
		        PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Crepeat1)");
			FREE_STATE(state);
            	        return -2;
         	}
		/* pinst is sole instruction in loop, and it matches a
		 * single character.  Since Crepeat1 was originally a
		 * Cupdate_failure_jump, we also know that backtracking
		 * is useless: so long as the single-character
		 * expression matches, it must be used.  Also, in the
		 * case of +, we've already matched one character, so +
		 * can't fail: nothing here can cause a failure.  */
		switch (*pinst++)
		{
		case Cset:
		  {
		        if (translate)
			{
				while (text < textend)
				{
					ch = translate[(unsigned char)*text];
					if (pinst[ch/8] & (1<<(ch & 7)))
						text++;
					else
						break;
				}
			}
			else
			{
				while (text < textend)
				{
					ch = (unsigned char)*text;
					if (pinst[ch/8] & (1<<(ch & 7)))
						text++;
					else
						break;
				}
			}
			break;
                }
		case Cexact:
		{
			ch = (unsigned char)*pinst;
			if (translate)
			{
				while (text < textend &&
				       translate[(unsigned char)*text] == ch)
					text++;
			}
			else
			{
				while (text < textend && (unsigned char)*text == ch)
					text++;
			}
			break;
		}
		case Canychar:
		{
			while (text < textend && (unsigned char)*text != '\n')
				text++;
			break;
		}
		case Csyntaxspec:
		{
			a = (unsigned char)*pinst;
			if (translate)
			{
				while (text < textend &&
				       (SYNTAX(translate[*text]) & a) )
					text++;
			}
			else
			{
				while (text < textend && (SYNTAX(*text) & a) )
					text++;
			}
			break;
		}
		case Cnotsyntaxspec:
		{
			a = (unsigned char)*pinst;
			if (translate)
			{
				while (text < textend &&
				       !(SYNTAX(translate[*text]) & a) )
					text++;
			}
			else
			{
				while (text < textend && !(SYNTAX(*text) & a) )
					text++;
			}
			break;
		}
		default:
		{
		        FREE_STATE(state);
		        PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?");
		        return -2;
			/*NOTREACHED*/
		}
		}
		/* due to the funky way + and * are compiled, the top
		 * failure- stack entry at this point is actually a
		 * success entry -- update it & pop it */
		UPDATE_FAILURE(state, text, goto error);
		goto fail;      /* i.e., succeed <wink/sigh> */
	}
	case Cbegbuf:
	{
		if (text == textstart)
			goto continue_matching;
		goto fail;
	}
	case Cendbuf:
	{
		if (text == textend)
			goto continue_matching;
		goto fail;
	}
	case Cwordbeg:
	{
		if (text == textend)
			goto fail;
		if (!(SYNTAX(*text) & Sword)) 
			goto fail;
		if (text == textstart)
			goto continue_matching;
		if (!(SYNTAX(text[-1]) & Sword))
			goto continue_matching;
		goto fail;
	}
	case Cwordend:
	{
		if (text == textstart)
			goto fail;
		if (!(SYNTAX(text[-1]) & Sword))
			goto fail;
		if (text == textend)
			goto continue_matching;
		if (!(SYNTAX(*text) & Sword))
		        goto continue_matching;
                goto fail;
	}
	case Cwordbound:
	{
		/* Note: as in gnu regexp, this also matches at the
		 * beginning and end of buffer.  */

		if (text == textstart || text == textend)
			goto continue_matching;
		if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))
			goto continue_matching;
		goto fail;
	}
	case Cnotwordbound:
	{
		/* Note: as in gnu regexp, this never matches at the
		 * beginning and end of buffer.  */
		if (text == textstart || text == textend)
			goto fail;
		if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)))
      		        goto continue_matching;
		goto fail;
	}
	case Csyntaxspec:
	{
		NEXTCHAR(ch);
		if (!(SYNTAX(ch) & (unsigned char)*code++))
			goto fail;
		goto continue_matching;
	}
	case Cnotsyntaxspec:
	{
		NEXTCHAR(ch);
		if (SYNTAX(ch) & (unsigned char)*code++)
			goto fail;
		goto continue_matching;
	}
	default:
	{
	        FREE_STATE(state);
	        PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?");
		return -2;
		/*NOTREACHED*/
	}
	}
	
	

#if 0 /* This line is never reached --Guido */
	abort();
#endif
	/*
	 *NOTREACHED
	 */

	/* Using "break;" in the above switch statement is equivalent to "goto fail;" */
  fail:
	POP_FAILURE(state, code, text, goto done_matching, goto error);
	goto continue_matching;
  
  done_matching:
/*   if(translated != NULL) */
/*      free(translated); */
	FREE_STATE(state);
	return -1;

  error:
/*   if (translated != NULL) */
/*      free(translated); */
	FREE_STATE(state);
	return -2;
}
	

#undef PREFETCH
#undef NEXTCHAR

int re_search(regexp_t bufp, unsigned char *string, int size, int pos,
              int range, regexp_registers_t regs)
{
	unsigned char *fastmap;
	unsigned char *translate;
	unsigned char *text;
	unsigned char *partstart;
	unsigned char *partend;
	int dir;
	int ret;
	unsigned char anchor;
  
	assert(size >= 0 && pos >= 0);
	assert(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */
  
	fastmap = bufp->fastmap;
	translate = bufp->translate;
	if (fastmap && !bufp->fastmap_accurate) {
                re_compile_fastmap(bufp);
	        if (PyErr_Occurred()) return -2;
	}
	
	anchor = bufp->anchor;
	if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */
		fastmap = NULL;

	if (range < 0)
	{
		dir = -1;
		range = -range;
	}
	else
		dir = 1;

	if (anchor == 2) {
		if (pos != 0)
			return -1;
		else
			range = 0;
	}

	for (; range >= 0; range--, pos += dir)
	{
		if (fastmap)
		{
			if (dir == 1)
			{ /* searching forwards */

				text = string + pos;
				partend = string + size;
				partstart = text;
				if (translate)
					while (text != partend &&
					       !fastmap[(unsigned char) translate[(unsigned char)*text]])
						text++;
				else
					while (text != partend && !fastmap[(unsigned char)*text])
						text++;
				pos += text - partstart;
				range -= text - partstart;
				if (pos == size && bufp->can_be_null == 0)
					return -1;
			}
			else
			{ /* searching backwards */
				text = string + pos;
				partstart = string + pos - range;
				partend = text;
				if (translate)
					while (text != partstart &&
					       !fastmap[(unsigned char)
						       translate[(unsigned char)*text]])
						text--;
				else
					while (text != partstart &&
					       !fastmap[(unsigned char)*text])
						text--;
				pos -= partend - text;
				range -= partend - text;
			}
		}
		if (anchor == 1)
		{ /* anchored to begline */
			if (pos > 0 && (string[pos - 1] != '\n'))
				continue;
		}
		assert(pos >= 0 && pos <= size);
		ret = re_match(bufp, string, size, pos, regs);
		if (ret >= 0)
			return pos;
		if (ret == -2)
			return -2;
	}
	return -1;
}

/*
** Local Variables:
** mode: c
** c-file-style: "python"
** End:
*/
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -