📄 regexp.c

📁 VIM文本编辑器
💻 C
📖 第 1 页 / 共 5 页
字号:
	EMSG_RETURN((char_u *)"\\{ follows nothing")
	/* NOTREACHED */
      case Magic('*'):
	if (reg_magic)
	    EMSG_RETURN((char_u *)"* follows nothing")
	else
	    EMSG_RETURN((char_u *)"\\* follows nothing")
	/* break; Not Reached */
      case Magic('~'):		/* previous substitute pattern */
	    if (reg_prev_sub)
	    {
		char_u	    *p;

		ret = regnode(EXACTLY);
		p = reg_prev_sub;
		while (*p)
		{
		    regc(*p++);
		}
		regc('\0');
		if (p - reg_prev_sub)
		{
		    *flagp |= HASWIDTH;
		    if ((p - reg_prev_sub) == 1)
			*flagp |= SIMPLE;
		}
	    }
	    else
		EMSG_RETURN(e_nopresub);
	    break;
      case Magic('1'):
      case Magic('2'):
      case Magic('3'):
      case Magic('4'):
      case Magic('5'):
      case Magic('6'):
      case Magic('7'):
      case Magic('8'):
      case Magic('9'):
	    {
	    int		    refnum;

	    ungetchr();
	    refnum = getchr() - Magic('0');
	    /*
	     * Check if the back reference is legal. We use the parentheses
	     * pointers to mark encountered close parentheses, but this
	     * is only available in the second pass. Checking opens is
	     * always possible.
	     * Should also check that we don't refer to something that
	     * is repeated (+*=): what instance of the repetition should
	     * we match? TODO.
	     */
	    if (refnum < regnpar &&
		(regendp == NULL || regendp[refnum] != NULL))
		ret = regnode(BACKREF + refnum);
	    else
		EMSG_RETURN((char_u *)"Illegal back reference");
	}
	break;
      case Magic('['):
	{
	    char_u	*p;

	    /*
	     * If there is no matching ']', we assume the '[' is a normal
	     * character. This makes ":help [" work.
	     */
	    p = skip_range(regparse);
	    if (*p == ']')	/* there is a matching ']' */
	    {
		/*
		 * In a character class, different parsing rules apply.
		 * Not even \ is special anymore, nothing is.
		 */
		if (*regparse == '^') {	    /* Complement of range. */
		    ret = regnode(ANYBUT);
		    regparse++;
		}
		else
		    ret = regnode(ANYOF);
		if (*regparse == ']' || *regparse == '-')
		    regc(*regparse++);
		while (*regparse != '\0' && *regparse != ']')
		{
		    if (*regparse == '-')
		    {
			regparse++;
			if (*regparse == ']' || *regparse == '\0')
			    regc('-');
			else
			{
			    int		cclass;
			    int		cclassend;

			    cclass = UCHARAT(regparse - 2) + 1;
			    cclassend = UCHARAT(regparse);
			    if (cclass > cclassend + 1)
				EMSG_RETURN(e_invrange);
			    for (; cclass <= cclassend; cclass++)
				regc(cclass);
			    regparse++;
			}
		    }
		    /*
		     * Only "\]", "\^", "\]" and "\\" are special in Vi.  Vim
		     * accepts "\t", "\e", etc., but only when the 'l' flag in
		     * 'cpoptions' is not included.
		     */
		    else if (*regparse == '\\' &&
			    (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL ||
			     (!cpo_lit &&
			       vim_strchr(REGEXP_ABBR, regparse[1]) != NULL)))
		    {
			regparse++;
			regc(backslash_trans(*regparse++));
		    }
		    else if (*regparse == '[')
		    {
			int (*func)__ARGS((int));
			int cu;

			if ((func = skip_class_name(&regparse)) == NULL)
			    regc(*regparse++);
			else
			    /* Characters assumed to be 8 bits */
			    for (cu = 0; cu <= 255; cu++)
				if ((*func)(cu))
				    regc(cu);
		    }
		    else
			regc(*regparse++);
		}
		regc('\0');
		if (*regparse != ']')
		    EMSG_RETURN(e_toomsbra);
		skipchr();	    /* let's be friends with the lexer again */
		*flagp |= HASWIDTH | SIMPLE;
		break;
	    }
	}
	/* FALLTHROUGH */

      default:
	{
	    int		    len;
	    int		    chr;

	    ungetchr();
	    len = 0;
	    ret = regnode(EXACTLY);
	    /*
	     * Always take at least one character, for '[' without matching
	     * ']'.
	     */
	    while ((chr = peekchr()) != '\0' && (chr < Magic(0) || len == 0))
	    {
		regc(chr);
		skipchr();
		len++;
	    }
#ifdef DEBUG
	    if (len == 0)
		 EMSG_RETURN((char_u *)"Unexpected magic character; check META.");
#endif
	    /*
	     * If there is a following *, \+ or \= we need the character
	     * in front of it as a single character operand
	     */
	    if (len > 1 && re_ismult(chr))
	    {
		unregc();	    /* Back off of *+= operand */
		ungetchr();	    /* and put it back for next time */
		--len;
	    }
	    regc('\0');
	    *flagp |= HASWIDTH;
	    if (len == 1)
		*flagp |= SIMPLE;
	}
	break;
    }

    return ret;
}

/*
 * regnode - emit a node
 */
    static char_u *		/* Location. */
regnode(op)
    int		op;
{
    char_u  *ret;
    char_u  *ptr;

    ret = regcode;
    if (ret == JUST_CALC_SIZE)
    {
	regsize += 3;
	return ret;
    }
    ptr = ret;
    *ptr++ = op;
    *ptr++ = '\0';		/* Null "next" pointer. */
    *ptr++ = '\0';
    regcode = ptr;

    return ret;
}

/*
 * regc - emit (if appropriate) a byte of code
 */
    static void
regc(b)
    int		b;
{
    if (regcode != JUST_CALC_SIZE)
	*regcode++ = b;
    else
	regsize++;
}

/*
 * unregc - take back (if appropriate) a byte of code
 */
    static void
unregc()
{
    if (regcode != JUST_CALC_SIZE)
	regcode--;
    else
	regsize--;
}

/*
 * reginsert - insert an operator in front of already-emitted operand
 *
 * Means relocating the operand.
 */
    static void
reginsert(op, opnd)
    int		op;
    char_u     *opnd;
{
    char_u  *src;
    char_u  *dst;
    char_u  *place;

    if (regcode == JUST_CALC_SIZE)
    {
	regsize += 3;
	return;
    }
    src = regcode;
    regcode += 3;
    dst = regcode;
    while (src > opnd)
	*--dst = *--src;

    place = opnd;		/* Op node, where operand used to be. */
    *place++ = op;
    *place++ = '\0';
    *place = '\0';
}

/*
 * reginsert_limits - insert an operator in front of already-emitted operand.
 * The operator has the given limit values as operands.  Also set next pointer.
 *
 * Means relocating the operand.
 */
    static void
reginsert_limits(op, minval, maxval, opnd)
    int		op;
    int		minval;
    int		maxval;
    char_u     *opnd;
{
    char_u  *src;
    char_u  *dst;
    char_u  *place;

    if (regcode == JUST_CALC_SIZE)
    {
	regsize += 7;
	return;
    }
    src = regcode;
    regcode += 7;
    dst = regcode;
    while (src > opnd)
	*--dst = *--src;

    place = opnd;		/* Op node, where operand used to be. */
    *place++ = op;
    *place++ = '\0';
    *place++ = '\0';
    *place++ = (char_u) (((unsigned)minval >> 8) & 0377);
    *place++ = (char_u) (minval & 0377);
    *place++ = (char_u) (((unsigned)maxval >> 8) & 0377);
    *place++ = (char_u) (maxval & 0377);
    regtail(opnd, place);
}

/*
 * regtail - set the next-pointer at the end of a node chain
 */
    static void
regtail(p, val)
    char_u	   *p;
    char_u	   *val;
{
    char_u  *scan;
    char_u  *temp;
    int	    offset;

    if (p == JUST_CALC_SIZE)
	return;

    /* Find last node. */
    scan = p;
    for (;;)
    {
	temp = regnext(scan);
	if (temp == NULL)
	    break;
	scan = temp;
    }

    if (OP(scan) == BACK)
	offset = (int)(scan - val);
    else
	offset = (int)(val - scan);
    *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
    *(scan + 2) = (char_u) (offset & 0377);
}

/*
 * regoptail - regtail on operand of first argument; nop if operandless
 */
    static void
regoptail(p, val)
    char_u	   *p;
    char_u	   *val;
{
    /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
    if (p == NULL || p == JUST_CALC_SIZE ||
	    (OP(p) != BRANCH &&
	     (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
	return;
    regtail(OPERAND(p), val);
}

/*
 * getchr() - get the next character from the pattern. We know about
 * magic and such, so therefore we need a lexical analyzer.
 */

/* static int	    curchr; */
static int	prevchr;
static int	nextchr;    /* used for ungetchr() */
/*
 * Note: prevchr is sometimes -1 when we are not at the start,
 * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
 * taken to be magic -- webb
 */
static int	at_start;	/* True when on the first character */
static int	prev_at_start;  /* True when on the second character */

    static void
initchr(str)
    char_u *str;
{
    regparse = str;
    curchr = prevchr = nextchr = -1;
    at_start = TRUE;
    prev_at_start = FALSE;
}

    static int
peekchr()
{
    if (curchr < 0)
    {
	switch (curchr = regparse[0])
	{
	case '.':
    /*	case '+':*/
    /*	case '=':*/
	case '[':
	case '~':
	    if (reg_magic)
		curchr = Magic(curchr);
	    break;
	case '*':
	    /* * is not magic as the very first character, eg "?*ptr" and when
	     * after '^', eg "/^*ptr" */
	    if (reg_magic && !at_start
				 && !(prev_at_start && prevchr == Magic('^')))
		curchr = Magic('*');
	    break;
	case '^':
	    /* ^ is only magic as the very first character */
	    if (at_start)
		curchr = Magic('^');
	    break;
	case '$':
	    /* $ is only magic as the very last char and in front of '\|' */
	    if (regparse[1] == NUL
			       || (regparse[1] == '\\' && regparse[2] == '|'))
		curchr = Magic('$');
	    break;
	case '\\':
	    regparse++;
	    if (regparse[0] == NUL)
	    {
		curchr = '\\';	/* trailing '\' */
		--regparse;	/* there is no extra character to skip */
	    }
	    else if (vim_strchr(META, regparse[0]))
	    {
		/*
		 * META contains everything that may be magic sometimes, except
		 * ^ and $ ("\^" and "\$" are never magic).
		 * We now fetch the next character and toggle its magicness.
		 * Therefore, \ is so meta-magic that it is not in META.
		 */
		curchr = -1;
		prev_at_start = at_start;
		at_start = FALSE;	/* be able to say "/\*ptr" */
		peekchr();
		curchr ^= Magic(0);
	    }
	    else if (vim_strchr(REGEXP_ABBR, regparse[0]))
	    {
		/*
		 * Handle abbreviations, like "\t" for TAB -- webb
		 */
		curchr = backslash_trans(regparse[0]);
	    }
	    else
	    {
		/*
		 * Next character can never be (made) magic?
		 * Then backslashing it won't do anything.
		 */
		curchr = regparse[0];
	    }
	    break;
	}
    }

    return curchr;
}

    static void
skipchr()
{
    regparse++;
    prev_at_start = at_start;
    at_start = FALSE;
    prevchr = curchr;
    curchr = nextchr;	    /* use previously unget char, or -1 */
    nextchr = -1;
}

    static int
getchr()
{
    int chr;

    chr = peekchr();
    skipchr();

    return chr;
}

/*
 * put character back. Works only once!
 */
    static void
ungetchr()
{
    nextchr = curchr;
    curchr = prevchr;
    at_start = prev_at_start;
    prev_at_start = FALSE;
    /*
     * Backup regparse as well; not because we will use what it points at,
     * but because skipchr() will bump it again.
     */
    regparse--;
}

/*
 * read_limits - Read two integers to be taken as a minimum and maximum.
 * If the first character is '-', then the range is reversed.
 * Should end with 'end'.  If minval is missing, zero is default, if maxval is
 * missing, a very big number is the default.
 */
    static int
read_limits(start, end, minval, maxval)
    int	    start;
    int	    end;
    int	    *minval;
    int	    *maxval;
{
    int	    reverse = FALSE;
    char_u  *first_char;

    if (*regparse == '-')
    {
	/* Starts with '-', so reverse the range later */
	regparse++;
	reverse = TRUE;
    }
    first_char = regparse;
    *minval = getdigits(&regparse);
    if (*regparse == ',')	    /* There is a comma */
    {
	if (isdigit(*++regparse))
	    *maxval = getdigits(&regparse);
	else
	    *maxval = MAX_LIMIT;
    }
    else if (isdigit(*first_char))
	*maxval = *minval;	    /* It was \{n} or \{-n} */
    else
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -