📄 regexp.c
字号:
EMSG_RETURN((char_u *)"\\{ follows nothing")
/* NOTREACHED */
case Magic('*'):
if (reg_magic)
EMSG_RETURN((char_u *)"* follows nothing")
else
EMSG_RETURN((char_u *)"\\* follows nothing")
/* break; Not Reached */
case Magic('~'): /* previous substitute pattern */
if (reg_prev_sub)
{
char_u *p;
ret = regnode(EXACTLY);
p = reg_prev_sub;
while (*p)
{
regc(*p++);
}
regc('\0');
if (p - reg_prev_sub)
{
*flagp |= HASWIDTH;
if ((p - reg_prev_sub) == 1)
*flagp |= SIMPLE;
}
}
else
EMSG_RETURN(e_nopresub);
break;
case Magic('1'):
case Magic('2'):
case Magic('3'):
case Magic('4'):
case Magic('5'):
case Magic('6'):
case Magic('7'):
case Magic('8'):
case Magic('9'):
{
int refnum;
ungetchr();
refnum = getchr() - Magic('0');
/*
* Check if the back reference is legal. We use the parentheses
* pointers to mark encountered close parentheses, but this
* is only available in the second pass. Checking opens is
* always possible.
* Should also check that we don't refer to something that
* is repeated (+*=): what instance of the repetition should
* we match? TODO.
*/
if (refnum < regnpar &&
(regendp == NULL || regendp[refnum] != NULL))
ret = regnode(BACKREF + refnum);
else
EMSG_RETURN((char_u *)"Illegal back reference");
}
break;
case Magic('['):
{
char_u *p;
/*
* If there is no matching ']', we assume the '[' is a normal
* character. This makes ":help [" work.
*/
p = skip_range(regparse);
if (*p == ']') /* there is a matching ']' */
{
/*
* In a character class, different parsing rules apply.
* Not even \ is special anymore, nothing is.
*/
if (*regparse == '^') { /* Complement of range. */
ret = regnode(ANYBUT);
regparse++;
}
else
ret = regnode(ANYOF);
if (*regparse == ']' || *regparse == '-')
regc(*regparse++);
while (*regparse != '\0' && *regparse != ']')
{
if (*regparse == '-')
{
regparse++;
if (*regparse == ']' || *regparse == '\0')
regc('-');
else
{
int cclass;
int cclassend;
cclass = UCHARAT(regparse - 2) + 1;
cclassend = UCHARAT(regparse);
if (cclass > cclassend + 1)
EMSG_RETURN(e_invrange);
for (; cclass <= cclassend; cclass++)
regc(cclass);
regparse++;
}
}
/*
* Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
* accepts "\t", "\e", etc., but only when the 'l' flag in
* 'cpoptions' is not included.
*/
else if (*regparse == '\\' &&
(vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL ||
(!cpo_lit &&
vim_strchr(REGEXP_ABBR, regparse[1]) != NULL)))
{
regparse++;
regc(backslash_trans(*regparse++));
}
else if (*regparse == '[')
{
int (*func)__ARGS((int));
int cu;
if ((func = skip_class_name(®parse)) == NULL)
regc(*regparse++);
else
/* Characters assumed to be 8 bits */
for (cu = 0; cu <= 255; cu++)
if ((*func)(cu))
regc(cu);
}
else
regc(*regparse++);
}
regc('\0');
if (*regparse != ']')
EMSG_RETURN(e_toomsbra);
skipchr(); /* let's be friends with the lexer again */
*flagp |= HASWIDTH | SIMPLE;
break;
}
}
/* FALLTHROUGH */
default:
{
int len;
int chr;
ungetchr();
len = 0;
ret = regnode(EXACTLY);
/*
* Always take at least one character, for '[' without matching
* ']'.
*/
while ((chr = peekchr()) != '\0' && (chr < Magic(0) || len == 0))
{
regc(chr);
skipchr();
len++;
}
#ifdef DEBUG
if (len == 0)
EMSG_RETURN((char_u *)"Unexpected magic character; check META.");
#endif
/*
* If there is a following *, \+ or \= we need the character
* in front of it as a single character operand
*/
if (len > 1 && re_ismult(chr))
{
unregc(); /* Back off of *+= operand */
ungetchr(); /* and put it back for next time */
--len;
}
regc('\0');
*flagp |= HASWIDTH;
if (len == 1)
*flagp |= SIMPLE;
}
break;
}
return ret;
}
/*
* regnode - emit a node
*/
static char_u * /* Location. */
regnode(op)
int op;
{
char_u *ret;
char_u *ptr;
ret = regcode;
if (ret == JUST_CALC_SIZE)
{
regsize += 3;
return ret;
}
ptr = ret;
*ptr++ = op;
*ptr++ = '\0'; /* Null "next" pointer. */
*ptr++ = '\0';
regcode = ptr;
return ret;
}
/*
* regc - emit (if appropriate) a byte of code
*/
static void
regc(b)
int b;
{
if (regcode != JUST_CALC_SIZE)
*regcode++ = b;
else
regsize++;
}
/*
* unregc - take back (if appropriate) a byte of code
*/
static void
unregc()
{
if (regcode != JUST_CALC_SIZE)
regcode--;
else
regsize--;
}
/*
* reginsert - insert an operator in front of already-emitted operand
*
* Means relocating the operand.
*/
static void
reginsert(op, opnd)
int op;
char_u *opnd;
{
char_u *src;
char_u *dst;
char_u *place;
if (regcode == JUST_CALC_SIZE)
{
regsize += 3;
return;
}
src = regcode;
regcode += 3;
dst = regcode;
while (src > opnd)
*--dst = *--src;
place = opnd; /* Op node, where operand used to be. */
*place++ = op;
*place++ = '\0';
*place = '\0';
}
/*
* reginsert_limits - insert an operator in front of already-emitted operand.
* The operator has the given limit values as operands. Also set next pointer.
*
* Means relocating the operand.
*/
static void
reginsert_limits(op, minval, maxval, opnd)
int op;
int minval;
int maxval;
char_u *opnd;
{
char_u *src;
char_u *dst;
char_u *place;
if (regcode == JUST_CALC_SIZE)
{
regsize += 7;
return;
}
src = regcode;
regcode += 7;
dst = regcode;
while (src > opnd)
*--dst = *--src;
place = opnd; /* Op node, where operand used to be. */
*place++ = op;
*place++ = '\0';
*place++ = '\0';
*place++ = (char_u) (((unsigned)minval >> 8) & 0377);
*place++ = (char_u) (minval & 0377);
*place++ = (char_u) (((unsigned)maxval >> 8) & 0377);
*place++ = (char_u) (maxval & 0377);
regtail(opnd, place);
}
/*
* regtail - set the next-pointer at the end of a node chain
*/
static void
regtail(p, val)
char_u *p;
char_u *val;
{
char_u *scan;
char_u *temp;
int offset;
if (p == JUST_CALC_SIZE)
return;
/* Find last node. */
scan = p;
for (;;)
{
temp = regnext(scan);
if (temp == NULL)
break;
scan = temp;
}
if (OP(scan) == BACK)
offset = (int)(scan - val);
else
offset = (int)(val - scan);
*(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
*(scan + 2) = (char_u) (offset & 0377);
}
/*
* regoptail - regtail on operand of first argument; nop if operandless
*/
static void
regoptail(p, val)
char_u *p;
char_u *val;
{
/* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
if (p == NULL || p == JUST_CALC_SIZE ||
(OP(p) != BRANCH &&
(OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
return;
regtail(OPERAND(p), val);
}
/*
* getchr() - get the next character from the pattern. We know about
* magic and such, so therefore we need a lexical analyzer.
*/
/* static int curchr; */
static int prevchr;
static int nextchr; /* used for ungetchr() */
/*
* Note: prevchr is sometimes -1 when we are not at the start,
* eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
* taken to be magic -- webb
*/
static int at_start; /* True when on the first character */
static int prev_at_start; /* True when on the second character */
static void
initchr(str)
char_u *str;
{
regparse = str;
curchr = prevchr = nextchr = -1;
at_start = TRUE;
prev_at_start = FALSE;
}
static int
peekchr()
{
if (curchr < 0)
{
switch (curchr = regparse[0])
{
case '.':
/* case '+':*/
/* case '=':*/
case '[':
case '~':
if (reg_magic)
curchr = Magic(curchr);
break;
case '*':
/* * is not magic as the very first character, eg "?*ptr" and when
* after '^', eg "/^*ptr" */
if (reg_magic && !at_start
&& !(prev_at_start && prevchr == Magic('^')))
curchr = Magic('*');
break;
case '^':
/* ^ is only magic as the very first character */
if (at_start)
curchr = Magic('^');
break;
case '$':
/* $ is only magic as the very last char and in front of '\|' */
if (regparse[1] == NUL
|| (regparse[1] == '\\' && regparse[2] == '|'))
curchr = Magic('$');
break;
case '\\':
regparse++;
if (regparse[0] == NUL)
{
curchr = '\\'; /* trailing '\' */
--regparse; /* there is no extra character to skip */
}
else if (vim_strchr(META, regparse[0]))
{
/*
* META contains everything that may be magic sometimes, except
* ^ and $ ("\^" and "\$" are never magic).
* We now fetch the next character and toggle its magicness.
* Therefore, \ is so meta-magic that it is not in META.
*/
curchr = -1;
prev_at_start = at_start;
at_start = FALSE; /* be able to say "/\*ptr" */
peekchr();
curchr ^= Magic(0);
}
else if (vim_strchr(REGEXP_ABBR, regparse[0]))
{
/*
* Handle abbreviations, like "\t" for TAB -- webb
*/
curchr = backslash_trans(regparse[0]);
}
else
{
/*
* Next character can never be (made) magic?
* Then backslashing it won't do anything.
*/
curchr = regparse[0];
}
break;
}
}
return curchr;
}
static void
skipchr()
{
regparse++;
prev_at_start = at_start;
at_start = FALSE;
prevchr = curchr;
curchr = nextchr; /* use previously unget char, or -1 */
nextchr = -1;
}
static int
getchr()
{
int chr;
chr = peekchr();
skipchr();
return chr;
}
/*
* put character back. Works only once!
*/
static void
ungetchr()
{
nextchr = curchr;
curchr = prevchr;
at_start = prev_at_start;
prev_at_start = FALSE;
/*
* Backup regparse as well; not because we will use what it points at,
* but because skipchr() will bump it again.
*/
regparse--;
}
/*
* read_limits - Read two integers to be taken as a minimum and maximum.
* If the first character is '-', then the range is reversed.
* Should end with 'end'. If minval is missing, zero is default, if maxval is
* missing, a very big number is the default.
*/
static int
read_limits(start, end, minval, maxval)
int start;
int end;
int *minval;
int *maxval;
{
int reverse = FALSE;
char_u *first_char;
if (*regparse == '-')
{
/* Starts with '-', so reverse the range later */
regparse++;
reverse = TRUE;
}
first_char = regparse;
*minval = getdigits(®parse);
if (*regparse == ',') /* There is a comma */
{
if (isdigit(*++regparse))
*maxval = getdigits(®parse);
else
*maxval = MAX_LIMIT;
}
else if (isdigit(*first_char))
*maxval = *minval; /* It was \{n} or \{-n} */
else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -