📄 regexp.c
字号:
*maxval = MAX_LIMIT; /* It was \{} or \{-} */
if (*regparse == '\\')
regparse++; /* Allow either \{...} or \{...\} */
if ( (*regparse != end && *regparse != NUL)
|| (*maxval == 0 && *minval == 0))
{
sprintf((char *)IObuff, "Syntax error in \\%c...%c", start, end);
emsg(IObuff);
rc_did_emsg = TRUE;
return FAIL;
}
/*
* Reverse the range if there was a '-', or make sure it is in the right
* order otherwise.
*/
if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
{
int tmp;
tmp = *minval;
*minval = *maxval;
*maxval = tmp;
}
skipchr(); /* let's be friends with the lexer again */
return OK;
}
/*
* vim_regexec and friends
*/
/*
* Global work variables for vim_regexec().
*/
static char_u *reginput; /* String-input pointer. */
static char_u *regbol; /* Beginning of input, for ^ check. */
static char_u **regstartp; /* Pointer to startp array. */
static int need_clear_subexpr; /* *regstartp end *regendp still need
to be cleared */
static int regtry __ARGS((vim_regexp *, char_u *));
static void clear_subexpr __ARGS((void));
static int regmatch __ARGS((char_u *));
static int regrepeat __ARGS((char_u *));
#ifdef DEBUG
int regnarrate = 0;
#endif
/*
* vim_regexec - match a regexp against a string
* Uses current value of reg_ic.
* Return non-zero if there is a match.
*/
int
vim_regexec(prog, string, at_bol)
vim_regexp *prog;
char_u *string;
int at_bol;
{
char_u *s;
/* Be paranoid... */
if (prog == NULL || string == NULL)
{
emsg(e_null);
rc_did_emsg = TRUE;
return 0;
}
/* Check validity of program. */
if (UCHARAT(prog->program) != MAGIC)
{
emsg(e_re_corr);
rc_did_emsg = TRUE;
return 0;
}
/* If there is a "must appear" string, look for it. */
if (prog->regmust != NULL)
{
s = string;
while ((s = cstrchr(s, prog->regmust[0])) != NULL)
{
if (cstrncmp(s, prog->regmust, prog->regmlen) == 0)
break; /* Found it. */
s++;
}
if (s == NULL) /* Not present. */
return 0;
}
/* Mark beginning of line for ^ . */
if (at_bol)
regbol = string; /* is possible to match bol */
else
regbol = NULL; /* we aren't there, so don't match it */
/* Simplest case: anchored match need be tried only once. */
if (prog->reganch)
{
if (prog->regstart != '\0' && prog->regstart != string[0] &&
(!reg_ic || TO_LOWER(prog->regstart) != TO_LOWER(string[0])))
return 0;
return regtry(prog, string);
}
/* Messy cases: unanchored match. */
s = string;
if (prog->regstart != '\0')
/* We know what char it must start with. */
while ((s = cstrchr(s, prog->regstart)) != NULL)
{
if (regtry(prog, s))
return 1;
s++;
}
else
/* We don't -- general case. */
do
{
if (regtry(prog, s))
return 1;
} while (*s++ != '\0');
/* Failure. */
return 0;
}
/*
* regtry - try match at specific point
*/
static int /* 0 failure, 1 success */
regtry(prog, string)
vim_regexp *prog;
char_u *string;
{
reginput = string;
regstartp = prog->startp;
regendp = prog->endp;
need_clear_subexpr = TRUE;
if (regmatch(prog->program + 1))
{
clear_subexpr();
prog->startp[0] = string;
prog->endp[0] = reginput;
return 1;
}
else
return 0;
}
/*
* Clear the subexpressions, if this wasn't done yet.
* This construction is used to clear the subexpressions only when they are
* used (to increase speed).
*/
static void
clear_subexpr()
{
if (need_clear_subexpr)
{
vim_memset(regstartp, 0, sizeof(char_u *) * NSUBEXP);
vim_memset(regendp, 0, sizeof(char_u *) * NSUBEXP);
need_clear_subexpr = FALSE;
}
}
/*
* regmatch - main matching routine
*
* Conceptually the strategy is simple: Check to see whether the current
* node matches, call self recursively to see whether the rest matches,
* and then act accordingly. In practice we make some effort to avoid
* recursion, in particular by going through "ordinary" nodes (that don't
* need to know whether the rest of the match failed) by a loop instead of
* by recursion.
*/
static int /* 0 failure, 1 success */
regmatch(prog)
char_u *prog;
{
char_u *scan; /* Current node. */
char_u *next; /* Next node. */
int minval = -1;
int maxval = -1;
scan = prog;
#ifdef DEBUG
if (scan != NULL && regnarrate)
{
mch_errmsg(regprop(scan));
mch_errmsg("(\n");
}
#endif
while (scan != NULL)
{
#ifdef DEBUG
if (regnarrate)
{
mch_errmsg(regprop(scan));
mch_errmsg("...\n");
}
#endif
next = regnext(scan);
switch (OP(scan))
{
case BOL:
if (reginput != regbol)
return 0;
break;
case EOL:
if (*reginput != '\0')
return 0;
break;
case BOW: /* \<word; reginput points to w */
if (reginput != regbol && vim_iswordc(reginput[-1]))
return 0;
if (!reginput[0] || !vim_iswordc(reginput[0]))
return 0;
break;
case EOW: /* word\>; reginput points after d */
if (reginput == regbol || !vim_iswordc(reginput[-1]))
return 0;
if (reginput[0] && vim_iswordc(reginput[0]))
return 0;
break;
case ANY:
if (*reginput == '\0')
return 0;
reginput++;
break;
case IDENT:
if (!vim_isIDc(*reginput))
return 0;
reginput++;
break;
case KWORD:
if (!vim_iswordc(*reginput))
return 0;
reginput++;
break;
case FNAME:
if (!vim_isfilec(*reginput))
return 0;
reginput++;
break;
case PRINT:
if (charsize(*reginput) != 1)
return 0;
reginput++;
break;
case SIDENT:
if (isdigit(*reginput) || !vim_isIDc(*reginput))
return 0;
reginput++;
break;
case SWORD:
if (isdigit(*reginput) || !vim_iswordc(*reginput))
return 0;
reginput++;
break;
case SFNAME:
if (isdigit(*reginput) || !vim_isfilec(*reginput))
return 0;
reginput++;
break;
case SPRINT:
if (isdigit(*reginput) || charsize(*reginput) != 1)
return 0;
reginput++;
break;
case WHITE:
if (!vim_iswhite(*reginput))
return 0;
reginput++;
break;
case NWHITE:
if (*reginput == NUL || vim_iswhite(*reginput))
return 0;
reginput++;
break;
case DIGIT:
if (!ri_digit(*reginput))
return 0;
reginput++;
break;
case NDIGIT:
if (*reginput == NUL || ri_digit(*reginput))
return 0;
reginput++;
break;
case HEX:
if (!ri_hex(*reginput))
return 0;
reginput++;
break;
case NHEX:
if (*reginput == NUL || ri_hex(*reginput))
return 0;
reginput++;
break;
case OCTAL:
if (!ri_octal(*reginput))
return 0;
reginput++;
break;
case NOCTAL:
if (*reginput == NUL || ri_octal(*reginput))
return 0;
reginput++;
break;
case WORD:
if (!ri_word(*reginput))
return 0;
reginput++;
break;
case NWORD:
if (*reginput == NUL || ri_word(*reginput))
return 0;
reginput++;
break;
case HEAD:
if (!ri_head(*reginput))
return 0;
reginput++;
break;
case NHEAD:
if (*reginput == NUL || ri_head(*reginput))
return 0;
reginput++;
break;
case ALPHA:
if (!ri_alpha(*reginput))
return 0;
reginput++;
break;
case NALPHA:
if (*reginput == NUL || ri_alpha(*reginput))
return 0;
reginput++;
break;
case LOWER:
if (!ri_lower(*reginput))
return 0;
reginput++;
break;
case NLOWER:
if (*reginput == NUL || ri_lower(*reginput))
return 0;
reginput++;
break;
case UPPER:
if (!ri_upper(*reginput))
return 0;
reginput++;
break;
case NUPPER:
if (*reginput == NUL || ri_upper(*reginput))
return 0;
reginput++;
break;
case EXACTLY:
{
int len;
char_u *opnd;
opnd = OPERAND(scan);
/* Inline the first character, for speed. */
if (*opnd != *reginput
&& (!reg_ic || TO_LOWER(*opnd) != TO_LOWER(*reginput)))
return 0;
len = STRLEN(opnd);
if (len > 1 && cstrncmp(opnd, reginput, len) != 0)
return 0;
reginput += len;
}
break;
case ANYOF:
if (*reginput == '\0' || cstrchr(OPERAND(scan), *reginput) == NULL)
return 0;
reginput++;
break;
case ANYBUT:
if (*reginput == '\0' || cstrchr(OPERAND(scan), *reginput) != NULL)
return 0;
reginput++;
break;
case NOTHING:
break;
case BACK:
break;
case MOPEN + 1:
case MOPEN + 2:
case MOPEN + 3:
case MOPEN + 4:
case MOPEN + 5:
case MOPEN + 6:
case MOPEN + 7:
case MOPEN + 8:
case MOPEN + 9:
{
int no;
char_u *save;
clear_subexpr();
no = OP(scan) - MOPEN;
save = regstartp[no];
regstartp[no] = reginput; /* Tentatively */
#ifdef DEBUG
if (regnarrate)
printf("MOPEN %d pre @'%s' ('%s' )'%s'\n",
no, save,
regstartp[no] ? (char *)regstartp[no] : "NULL",
regendp[no] ? (char *)regendp[no] : "NULL");
#endif
if (regmatch(next))
{
#ifdef DEBUG
if (regnarrate)
printf("MOPEN %d post @'%s' ('%s' )'%s'\n",
no, save,
regstartp[no] ? (char *)regstartp[no] : "NULL",
regendp[no] ? (char *)regendp[no] : "NULL");
#endif
return 1;
}
regstartp[no] = save; /* We were wrong... */
return 0;
}
/* break; Not Reached */
case MCLOSE + 1:
case MCLOSE + 2:
case MCLOSE + 3:
case MCLOSE + 4:
case MCLOSE + 5:
case MCLOSE + 6:
case MCLOSE + 7:
case MCLOSE + 8:
case MCLOSE + 9:
{
int no;
char_u *save;
clear_subexpr();
no = OP(scan) - MCLOSE;
save = regendp[no];
regendp[no] = reginput; /* Tentatively */
#ifdef DEBUG
if (regnarrate)
printf("MCLOSE %d pre @'%s' ('%s' )'%s'\n",
no, save,
regstartp[no] ? (char *)regstartp[no] : "NULL",
regendp[no] ? (char *)regendp[no] : "NULL");
#endif
if (regmatch(next))
{
#ifdef DEBUG
if (regnarrate)
printf("MCLOSE %d post @'%s' ('%s' )'%s'\n",
no, save,
regstartp[no] ? (char *)regstartp[no] : "NULL",
regendp[no] ? (char *)regendp[no] : "NULL");
#endif
return 1;
}
regendp[no] = save; /* We were wrong... */
return 0;
}
/* break; Not Reached */
case BACKREF + 1:
case BACKREF + 2:
case BACKREF + 3:
case BACKREF + 4:
case BACKREF + 5:
case BACKREF + 6:
case BACKREF + 7:
case BACKREF + 8:
case BACKREF + 9:
{
int no;
int len;
clear_subexpr();
no = OP(scan) - BACKREF;
if (regendp[no] != NULL)
{
len = (int)(regendp[no] - regstartp[no]);
if (cstrncmp(regstartp[no], reginput, len) != 0)
return 0;
reginput += len;
}
else
{
/*emsg("backref to 0-repeat");*/
/*return 0;*/
}
}
break;
case BRANCH:
{
char_u *save;
if (OP(next) != BRANCH) /* No choice. */
next = OPERAND(scan); /* Avoid recursion. */
else
{
do
{
save = reginput;
if (regmatch(OPERAND(scan)))
return 1;
reginput = save;
scan = regnext(scan);
} while (scan != NULL && OP(scan) == BRANCH);
return 0;
/* NOTREACHED */
}
}
break;
case BRACE_LIMITS:
{
int no;
if (OP(next) == BRACE_SIMPLE)
{
minval = OPERAND_MIN(scan);
maxval = OPERAND_MAX(scan);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -