📄 pcre_compile.c

📁 Scheme跨平台编译器
💻 C
📖 第 1 页 / 共 5 页
字号:
return -1;}#endif/**************************************************            Check for counted repeat            **************************************************//* This function is called when a '{' is encountered in a place where it mightstart a quantifier. It looks ahead to see if it really is a quantifier or not.It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}where the ddds are digits.Arguments:  p         pointer to the first char after '{'Returns:    TRUE or FALSE*/static BOOLis_counted_repeat(const uschar *p){if ((digitab[*p++] & ctype_digit) == 0) return FALSE;while ((digitab[*p] & ctype_digit) != 0) p++;if (*p == '}') return TRUE;if (*p++ != ',') return FALSE;if (*p == '}') return TRUE;if ((digitab[*p++] & ctype_digit) == 0) return FALSE;while ((digitab[*p] & ctype_digit) != 0) p++;return (*p == '}');}/**************************************************         Read repeat counts                     **************************************************//* Read an item of the form {n,m} and return the values. This is called onlyafter is_counted_repeat() has confirmed that a repeat-count quantifier exists,so the syntax is guaranteed to be correct, but we need to check the values.Arguments:  p              pointer to first char after '{'  minp           pointer to int for min  maxp           pointer to int for max                 returned as -1 if no max  errorcodeptr   points to error code variableReturns:         pointer to '}' on success;                 current ptr on error, with errorcodeptr set non-zero*/static const uschar *read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr){int min = 0;int max = -1;/* Read the minimum value and do a paranoid check: a negative value indicatesan integer overflow. */while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';if (min < 0 || min > 65535)  {  *errorcodeptr = ERR5;  return p;  }/* Read the maximum value if there is one, and again do a paranoid on its size.Also, max must not be less than min. */if (*p == '}') max = min; else  {  if (*(++p) != '}')    {    max = 0;    while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';    if (max < 0 || max > 65535)      {      *errorcodeptr = ERR5;      return p;      }    if (max < min)      {      *errorcodeptr = ERR4;      return p;      }    }  }/* Fill in the required variables, and pass back the pointer to the terminating'}'. */*minp = min;*maxp = max;return p;}/**************************************************       Find forward referenced subpattern       **************************************************//* This function scans along a pattern's text looking for capturingsubpatterns, and counting them. If it finds a named pattern that matches thename it is given, it returns its number. Alternatively, if the name is NULL, itreturns when it reaches a given numbered subpattern. This is used for forwardreferences to subpatterns. We know that if (?P< is encountered, the name willbe terminated by '>' because that is checked in the first pass.Arguments:  ptr          current position in the pattern  cd           compile background data  name         name to seek, or NULL if seeking a numbered subpattern  lorn         name length, or subpattern number if name is NULL  xmode        TRUE if we are in /x modeReturns:       the number of the named subpattern, or -1 if not found*/static intfind_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn,  BOOL xmode){const uschar *thisname;int count = cd->bracount;for (; *ptr != 0; ptr++)  {  int term;  /* Skip over backslashed characters and also entire \Q...\E */  if (*ptr == '\\')    {    if (*(++ptr) == 0) return -1;    if (*ptr == 'Q') for (;;)      {      while (*(++ptr) != 0 && *ptr != '\\');      if (*ptr == 0) return -1;      if (*(++ptr) == 'E') break;      }    continue;    }  /* Skip over character classes; this logic must be similar to the way they  are handled for real. If the first character is '^', skip it. Also, if the  first few characters (either before or after ^) are \Q\E or \E we skip them  too. This makes for compatibility with Perl. */  if (*ptr == '[')    {    BOOL negate_class = FALSE;    for (;;)      {      int c = *(++ptr);      if (c == '\\')        {        if (ptr[1] == 'E') ptr++;          else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;            else break;        }      else if (!negate_class && c == '^')        negate_class = TRUE;      else break;      }    /* If the next character is ']', it is a data character that must be    skipped, except in JavaScript compatibility mode. */    if (ptr[1] == ']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)      ptr++;    while (*(++ptr) != ']')      {      if (*ptr == 0) return -1;      if (*ptr == '\\')        {        if (*(++ptr) == 0) return -1;        if (*ptr == 'Q') for (;;)          {          while (*(++ptr) != 0 && *ptr != '\\');          if (*ptr == 0) return -1;          if (*(++ptr) == 'E') break;          }        continue;        }      }    continue;    }  /* Skip comments in /x mode */  if (xmode && *ptr == '#')    {    while (*(++ptr) != 0 && *ptr != '\n');    if (*ptr == 0) return -1;    continue;    }  /* An opening parens must now be a real metacharacter */  if (*ptr != '(') continue;  if (ptr[1] != '?' && ptr[1] != '*')    {    count++;    if (name == NULL && count == lorn) return count;    continue;    }  ptr += 2;  if (*ptr == 'P') ptr++;                      /* Allow optional P */  /* We have to disambiguate (?<! and (?<= from (?<name> */  if ((*ptr != '<' || ptr[1] == '!' || ptr[1] == '=') &&       *ptr != '\'')    continue;  count++;  if (name == NULL && count == lorn) return count;  term = *ptr++;  if (term == '<') term = '>';  thisname = ptr;  while (*ptr != term) ptr++;  if (name != NULL && lorn == ptr - thisname &&      strncmp((const char *)name, (const char *)thisname, lorn) == 0)    return count;  }return -1;}/**************************************************      Find first significant op code            **************************************************//* This is called by several functions that scan a compiled expression lookingfor a fixed first character, or an anchoring op code etc. It skips over thingsthat do not influence this. For some calls, a change of option is important.For some calls, it makes sense to skip negative forward and all backwardassertions, and also the \b assertion; for others it does not.Arguments:  code         pointer to the start of the group  options      pointer to external options  optbit       the option bit whose changing is significant, or                 zero if none are  skipassert   TRUE if certain assertions are to be skippedReturns:       pointer to the first significant opcode*/static const uschar*first_significant_code(const uschar *code, int *options, int optbit,  BOOL skipassert){for (;;)  {  switch ((int)*code)    {    case OP_OPT:    if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))      *options = (int)code[1];    code += 2;    break;    case OP_ASSERT_NOT:    case OP_ASSERTBACK:    case OP_ASSERTBACK_NOT:    if (!skipassert) return code;    do code += GET(code, 1); while (*code == OP_ALT);    code += _pcre_OP_lengths[*code];    break;    case OP_WORD_BOUNDARY:    case OP_NOT_WORD_BOUNDARY:    if (!skipassert) return code;    /* Fall through */    case OP_CALLOUT:    case OP_CREF:    case OP_RREF:    case OP_DEF:    code += _pcre_OP_lengths[*code];    break;    default:    return code;    }  }/* Control never reaches here */}/**************************************************        Find the fixed length of a pattern      **************************************************//* Scan a pattern and compute the fixed length of subject that will match it,if the length is fixed. This is needed for dealing with backward assertions.In UTF8 mode, the result is in characters rather than bytes.Arguments:  code     points to the start of the pattern (the bracket)  options  the compiling optionsReturns:   the fixed length, or -1 if there is no fixed length,             or -2 if \C was encountered*/static intfind_fixedlength(uschar *code, int options){int length = -1;register int branchlength = 0;register uschar *cc = code + 1 + LINK_SIZE;/* Scan along the opcodes for this branch. If we get to the end of thebranch, check the length against that of the other branches. */for (;;)  {  int d;  register int op = *cc;  switch (op)    {    case OP_CBRA:    case OP_BRA:    case OP_ONCE:    case OP_COND:    d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options);    if (d < 0) return d;    branchlength += d;    do cc += GET(cc, 1); while (*cc == OP_ALT);    cc += 1 + LINK_SIZE;    break;    /* Reached end of a branch; if it's a ket it is the end of a nested    call. If it's ALT it is an alternation in a nested call. If it is    END it's the end of the outer call. All can be handled by the same code. */    case OP_ALT:    case OP_KET:    case OP_KETRMAX:    case OP_KETRMIN:    case OP_END:    if (length < 0) length = branchlength;      else if (length != branchlength) return -1;    if (*cc != OP_ALT) return length;    cc += 1 + LINK_SIZE;    branchlength = 0;    break;    /* Skip over assertive subpatterns */    case OP_ASSERT:    case OP_ASSERT_NOT:    case OP_ASSERTBACK:    case OP_ASSERTBACK_NOT:    do cc += GET(cc, 1); while (*cc == OP_ALT);    /* Fall through */    /* Skip over things that don't match chars */    case OP_REVERSE:    case OP_CREF:    case OP_RREF:    case OP_DEF:    case OP_OPT:    case OP_CALLOUT:    case OP_SOD:    case OP_SOM:    case OP_EOD:    case OP_EODN:    case OP_CIRC:    case OP_DOLL:    case OP_NOT_WORD_BOUNDARY:    case OP_WORD_BOUNDARY:    cc += _pcre_OP_lengths[*cc];    break;    /* Handle literal characters */    case OP_CHAR:    case OP_CHARNC:    case OP_NOT:    branchlength++;    cc += 2;#ifdef SUPPORT_UTF8    if ((options & PCRE_UTF8) != 0)      {      while ((*cc & 0xc0) == 0x80) cc++;      }#endif    break;    /* Handle exact repetitions. The count is already in characters, but we    need to skip over a multibyte character in UTF8 mode.  */    case OP_EXACT:    branchlength += GET2(cc,1);    cc += 4;#ifdef SUPPORT_UTF8    if ((options & PCRE_UTF8) != 0)      {      while((*cc & 0x80) == 0x80) cc++;      }#endif    break;    case OP_TYPEEXACT:    branchlength += GET2(cc,1);    if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;    cc += 4;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -