📄 pcre_compile.c

📁 SDL文件。SDL_ERROwenjian.....
💻 C
📖 第 1 页 / 共 5 页
字号:
*************************************************//* The error texts are now all in one long string, to save on relocations. Assome of the text is of unknown length, we can't use a table of offsets.Instead, just count through the strings. This is not a performance issuebecause it happens only when there has been a compilation error.Argument:   the error numberReturns:    pointer to the error string*/static const char *find_error_text(int n){const char *s = error_texts;for (; n > 0; n--) while (*s++ != 0);return s;}/**************************************************            Handle escapes                      **************************************************//* This function is called when a \ has been encountered. It either returns apositive value for a simple escape such as \n, or a negative value whichencodes one of the more complicated things such as \d. A backreference to groupn is returned as -(ESC_REF + n); ESC_REF is the highest ESC_xxx macro. WhenUTF-8 is enabled, a positive value greater than 255 may be returned. On entry,ptr is pointing at the \. On exit, it is on the final character of the escapesequence.Arguments:  ptrptr         points to the pattern position pointer  errorcodeptr   points to the errorcode variable  bracount       number of previous extracting brackets  options        the options bits  isclass        TRUE if inside a character classReturns:         zero or positive => a data character                 negative => a special escape sequence                 on error, errorcodeptr is set*/static intcheck_escape(const uschar **ptrptr, int *errorcodeptr, int bracount,  int options, BOOL isclass){BOOL utf8 = (options & PCRE_UTF8) != 0;const uschar *ptr = *ptrptr + 1;int c, i;GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */ptr--;                            /* Set pointer back to the last byte *//* If backslash is at the end of the pattern, it's an error. */if (c == 0) *errorcodeptr = ERR1;/* Non-alphamerics are literals. For digits or letters, do an initial lookup ina table. A non-zero result is something that can be returned immediately.Otherwise further processing may be required. */#ifndef EBCDIC  /* ASCII coding */else if (c < '0' || c > 'z') {}                           /* Not alphameric */else if ((i = escapes[c - '0']) != 0) c = i;#else           /* EBCDIC coding */else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */else if ((i = escapes[c - 0x48]) != 0)  c = i;#endif/* Escapes that need further processing, or are illegal. */else  {  const uschar *oldptr;  BOOL braced, negated;  switch (c)    {    /* A number of Perl escapes are not handled by PCRE. We give an explicit    error. */    case 'l':    case 'L':    case 'N':    case 'u':    case 'U':    *errorcodeptr = ERR37;    break;    /* \g must be followed by a number, either plain or braced. If positive, it    is an absolute backreference. If negative, it is a relative backreference.    This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a    reference to a named group. This is part of Perl's movement towards a    unified syntax for back references. As this is synonymous with \k{name}, we    fudge it up by pretending it really was \k. */    case 'g':    if (ptr[1] == '{')      {      const uschar *p;      for (p = ptr+2; *p != 0 && *p != '}'; p++)        if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;      if (*p != 0 && *p != '}')        {        c = -ESC_k;        break;        }      braced = TRUE;      ptr++;      }    else braced = FALSE;    if (ptr[1] == '-')      {      negated = TRUE;      ptr++;      }    else negated = FALSE;    c = 0;    while ((digitab[ptr[1]] & ctype_digit) != 0)      c = c * 10 + *(++ptr) - '0';    if (c < 0)      {      *errorcodeptr = ERR61;      break;      }    if (c == 0 || (braced && *(++ptr) != '}'))      {      *errorcodeptr = ERR57;      break;      }    if (negated)      {      if (c > bracount)        {        *errorcodeptr = ERR15;        break;        }      c = bracount - (c - 1);      }    c = -(ESC_REF + c);    break;    /* The handling of escape sequences consisting of a string of digits    starting with one that is not zero is not straightforward. By experiment,    the way Perl works seems to be as follows:    Outside a character class, the digits are read as a decimal number. If the    number is less than 10, or if there are that many previous extracting    left brackets, then it is a back reference. Otherwise, up to three octal    digits are read to form an escaped byte. Thus \123 is likely to be octal    123 (cf \0123, which is octal 012 followed by the literal 3). If the octal    value is greater than 377, the least significant 8 bits are taken. Inside a    character class, \ followed by a digit is always an octal number. */    case '1': case '2': case '3': case '4': case '5':    case '6': case '7': case '8': case '9':    if (!isclass)      {      oldptr = ptr;      c -= '0';      while ((digitab[ptr[1]] & ctype_digit) != 0)        c = c * 10 + *(++ptr) - '0';      if (c < 0)        {        *errorcodeptr = ERR61;        break;        }      if (c < 10 || c <= bracount)        {        c = -(ESC_REF + c);        break;        }      ptr = oldptr;      /* Put the pointer back and fall through */      }    /* Handle an octal number following \. If the first digit is 8 or 9, Perl    generates a binary zero byte and treats the digit as a following literal.    Thus we have to pull back the pointer by one. */    if ((c = *ptr) >= '8')      {      ptr--;      c = 0;      break;      }    /* \0 always starts an octal number, but we may drop through to here with a    larger first octal digit. The original code used just to take the least    significant 8 bits of octal numbers (I think this is what early Perls used    to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more    than 3 octal digits. */    case '0':    c -= '0';    while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')        c = c * 8 + *(++ptr) - '0';    if (!utf8 && c > 255) *errorcodeptr = ERR51;    break;    /* \x is complicated. \x{ddd} is a character number which can be greater    than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is    treated as a data character. */    case 'x':    if (ptr[1] == '{')      {      const uschar *pt = ptr + 2;      int count = 0;      c = 0;      while ((digitab[*pt] & ctype_xdigit) != 0)        {        register int cc = *pt++;        if (c == 0 && cc == '0') continue;     /* Leading zeroes */        count++;#ifndef EBCDIC  /* ASCII coding */        if (cc >= 'a') cc -= 32;               /* Convert to upper case */        c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));#else           /* EBCDIC coding */        if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */        c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));#endif        }      if (*pt == '}')        {        if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;        ptr = pt;        break;        }      /* If the sequence of hex digits does not end with '}', then we don't      recognize this construct; fall through to the normal \x handling. */      }    /* Read just a single-byte hex-defined char */    c = 0;    while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)      {      int cc;                               /* Some compilers don't like ++ */      cc = *(++ptr);                        /* in initializers */#ifndef EBCDIC  /* ASCII coding */      if (cc >= 'a') cc -= 32;              /* Convert to upper case */      c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));#else           /* EBCDIC coding */      if (cc <= 'z') cc += 64;              /* Convert to upper case */      c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));#endif      }    break;    /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.    This coding is ASCII-specific, but then the whole concept of \cx is    ASCII-specific. (However, an EBCDIC equivalent has now been added.) */    case 'c':    c = *(++ptr);    if (c == 0)      {      *errorcodeptr = ERR2;      break;      }#ifndef EBCDIC  /* ASCII coding */    if (c >= 'a' && c <= 'z') c -= 32;    c ^= 0x40;#else           /* EBCDIC coding */    if (c >= 'a' && c <= 'z') c += 64;    c ^= 0xC0;#endif    break;    /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any    other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,    for Perl compatibility, it is a literal. This code looks a bit odd, but    there used to be some cases other than the default, and there may be again    in future, so I haven't "optimized" it. */    default:    if ((options & PCRE_EXTRA) != 0) switch(c)      {      default:      *errorcodeptr = ERR3;      break;      }    break;    }  }*ptrptr = ptr;return c;}#ifdef SUPPORT_UCP/**************************************************               Handle \P and \p                 **************************************************//* This function is called after \P or \p has been encountered, provided thatPCRE is compiled with support for Unicode properties. On entry, ptrptr ispointing at the P or p. On exit, it is pointing at the final character of theescape sequence.Argument:  ptrptr         points to the pattern position pointer  negptr         points to a boolean that is set TRUE for negation else FALSE  dptr           points to an int that is set to the detailed property value  errorcodeptr   points to the error code variableReturns:         type value from ucp_type_table, or -1 for an invalid type*/static intget_ucp(const uschar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr){int c, i, bot, top;const uschar *ptr = *ptrptr;char name[32];c = *(++ptr);if (c == 0) goto ERROR_RETURN;*negptr = FALSE;/* \P or \p can be followed by a name in {}, optionally preceded by ^ fornegation. */if (c == '{')  {  if (ptr[1] == '^')    {    *negptr = TRUE;    ptr++;    }  for (i = 0; i < (int)sizeof(name) - 1; i++)    {    c = *(++ptr);    if (c == 0) goto ERROR_RETURN;    if (c == '}') break;    name[i] = c;    }  if (c !='}') goto ERROR_RETURN;  name[i] = 0;  }/* Otherwise there is just one following character */else  {  name[0] = c;  name[1] = 0;  }*ptrptr = ptr;/* Search for a recognized property name using binary chop */bot = 0;top = _pcre_utt_size;while (bot < top)  {  i = (bot + top) >> 1;  c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset);  if (c == 0)    {    *dptr = _pcre_utt[i].value;    return _pcre_utt[i].type;    }  if (c > 0) bot = i + 1; else top = i;  }*errorcodeptr = ERR47;*ptrptr = ptr;return -1;ERROR_RETURN:*errorcodeptr = ERR46;*ptrptr = ptr;return -1;}#endif/**************************************************            Check for counted repeat            **************************************************//* This function is called when a '{' is encountered in a place where it mightstart a quantifier. It looks ahead to see if it really is a quantifier or not.It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}where the ddds are digits.Arguments:  p         pointer to the first char after '{'Returns:    TRUE or FALSE*/static BOOLis_counted_repeat(const uschar *p){if ((digitab[*p++] & ctype_digit) == 0) return FALSE;while ((digitab[*p] & ctype_digit) != 0) p++;if (*p == '}') return TRUE;if (*p++ != ',') return FALSE;if (*p == '}') return TRUE;if ((digitab[*p++] & ctype_digit) == 0) return FALSE;while ((digitab[*p] & ctype_digit) != 0) p++;return (*p == '}');}/**************************************************         Read repeat counts                     **************************************************//* Read an item of the form {n,m} and return the values. This is called onlyafter is_counted_repeat() has confirmed that a repeat-count quantifier exists,
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -