📄 pcre_compile.c

📁 SDL文件。SDL_ERROwenjian.....
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
/**************************************************          Check POSIX class name                **************************************************//* This function is called to check the name given in a POSIX-style class entrysuch as [:alnum:].Arguments:  ptr        points to the first letter  len        the length of the nameReturns:     a value representing the name, or -1 if unknown*/static intcheck_posix_name(const uschar *ptr, int len){const char *pn = posix_names;register int yield = 0;while (posix_name_lengths[yield] != 0)  {  if (len == posix_name_lengths[yield] &&    strncmp((const char *)ptr, pn, len) == 0) return yield;  pn += posix_name_lengths[yield] + 1;  yield++;  }return -1;}/**************************************************    Adjust OP_RECURSE items in repeated group   **************************************************//* OP_RECURSE items contain an offset from the start of the regex to the groupthat is referenced. This means that groups can be replicated for fixedrepetition simply by copying (because the recursion is allowed to refer toearlier groups that are outside the current group). However, when a group isoptional (i.e. the minimum quantifier is zero), OP_BRAZERO is inserted beforeit, after it has been compiled. This means that any OP_RECURSE items within itthat refer to the group itself or any contained groups have to have theiroffsets adjusted. That one of the jobs of this function. Before it is called,the partially compiled regex must be temporarily terminated with OP_END.This function has been extended with the possibility of forward references forrecursions and subroutine calls. It must also check the list of such referencesfor the group we are dealing with. If it finds that one of the recursions inthe current group is on this list, it adjusts the offset in the list, not thevalue in the reference (which is a group number).Arguments:  group      points to the start of the group  adjust     the amount by which the group is to be moved  utf8       TRUE in UTF-8 mode  cd         contains pointers to tables etc.  save_hwm   the hwm forward reference pointer at the start of the groupReturns:     nothing*/static voidadjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd,  uschar *save_hwm){uschar *ptr = group;while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)  {  int offset;  uschar *hc;  /* See if this recursion is on the forward reference list. If so, adjust the  reference. */  for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)    {    offset = GET(hc, 0);    if (cd->start_code + offset == ptr + 1)      {      PUT(hc, 0, offset + adjust);      break;      }    }  /* Otherwise, adjust the recursion offset if it's after the start of this  group. */  if (hc >= cd->hwm)    {    offset = GET(ptr, 1);    if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);    }  ptr += 1 + LINK_SIZE;  }}/**************************************************        Insert an automatic callout point       **************************************************//* This function is called when the PCRE_AUTO_CALLOUT option is set, to insertcallout points before each pattern item.Arguments:  code           current code pointer  ptr            current pattern pointer  cd             pointers to tables etcReturns:         new code pointer*/static uschar *auto_callout(uschar *code, const uschar *ptr, compile_data *cd){*code++ = OP_CALLOUT;*code++ = 255;PUT(code, 0, ptr - cd->start_pattern);  /* Pattern offset */PUT(code, LINK_SIZE, 0);                /* Default length */return code + 2*LINK_SIZE;}/**************************************************         Complete a callout item                **************************************************//* A callout item contains the length of the next item in the pattern, whichwe can't fill in till after we have reached the relevant point. This is usedfor both automatic and manual callouts.Arguments:  previous_callout   points to previous callout item  ptr                current pattern pointer  cd                 pointers to tables etcReturns:             nothing*/static voidcomplete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd){int length = ptr - cd->start_pattern - GET(previous_callout, 2);PUT(previous_callout, 2 + LINK_SIZE, length);}#ifdef SUPPORT_UCP/**************************************************           Get othercase range                  **************************************************//* This function is passed the start and end of a class range, in UTF-8 modewith UCP support. It searches up the characters, looking for internal ranges ofcharacters in the "other" case. Each call returns the next one, updating thestart address.Arguments:  cptr        points to starting character value; updated  d           end value  ocptr       where to put start of othercase range  odptr       where to put end of othercase rangeYield:        TRUE when range returned; FALSE when no more*/static BOOLget_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,  unsigned int *odptr){unsigned int c, othercase, next;for (c = *cptr; c <= d; c++)  { if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }if (c > d) return FALSE;*ocptr = othercase;next = othercase + 1;for (++c; c <= d; c++)  {  if (_pcre_ucp_othercase(c) != next) break;  next++;  }*odptr = next - 1;*cptr = c;return TRUE;}#endif  /* SUPPORT_UCP *//**************************************************     Check if auto-possessifying is possible    **************************************************//* This function is called for unlimited repeats of certain items, to seewhether the next thing could possibly match the repeated item. If not, it makessense to automatically possessify the repeated item.Arguments:  op_code       the repeated op code  this          data for this item, depends on the opcode  utf8          TRUE in UTF-8 mode  utf8_char     used for utf8 character bytes, NULL if not relevant  ptr           next character in pattern  options       options bits  cd            contains pointers to tables etc.Returns:        TRUE if possessifying is wanted*/static BOOLcheck_auto_possessive(int op_code, int item, BOOL utf8, uschar *utf8_char,  const uschar *ptr, int options, compile_data *cd){int next;/* Skip whitespace and comments in extended mode */if ((options & PCRE_EXTENDED) != 0)  {  for (;;)    {    while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;    if (*ptr == '#')      {      while (*(++ptr) != 0)        if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }      }    else break;    }  }/* If the next item is one that we can handle, get its value. A non-negativevalue is a character, a negative value is an escape value. */if (*ptr == '\\')  {  int temperrorcode = 0;  next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);  if (temperrorcode != 0) return FALSE;  ptr++;    /* Point after the escape sequence */  }else if ((cd->ctypes[*ptr] & ctype_meta) == 0)  {#ifdef SUPPORT_UTF8  if (utf8) { GETCHARINC(next, ptr); } else#endif  next = *ptr++;  }else return FALSE;/* Skip whitespace and comments in extended mode */if ((options & PCRE_EXTENDED) != 0)  {  for (;;)    {    while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;    if (*ptr == '#')      {      while (*(++ptr) != 0)        if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }      }    else break;    }  }/* If the next thing is itself optional, we have to give up. */if (*ptr == '*' || *ptr == '?' || strncmp((char *)ptr, "{0,", 3) == 0)  return FALSE;/* Now compare the next item with the previous opcode. If the previous is apositive single character match, "item" either contains the character or, if"item" is greater than 127 in utf8 mode, the character's bytes are inutf8_char. *//* Handle cases when the next item is a character. */if (next >= 0) switch(op_code)  {  case OP_CHAR:#ifdef SUPPORT_UTF8  if (utf8 && item > 127) { GETCHAR(item, utf8_char); }#endif  return item != next;  /* For CHARNC (caseless character) we must check the other case. If we have  Unicode property support, we can use it to test the other case of  high-valued characters. */  case OP_CHARNC:#ifdef SUPPORT_UTF8  if (utf8 && item > 127) { GETCHAR(item, utf8_char); }#endif  if (item == next) return FALSE;#ifdef SUPPORT_UTF8  if (utf8)    {    unsigned int othercase;    if (next < 128) othercase = cd->fcc[next]; else#ifdef SUPPORT_UCP    othercase = _pcre_ucp_othercase((unsigned int)next);#else    othercase = NOTACHAR;#endif    return (unsigned int)item != othercase;    }  else#endif  /* SUPPORT_UTF8 */  return (item != cd->fcc[next]);  /* Non-UTF-8 mode */  /* For OP_NOT, "item" must be a single-byte character. */  case OP_NOT:  if (next < 0) return FALSE;  /* Not a character */  if (item == next) return TRUE;  if ((options & PCRE_CASELESS) == 0) return FALSE;#ifdef SUPPORT_UTF8  if (utf8)    {    unsigned int othercase;    if (next < 128) othercase = cd->fcc[next]; else#ifdef SUPPORT_UCP    othercase = _pcre_ucp_othercase(next);#else    othercase = NOTACHAR;#endif    return (unsigned int)item == othercase;    }  else#endif  /* SUPPORT_UTF8 */  return (item == cd->fcc[next]);  /* Non-UTF-8 mode */  case OP_DIGIT:  return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;  case OP_NOT_DIGIT:  return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0;  case OP_WHITESPACE:  return next > 127 || (cd->ctypes[next] & ctype_space) == 0;  case OP_NOT_WHITESPACE:  return next <= 127 && (cd->ctypes[next] & ctype_space) != 0;  case OP_WORDCHAR:  return next > 127 || (cd->ctypes[next] & ctype_word) == 0;  case OP_NOT_WORDCHAR:  return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;  case OP_HSPACE:  case OP_NOT_HSPACE:  switch(next)    {    case 0x09:    case 0x20:    case 0xa0:    case 0x1680:    case 0x180e:    case 0x2000:    case 0x2001:    case 0x2002:    case 0x2003:    case 0x2004:    case 0x2005:    case 0x2006:    case 0x2007:    case 0x2008:    case 0x2009:    case 0x200A:    case 0x202f:    case 0x205f:    case 0x3000:    return op_code != OP_HSPACE;    default:    return op_code == OP_HSPACE;    }  case OP_VSPACE:  case OP_NOT_VSPACE:  switch(next)    {    case 0x0a:    case 0x0b:    case 0x0c:    case 0x0d:    case 0x85:    case 0x2028:    case 0x2029:    return op_code != OP_VSPACE;    default:    return op_code == OP_VSPACE;    }  default:  return FALSE;  }/* Handle the case when the next item is \d, \s, etc. */switch(op_code)  {  case OP_CHAR:  case OP_CHARNC:#ifdef SUPPORT_UTF8  if (utf8 && item > 127) { GETCHAR(item, utf8_char); }#endif  switch(-next)    {    case ESC_d:    return item > 127 || (cd->ctypes[item] & ctype_digit) == 0;    case ESC_D:    return item <= 127 && (cd->ctypes[item] & ctype_digit) != 0;    case ESC_s:    return item > 127 || (cd->ctypes[item] & ctype_space) == 0;    case ESC_S:    return item <= 127 && (cd->ctypes[item] & cty
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -