📄 pcre_compile.c

📁 this is a glib for c language
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
PUT(code, LINK_SIZE, 0);                /* Default length */return code + 2*LINK_SIZE;}/**************************************************         Complete a callout item                **************************************************//* A callout item contains the length of the next item in the pattern, whichwe can't fill in till after we have reached the relevant point. This is usedfor both automatic and manual callouts.Arguments:  previous_callout   points to previous callout item  ptr                current pattern pointer  cd                 pointers to tables etcReturns:             nothing*/static voidcomplete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd){int length = ptr - cd->start_pattern - GET(previous_callout, 2);PUT(previous_callout, 2 + LINK_SIZE, length);}#ifdef SUPPORT_UCP/**************************************************           Get othercase range                  **************************************************//* This function is passed the start and end of a class range, in UTF-8 modewith UCP support. It searches up the characters, looking for internal ranges ofcharacters in the "other" case. Each call returns the next one, updating thestart address.Arguments:  cptr        points to starting character value; updated  d           end value  ocptr       where to put start of othercase range  odptr       where to put end of othercase rangeYield:        TRUE when range returned; FALSE when no more*/static BOOLget_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,  unsigned int *odptr){unsigned int c, othercase, next;for (c = *cptr; c <= d; c++)  { if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }if (c > d) return FALSE;*ocptr = othercase;next = othercase + 1;for (++c; c <= d; c++)  {  if (_pcre_ucp_othercase(c) != next) break;  next++;  }*odptr = next - 1;*cptr = c;return TRUE;}#endif  /* SUPPORT_UCP *//**************************************************     Check if auto-possessifying is possible    **************************************************//* This function is called for unlimited repeats of certain items, to seewhether the next thing could possibly match the repeated item. If not, it makessense to automatically possessify the repeated item.Arguments:  op_code       the repeated op code  this          data for this item, depends on the opcode  utf8          TRUE in UTF-8 mode  utf8_char     used for utf8 character bytes, NULL if not relevant  ptr           next character in pattern  options       options bits  cd            contains pointers to tables etc.Returns:        TRUE if possessifying is wanted*/static BOOLcheck_auto_possessive(int op_code, int item, BOOL utf8, uschar *utf8_char,  const uschar *ptr, int options, compile_data *cd){int next;/* Skip whitespace and comments in extended mode */if ((options & PCRE_EXTENDED) != 0)  {  for (;;)    {    while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;    if (*ptr == '#')      {      while (*(++ptr) != 0)        if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }      }    else break;    }  }/* If the next item is one that we can handle, get its value. A non-negativevalue is a character, a negative value is an escape value. */if (*ptr == '\\')  {  int temperrorcode = 0;  next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);  if (temperrorcode != 0) return FALSE;  ptr++;    /* Point after the escape sequence */  }else if ((cd->ctypes[*ptr] & ctype_meta) == 0)  {#ifdef SUPPORT_UTF8  if (utf8) { GETCHARINC(next, ptr); } else#endif  next = *ptr++;  }else return FALSE;/* Skip whitespace and comments in extended mode */if ((options & PCRE_EXTENDED) != 0)  {  for (;;)    {    while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;    if (*ptr == '#')      {      while (*(++ptr) != 0)        if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }      }    else break;    }  }/* If the next thing is itself optional, we have to give up. */if (*ptr == '*' || *ptr == '?' || strncmp((char *)ptr, "{0,", 3) == 0)  return FALSE;/* Now compare the next item with the previous opcode. If the previous is apositive single character match, "item" either contains the character or, if"item" is greater than 127 in utf8 mode, the character's bytes are inutf8_char. *//* Handle cases when the next item is a character. */if (next >= 0) switch(op_code)  {  case OP_CHAR:#ifdef SUPPORT_UTF8  if (utf8 && item > 127) { GETCHAR(item, utf8_char); }#endif  return item != next;  /* For CHARNC (caseless character) we must check the other case. If we have  Unicode property support, we can use it to test the other case of  high-valued characters. */  case OP_CHARNC:#ifdef SUPPORT_UTF8  if (utf8 && item > 127) { GETCHAR(item, utf8_char); }#endif  if (item == next) return FALSE;#ifdef SUPPORT_UTF8  if (utf8)    {    unsigned int othercase;    if (next < 128) othercase = cd->fcc[next]; else#ifdef SUPPORT_UCP    othercase = _pcre_ucp_othercase((unsigned int)next);#else    othercase = NOTACHAR;#endif    return (unsigned int)item != othercase;    }  else#endif  /* SUPPORT_UTF8 */  return (item != cd->fcc[next]);  /* Non-UTF-8 mode */  /* For OP_NOT, "item" must be a single-byte character. */  case OP_NOT:  if (item == next) return TRUE;  if ((options & PCRE_CASELESS) == 0) return FALSE;#ifdef SUPPORT_UTF8  if (utf8)    {    unsigned int othercase;    if (next < 128) othercase = cd->fcc[next]; else#ifdef SUPPORT_UCP    othercase = _pcre_ucp_othercase(next);#else    othercase = NOTACHAR;#endif    return (unsigned int)item == othercase;    }  else#endif  /* SUPPORT_UTF8 */  return (item == cd->fcc[next]);  /* Non-UTF-8 mode */  case OP_DIGIT:  return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;  case OP_NOT_DIGIT:  return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0;  case OP_WHITESPACE:  return next > 127 || (cd->ctypes[next] & ctype_space) == 0;  case OP_NOT_WHITESPACE:  return next <= 127 && (cd->ctypes[next] & ctype_space) != 0;  case OP_WORDCHAR:  return next > 127 || (cd->ctypes[next] & ctype_word) == 0;  case OP_NOT_WORDCHAR:  return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;  case OP_HSPACE:  case OP_NOT_HSPACE:  switch(next)    {    case 0x09:    case 0x20:    case 0xa0:    case 0x1680:    case 0x180e:    case 0x2000:    case 0x2001:    case 0x2002:    case 0x2003:    case 0x2004:    case 0x2005:    case 0x2006:    case 0x2007:    case 0x2008:    case 0x2009:    case 0x200A:    case 0x202f:    case 0x205f:    case 0x3000:    return op_code != OP_HSPACE;    default:    return op_code == OP_HSPACE;    }  case OP_VSPACE:  case OP_NOT_VSPACE:  switch(next)    {    case 0x0a:    case 0x0b:    case 0x0c:    case 0x0d:    case 0x85:    case 0x2028:    case 0x2029:    return op_code != OP_VSPACE;    default:    return op_code == OP_VSPACE;    }  default:  return FALSE;  }/* Handle the case when the next item is \d, \s, etc. */switch(op_code)  {  case OP_CHAR:  case OP_CHARNC:#ifdef SUPPORT_UTF8  if (utf8 && item > 127) { GETCHAR(item, utf8_char); }#endif  switch(-next)    {    case ESC_d:    return item > 127 || (cd->ctypes[item] & ctype_digit) == 0;    case ESC_D:    return item <= 127 && (cd->ctypes[item] & ctype_digit) != 0;    case ESC_s:    return item > 127 || (cd->ctypes[item] & ctype_space) == 0;    case ESC_S:    return item <= 127 && (cd->ctypes[item] & ctype_space) != 0;    case ESC_w:    return item > 127 || (cd->ctypes[item] & ctype_word) == 0;    case ESC_W:    return item <= 127 && (cd->ctypes[item] & ctype_word) != 0;    case ESC_h:    case ESC_H:    switch(item)      {      case 0x09:      case 0x20:      case 0xa0:      case 0x1680:      case 0x180e:      case 0x2000:      case 0x2001:      case 0x2002:      case 0x2003:      case 0x2004:      case 0x2005:      case 0x2006:      case 0x2007:      case 0x2008:      case 0x2009:      case 0x200A:      case 0x202f:      case 0x205f:      case 0x3000:      return -next != ESC_h;      default:      return -next == ESC_h;      }    case ESC_v:    case ESC_V:    switch(item)      {      case 0x0a:      case 0x0b:      case 0x0c:      case 0x0d:      case 0x85:      case 0x2028:      case 0x2029:      return -next != ESC_v;      default:      return -next == ESC_v;      }    default:    return FALSE;    }  case OP_DIGIT:  return next == -ESC_D || next == -ESC_s || next == -ESC_W ||         next == -ESC_h || next == -ESC_v;  case OP_NOT_DIGIT:  return next == -ESC_d;  case OP_WHITESPACE:  return next == -ESC_S || next == -ESC_d || next == -ESC_w;  case OP_NOT_WHITESPACE:  return next == -ESC_s || next == -ESC_h || next == -ESC_v;  case OP_HSPACE:  return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w;  case OP_NOT_HSPACE:  return next == -ESC_h;  /* Can't have \S in here because VT matches \S (Perl anomaly) */  case OP_VSPACE:  return next == -ESC_V || next == -ESC_d || next == -ESC_w;  case OP_NOT_VSPACE:  return next == -ESC_v;  case OP_WORDCHAR:  return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;  case OP_NOT_WORDCHAR:  return next == -ESC_w || next == -ESC_d;  default:  return FALSE;  }/* Control does not reach here */}/**************************************************           Compile one branch                   **************************************************//* Scan the pattern, compiling it into the a vector. If the options arechanged during the branch, the pointer is used to change the external optionsbits. This function is used during the pre-compile phase when we are tryingto find out the amount of memory needed, as well as during the real compilephase. The value of lengthptr distinguishes the two phases.Arguments:  optionsptr     pointer to the option bits  codeptr        points to the pointer to the current code point  ptrptr         points to the current pattern pointer  errorcodeptr   points to error code variable  firstbyteptr   set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)  reqbyteptr     set to the last literal character required, else < 0  bcptr          points to current branch chain  cd             contains pointers to tables etc.  lengthptr      NULL during the real compile phase                 points to length accumulator during pre-compile phaseReturns:         TRUE on success                 FALSE, with *errorcodeptr set non-zero on error*/static BOOLcompile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr,  int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,  compile_data *cd, int *lengthptr){int repeat_type, op_type;int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */int bravalue = 0;int greedy_default, greedy_non_default;int firstbyte, reqbyte;int zeroreqbyte, zerofirstbyte;int req_caseopt, reqvary, tempreqvary;int options = *optionsptr;int after_manual_callout = 0;int length_prevgroup = 0;register int c;register uschar *code = *codeptr;uschar *last_code = code;uschar *orig_code = code;uschar *tempcode;BOOL inescq = FALSE;BOOL groupsetfirstbyte = FALSE
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -