⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcre.c

📁 ncbi源码
💻 C
📖 第 1 页 / 共 5 页
字号:
        case '!':                 /* Negative lookahead */        bravalue = OP_ASSERT_NOT;        ptr++;        break;        case '<':                 /* Lookbehinds */        switch (*(++ptr))          {          case '=':               /* Positive lookbehind */          bravalue = OP_ASSERTBACK;          ptr++;          break;          case '!':               /* Negative lookbehind */          bravalue = OP_ASSERTBACK_NOT;          ptr++;          break;          default:                /* Syntax error */          *errorptr = ERR24;          goto FAILED;          }        break;        case '>':                 /* One-time brackets */        bravalue = OP_ONCE;        ptr++;        break;        case 'R':                 /* Pattern recursion */        *code++ = OP_RECURSE;        ptr++;        continue;        default:                  /* Option setting */        set = unset = 0;        optset = &set;        while (*ptr != ')' && *ptr != ':')          {          switch (*ptr++)            {            case '-': optset = &unset; break;            case 'i': *optset |= PCRE_CASELESS; break;            case 'm': *optset |= PCRE_MULTILINE; break;            case 's': *optset |= PCRE_DOTALL; break;            case 'x': *optset |= PCRE_EXTENDED; break;            case 'U': *optset |= PCRE_UNGREEDY; break;            case 'X': *optset |= PCRE_EXTRA; break;            default:            *errorptr = ERR12;            goto FAILED;            }          }        /* Set up the changed option bits, but don't change anything yet. */        newoptions = (options | set) & (~unset);        /* If the options ended with ')' this is not the start of a nested        group with option changes, so the options change at this level. At top        level there is nothing else to be done (the options will in fact have        been set from the start of compiling as a result of the first pass) but        at an inner level we must compile code to change the ims options if        necessary, and pass the new setting back so that it can be put at the        start of any following branches, and when this group ends, a resetting        item can be compiled. */        if (*ptr == ')')          {          if ((options & PCRE_INGROUP) != 0 &&              (options & PCRE_IMS) != (newoptions & PCRE_IMS))            {            *code++ = OP_OPT;            *code++ = *optchanged = newoptions & PCRE_IMS;            }          options = newoptions;  /* Change options at this level */          previous = NULL;       /* This item can't be repeated */          continue;              /* It is complete */          }        /* If the options ended with ':' we are heading into a nested group        with possible change of options. Such groups are non-capturing and are        not assertions of any kind. All we need to do is skip over the ':';        the newoptions value is handled below. */        bravalue = OP_BRA;        ptr++;        }      }    /* Else we have a referencing group; adjust the opcode. If the bracket    number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and    arrange for the true number to follow later, in an OP_BRANUMBER item. */    else      {      if (++(*brackets) > EXTRACT_BASIC_MAX)        {        bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1;        code[3] = OP_BRANUMBER;        code[4] = *brackets >> 8;        code[5] = *brackets & 255;        skipbytes = 3;        }      else bravalue = OP_BRA + *brackets;      }    /* Process nested bracketed re. Assertions may not be repeated, but other    kinds can be. We copy code into a non-register variable in order to be able    to pass its address because some compilers complain otherwise. Pass in a    new setting for the ims options if they have changed. */    previous = (bravalue >= OP_ONCE)? code : NULL;    *code = bravalue;    tempcode = code;    if (!compile_regex(         options | PCRE_INGROUP,       /* Set for all nested groups */         ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?           newoptions & PCRE_IMS : -1, /* Pass ims options if changed */         brackets,                     /* Extracting bracket count */         &tempcode,                    /* Where to put code (updated) */         &ptr,                         /* Input pointer (updated) */         errorptr,                     /* Where to put an error message */         (bravalue == OP_ASSERTBACK ||          bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */         skipbytes,                    /* Skip over OP_COND/OP_BRANUMBER */         &subreqchar,                  /* For possible last char */         &subcountlits,                /* For literal count */         cd))                          /* Tables block */      goto FAILED;    /* At the end of compiling, code is still pointing to the start of the    group, while tempcode has been updated to point past the end of the group    and any option resetting that may follow it. The pattern pointer (ptr)    is on the bracket. */    /* If this is a conditional bracket, check that there are no more than    two branches in the group. */    else if (bravalue == OP_COND)      {      uschar *tc = code;      condcount = 0;      do {         condcount++;         tc += (tc[1] << 8) | tc[2];         }      while (*tc != OP_KET);      if (condcount > 2)        {        *errorptr = ERR27;        goto FAILED;        }      }    /* Handle updating of the required character. If the subpattern didn't    set one, leave it as it was. Otherwise, update it for normal brackets of    all kinds, forward assertions, and conditions with two branches. Don't    update the literal count for forward assertions, however. If the bracket    is followed by a quantifier with zero repeat, we have to back off. Hence    the definition of prevreqchar and subcountlits outside the main loop so    that they can be accessed for the back off. */    if (subreqchar > 0 &&         (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_ASSERT ||         (bravalue == OP_COND && condcount == 2)))      {      prevreqchar = *reqchar;      *reqchar = subreqchar;      if (bravalue != OP_ASSERT) *countlits += subcountlits;      }    /* Now update the main code pointer to the end of the group. */    code = tempcode;    /* Error if hit end of pattern */    if (*ptr != ')')      {      *errorptr = ERR14;      goto FAILED;      }    break;    /* Check \ for being a real metacharacter; if not, fall through and handle    it as a data character at the start of a string. Escape items are checked    for validity in the pre-compiling pass. */    case '\\':    tempptr = ptr;    c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);    /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values    are arranged to be the negation of the corresponding OP_values. For the    back references, the values are ESC_REF plus the reference number. Only    back references and those types that consume a character may be repeated.    We can test for values between ESC_b and ESC_Z for the latter; this may    have to change if any new ones are ever created. */    if (c < 0)      {      if (-c >= ESC_REF)        {        int number = -c - ESC_REF;        previous = code;        *code++ = OP_REF;        *code++ = number >> 8;        *code++ = number & 255;        }      else        {        previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;        *code++ = -c;        }      continue;      }    /* Data character: reset and fall through */    ptr = tempptr;    c = '\\';    /* Handle a run of data characters until a metacharacter is encountered.    The first character is guaranteed not to be whitespace or # when the    extended flag is set. */    NORMAL_CHAR:    default:    previous = code;    *code = OP_CHARS;    code += 2;    length = 0;    do      {      if ((options & PCRE_EXTENDED) != 0)        {        if ((cd->ctypes[c] & ctype_space) != 0) continue;        if (c == '#')          {          /* The space before the ; is to avoid a warning on a silly compiler          on the Macintosh. */          while ((c = *(++ptr)) != 0 && c != NEWLINE) ;          if (c == 0) break;          continue;          }        }      /* Backslash may introduce a data char or a metacharacter. Escaped items      are checked for validity in the pre-compiling pass. Stop the string      before a metaitem. */      if (c == '\\')        {        tempptr = ptr;        c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);        if (c < 0) { ptr = tempptr; break; }        /* If a character is > 127 in UTF-8 mode, we have to turn it into        two or more characters in the UTF-8 encoding. */#ifdef SUPPORT_UTF8        if (c > 127 && (options & PCRE_UTF8) != 0)          {          uschar buffer[8];          int len = ord2utf8(c, buffer);          for (c = 0; c < len; c++) *code++ = buffer[c];          length += len;          continue;          }#endif        }      /* Ordinary character or single-char escape */      *code++ = c;      length++;      }    /* This "while" is the end of the "do" above. */    while (length < MAXLIT && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);    /* Update the last character and the count of literals */    prevreqchar = (length > 1)? code[-2] : *reqchar;    *reqchar = code[-1];    *countlits += length;    /* Compute the length and set it in the data vector, and advance to    the next state. */    previous[1] = length;    if (length < MAXLIT) ptr--;    break;    }  }                   /* end of big loop *//* Control never reaches here by falling through, only by a goto for all theerror states. Pass back the position in the pattern so that it can be displayedto the user for diagnosing the error. */FAILED:*ptrptr = ptr;return FALSE;}/**************************************************     Compile sequence of alternatives           **************************************************//* On entry, ptr is pointing past the bracket character, but on returnit points to the closing bracket, or vertical bar, or end of string.The code variable is pointing at the byte into which the BRA operator has beenstored. If the ims options are changed at the start (for a (?ims: group) orduring any branch, we need to insert an OP_OPT item at the start of everyfollowing branch to ensure they get set correctly at run time, and also passthe new options into every subsequent branch compile.Argument:  options     the option bits  optchanged  new ims options to set as if (?ims) were at the start, or -1               for no change  brackets    -> int containing the number of extracting brackets used  codeptr     -> the address of the current code pointer  ptrptr      -> the address of the current pattern pointer  errorptr    -> pointer to error message  lookbehind  TRUE if this is a lookbehind assertion  skipbytes   skip this many bytes at start (for OP_COND, OP_BRANUMBER)  reqchar     -> place to put the last required character, or a negative number  countlits   -> place to put the shortest literal count of any branch  cd          points to the data block with tables pointersReturns:      TRUE on success*/static BOOLcompile_regex(int options, int optchanged, int *brackets, uschar **codeptr,  const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int skipbytes,  int *reqchar, int *countlits, compile_data *cd){const uschar *ptr = *ptrptr;uschar *code = *codeptr;uschar *last_branch = code;uschar *start_bracket = code;uschar *reverse_count = NULL;int oldoptions = options & PCRE_IMS;int branchreqchar, branchcountlits;*reqchar = -1;*countlits = INT_MAX;code += 3 + skipbytes;/* Loop for each alternative branch */for (;;)  {  int length;  /* Handle change of options */  if (optchanged >= 0)    {    *code++ = OP_OPT;    *code++ = optchanged;    options = (options & ~PCRE_IMS) | optchanged;    }  /* Set up dummy OP_REVERSE if lookbehind assertion */  if (lookbehind)    {    *code++ = OP_REVERSE;    reverse_count = code;    *code++ = 0;    *code++ = 0;    }  /* Now compile the branch */  if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged,      &branchreqchar, &branchcountlits, cd))    {    *ptrptr = ptr;    return FALSE;    }  /* Fill in the length of the last branch */  length = code - last_branch;  last_branch[1] = length >> 8;  last_branch[2] = length & 255;  /* Save the last required character if all branches have the same; a current  value of -1 means unset, while -2 means "previous branch had no last required  char".  */  if (*reqchar != -2)    {    if (branchreqchar >= 0)      {      if (*reqchar == -1) *reqchar = branchreqchar;      else if (*reqchar != branchreqchar) *reqchar = -2;      }    else *reqchar = -2;    }  /* Keep the shortest literal count */  if (branchcountlits < *countlits) *countlits = branchcountlits;  DPRINTF(("literal count = %d min=%d\n", branchcountlits, *countlits));  /* If lookbehind, check that this branch matches a fixed-length string,  and put the length into the OP_REVERSE item. Temporarily mark the end of  the branch with OP_END. */  if (lookbehind)    {    *code = OP_END;    length = find_fixedlength(last_branch, options);    DPRINTF(("fixed length = %d\n", length));    if (length < 0)      {      *errorptr = ERR25;      *ptrptr = ptr;      return FALSE;      }    reverse_count[0] = (length >> 8);    reverse_count[1] = length & 255;    }  /* Reach

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -