⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcre_compile.c

📁 this is a glib for c language
💻 C
📖 第 1 页 / 共 5 页
字号:
      case OP_TYPEMINQUERY:      case OP_TYPEPOSSTAR:      case OP_TYPEPOSPLUS:      case OP_TYPEPOSQUERY:      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;      break;      case OP_TYPEPOSUPTO:      case OP_TYPEUPTO:      case OP_TYPEMINUPTO:      case OP_TYPEEXACT:      if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;      break;      }    /* Add in the fixed length from the table */    code += _pcre_OP_lengths[c];    /* In UTF-8 mode, opcodes that are followed by a character may be followed    by a multi-byte character. The length in the table is a minimum, so we have    to arrange to skip the extra bytes. */#ifdef SUPPORT_UTF8    if (utf8) switch(c)      {      case OP_CHAR:      case OP_CHARNC:      case OP_EXACT:      case OP_UPTO:      case OP_MINUPTO:      case OP_POSUPTO:      case OP_STAR:      case OP_MINSTAR:      case OP_POSSTAR:      case OP_PLUS:      case OP_MINPLUS:      case OP_POSPLUS:      case OP_QUERY:      case OP_MINQUERY:      case OP_POSQUERY:      if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];      break;      }#endif    }  }}/**************************************************    Scan compiled branch for non-emptiness      **************************************************//* This function scans through a branch of a compiled pattern to see whether itcan match the empty string or not. It is called from could_be_empty()below and from compile_branch() when checking for an unlimited repeat of agroup that can match nothing. Note that first_significant_code() skips overbackward and negative forward assertions when its final argument is TRUE. If wehit an unclosed bracket, we return "empty" - this means we've struck an innerbracket whose current branch will already have been scanned.Arguments:  code        points to start of search  endcode     points to where to stop  utf8        TRUE if in UTF8 modeReturns:      TRUE if what is matched could be empty*/static BOOLcould_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8){register int c;for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);     code < endcode;     code = first_significant_code(code + _pcre_OP_lengths[c], NULL, 0, TRUE))  {  const uschar *ccode;  c = *code;  /* Skip over forward assertions; the other assertions are skipped by  first_significant_code() with a TRUE final argument. */  if (c == OP_ASSERT)    {    do code += GET(code, 1); while (*code == OP_ALT);    c = *code;    continue;    }  /* Groups with zero repeats can of course be empty; skip them. */  if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO)    {    code += _pcre_OP_lengths[c];    do code += GET(code, 1); while (*code == OP_ALT);    c = *code;    continue;    }  /* For other groups, scan the branches. */  if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND)    {    BOOL empty_branch;    if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */    /* Scan a closed bracket */    empty_branch = FALSE;    do      {      if (!empty_branch && could_be_empty_branch(code, endcode, utf8))        empty_branch = TRUE;      code += GET(code, 1);      }    while (*code == OP_ALT);    if (!empty_branch) return FALSE;   /* All branches are non-empty */    c = *code;    continue;    }  /* Handle the other opcodes */  switch (c)    {    /* Check for quantifiers after a class. XCLASS is used for classes that    cannot be represented just by a bit map. This includes negated single    high-valued characters. The length in _pcre_OP_lengths[] is zero; the    actual length is stored in the compiled code, so we must update "code"    here. */#ifdef SUPPORT_UTF8    case OP_XCLASS:    ccode = code += GET(code, 1);    goto CHECK_CLASS_REPEAT;#endif    case OP_CLASS:    case OP_NCLASS:    ccode = code + 33;#ifdef SUPPORT_UTF8    CHECK_CLASS_REPEAT:#endif    switch (*ccode)      {      case OP_CRSTAR:            /* These could be empty; continue */      case OP_CRMINSTAR:      case OP_CRQUERY:      case OP_CRMINQUERY:      break;      default:                   /* Non-repeat => class must match */      case OP_CRPLUS:            /* These repeats aren't empty */      case OP_CRMINPLUS:      return FALSE;      case OP_CRRANGE:      case OP_CRMINRANGE:      if (GET2(ccode, 1) > 0) return FALSE;  /* Minimum > 0 */      break;      }    break;    /* Opcodes that must match a character */    case OP_PROP:    case OP_NOTPROP:    case OP_EXTUNI:    case OP_NOT_DIGIT:    case OP_DIGIT:    case OP_NOT_WHITESPACE:    case OP_WHITESPACE:    case OP_NOT_WORDCHAR:    case OP_WORDCHAR:    case OP_ANY:    case OP_ALLANY:    case OP_ANYBYTE:    case OP_CHAR:    case OP_CHARNC:    case OP_NOT:    case OP_PLUS:    case OP_MINPLUS:    case OP_POSPLUS:    case OP_EXACT:    case OP_NOTPLUS:    case OP_NOTMINPLUS:    case OP_NOTPOSPLUS:    case OP_NOTEXACT:    case OP_TYPEPLUS:    case OP_TYPEMINPLUS:    case OP_TYPEPOSPLUS:    case OP_TYPEEXACT:    return FALSE;    /* These are going to continue, as they may be empty, but we have to    fudge the length for the \p and \P cases. */    case OP_TYPESTAR:    case OP_TYPEMINSTAR:    case OP_TYPEPOSSTAR:    case OP_TYPEQUERY:    case OP_TYPEMINQUERY:    case OP_TYPEPOSQUERY:    if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;    break;    /* Same for these */    case OP_TYPEUPTO:    case OP_TYPEMINUPTO:    case OP_TYPEPOSUPTO:    if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;    break;    /* End of branch */    case OP_KET:    case OP_KETRMAX:    case OP_KETRMIN:    case OP_ALT:    return TRUE;    /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,    MINUPTO, and POSUPTO may be followed by a multibyte character */#ifdef SUPPORT_UTF8    case OP_STAR:    case OP_MINSTAR:    case OP_POSSTAR:    case OP_QUERY:    case OP_MINQUERY:    case OP_POSQUERY:    case OP_UPTO:    case OP_MINUPTO:    case OP_POSUPTO:    if (utf8) while ((code[2] & 0xc0) == 0x80) code++;    break;#endif    }  }return TRUE;}/**************************************************    Scan compiled regex for non-emptiness       **************************************************//* This function is called to check for left recursive calls. We want to checkthe current branch of the current pattern to see if it could match the emptystring. If it could, we must look outwards for branches at other levels,stopping when we pass beyond the bracket which is the subject of the recursion.Arguments:  code        points to start of the recursion  endcode     points to where to stop (current RECURSE item)  bcptr       points to the chain of current (unclosed) branch starts  utf8        TRUE if in UTF-8 modeReturns:      TRUE if what is matched could be empty*/static BOOLcould_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,  BOOL utf8){while (bcptr != NULL && bcptr->current >= code)  {  if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;  bcptr = bcptr->outer;  }return TRUE;}/**************************************************           Check for POSIX class syntax         **************************************************//* This function is called when the sequence "[:" or "[." or "[=" isencountered in a character class. It checks whether this is followed by asequence of characters terminated by a matching ":]" or ".]" or "=]". If wereach an unescaped ']' without the special preceding character, return FALSE.Originally, this function only recognized a sequence of letters between theterminators, but it seems that Perl recognizes any sequence of characters,though of course unknown POSIX names are subsequently rejected. Perl gives an"Unknown POSIX class" error for [:f\oo:] for example, where previously PCREdidn't consider this to be a POSIX class. Likewise for [:1234:].The problem in trying to be exactly like Perl is in the handling of escapes. Wehave to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIXclass, but [abc[:x\]pqr:]] is (so that an error can be generated). The codebelow handles the special case of \], but does not try to do any other escapeprocessing. This makes it different from Perl for cases such as [:l\ower:]where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize"l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,I think.Arguments:  ptr      pointer to the initial [  endptr   where to return the end pointerReturns:   TRUE or FALSE*/static BOOLcheck_posix_syntax(const uschar *ptr, const uschar **endptr){int terminator;          /* Don't combine these lines; the Solaris cc */terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */for (++ptr; *ptr != 0; ptr++)  {  if (*ptr == '\\' && ptr[1] == ']') ptr++; else    {    if (*ptr == ']') return FALSE;    if (*ptr == terminator && ptr[1] == ']')      {      *endptr = ptr;      return TRUE;      }    }  }return FALSE;}/**************************************************          Check POSIX class name                **************************************************//* This function is called to check the name given in a POSIX-style class entrysuch as [:alnum:].Arguments:  ptr        points to the first letter  len        the length of the nameReturns:     a value representing the name, or -1 if unknown*/static intcheck_posix_name(const uschar *ptr, int len){const char *pn = posix_names;register int yield = 0;while (posix_name_lengths[yield] != 0)  {  if (len == posix_name_lengths[yield] &&    strncmp((const char *)ptr, pn, len) == 0) return yield;  pn += posix_name_lengths[yield] + 1;  yield++;  }return -1;}/**************************************************    Adjust OP_RECURSE items in repeated group   **************************************************//* OP_RECURSE items contain an offset from the start of the regex to the groupthat is referenced. This means that groups can be replicated for fixedrepetition simply by copying (because the recursion is allowed to refer toearlier groups that are outside the current group). However, when a group isoptional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO isinserted before it, after it has been compiled. This means that any OP_RECURSEitems within it that refer to the group itself or any contained groups have tohave their offsets adjusted. That one of the jobs of this function. Before itis called, the partially compiled regex must be temporarily terminated withOP_END.This function has been extended with the possibility of forward references forrecursions and subroutine calls. It must also check the list of such referencesfor the group we are dealing with. If it finds that one of the recursions inthe current group is on this list, it adjusts the offset in the list, not thevalue in the reference (which is a group number).Arguments:  group      points to the start of the group  adjust     the amount by which the group is to be moved  utf8       TRUE in UTF-8 mode  cd         contains pointers to tables etc.  save_hwm   the hwm forward reference pointer at the start of the groupReturns:     nothing*/static voidadjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd,  uschar *save_hwm){uschar *ptr = group;while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)  {  int offset;  uschar *hc;  /* See if this recursion is on the forward reference list. If so, adjust the  reference. */  for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)    {    offset = GET(hc, 0);    if (cd->start_code + offset == ptr + 1)      {      PUT(hc, 0, offset + adjust);      break;      }    }  /* Otherwise, adjust the recursion offset if it's after the start of this  group. */  if (hc >= cd->hwm)    {    offset = GET(ptr, 1);    if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);    }  ptr += 1 + LINK_SIZE;  }}/**************************************************        Insert an automatic callout point       **************************************************//* This function is called when the PCRE_AUTO_CALLOUT option is set, to insertcallout points before each pattern item.Arguments:  code           current code pointer  ptr            current pattern pointer  cd             pointers to tables etcReturns:         new code pointer*/static uschar *auto_callout(uschar *code, const uschar *ptr, compile_data *cd){*code++ = OP_CALLOUT;*code++ = 255;PUT(code, 0, ptr - cd->start_pattern);  /* Pattern offset */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -