⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcre_compile.c

📁 this is a glib for c language
💻 C
📖 第 1 页 / 共 5 页
字号:
      if (*ptr == '\\')        {        if (*(++ptr) == 0) return -1;        if (*ptr == 'Q') for (;;)          {          while (*(++ptr) != 0 && *ptr != '\\');          if (*ptr == 0) return -1;          if (*(++ptr) == 'E') break;          }        continue;        }      }    continue;    }  /* Skip comments in /x mode */  if (xmode && *ptr == '#')    {    while (*(++ptr) != 0 && *ptr != '\n');    if (*ptr == 0) return -1;    continue;    }  /* An opening parens must now be a real metacharacter */  if (*ptr != '(') continue;  if (ptr[1] != '?' && ptr[1] != '*')    {    count++;    if (name == NULL && count == lorn) return count;    continue;    }  ptr += 2;  if (*ptr == 'P') ptr++;                      /* Allow optional P */  /* We have to disambiguate (?<! and (?<= from (?<name> */  if ((*ptr != '<' || ptr[1] == '!' || ptr[1] == '=') &&       *ptr != '\'')    continue;  count++;  if (name == NULL && count == lorn) return count;  term = *ptr++;  if (term == '<') term = '>';  thisname = ptr;  while (*ptr != term) ptr++;  if (name != NULL && lorn == ptr - thisname &&      strncmp((const char *)name, (const char *)thisname, lorn) == 0)    return count;  }return -1;}/**************************************************      Find first significant op code            **************************************************//* This is called by several functions that scan a compiled expression lookingfor a fixed first character, or an anchoring op code etc. It skips over thingsthat do not influence this. For some calls, a change of option is important.For some calls, it makes sense to skip negative forward and all backwardassertions, and also the \b assertion; for others it does not.Arguments:  code         pointer to the start of the group  options      pointer to external options  optbit       the option bit whose changing is significant, or                 zero if none are  skipassert   TRUE if certain assertions are to be skippedReturns:       pointer to the first significant opcode*/static const uschar*first_significant_code(const uschar *code, int *options, int optbit,  BOOL skipassert){for (;;)  {  switch ((int)*code)    {    case OP_OPT:    if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))      *options = (int)code[1];    code += 2;    break;    case OP_ASSERT_NOT:    case OP_ASSERTBACK:    case OP_ASSERTBACK_NOT:    if (!skipassert) return code;    do code += GET(code, 1); while (*code == OP_ALT);    code += _pcre_OP_lengths[*code];    break;    case OP_WORD_BOUNDARY:    case OP_NOT_WORD_BOUNDARY:    if (!skipassert) return code;    /* Fall through */    case OP_CALLOUT:    case OP_CREF:    case OP_RREF:    case OP_DEF:    code += _pcre_OP_lengths[*code];    break;    default:    return code;    }  }/* Control never reaches here */}/**************************************************        Find the fixed length of a pattern      **************************************************//* Scan a pattern and compute the fixed length of subject that will match it,if the length is fixed. This is needed for dealing with backward assertions.In UTF8 mode, the result is in characters rather than bytes.Arguments:  code     points to the start of the pattern (the bracket)  options  the compiling optionsReturns:   the fixed length, or -1 if there is no fixed length,             or -2 if \C was encountered*/static intfind_fixedlength(uschar *code, int options){int length = -1;register int branchlength = 0;register uschar *cc = code + 1 + LINK_SIZE;/* Scan along the opcodes for this branch. If we get to the end of thebranch, check the length against that of the other branches. */for (;;)  {  int d;  register int op = *cc;  switch (op)    {    case OP_CBRA:    case OP_BRA:    case OP_ONCE:    case OP_COND:    d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options);    if (d < 0) return d;    branchlength += d;    do cc += GET(cc, 1); while (*cc == OP_ALT);    cc += 1 + LINK_SIZE;    break;    /* Reached end of a branch; if it's a ket it is the end of a nested    call. If it's ALT it is an alternation in a nested call. If it is    END it's the end of the outer call. All can be handled by the same code. */    case OP_ALT:    case OP_KET:    case OP_KETRMAX:    case OP_KETRMIN:    case OP_END:    if (length < 0) length = branchlength;      else if (length != branchlength) return -1;    if (*cc != OP_ALT) return length;    cc += 1 + LINK_SIZE;    branchlength = 0;    break;    /* Skip over assertive subpatterns */    case OP_ASSERT:    case OP_ASSERT_NOT:    case OP_ASSERTBACK:    case OP_ASSERTBACK_NOT:    do cc += GET(cc, 1); while (*cc == OP_ALT);    /* Fall through */    /* Skip over things that don't match chars */    case OP_REVERSE:    case OP_CREF:    case OP_RREF:    case OP_DEF:    case OP_OPT:    case OP_CALLOUT:    case OP_SOD:    case OP_SOM:    case OP_EOD:    case OP_EODN:    case OP_CIRC:    case OP_DOLL:    case OP_NOT_WORD_BOUNDARY:    case OP_WORD_BOUNDARY:    cc += _pcre_OP_lengths[*cc];    break;    /* Handle literal characters */    case OP_CHAR:    case OP_CHARNC:    case OP_NOT:    branchlength++;    cc += 2;#ifdef SUPPORT_UTF8    if ((options & PCRE_UTF8) != 0)      {      while ((*cc & 0xc0) == 0x80) cc++;      }#endif    break;    /* Handle exact repetitions. The count is already in characters, but we    need to skip over a multibyte character in UTF8 mode.  */    case OP_EXACT:    branchlength += GET2(cc,1);    cc += 4;#ifdef SUPPORT_UTF8    if ((options & PCRE_UTF8) != 0)      {      while((*cc & 0x80) == 0x80) cc++;      }#endif    break;    case OP_TYPEEXACT:    branchlength += GET2(cc,1);    if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;    cc += 4;    break;    /* Handle single-char matchers */    case OP_PROP:    case OP_NOTPROP:    cc += 2;    /* Fall through */    case OP_NOT_DIGIT:    case OP_DIGIT:    case OP_NOT_WHITESPACE:    case OP_WHITESPACE:    case OP_NOT_WORDCHAR:    case OP_WORDCHAR:    case OP_ANY:    case OP_ALLANY:    branchlength++;    cc++;    break;    /* The single-byte matcher isn't allowed */    case OP_ANYBYTE:    return -2;    /* Check a class for variable quantification */#ifdef SUPPORT_UTF8    case OP_XCLASS:    cc += GET(cc, 1) - 33;    /* Fall through */#endif    case OP_CLASS:    case OP_NCLASS:    cc += 33;    switch (*cc)      {      case OP_CRSTAR:      case OP_CRMINSTAR:      case OP_CRQUERY:      case OP_CRMINQUERY:      return -1;      case OP_CRRANGE:      case OP_CRMINRANGE:      if (GET2(cc,1) != GET2(cc,3)) return -1;      branchlength += GET2(cc,1);      cc += 5;      break;      default:      branchlength++;      }    break;    /* Anything else is variable length */    default:    return -1;    }  }/* Control never gets here */}/**************************************************    Scan compiled regex for numbered bracket    **************************************************//* This little function scans through a compiled pattern until it finds acapturing bracket with the given number.Arguments:  code        points to start of expression  utf8        TRUE in UTF-8 mode  number      the required bracket numberReturns:      pointer to the opcode for the bracket, or NULL if not found*/static const uschar *find_bracket(const uschar *code, BOOL utf8, int number){for (;;)  {  register int c = *code;  if (c == OP_END) return NULL;  /* XCLASS is used for classes that cannot be represented just by a bit  map. This includes negated single high-valued characters. The length in  the table is zero; the actual length is stored in the compiled code. */  if (c == OP_XCLASS) code += GET(code, 1);  /* Handle capturing bracket */  else if (c == OP_CBRA)    {    int n = GET2(code, 1+LINK_SIZE);    if (n == number) return (uschar *)code;    code += _pcre_OP_lengths[c];    }  /* Otherwise, we can get the item's length from the table, except that for  repeated character types, we have to test for \p and \P, which have an extra  two bytes of parameters. */  else    {    switch(c)      {      case OP_TYPESTAR:      case OP_TYPEMINSTAR:      case OP_TYPEPLUS:      case OP_TYPEMINPLUS:      case OP_TYPEQUERY:      case OP_TYPEMINQUERY:      case OP_TYPEPOSSTAR:      case OP_TYPEPOSPLUS:      case OP_TYPEPOSQUERY:      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;      break;      case OP_TYPEUPTO:      case OP_TYPEMINUPTO:      case OP_TYPEEXACT:      case OP_TYPEPOSUPTO:      if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;      break;      }    /* Add in the fixed length from the table */    code += _pcre_OP_lengths[c];  /* In UTF-8 mode, opcodes that are followed by a character may be followed by  a multi-byte character. The length in the table is a minimum, so we have to  arrange to skip the extra bytes. */#ifdef SUPPORT_UTF8    if (utf8) switch(c)      {      case OP_CHAR:      case OP_CHARNC:      case OP_EXACT:      case OP_UPTO:      case OP_MINUPTO:      case OP_POSUPTO:      case OP_STAR:      case OP_MINSTAR:      case OP_POSSTAR:      case OP_PLUS:      case OP_MINPLUS:      case OP_POSPLUS:      case OP_QUERY:      case OP_MINQUERY:      case OP_POSQUERY:      if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];      break;      }#endif    }  }}/**************************************************   Scan compiled regex for recursion reference  **************************************************//* This little function scans through a compiled pattern until it finds aninstance of OP_RECURSE.Arguments:  code        points to start of expression  utf8        TRUE in UTF-8 modeReturns:      pointer to the opcode for OP_RECURSE, or NULL if not found*/static const uschar *find_recurse(const uschar *code, BOOL utf8){for (;;)  {  register int c = *code;  if (c == OP_END) return NULL;  if (c == OP_RECURSE) return code;  /* XCLASS is used for classes that cannot be represented just by a bit  map. This includes negated single high-valued characters. The length in  the table is zero; the actual length is stored in the compiled code. */  if (c == OP_XCLASS) code += GET(code, 1);  /* Otherwise, we can get the item's length from the table, except that for  repeated character types, we have to test for \p and \P, which have an extra  two bytes of parameters. */  else    {    switch(c)      {      case OP_TYPESTAR:      case OP_TYPEMINSTAR:      case OP_TYPEPLUS:      case OP_TYPEMINPLUS:      case OP_TYPEQUERY:

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -