📄 pcre_compile.c

📁 Scheme跨平台编译器
💻 C
📖 第 1 页 / 共 5 页
字号:
    break;    /* Handle single-char matchers */    case OP_PROP:    case OP_NOTPROP:    cc += 2;    /* Fall through */    case OP_NOT_DIGIT:    case OP_DIGIT:    case OP_NOT_WHITESPACE:    case OP_WHITESPACE:    case OP_NOT_WORDCHAR:    case OP_WORDCHAR:    case OP_ANY:    case OP_ALLANY:    branchlength++;    cc++;    break;    /* The single-byte matcher isn't allowed */    case OP_ANYBYTE:    return -2;    /* Check a class for variable quantification */#ifdef SUPPORT_UTF8    case OP_XCLASS:    cc += GET(cc, 1) - 33;    /* Fall through */#endif    case OP_CLASS:    case OP_NCLASS:    cc += 33;    switch (*cc)      {      case OP_CRSTAR:      case OP_CRMINSTAR:      case OP_CRQUERY:      case OP_CRMINQUERY:      return -1;      case OP_CRRANGE:      case OP_CRMINRANGE:      if (GET2(cc,1) != GET2(cc,3)) return -1;      branchlength += GET2(cc,1);      cc += 5;      break;      default:      branchlength++;      }    break;    /* Anything else is variable length */    default:    return -1;    }  }/* Control never gets here */}/**************************************************    Scan compiled regex for numbered bracket    **************************************************//* This little function scans through a compiled pattern until it finds acapturing bracket with the given number.Arguments:  code        points to start of expression  utf8        TRUE in UTF-8 mode  number      the required bracket numberReturns:      pointer to the opcode for the bracket, or NULL if not found*/static const uschar *find_bracket(const uschar *code, BOOL utf8, int number){for (;;)  {  register int c = *code;  if (c == OP_END) return NULL;  /* XCLASS is used for classes that cannot be represented just by a bit  map. This includes negated single high-valued characters. The length in  the table is zero; the actual length is stored in the compiled code. */  if (c == OP_XCLASS) code += GET(code, 1);  /* Handle capturing bracket */  else if (c == OP_CBRA)    {    int n = GET2(code, 1+LINK_SIZE);    if (n == number) return (uschar *)code;    code += _pcre_OP_lengths[c];    }  /* Otherwise, we can get the item's length from the table, except that for  repeated character types, we have to test for \p and \P, which have an extra  two bytes of parameters. */  else    {    switch(c)      {      case OP_TYPESTAR:      case OP_TYPEMINSTAR:      case OP_TYPEPLUS:      case OP_TYPEMINPLUS:      case OP_TYPEQUERY:      case OP_TYPEMINQUERY:      case OP_TYPEPOSSTAR:      case OP_TYPEPOSPLUS:      case OP_TYPEPOSQUERY:      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;      break;      case OP_TYPEUPTO:      case OP_TYPEMINUPTO:      case OP_TYPEEXACT:      case OP_TYPEPOSUPTO:      if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;      break;      }    /* Add in the fixed length from the table */    code += _pcre_OP_lengths[c];  /* In UTF-8 mode, opcodes that are followed by a character may be followed by  a multi-byte character. The length in the table is a minimum, so we have to  arrange to skip the extra bytes. */#ifdef SUPPORT_UTF8    if (utf8) switch(c)      {      case OP_CHAR:      case OP_CHARNC:      case OP_EXACT:      case OP_UPTO:      case OP_MINUPTO:      case OP_POSUPTO:      case OP_STAR:      case OP_MINSTAR:      case OP_POSSTAR:      case OP_PLUS:      case OP_MINPLUS:      case OP_POSPLUS:      case OP_QUERY:      case OP_MINQUERY:      case OP_POSQUERY:      if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];      break;      }#endif    }  }}/**************************************************   Scan compiled regex for recursion reference  **************************************************//* This little function scans through a compiled pattern until it finds aninstance of OP_RECURSE.Arguments:  code        points to start of expression  utf8        TRUE in UTF-8 modeReturns:      pointer to the opcode for OP_RECURSE, or NULL if not found*/static const uschar *find_recurse(const uschar *code, BOOL utf8){for (;;)  {  register int c = *code;  if (c == OP_END) return NULL;  if (c == OP_RECURSE) return code;  /* XCLASS is used for classes that cannot be represented just by a bit  map. This includes negated single high-valued characters. The length in  the table is zero; the actual length is stored in the compiled code. */  if (c == OP_XCLASS) code += GET(code, 1);  /* Otherwise, we can get the item's length from the table, except that for  repeated character types, we have to test for \p and \P, which have an extra  two bytes of parameters. */  else    {    switch(c)      {      case OP_TYPESTAR:      case OP_TYPEMINSTAR:      case OP_TYPEPLUS:      case OP_TYPEMINPLUS:      case OP_TYPEQUERY:      case OP_TYPEMINQUERY:      case OP_TYPEPOSSTAR:      case OP_TYPEPOSPLUS:      case OP_TYPEPOSQUERY:      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;      break;      case OP_TYPEPOSUPTO:      case OP_TYPEUPTO:      case OP_TYPEMINUPTO:      case OP_TYPEEXACT:      if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;      break;      }    /* Add in the fixed length from the table */    code += _pcre_OP_lengths[c];    /* In UTF-8 mode, opcodes that are followed by a character may be followed    by a multi-byte character. The length in the table is a minimum, so we have    to arrange to skip the extra bytes. */#ifdef SUPPORT_UTF8    if (utf8) switch(c)      {      case OP_CHAR:      case OP_CHARNC:      case OP_EXACT:      case OP_UPTO:      case OP_MINUPTO:      case OP_POSUPTO:      case OP_STAR:      case OP_MINSTAR:      case OP_POSSTAR:      case OP_PLUS:      case OP_MINPLUS:      case OP_POSPLUS:      case OP_QUERY:      case OP_MINQUERY:      case OP_POSQUERY:      if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];      break;      }#endif    }  }}/**************************************************    Scan compiled branch for non-emptiness      **************************************************//* This function scans through a branch of a compiled pattern to see whether itcan match the empty string or not. It is called from could_be_empty()below and from compile_branch() when checking for an unlimited repeat of agroup that can match nothing. Note that first_significant_code() skips overbackward and negative forward assertions when its final argument is TRUE. If wehit an unclosed bracket, we return "empty" - this means we've struck an innerbracket whose current branch will already have been scanned.Arguments:  code        points to start of search  endcode     points to where to stop  utf8        TRUE if in UTF8 modeReturns:      TRUE if what is matched could be empty*/static BOOLcould_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8){register int c;for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);     code < endcode;     code = first_significant_code(code + _pcre_OP_lengths[c], NULL, 0, TRUE))  {  const uschar *ccode;  c = *code;  /* Skip over forward assertions; the other assertions are skipped by  first_significant_code() with a TRUE final argument. */  if (c == OP_ASSERT)    {    do code += GET(code, 1); while (*code == OP_ALT);    c = *code;    continue;    }  /* Groups with zero repeats can of course be empty; skip them. */  if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO)    {    code += _pcre_OP_lengths[c];    do code += GET(code, 1); while (*code == OP_ALT);    c = *code;    continue;    }  /* For other groups, scan the branches. */  if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND)    {    BOOL empty_branch;    if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */    /* Scan a closed bracket */    empty_branch = FALSE;    do      {      if (!empty_branch && could_be_empty_branch(code, endcode, utf8))        empty_branch = TRUE;      code += GET(code, 1);      }    while (*code == OP_ALT);    if (!empty_branch) return FALSE;   /* All branches are non-empty */    c = *code;    continue;    }  /* Handle the other opcodes */  switch (c)    {    /* Check for quantifiers after a class. XCLASS is used for classes that    cannot be represented just by a bit map. This includes negated single    high-valued characters. The length in _pcre_OP_lengths[] is zero; the    actual length is stored in the compiled code, so we must update "code"    here. */#ifdef SUPPORT_UTF8    case OP_XCLASS:    ccode = code += GET(code, 1);    goto CHECK_CLASS_REPEAT;#endif    case OP_CLASS:    case OP_NCLASS:    ccode = code + 33;#ifdef SUPPORT_UTF8    CHECK_CLASS_REPEAT:#endif    switch (*ccode)      {      case OP_CRSTAR:            /* These could be empty; continue */      case OP_CRMINSTAR:      case OP_CRQUERY:      case OP_CRMINQUERY:      break;      default:                   /* Non-repeat => class must match */      case OP_CRPLUS:            /* These repeats aren't empty */      case OP_CRMINPLUS:      return FALSE;      case OP_CRRANGE:      case OP_CRMINRANGE:      if (GET2(ccode, 1) > 0) return FALSE;  /* Minimum > 0 */      break;      }    break;    /* Opcodes that must match a character */    case OP_PROP:    case OP_NOTPROP:    case OP_EXTUNI:    case OP_NOT_DIGIT:    case OP_DIGIT:    case OP_NOT_WHITESPACE:    case OP_WHITESPACE:    case OP_NOT_WORDCHAR:    case OP_WORDCHAR:    case OP_ANY:    case OP_ALLANY:    case OP_ANYBYTE:    case OP_CHAR:    case OP_CHARNC:    case OP_NOT:    case OP_PLUS:    case OP_MINPLUS:    case OP_POSPLUS:    case OP_EXACT:    case OP_NOTPLUS:    case OP_NOTMINPLUS:    case OP_NOTPOSPLUS:    case OP_NOTEXACT:    case OP_TYPEPLUS:    case OP_TYPEMINPLUS:    case OP_TYPEPOSPLUS:    case OP_TYPEEXACT:    return FALSE;    /* These are going to continue, as they may be empty, but we have to    fudge the length for the \p and \P cases. */    case OP_TYPESTAR:    case OP_TYPEMINSTAR:    case OP_TYPEPOSSTAR:    case OP_TYPEQUERY:    case OP_TYPEMINQUERY:    case OP_TYPEPOSQUERY:    if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;    break;    /* Same for these */    case OP_TYPEUPTO:
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -