📄 pcre_exec.c

📁 PHP v6.0 For Linux 运行环境：Win9X/ WinME/ WinNT/ Win2K/ WinXP
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
        /* Not UTF-8 mode */          {          for (fi = min;; fi++)            {            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            c = *eptr++;            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            }          }        /* Control never gets here */        }      /* If maximizing, find the longest possible run, then work backwards. */      else        {        pp = eptr;#ifdef SUPPORT_UTF8        /* UTF-8 mode */        if (utf8)          {          for (i = min; i < max; i++)            {            int len = 1;            if (eptr >= md->end_subject) break;            GETCHARLEN(c, eptr, len);            if (c > 255)              {              if (op == OP_CLASS) break;              }            else              {              if ((data[c/8] & (1 << (c&7))) == 0) break;              }            eptr += len;            }          for (;;)            {            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (eptr-- == pp) break;        /* Stop if tried at original pos */            BACKCHAR(eptr);            }          }        else#endif          /* Not UTF-8 mode */          {          for (i = min; i < max; i++)            {            if (eptr >= md->end_subject) break;            c = *eptr;            if ((data[c/8] & (1 << (c&7))) == 0) break;            eptr++;            }          while (eptr >= pp)            {            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            eptr--;            }          }        RRETURN(MATCH_NOMATCH);        }      }    /* Control never gets here */    /* Match an extended character class. This opcode is encountered only    in UTF-8 mode, because that's the only time it is compiled. */#ifdef SUPPORT_UTF8    case OP_XCLASS:      {      data = ecode + 1 + LINK_SIZE;                /* Save for matching */      ecode += GET(ecode, 1);                      /* Advance past the item */      switch (*ecode)        {        case OP_CRSTAR:        case OP_CRMINSTAR:        case OP_CRPLUS:        case OP_CRMINPLUS:        case OP_CRQUERY:        case OP_CRMINQUERY:        c = *ecode++ - OP_CRSTAR;        minimize = (c & 1) != 0;        min = rep_min[c];                 /* Pick up values from tables; */        max = rep_max[c];                 /* zero for max => infinity */        if (max == 0) max = INT_MAX;        break;        case OP_CRRANGE:        case OP_CRMINRANGE:        minimize = (*ecode == OP_CRMINRANGE);        min = GET2(ecode, 1);        max = GET2(ecode, 3);        if (max == 0) max = INT_MAX;        ecode += 5;        break;        default:               /* No repeat follows */        min = max = 1;        break;        }      /* First, ensure the minimum number of matches are present. */      for (i = 1; i <= min; i++)        {        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);        GETCHARINC(c, eptr);        if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);        }      /* If max == min we can continue with the main loop without the      need to recurse. */      if (min == max) continue;      /* If minimizing, keep testing the rest of the expression and advancing      the pointer while it matches the class. */      if (minimize)        {        for (fi = min;; fi++)          {          RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          GETCHARINC(c, eptr);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          }        /* Control never gets here */        }      /* If maximizing, find the longest possible run, then work backwards. */      else        {        pp = eptr;        for (i = min; i < max; i++)          {          int len = 1;          if (eptr >= md->end_subject) break;          GETCHARLEN(c, eptr, len);          if (!_pcre_xclass(c, data)) break;          eptr += len;          }        for(;;)          {          RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (eptr-- == pp) break;        /* Stop if tried at original pos */          BACKCHAR(eptr)          }        RRETURN(MATCH_NOMATCH);        }      /* Control never gets here */      }#endif    /* End of XCLASS */    /* Match a single character, casefully */    case OP_CHAR:#ifdef SUPPORT_UTF8    if (utf8)      {      length = 1;      ecode++;      GETCHARLEN(fc, ecode, length);      if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);      }    else#endif    /* Non-UTF-8 mode */      {      if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);      if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);      ecode += 2;      }    break;    /* Match a single character, caselessly */    case OP_CHARNC:#ifdef SUPPORT_UTF8    if (utf8)      {      length = 1;      ecode++;      GETCHARLEN(fc, ecode, length);      if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      /* If the pattern character's value is < 128, we have only one byte, and      can use the fast lookup table. */      if (fc < 128)        {        if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);        }      /* Otherwise we must pick up the subject character */      else        {        int dc;        GETCHARINC(dc, eptr);        ecode += length;        /* If we have Unicode property support, we can use it to test the other        case of the character, if there is one. */        if (fc != dc)          {#ifdef SUPPORT_UCP          if (dc != _pcre_ucp_othercase(fc))#endif            RRETURN(MATCH_NOMATCH);          }        }      }    else#endif   /* SUPPORT_UTF8 */    /* Non-UTF-8 mode */      {      if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);      if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);      ecode += 2;      }    break;    /* Match a single character repeatedly; different opcodes share code. */    case OP_EXACT:    min = max = GET2(ecode, 1);    ecode += 3;    goto REPEATCHAR;    case OP_UPTO:    case OP_MINUPTO:    min = 0;    max = GET2(ecode, 1);    minimize = *ecode == OP_MINUPTO;    ecode += 3;    goto REPEATCHAR;    case OP_STAR:    case OP_MINSTAR:    case OP_PLUS:    case OP_MINPLUS:    case OP_QUERY:    case OP_MINQUERY:    c = *ecode++ - OP_STAR;    minimize = (c & 1) != 0;    min = rep_min[c];                 /* Pick up values from tables; */    max = rep_max[c];                 /* zero for max => infinity */    if (max == 0) max = INT_MAX;    /* Common code for all repeated single-character matches. We can give    up quickly if there are fewer than the minimum number of characters left in    the subject. */    REPEATCHAR:#ifdef SUPPORT_UTF8    if (utf8)      {      length = 1;      charptr = ecode;      GETCHARLEN(fc, ecode, length);      if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      ecode += length;      /* Handle multibyte character matching specially here. There is      support for caseless matching if UCP support is present. */      if (length > 1)        {        int oclength = 0;        uschar occhars[8];#ifdef SUPPORT_UCP        int othercase;        if ((ims & PCRE_CASELESS) != 0 &&            (othercase = _pcre_ucp_othercase(fc)) >= 0 &&             othercase >= 0)          oclength = _pcre_ord2utf8(othercase, occhars);#endif  /* SUPPORT_UCP */        for (i = 1; i <= min; i++)          {          if (memcmp(eptr, charptr, length) == 0) eptr += length;          /* Need braces because of following else */          else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }          else            {            if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);            eptr += oclength;            }          }        if (min == max) continue;        if (minimize)          {          for (fi = min;; fi++)            {            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (memcmp(eptr, charptr, length) == 0) eptr += length;            /* Need braces because of following else */            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else              {              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              eptr += oclength;              }            }          /* Control never gets here */          }        else          {          pp = eptr;          for (i = min; i < max; i++)            {            if (eptr > md->end_subject - length) break;            if (memcmp(eptr, charptr, length) == 0) eptr += length;            else if (oclength == 0) break;            else              {              if (memcmp(eptr, occhars, oclength) != 0) break;              eptr += oclength;              }            }          while (eptr >= pp)           {           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);           if (rrc != MATCH_NOMATCH) RRETURN(rrc);           eptr -= length;           }          RRETURN(MATCH_NOMATCH);          }        /* Control never gets here */        }      /* If the length of a UTF-8 character is 1, we fall through here, and      obey the code as for non-UTF-8 characters below, though in this case the      value of fc will always be < 128. */      }    else#endif  /* SUPPORT_UTF8 */    /* When not in UTF-8 mode, load a single-byte character. */      {      if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;      }    /* The value of fc at this point is always less than 256, though we may or    may not be in UTF-8 mode. The code is duplicated for the caseless and    caseful cases, for speed, since matching characters is likely to be quite    common. First, ensure the minimum number of matches are present. If min =    max, continue at the same level without recursing. Otherwise, if    minimizing, keep trying the rest of the expression and advancing one    matching character if failing, up to the maximum. Alternatively, if    maximizing, find the maximum number of characters and work backwards. */    DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,      max, eptr));    if ((ims & PCRE_CASELESS) != 0)      {      fc = md->lcc[fc];      for (i = 1; i <= min; i++)        if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);      if (min == max) continue;      if (minimize)        {        for (fi = min;; fi++)          {          RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (fi >= max || eptr >= md->end_subject ||              fc != md->lcc[*eptr++])            RRETURN(MATCH_NOMATCH);          }        /* Control never gets here */        }      else        {        pp = eptr;        for (i = min; i < max; i++)          {          if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;          eptr++;          }        while (eptr >= pp)          {          RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);          eptr--;          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          }        RRETURN(MATCH_NOMATCH);        }      /* Control never gets here */
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -