📄 pcre_exec.c

📁 php-4.4.7学习linux时下载的源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
      RRETURN(MATCH_NOMATCH);    ecode++;    break;    /* Word boundary assertions */    case OP_NOT_WORD_BOUNDARY:    case OP_WORD_BOUNDARY:      {      /* Find out if the previous and current characters are "word" characters.      It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to      be "non-word" characters. */#ifdef SUPPORT_UTF8      if (utf8)        {        if (eptr == md->start_subject) prev_is_word = FALSE; else          {          const uschar *lastptr = eptr - 1;          while((*lastptr & 0xc0) == 0x80) lastptr--;          GETCHAR(c, lastptr);          prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;          }        if (eptr >= md->end_subject) cur_is_word = FALSE; else          {          GETCHAR(c, eptr);          cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;          }        }      else#endif      /* More streamlined when not in UTF-8 mode */        {        prev_is_word = (eptr != md->start_subject) &&          ((md->ctypes[eptr[-1]] & ctype_word) != 0);        cur_is_word = (eptr < md->end_subject) &&          ((md->ctypes[*eptr] & ctype_word) != 0);        }      /* Now see if the situation is what we want */      if ((*ecode++ == OP_WORD_BOUNDARY)?           cur_is_word == prev_is_word : cur_is_word != prev_is_word)        RRETURN(MATCH_NOMATCH);      }    break;    /* Match a single character type; inline for speed */    case OP_ANY:    if ((ims & PCRE_DOTALL) == 0)      {      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);      }    if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);    if (utf8)      while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;    ecode++;    break;    /* Match a single byte, even in UTF-8 mode. This opcode really does match    any byte, even newline, independent of the setting of PCRE_DOTALL. */    case OP_ANYBYTE:    if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);    ecode++;    break;    case OP_NOT_DIGIT:    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);    GETCHARINCTEST(c, eptr);    if (#ifdef SUPPORT_UTF8       c < 256 &&#endif       (md->ctypes[c] & ctype_digit) != 0       )      RRETURN(MATCH_NOMATCH);    ecode++;    break;    case OP_DIGIT:    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);    GETCHARINCTEST(c, eptr);    if (#ifdef SUPPORT_UTF8       c >= 256 ||#endif       (md->ctypes[c] & ctype_digit) == 0       )      RRETURN(MATCH_NOMATCH);    ecode++;    break;    case OP_NOT_WHITESPACE:    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);    GETCHARINCTEST(c, eptr);    if (#ifdef SUPPORT_UTF8       c < 256 &&#endif       (md->ctypes[c] & ctype_space) != 0       )      RRETURN(MATCH_NOMATCH);    ecode++;    break;    case OP_WHITESPACE:    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);    GETCHARINCTEST(c, eptr);    if (#ifdef SUPPORT_UTF8       c >= 256 ||#endif       (md->ctypes[c] & ctype_space) == 0       )      RRETURN(MATCH_NOMATCH);    ecode++;    break;    case OP_NOT_WORDCHAR:    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);    GETCHARINCTEST(c, eptr);    if (#ifdef SUPPORT_UTF8       c < 256 &&#endif       (md->ctypes[c] & ctype_word) != 0       )      RRETURN(MATCH_NOMATCH);    ecode++;    break;    case OP_WORDCHAR:    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);    GETCHARINCTEST(c, eptr);    if (#ifdef SUPPORT_UTF8       c >= 256 ||#endif       (md->ctypes[c] & ctype_word) == 0       )      RRETURN(MATCH_NOMATCH);    ecode++;    break;    case OP_ANYNL:    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);    GETCHARINCTEST(c, eptr);    switch(c)      {      default: RRETURN(MATCH_NOMATCH);      case 0x000d:      if (eptr < md->end_subject && *eptr == 0x0a) eptr++;      break;      case 0x000a:      case 0x000b:      case 0x000c:      case 0x0085:      case 0x2028:      case 0x2029:      break;      }    ecode++;    break;#ifdef SUPPORT_UCP    /* Check the next character by Unicode property. We will get here only    if the support is in the binary; otherwise a compile-time error occurs. */    case OP_PROP:    case OP_NOTPROP:    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);    GETCHARINCTEST(c, eptr);      {      int chartype, script;      int category = _pcre_ucp_findprop(c, &chartype, &script);      switch(ecode[1])        {        case PT_ANY:        if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);        break;        case PT_LAMP:        if ((chartype == ucp_Lu ||             chartype == ucp_Ll ||             chartype == ucp_Lt) == (op == OP_NOTPROP))          RRETURN(MATCH_NOMATCH);         break;        case PT_GC:        if ((ecode[2] != category) == (op == OP_PROP))          RRETURN(MATCH_NOMATCH);        break;        case PT_PC:        if ((ecode[2] != chartype) == (op == OP_PROP))          RRETURN(MATCH_NOMATCH);        break;        case PT_SC:        if ((ecode[2] != script) == (op == OP_PROP))          RRETURN(MATCH_NOMATCH);        break;        default:        RRETURN(PCRE_ERROR_INTERNAL);        }      ecode += 3;      }    break;    /* Match an extended Unicode sequence. We will get here only if the support    is in the binary; otherwise a compile-time error occurs. */    case OP_EXTUNI:    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);    GETCHARINCTEST(c, eptr);      {      int chartype, script;      int category = _pcre_ucp_findprop(c, &chartype, &script);      if (category == ucp_M) RRETURN(MATCH_NOMATCH);      while (eptr < md->end_subject)        {        int len = 1;        if (!utf8) c = *eptr; else          {          GETCHARLEN(c, eptr, len);          }        category = _pcre_ucp_findprop(c, &chartype, &script);        if (category != ucp_M) break;        eptr += len;        }      }    ecode++;    break;#endif    /* Match a back reference, possibly repeatedly. Look past the end of the    item to see if there is repeat information following. The code is similar    to that for character classes, but repeated for efficiency. Then obey    similar code to character type repeats - written out again for speed.    However, if the referenced string is the empty string, always treat    it as matched, any number of times (otherwise there could be infinite    loops). */    case OP_REF:      {      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      ecode += 3;                                 /* Advance past item */      /* If the reference is unset, set the length to be longer than the amount      of subject left; this ensures that every attempt at a match fails. We      can't just fail here, because of the possibility of quantifiers with zero      minima. */      length = (offset >= offset_top || md->offset_vector[offset] < 0)?        md->end_subject - eptr + 1 :        md->offset_vector[offset+1] - md->offset_vector[offset];      /* Set up for repetition, or handle the non-repeated case */      switch (*ecode)        {        case OP_CRSTAR:        case OP_CRMINSTAR:        case OP_CRPLUS:        case OP_CRMINPLUS:        case OP_CRQUERY:        case OP_CRMINQUERY:        c = *ecode++ - OP_CRSTAR;        minimize = (c & 1) != 0;        min = rep_min[c];                 /* Pick up values from tables; */        max = rep_max[c];                 /* zero for max => infinity */        if (max == 0) max = INT_MAX;        break;        case OP_CRRANGE:        case OP_CRMINRANGE:        minimize = (*ecode == OP_CRMINRANGE);        min = GET2(ecode, 1);        max = GET2(ecode, 3);        if (max == 0) max = INT_MAX;        ecode += 5;        break;        default:               /* No repeat follows */        if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);        eptr += length;        continue;              /* With the main loop */        }      /* If the length of the reference is zero, just continue with the      main loop. */      if (length == 0) continue;      /* First, ensure the minimum number of matches are present. We get back      the length of the reference string explicitly rather than passing the      address of eptr, so that eptr can be a register variable. */      for (i = 1; i <= min; i++)        {        if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);        eptr += length;        }      /* If min = max, continue at the same level without recursion.      They are not both allowed to be zero. */      if (min == max) continue;      /* If minimizing, keep trying and advancing the pointer */      if (minimize)        {        for (fi = min;; fi++)          {          RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (fi >= max || !match_ref(offset, eptr, length, md, ims))            RRETURN(MATCH_NOMATCH);          eptr += length;          }        /* Control never gets here */        }      /* If maximizing, find the longest string and work backwards */      else        {        pp = eptr;        for (i = min; i < max; i++)          {          if (!match_ref(offset, eptr, length, md, ims)) break;          eptr += length;          }        while (eptr >= pp)          {          RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          eptr -= length;          }        RRETURN(MATCH_NOMATCH);        }      }    /* Control never gets here */    /* Match a bit-mapped character class, possibly repeatedly. This op code is    used when all the characters in the class have values in the range 0-255,    and either the matching is caseful, or the characters are in the range    0-127 when UTF-8 processing is enabled. The only difference between    OP_CLASS and OP_NCLASS occurs when a data character outside the range is    encountered.    First, look past the end of the item to see if there is repeat information    following. Then obey similar code to character type repeats - written out    again for speed. */    case OP_NCLASS:    case OP_CLASS:      {      data = ecode + 1;                /* Save for matching */      ecode += 33;                     /* Advance past the item */      switch (*ecode)        {        case OP_CRSTAR:        case OP_CRMINSTAR:        case OP_CRPLUS:        case OP_CRMINPLUS:        case OP_CRQUERY:        case OP_CRMINQUERY:        c = *ecode++ - OP_CRSTAR;        minimize = (c & 1) != 0;        min = rep_min[c];                 /* Pick up values from tables; */        max = rep_max[c];                 /* zero for max => infinity */        if (max == 0) max = INT_MAX;        break;        case OP_CRRANGE:        case OP_CRMINRANGE:        minimize = (*ecode == OP_CRMINRANGE);        min = GET2(ecode, 1);        max = GET2(ecode, 3);        if (max == 0) max = INT_MAX;        ecode += 5;        break;        default:               /* No repeat follows */        min = max = 1;        break;        }      /* First, ensure the minimum number of matches are present. */#ifdef SUPPORT_UTF8      /* UTF-8 mode */      if (utf8)        {        for (i = 1; i <= min; i++)          {          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          GETCHARINC(c, eptr);          if (c > 255)            {            if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);            }          else            {            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            }          }        }      else
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -