⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcre_exec.c

📁 PHP v6.0 For Linux 运行环境:Win9X/ WinME/ WinNT/ Win2K/ WinXP
💻 C
📖 第 1 页 / 共 5 页
字号:
      cb.callout_data     = md->callout_data;      if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);      if (rrc < 0) RRETURN(rrc);      }    ecode += 2 + 2*LINK_SIZE;    break;    /* Recursion either matches the current regex, or some subexpression. The    offset data is the offset to the starting bracket from the start of the    whole pattern. (This is so that it works from duplicated subpatterns.)    If there are any capturing brackets started but not finished, we have to    save their starting points and reinstate them after the recursion. However,    we don't know how many such there are (offset_top records the completed    total) so we just have to save all the potential data. There may be up to    65535 such values, which is too large to put on the stack, but using malloc    for small numbers seems expensive. As a compromise, the stack is used when    there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc    is used. A problem is what to do if the malloc fails ... there is no way of    returning to the top level with an error. Save the top REC_STACK_SAVE_MAX    values on the stack, and accept that the rest may be wrong.    There are also other values that have to be saved. We use a chained    sequence of blocks that actually live on the stack. Thanks to Robin Houston    for the original version of this logic. */    case OP_RECURSE:      {      callpat = md->start_code + GET(ecode, 1);      new_recursive.group_num = *callpat - OP_BRA;      /* For extended extraction brackets (large number), we have to fish out      the number from a dummy opcode at the start. */      if (new_recursive.group_num > EXTRACT_BASIC_MAX)        new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);      /* Add to "recursing stack" */      new_recursive.prevrec = md->recursive;      md->recursive = &new_recursive;      /* Find where to continue from afterwards */      ecode += 1 + LINK_SIZE;      new_recursive.after_call = ecode;      /* Now save the offset data. */      new_recursive.saved_max = md->offset_end;      if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)        new_recursive.offset_save = stacksave;      else        {        new_recursive.offset_save =          (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));        if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);        }      memcpy(new_recursive.offset_save, md->offset_vector,            new_recursive.saved_max * sizeof(int));      new_recursive.save_start = md->start_match;      md->start_match = eptr;      /* OK, now we can do the recursion. For each top-level alternative we      restore the offset and recursion data. */      DPRINTF(("Recursing into group %d\n", new_recursive.group_num));      do        {        RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,            eptrb, match_isgroup);        if (rrc == MATCH_MATCH)          {          DPRINTF(("Recursion matched\n"));          md->recursive = new_recursive.prevrec;          if (new_recursive.offset_save != stacksave)            (pcre_free)(new_recursive.offset_save);          RRETURN(MATCH_MATCH);          }        else if (rrc != MATCH_NOMATCH)          {          DPRINTF(("Recursion gave error %d\n", rrc));          RRETURN(rrc);          }        md->recursive = &new_recursive;        memcpy(md->offset_vector, new_recursive.offset_save,            new_recursive.saved_max * sizeof(int));        callpat += GET(callpat, 1);        }      while (*callpat == OP_ALT);      DPRINTF(("Recursion didn't match\n"));      md->recursive = new_recursive.prevrec;      if (new_recursive.offset_save != stacksave)        (pcre_free)(new_recursive.offset_save);      RRETURN(MATCH_NOMATCH);      }    /* Control never reaches here */    /* "Once" brackets are like assertion brackets except that after a match,    the point in the subject string is not moved back. Thus there can never be    a move back into the brackets. Friedl calls these "atomic" subpatterns.    Check the alternative branches in turn - the matching won't pass the KET    for this kind of subpattern. If any one branch matches, we carry on as at    the end of a normal bracket, leaving the subject pointer. */    case OP_ONCE:      {      prev = ecode;      saved_eptr = eptr;      do        {        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,          eptrb, match_isgroup);        if (rrc == MATCH_MATCH) break;        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += GET(ecode,1);        }      while (*ecode == OP_ALT);      /* If hit the end of the group (which could be repeated), fail */      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      /* Continue as from after the assertion, updating the offsets high water      mark, since extracts may have been taken. */      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      offset_top = md->end_offset_top;      eptr = md->end_match_ptr;      /* For a non-repeating ket, just continue at this level. This also      happens for a repeating ket if no characters were matched in the group.      This is the forcible breaking of infinite loops as implemented in Perl      5.005. If there is an options reset, it will get obeyed in the normal      course of events. */      if (*ecode == OP_KET || eptr == saved_eptr)        {        ecode += 1+LINK_SIZE;        break;        }      /* The repeating kets try the rest of the pattern or restart from the      preceding bracket, in the appropriate order. We need to reset any options      that changed within the bracket before re-running it, so check the next      opcode. */      if (ecode[1+LINK_SIZE] == OP_OPT)        {        ims = (ims & ~PCRE_IMS) | ecode[4];        DPRINTF(("ims set to %02lx at group repeat\n", ims));        }      if (*ecode == OP_KETRMIN)        {        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        }      else  /* OP_KETRMAX */        {        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        }      }    RRETURN(MATCH_NOMATCH);    /* An alternation is the end of a branch; scan along to find the end of the    bracketed group and go to there. */    case OP_ALT:    do ecode += GET(ecode,1); while (*ecode == OP_ALT);    break;    /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating    that it may occur zero times. It may repeat infinitely, or not at all -    i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper    repeat limits are compiled as a number of copies, with the optional ones    preceded by BRAZERO or BRAMINZERO. */    case OP_BRAZERO:      {      next = ecode+1;      RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      do next += GET(next,1); while (*next == OP_ALT);      ecode = next + 1+LINK_SIZE;      }    break;    case OP_BRAMINZERO:      {      next = ecode+1;      do next += GET(next,1); while (*next == OP_ALT);      RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        match_isgroup);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      ecode++;      }    break;    /* End of a group, repeated or non-repeating. If we are at the end of    an assertion "group", stop matching and return MATCH_MATCH, but record the    current high water mark for use by positive assertions. Do this also    for the "once" (not-backup up) groups. */    case OP_KET:    case OP_KETRMIN:    case OP_KETRMAX:      {      prev = ecode - GET(ecode, 1);      saved_eptr = eptrb->epb_saved_eptr;      /* Back up the stack of bracket start pointers. */      eptrb = eptrb->epb_prev;      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ONCE)        {        md->end_match_ptr = eptr;      /* For ONCE */        md->end_offset_top = offset_top;        RRETURN(MATCH_MATCH);        }      /* In all other cases except a conditional group we have to check the      group number back at the start and if necessary complete handling an      extraction by setting the offsets and bumping the high water mark. */      if (*prev != OP_COND)        {        number = *prev - OP_BRA;        /* For extended extraction brackets (large number), we have to fish out        the number from a dummy opcode at the start. */        if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);        offset = number << 1;#ifdef DEBUG        printf("end bracket %d", number);        printf("\n");#endif        /* Test for a numbered group. This includes groups called as a result        of recursion. Note that whole-pattern recursion is coded as a recurse        into group 0, so it won't be picked up here. Instead, we catch it when        the OP_END is reached. */        if (number > 0)          {          md->capture_last = number;          if (offset >= md->offset_max) md->offset_overflow = TRUE; else            {            md->offset_vector[offset] =              md->offset_vector[md->offset_end - number];            md->offset_vector[offset+1] = eptr - md->start_subject;            if (offset_top <= offset) offset_top = offset + 2;            }          /* Handle a recursively called group. Restore the offsets          appropriately and continue from after the call. */          if (md->recursive != NULL && md->recursive->group_num == number)            {            recursion_info *rec = md->recursive;            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));            md->recursive = rec->prevrec;            md->start_match = rec->save_start;            memcpy(md->offset_vector, rec->offset_save,              rec->saved_max * sizeof(int));            ecode = rec->after_call;            ims = original_ims;            break;            }          }        }      /* Reset the value of the ims flags, in case they got changed during      the group. */      ims = original_ims;      DPRINTF(("ims reset to %02lx\n", ims));      /* For a non-repeating ket, just continue at this level. This also      happens for a repeating ket if no characters were matched in the group.      This is the forcible breaking of infinite loops as implemented in Perl      5.005. If there is an options reset, it will get obeyed in the normal      course of events. */      if (*ecode == OP_KET || eptr == saved_eptr)        {        ecode += 1 + LINK_SIZE;        break;        }      /* The repeating kets try the rest of the pattern or restart from the      preceding bracket, in the appropriate order. */      if (*ecode == OP_KETRMIN)        {        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        }      else  /* OP_KETRMAX */        {        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        }      }    RRETURN(MATCH_NOMATCH);    /* Start of subject unless notbol, or after internal newline if multiline */    case OP_CIRC:    if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);    if ((ims & PCRE_MULTILINE) != 0)      {      if (eptr != md->start_subject && eptr[-1] != NEWLINE)        RRETURN(MATCH_NOMATCH);      ecode++;      break;      }    /* ... else fall through */    /* Start of subject assertion */    case OP_SOD:    if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);    ecode++;    break;    /* Start of match assertion */    case OP_SOM:    if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);    ecode++;    break;    /* Assert before internal newline if multiline, or before a terminating    newline unless endonly is set, else end of subject unless noteol is set. */    case OP_DOLL:    if ((ims & PCRE_MULTILINE) != 0)      {      if (eptr < md->end_subject)        { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }      else        { if (md->noteol) RRETURN(MATCH_NOMATCH); }      ecode++;      break;      }    else      {      if (md->noteol) RRETURN(MATCH_NOMATCH);      if (!md->endonly)        {        if (eptr < md->end_subject - 1 ||           (eptr == md->end_subject - 1 && *eptr != NEWLINE))          RRETURN(MATCH_NOMATCH);        ecode++;        break;        }      }    /* ... else fall through */    /* End of subject assertion (\z) */    case OP_EOD:    if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);    ecode++;    break;    /* End of subject or ending \n assertion (\Z) */    case OP_EODN:    if (eptr < md->end_subject - 1 ||       (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);    ecode++;    break;    /* Word boundary assertions */    case OP_NOT_WORD_BOUNDARY:    case OP_WORD_BOUNDARY:      {      /* Find out if the previous and current characters are "word" characters.      It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to      be "non-word" characters. */#ifdef SUPPORT_UTF8      if (utf8)        {        if (eptr == md->start_subject) prev_is_word = FALSE; else          {          const uschar *lastptr = eptr - 1;          while((*lastptr & 0xc0) == 0x80) lastptr--;          GETCHAR(c, lastptr);          prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;          }        if (eptr >= md->end_subject) cur_is_word = FALSE; else          {          GETCHAR(c, eptr);          cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -