⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcre_exec.c

📁 SDL文件。SDL_ERROwenjian.....
💻 C
📖 第 1 页 / 共 5 页
字号:
      else                       /* Group must match something */        {        flags = 0;        goto TAIL_RECURSE;        }      }    else                         /* Condition false & no 2nd alternative */      {      ecode += 1 + LINK_SIZE;      }    break;    /* End of the pattern, either real or forced. If we are in a top-level    recursion, we should restore the offsets appropriately and continue from    after the call. */    case OP_ACCEPT:    case OP_END:    if (md->recursive != NULL && md->recursive->group_num == 0)      {      recursion_info *rec = md->recursive;      DPRINTF(("End of pattern in a (?0) recursion\n"));      md->recursive = rec->prevrec;      memmove(md->offset_vector, rec->offset_save,        rec->saved_max * sizeof(int));      mstart = rec->save_start;      ims = original_ims;      ecode = rec->after_call;      break;      }    /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty    string - backtracking will then try other alternatives, if any. */    if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);    md->end_match_ptr = eptr;           /* Record where we ended */    md->end_offset_top = offset_top;    /* and how many extracts were taken */    md->start_match_ptr = mstart;       /* and the start (\K can modify) */    RRETURN(MATCH_MATCH);    /* Change option settings */    case OP_OPT:    ims = ecode[1];    ecode += 2;    DPRINTF(("ims set to %02lx\n", ims));    break;    /* Assertion brackets. Check the alternative branches in turn - the    matching won't pass the KET for an assertion. If any one branch matches,    the assertion is true. Lookbehind assertions have an OP_REVERSE item at the    start of each branch to move the current point backwards, so the code at    this level is identical to the lookahead case. */    case OP_ASSERT:    case OP_ASSERTBACK:    do      {      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RM4);      if (rrc == MATCH_MATCH) break;      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      ecode += GET(ecode, 1);      }    while (*ecode == OP_ALT);    if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);    /* If checking an assertion for a condition, return MATCH_MATCH. */    if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);    /* Continue from after the assertion, updating the offsets high water    mark, since extracts may have been taken during the assertion. */    do ecode += GET(ecode,1); while (*ecode == OP_ALT);    ecode += 1 + LINK_SIZE;    offset_top = md->end_offset_top;    continue;    /* Negative assertion: all branches must fail to match */    case OP_ASSERT_NOT:    case OP_ASSERTBACK_NOT:    do      {      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RM5);      if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      ecode += GET(ecode,1);      }    while (*ecode == OP_ALT);    if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);    ecode += 1 + LINK_SIZE;    continue;    /* Move the subject pointer back. This occurs only at the start of    each branch of a lookbehind assertion. If we are too close to the start to    move back, this match function fails. When working with UTF-8 we move    back a number of characters, not bytes. */    case OP_REVERSE:#ifdef SUPPORT_UTF8    if (utf8)      {      i = GET(ecode, 1);      while (i-- > 0)        {        eptr--;        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        BACKCHAR(eptr);        }      }    else#endif    /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      {      eptr -= GET(ecode, 1);      if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);      }    /* Skip to next op code */    ecode += 1 + LINK_SIZE;    break;    /* The callout item calls an external function, if one is provided, passing    details of the match so far. This is mainly for debugging, though the    function is able to force a failure. */    case OP_CALLOUT:    if (pcre_callout != NULL)      {      pcre_callout_block cb;      cb.version          = 1;   /* Version 1 of the callout block */      cb.callout_number   = ecode[1];      cb.offset_vector    = md->offset_vector;      cb.subject          = (PCRE_SPTR)md->start_subject;      cb.subject_length   = md->end_subject - md->start_subject;      cb.start_match      = mstart - md->start_subject;      cb.current_position = eptr - md->start_subject;      cb.pattern_position = GET(ecode, 2);      cb.next_item_length = GET(ecode, 2 + LINK_SIZE);      cb.capture_top      = offset_top/2;      cb.capture_last     = md->capture_last;      cb.callout_data     = md->callout_data;      if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);      if (rrc < 0) RRETURN(rrc);      }    ecode += 2 + 2*LINK_SIZE;    break;    /* Recursion either matches the current regex, or some subexpression. The    offset data is the offset to the starting bracket from the start of the    whole pattern. (This is so that it works from duplicated subpatterns.)    If there are any capturing brackets started but not finished, we have to    save their starting points and reinstate them after the recursion. However,    we don't know how many such there are (offset_top records the completed    total) so we just have to save all the potential data. There may be up to    65535 such values, which is too large to put on the stack, but using malloc    for small numbers seems expensive. As a compromise, the stack is used when    there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc    is used. A problem is what to do if the malloc fails ... there is no way of    returning to the top level with an error. Save the top REC_STACK_SAVE_MAX    values on the stack, and accept that the rest may be wrong.    There are also other values that have to be saved. We use a chained    sequence of blocks that actually live on the stack. Thanks to Robin Houston    for the original version of this logic. */    case OP_RECURSE:      {      callpat = md->start_code + GET(ecode, 1);      new_recursive.group_num = (callpat == md->start_code)? 0 :        GET2(callpat, 1 + LINK_SIZE);      /* Add to "recursing stack" */      new_recursive.prevrec = md->recursive;      md->recursive = &new_recursive;      /* Find where to continue from afterwards */      ecode += 1 + LINK_SIZE;      new_recursive.after_call = ecode;      /* Now save the offset data. */      new_recursive.saved_max = md->offset_end;      if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)        new_recursive.offset_save = stacksave;      else        {        new_recursive.offset_save =          (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));        if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);        }      memcpy(new_recursive.offset_save, md->offset_vector,            new_recursive.saved_max * sizeof(int));      new_recursive.save_start = mstart;      mstart = eptr;      /* OK, now we can do the recursion. For each top-level alternative we      restore the offset and recursion data. */      DPRINTF(("Recursing into group %d\n", new_recursive.group_num));      flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;      do        {        RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          md, ims, eptrb, flags, RM6);        if (rrc == MATCH_MATCH)          {          DPRINTF(("Recursion matched\n"));          md->recursive = new_recursive.prevrec;          if (new_recursive.offset_save != stacksave)            (pcre_free)(new_recursive.offset_save);          RRETURN(MATCH_MATCH);          }        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          {          DPRINTF(("Recursion gave error %d\n", rrc));          RRETURN(rrc);          }        md->recursive = &new_recursive;        memcpy(md->offset_vector, new_recursive.offset_save,            new_recursive.saved_max * sizeof(int));        callpat += GET(callpat, 1);        }      while (*callpat == OP_ALT);      DPRINTF(("Recursion didn't match\n"));      md->recursive = new_recursive.prevrec;      if (new_recursive.offset_save != stacksave)        (pcre_free)(new_recursive.offset_save);      RRETURN(MATCH_NOMATCH);      }    /* Control never reaches here */    /* "Once" brackets are like assertion brackets except that after a match,    the point in the subject string is not moved back. Thus there can never be    a move back into the brackets. Friedl calls these "atomic" subpatterns.    Check the alternative branches in turn - the matching won't pass the KET    for this kind of subpattern. If any one branch matches, we carry on as at    the end of a normal bracket, leaving the subject pointer. */    case OP_ONCE:    prev = ecode;    saved_eptr = eptr;    do      {      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);      if (rrc == MATCH_MATCH) break;      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      ecode += GET(ecode,1);      }    while (*ecode == OP_ALT);    /* If hit the end of the group (which could be repeated), fail */    if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);    /* Continue as from after the assertion, updating the offsets high water    mark, since extracts may have been taken. */    do ecode += GET(ecode, 1); while (*ecode == OP_ALT);    offset_top = md->end_offset_top;    eptr = md->end_match_ptr;    /* For a non-repeating ket, just continue at this level. This also    happens for a repeating ket if no characters were matched in the group.    This is the forcible breaking of infinite loops as implemented in Perl    5.005. If there is an options reset, it will get obeyed in the normal    course of events. */    if (*ecode == OP_KET || eptr == saved_eptr)      {      ecode += 1+LINK_SIZE;      break;      }    /* The repeating kets try the rest of the pattern or restart from the    preceding bracket, in the appropriate order. The second "call" of match()    uses tail recursion, to avoid using another stack frame. We need to reset    any options that changed within the bracket before re-running it, so    check the next opcode. */    if (ecode[1+LINK_SIZE] == OP_OPT)      {      ims = (ims & ~PCRE_IMS) | ecode[4];      DPRINTF(("ims set to %02lx at group repeat\n", ims));      }    if (*ecode == OP_KETRMIN)      {      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      ecode = prev;      flags = 0;      goto TAIL_RECURSE;      }    else  /* OP_KETRMAX */      {      RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      ecode += 1 + LINK_SIZE;      flags = 0;      goto TAIL_RECURSE;      }    /* Control never gets here */    /* An alternation is the end of a branch; scan along to find the end of the    bracketed group and go to there. */    case OP_ALT:    do ecode += GET(ecode,1); while (*ecode == OP_ALT);    break;    /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating    that it may occur zero times. It may repeat infinitely, or not at all -    i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper    repeat limits are compiled as a number of copies, with the optional ones    preceded by BRAZERO or BRAMINZERO. */    case OP_BRAZERO:      {      next = ecode+1;      RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      do next += GET(next,1); while (*next == OP_ALT);      ecode = next + 1 + LINK_SIZE;      }    break;    case OP_BRAMINZERO:      {      next = ecode+1;      do next += GET(next, 1); while (*next == OP_ALT);      RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      ecode++;      }    break;    /* End of a group, repeated or non-repeating. */    case OP_KET:    case OP_KETRMIN:    case OP_KETRMAX:    prev = ecode - GET(ecode, 1);    /* If this was a group that remembered the subject start, in order to break    infinite repeats of empty string matches, retrieve the subject start from    the chain. Otherwise, set it NULL. */    if (*prev >= OP_SBRA)      {      saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */      eptrb = eptrb->epb_prev;              /* Backup to previous group */      }    else saved_eptr = NULL;    /* If we are at the end of an assertion group, stop matching and return    MATCH_MATCH, but record the current high water mark for use by positive    assertions. Do this also for the "once" (atomic) groups. */    if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        *prev == OP_ONCE)      {      md->end_match_ptr = eptr;      /* For ONCE */      md->end_offset_top = offset_top;      RRETURN(MATCH_MATCH);      }    /* For capturing groups we have to check the group number back at the start    and if necessary complete handling an extraction by setting the offsets and    bumping the high water mark. Note that whole-pattern recursion is coded as    a recurse into group 0, so it won't be picked up here. Instead, we catch it    when the OP_END is reached. Other recursion is handled here. */    if (*prev == OP_CBRA || *prev == OP_SCBRA)      {      number = GET2(prev, 1+LINK_SIZE);      offset = number << 1;#ifdef DEBUG      printf("end bracket %d", number);      printf("\n");#endif      md->capture_last = number;      if (offset >= md->offset_max) md->offset_overflow = TRUE; else        {        md->offset_vector[offset] =          md->offset_vector[md->offset_end - number];        md->offset_vector[offset+1] = eptr - md->start_subject;        if (offset_top <= offset) offset_top = offset + 2;        }      /* Handle a recursively called group. Restore the offsets

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -