pcre_exec.c

来自「Ubuntu packages of security software。 相」· C语言 代码 · 共 2,090 行 · 第 1/5 页

C
2,090
字号
    else#endif    /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      {      eptr -= GET(ecode,1);      if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);      }    /* Skip to next op code */    ecode += 1 + LINK_SIZE;    break;    /* The callout item calls an external function, if one is provided, passing    details of the match so far. This is mainly for debugging, though the    function is able to force a failure. */    case OP_CALLOUT:    if (pcre_callout != NULL)      {      pcre_callout_block cb;      cb.version          = 1;   /* Version 1 of the callout block */      cb.callout_number   = ecode[1];      cb.offset_vector    = md->offset_vector;      cb.subject          = (PCRE_SPTR)md->start_subject;      cb.subject_length   = md->end_subject - md->start_subject;      cb.start_match      = md->start_match - md->start_subject;      cb.current_position = eptr - md->start_subject;      cb.pattern_position = GET(ecode, 2);      cb.next_item_length = GET(ecode, 2 + LINK_SIZE);      cb.capture_top      = offset_top/2;      cb.capture_last     = md->capture_last;      cb.callout_data     = md->callout_data;      if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);      if (rrc < 0) RRETURN(rrc);      }    ecode += 2 + 2*LINK_SIZE;    break;    /* Recursion either matches the current regex, or some subexpression. The    offset data is the offset to the starting bracket from the start of the    whole pattern. (This is so that it works from duplicated subpatterns.)    If there are any capturing brackets started but not finished, we have to    save their starting points and reinstate them after the recursion. However,    we don't know how many such there are (offset_top records the completed    total) so we just have to save all the potential data. There may be up to    65535 such values, which is too large to put on the stack, but using malloc    for small numbers seems expensive. As a compromise, the stack is used when    there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc    is used. A problem is what to do if the malloc fails ... there is no way of    returning to the top level with an error. Save the top REC_STACK_SAVE_MAX    values on the stack, and accept that the rest may be wrong.    There are also other values that have to be saved. We use a chained    sequence of blocks that actually live on the stack. Thanks to Robin Houston    for the original version of this logic. */    case OP_RECURSE:      {      callpat = md->start_code + GET(ecode, 1);      new_recursive.group_num = *callpat - OP_BRA;      /* For extended extraction brackets (large number), we have to fish out      the number from a dummy opcode at the start. */      if (new_recursive.group_num > EXTRACT_BASIC_MAX)        new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);      /* Add to "recursing stack" */      new_recursive.prevrec = md->recursive;      md->recursive = &new_recursive;      /* Find where to continue from afterwards */      ecode += 1 + LINK_SIZE;      new_recursive.after_call = ecode;      /* Now save the offset data. */      new_recursive.saved_max = md->offset_end;      if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)        new_recursive.offset_save = stacksave;      else        {        new_recursive.offset_save =          (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));        if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);        }      memcpy(new_recursive.offset_save, md->offset_vector,            new_recursive.saved_max * sizeof(int));      new_recursive.save_start = md->start_match;      md->start_match = eptr;      /* OK, now we can do the recursion. For each top-level alternative we      restore the offset and recursion data. */      DPRINTF(("Recursing into group %d\n", new_recursive.group_num));      do        {        RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,            eptrb, match_isgroup);        if (rrc == MATCH_MATCH)          {          DPRINTF(("Recursion matched\n"));          md->recursive = new_recursive.prevrec;          if (new_recursive.offset_save != stacksave)            (pcre_free)(new_recursive.offset_save);          RRETURN(MATCH_MATCH);          }        else if (rrc != MATCH_NOMATCH)          {          DPRINTF(("Recursion gave error %d\n", rrc));          RRETURN(rrc);          }        md->recursive = &new_recursive;        memcpy(md->offset_vector, new_recursive.offset_save,            new_recursive.saved_max * sizeof(int));        callpat += GET(callpat, 1);        }      while (*callpat == OP_ALT);      DPRINTF(("Recursion didn't match\n"));      md->recursive = new_recursive.prevrec;      if (new_recursive.offset_save != stacksave)        (pcre_free)(new_recursive.offset_save);      RRETURN(MATCH_NOMATCH);      }    /* Control never reaches here */    /* "Once" brackets are like assertion brackets except that after a match,    the point in the subject string is not moved back. Thus there can never be    a move back into the brackets. Friedl calls these "atomic" subpatterns.    Check the alternative branches in turn - the matching won't pass the KET    for this kind of subpattern. If any one branch matches, we carry on as at    the end of a normal bracket, leaving the subject pointer. */    case OP_ONCE:      prev = ecode;      saved_eptr = eptr;      do        {        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,          eptrb, match_isgroup);        if (rrc == MATCH_MATCH) break;        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += GET(ecode,1);        }      while (*ecode == OP_ALT);      /* If hit the end of the group (which could be repeated), fail */      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      /* Continue as from after the assertion, updating the offsets high water      mark, since extracts may have been taken. */      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      offset_top = md->end_offset_top;      eptr = md->end_match_ptr;      /* For a non-repeating ket, just continue at this level. This also      happens for a repeating ket if no characters were matched in the group.      This is the forcible breaking of infinite loops as implemented in Perl      5.005. If there is an options reset, it will get obeyed in the normal      course of events. */      if (*ecode == OP_KET || eptr == saved_eptr)        {        ecode += 1+LINK_SIZE;        break;        }      /* The repeating kets try the rest of the pattern or restart from the    preceding bracket, in the appropriate order. The second "call" of match()    uses tail recursion, to avoid using another stack frame. We need to reset    any options that changed within the bracket before re-running it, so    check the next opcode. */      if (ecode[1+LINK_SIZE] == OP_OPT)        {        ims = (ims & ~PCRE_IMS) | ecode[4];        DPRINTF(("ims set to %02lx at group repeat\n", ims));        }      if (*ecode == OP_KETRMIN)        {        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);      ecode = prev;      flags = match_isgroup;      goto TAIL_RECURSE;        }      else  /* OP_KETRMAX */        {        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);      ecode += 1 + LINK_SIZE;      flags = 0;      goto TAIL_RECURSE;      }    /* Control never gets here */    /* An alternation is the end of a branch; scan along to find the end of the    bracketed group and go to there. */    case OP_ALT:    do ecode += GET(ecode,1); while (*ecode == OP_ALT);    break;    /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating    that it may occur zero times. It may repeat infinitely, or not at all -    i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper    repeat limits are compiled as a number of copies, with the optional ones    preceded by BRAZERO or BRAMINZERO. */    case OP_BRAZERO:      {      next = ecode+1;      RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      do next += GET(next,1); while (*next == OP_ALT);      ecode = next + 1+LINK_SIZE;      }    break;    case OP_BRAMINZERO:      {      next = ecode+1;      do next += GET(next,1); while (*next == OP_ALT);      RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        match_isgroup);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      ecode++;      }    break;    /* End of a group, repeated or non-repeating. If we are at the end of    an assertion "group", stop matching and return MATCH_MATCH, but record the    current high water mark for use by positive assertions. Do this also    for the "once" (not-backup up) groups. */    case OP_KET:    case OP_KETRMIN:    case OP_KETRMAX:      prev = ecode - GET(ecode, 1);      saved_eptr = eptrb->epb_saved_eptr;      /* Back up the stack of bracket start pointers. */      eptrb = eptrb->epb_prev;      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ONCE)        {        md->end_match_ptr = eptr;      /* For ONCE */        md->end_offset_top = offset_top;        RRETURN(MATCH_MATCH);        }      /* In all other cases except a conditional group we have to check the      group number back at the start and if necessary complete handling an      extraction by setting the offsets and bumping the high water mark. */      if (*prev != OP_COND)        {        number = *prev - OP_BRA;        /* For extended extraction brackets (large number), we have to fish out        the number from a dummy opcode at the start. */        if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);        offset = number << 1;#ifdef DEBUG        printf("end bracket %d", number);        printf("\n");#endif        /* Test for a numbered group. This includes groups called as a result        of recursion. Note that whole-pattern recursion is coded as a recurse        into group 0, so it won't be picked up here. Instead, we catch it when        the OP_END is reached. */        if (number > 0)          {          md->capture_last = number;          if (offset >= md->offset_max) md->offset_overflow = TRUE; else            {            md->offset_vector[offset] =              md->offset_vector[md->offset_end - number];            md->offset_vector[offset+1] = eptr - md->start_subject;            if (offset_top <= offset) offset_top = offset + 2;            }          /* Handle a recursively called group. Restore the offsets          appropriately and continue from after the call. */          if (md->recursive != NULL && md->recursive->group_num == number)            {            recursion_info *rec = md->recursive;            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));            md->recursive = rec->prevrec;            md->start_match = rec->save_start;            memcpy(md->offset_vector, rec->offset_save,              rec->saved_max * sizeof(int));            ecode = rec->after_call;            ims = original_ims;            break;            }          }        }      /* Reset the value of the ims flags, in case they got changed during      the group. */      ims = original_ims;      DPRINTF(("ims reset to %02lx\n", ims));      /* For a non-repeating ket, just continue at this level. This also      happens for a repeating ket if no characters were matched in the group.      This is the forcible breaking of infinite loops as implemented in Perl      5.005. If there is an options reset, it will get obeyed in the normal      course of events. */      if (*ecode == OP_KET || eptr == saved_eptr)        {        ecode += 1 + LINK_SIZE;        break;        }      /* The repeating kets try the rest of the pattern or restart from the    preceding bracket, in the appropriate order. In the second case, we can use    tail recursion to avoid using another stack frame. */      if (*ecode == OP_KETRMIN)        {        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);      ecode = prev;      flags = match_isgroup;      goto TAIL_RECURSE;        }      else  /* OP_KETRMAX */        {        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);      ecode += 1 + LINK_SIZE;      flags = 0;      goto TAIL_RECURSE;      }    /* Control never gets here */    /* Start of subject unless notbol, or after internal newline if multiline */    case OP_CIRC:    if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);    if ((ims & PCRE_MULTILINE) != 0)      {      if (eptr != md->start_subject &&          (eptr == md->end_subject ||           eptr < md->start_subject + md->nllen ||           !IS_NEWLINE(eptr - md->nllen)))        RRETURN(MATCH_NOMATCH);      ecode++;      break;      }    /* ... else fall through */    /* Start of subject assertion */    case OP_SOD:    if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);    ecode++;    break;    /* Start of match assertion */    case OP_SOM:    if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);    ecode++;    break;    /* Assert before internal newline if multiline, or before a terminating    newline unless endonly is set, else end of subject unless noteol is set. */    case OP_DOLL:    if ((ims & PCRE_MULTILINE) != 0)      {      if (eptr < md->end_subject)        { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }      else        { if (md->noteol) RRETURN(MATCH_NOMATCH); }      ecode++;      break;      }    else      {      if (md->noteol) RRETURN(MATCH_NOMATCH);      if (!md->endonly)        {        if (eptr != md->end_subject &&            (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))          RRETURN(MATCH_NOMATCH);        ecode++;        break;        }      }    /* ... else fall through for endonly */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?