⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcre_exec.c

📁 PHP v6.0 For Linux 运行环境:Win9X/ WinME/ WinNT/ Win2K/ WinXP
💻 C
📖 第 1 页 / 共 5 页
字号:
/* Ditto for the local variables */#ifdef SUPPORT_UTF8#define charptr            frame->Xcharptr#endif#define callpat            frame->Xcallpat#define data               frame->Xdata#define next               frame->Xnext#define pp                 frame->Xpp#define prev               frame->Xprev#define saved_eptr         frame->Xsaved_eptr#define new_recursive      frame->Xnew_recursive#define cur_is_word        frame->Xcur_is_word#define condition          frame->Xcondition#define minimize           frame->Xminimize#define prev_is_word       frame->Xprev_is_word#define original_ims       frame->Xoriginal_ims#ifdef SUPPORT_UCP#define prop_type          frame->Xprop_type#define prop_value         frame->Xprop_value#define prop_fail_result   frame->Xprop_fail_result#define prop_category      frame->Xprop_category#define prop_chartype      frame->Xprop_chartype#define prop_script        frame->Xprop_script#define prop_test_variable frame->Xprop_test_variable#endif#define ctype              frame->Xctype#define fc                 frame->Xfc#define fi                 frame->Xfi#define length             frame->Xlength#define max                frame->Xmax#define min                frame->Xmin#define number             frame->Xnumber#define offset             frame->Xoffset#define op                 frame->Xop#define save_capture_last  frame->Xsave_capture_last#define save_offset1       frame->Xsave_offset1#define save_offset2       frame->Xsave_offset2#define save_offset3       frame->Xsave_offset3#define stacksave          frame->Xstacksave#define newptrb            frame->Xnewptrb/* When recursion is being used, local variables are allocated on the stack andget preserved during recursion in the normal way. In this environment, fi andi, and fc and c, can be the same variables. */#else#define fi i#define fc c#ifdef SUPPORT_UTF8                /* Many of these variables are used only  */const uschar *charptr;             /* in small blocks of the code. My normal */#endif                             /* style of coding would have declared    */const uschar *callpat;             /* them within each of those blocks.      */const uschar *data;                /* However, in order to accommodate the   */const uschar *next;                /* version of this code that uses an      */USPTR         pp;                  /* external "stack" implemented on the    */const uschar *prev;                /* heap, it is easier to declare them all */USPTR         saved_eptr;          /* here, so the declarations can be cut   */                                   /* out in a block. The only declarations  */recursion_info new_recursive;      /* within blocks below are for variables  */                                   /* that do not have to be preserved over  */BOOL cur_is_word;                  /* a recursive call to RMATCH().          */BOOL condition;BOOL minimize;BOOL prev_is_word;unsigned long int original_ims;#ifdef SUPPORT_UCPint prop_type;int prop_value;int prop_fail_result;int prop_category;int prop_chartype;int prop_script;int *prop_test_variable;#endifint ctype;int length;int max;int min;int number;int offset;int op;int save_capture_last;int save_offset1, save_offset2, save_offset3;int stacksave[REC_STACK_SAVE_MAX];eptrblock newptrb;#endif/* These statements are here to stop the compiler complaining about unitializedvariables. */#ifdef SUPPORT_UCPprop_value = 0;prop_fail_result = 0;prop_test_variable = NULL;#endif/* OK, now we can get on with the real code of the function. Recursive callsare specified by the macro RMATCH and RRETURN is used to return. WhenNO_RECURSE is *not* defined, these just turn into a recursive call to match()and a "return", respectively (possibly with some debugging if DEBUG isdefined). However, RMATCH isn't like a function call because it's quite acomplicated macro. It has to be used in one particular way. This shouldn't,however, impact performance when true recursion is being used. *//* First check that we haven't called match() too many times, or that wehaven't exceeded the recursive call limit. */if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);original_ims = ims;    /* Save for resetting on ')' */utf8 = md->utf8;       /* Local copy of the flag *//* At the start of a bracketed group, add the current subject pointer to thestack of such pointers, to be re-instated at the end of the group when we hitthe closing ket. When match() is called in other circumstances, we don't add tothis stack. */if ((flags & match_isgroup) != 0)  {  newptrb.epb_prev = eptrb;  newptrb.epb_saved_eptr = eptr;  eptrb = &newptrb;  }/* Now start processing the operations. */for (;;)  {  op = *ecode;  minimize = FALSE;  /* For partial matching, remember if we ever hit the end of the subject after  matching at least one subject character. */  if (md->partial &&      eptr >= md->end_subject &&      eptr > md->start_match)    md->hitend = TRUE;  /* Opening capturing bracket. If there is space in the offset vector, save  the current subject position in the working slot at the top of the vector. We  mustn't change the current values of the data slot, because they may be set  from a previous iteration of this group, and be referred to by a reference  inside the group.  If the bracket fails to match, we need to restore this value and also the  values of the final offsets, in case they were set by a previous iteration of  the same bracket.  If there isn't enough space in the offset vector, treat this as if it were a  non-capturing bracket. Don't worry about setting the flag for the error case  here; that is handled in the code for KET. */  if (op > OP_BRA)    {    number = op - OP_BRA;    /* For extended extraction brackets (large number), we have to fish out the    number from a dummy opcode at the start. */    if (number > EXTRACT_BASIC_MAX)      number = GET2(ecode, 2+LINK_SIZE);    offset = number << 1;#ifdef DEBUG    printf("start bracket %d subject=", number);    pchars(eptr, 16, TRUE, md);    printf("\n");#endif    if (offset < md->offset_max)      {      save_offset1 = md->offset_vector[offset];      save_offset2 = md->offset_vector[offset+1];      save_offset3 = md->offset_vector[md->offset_end - number];      save_capture_last = md->capture_last;      DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));      md->offset_vector[md->offset_end - number] = eptr - md->start_subject;      do        {        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          match_isgroup);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        md->capture_last = save_capture_last;        ecode += GET(ecode, 1);        }      while (*ecode == OP_ALT);      DPRINTF(("bracket %d failed\n", number));      md->offset_vector[offset] = save_offset1;      md->offset_vector[offset+1] = save_offset2;      md->offset_vector[md->offset_end - number] = save_offset3;      RRETURN(MATCH_NOMATCH);      }    /* Insufficient room for saving captured contents */    else op = OP_BRA;    }  /* Other types of node can be handled by a switch */  switch(op)    {    case OP_BRA:     /* Non-capturing bracket: optimized */    DPRINTF(("start bracket 0\n"));    do      {      RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        match_isgroup);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      ecode += GET(ecode, 1);      }    while (*ecode == OP_ALT);    DPRINTF(("bracket 0 failed\n"));    RRETURN(MATCH_NOMATCH);    /* Conditional group: compilation checked that there are no more than    two branches. If the condition is false, skipping the first branch takes us    past the end if there is only one branch, but that's OK because that is    exactly what going to the ket would do. */    case OP_COND:    if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      {      offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */      condition = (offset == CREF_RECURSE * 2)?        (md->recursive != NULL) :        (offset < offset_top && md->offset_vector[offset] >= 0);      RMATCH(rrc, eptr, ecode + (condition?        (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),        offset_top, md, ims, eptrb, match_isgroup);      RRETURN(rrc);      }    /* The condition is an assertion. Call match() to evaluate it - setting    the final argument TRUE causes it to stop at the end of an assertion. */    else      {      RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,          match_condassert | match_isgroup);      if (rrc == MATCH_MATCH)        {        ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);        while (*ecode == OP_ALT) ecode += GET(ecode, 1);        }      else if (rrc != MATCH_NOMATCH)        {        RRETURN(rrc);         /* Need braces because of following else */        }      else ecode += GET(ecode, 1);      RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        match_isgroup);      RRETURN(rrc);      }    /* Control never reaches here */    /* Skip over conditional reference or large extraction number data if    encountered. */    case OP_CREF:    case OP_BRANUMBER:    ecode += 3;    break;    /* End of the pattern. If we are in a recursion, we should restore the    offsets appropriately and continue from after the call. */    case OP_END:    if (md->recursive != NULL && md->recursive->group_num == 0)      {      recursion_info *rec = md->recursive;      DPRINTF(("End of pattern in a (?0) recursion\n"));      md->recursive = rec->prevrec;      memmove(md->offset_vector, rec->offset_save,        rec->saved_max * sizeof(int));      md->start_match = rec->save_start;      ims = original_ims;      ecode = rec->after_call;      break;      }    /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty    string - backtracking will then try other alternatives, if any. */    if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);    md->end_match_ptr = eptr;          /* Record where we ended */    md->end_offset_top = offset_top;   /* and how many extracts were taken */    RRETURN(MATCH_MATCH);    /* Change option settings */    case OP_OPT:    ims = ecode[1];    ecode += 2;    DPRINTF(("ims set to %02lx\n", ims));    break;    /* Assertion brackets. Check the alternative branches in turn - the    matching won't pass the KET for an assertion. If any one branch matches,    the assertion is true. Lookbehind assertions have an OP_REVERSE item at the    start of each branch to move the current point backwards, so the code at    this level is identical to the lookahead case. */    case OP_ASSERT:    case OP_ASSERTBACK:    do      {      RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        match_isgroup);      if (rrc == MATCH_MATCH) break;      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      ecode += GET(ecode, 1);      }    while (*ecode == OP_ALT);    if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);    /* If checking an assertion for a condition, return MATCH_MATCH. */    if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);    /* Continue from after the assertion, updating the offsets high water    mark, since extracts may have been taken during the assertion. */    do ecode += GET(ecode,1); while (*ecode == OP_ALT);    ecode += 1 + LINK_SIZE;    offset_top = md->end_offset_top;    continue;    /* Negative assertion: all branches must fail to match */    case OP_ASSERT_NOT:    case OP_ASSERTBACK_NOT:    do      {      RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        match_isgroup);      if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      ecode += GET(ecode,1);      }    while (*ecode == OP_ALT);    if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);    ecode += 1 + LINK_SIZE;    continue;    /* Move the subject pointer back. This occurs only at the start of    each branch of a lookbehind assertion. If we are too close to the start to    move back, this match function fails. When working with UTF-8 we move    back a number of characters, not bytes. */    case OP_REVERSE:#ifdef SUPPORT_UTF8    if (utf8)      {      c = GET(ecode,1);      for (i = 0; i < c; i++)        {        eptr--;        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        BACKCHAR(eptr)        }      }    else#endif    /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      {      eptr -= GET(ecode,1);      if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);      }    /* Skip to next op code */    ecode += 1 + LINK_SIZE;    break;    /* The callout item calls an external function, if one is provided, passing    details of the match so far. This is mainly for debugging, though the    function is able to force a failure. */    case OP_CALLOUT:    if (pcre_callout != NULL)      {      pcre_callout_block cb;      cb.version          = 1;   /* Version 1 of the callout block */      cb.callout_number   = ecode[1];      cb.offset_vector    = md->offset_vector;      cb.subject          = (PCRE_SPTR)md->start_subject;      cb.subject_length   = md->end_subject - md->start_subject;      cb.start_match      = md->start_match - md->start_subject;      cb.current_position = eptr - md->start_subject;      cb.pattern_position = GET(ecode, 2);      cb.next_item_length = GET(ecode, 2 + LINK_SIZE);      cb.capture_top      = offset_top/2;      cb.capture_last     = md->capture_last;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -