📄 pcre_exec.c
字号:
} } else#endif /* No UTF-8 support, or not in UTF-8 mode: count is byte count */ { eptr -= GET(ecode, 1); if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); } /* Skip to next op code */ ecode += 1 + LINK_SIZE; break; /* The callout item calls an external function, if one is provided, passing details of the match so far. This is mainly for debugging, though the function is able to force a failure. */ case OP_CALLOUT: if (pcre_callout != NULL) { pcre_callout_block cb; cb.version = 1; /* Version 1 of the callout block */ cb.callout_number = ecode[1]; cb.offset_vector = md->offset_vector; cb.subject = (PCRE_SPTR)md->start_subject; cb.subject_length = md->end_subject - md->start_subject; cb.start_match = md->start_match - md->start_subject; cb.current_position = eptr - md->start_subject; cb.pattern_position = GET(ecode, 2); cb.next_item_length = GET(ecode, 2 + LINK_SIZE); cb.capture_top = offset_top/2; cb.capture_last = md->capture_last; cb.callout_data = md->callout_data; if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH); if (rrc < 0) RRETURN(rrc); } ecode += 2 + 2*LINK_SIZE; break; /* Recursion either matches the current regex, or some subexpression. The offset data is the offset to the starting bracket from the start of the whole pattern. (This is so that it works from duplicated subpatterns.) If there are any capturing brackets started but not finished, we have to save their starting points and reinstate them after the recursion. However, we don't know how many such there are (offset_top records the completed total) so we just have to save all the potential data. There may be up to 65535 such values, which is too large to put on the stack, but using malloc for small numbers seems expensive. As a compromise, the stack is used when there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc is used. A problem is what to do if the malloc fails ... there is no way of returning to the top level with an error. Save the top REC_STACK_SAVE_MAX values on the stack, and accept that the rest may be wrong. There are also other values that have to be saved. We use a chained sequence of blocks that actually live on the stack. Thanks to Robin Houston for the original version of this logic. */ case OP_RECURSE: { callpat = md->start_code + GET(ecode, 1); new_recursive.group_num = (callpat == md->start_code)? 0 : GET2(callpat, 1 + LINK_SIZE); /* Add to "recursing stack" */ new_recursive.prevrec = md->recursive; md->recursive = &new_recursive; /* Find where to continue from afterwards */ ecode += 1 + LINK_SIZE; new_recursive.after_call = ecode; /* Now save the offset data. */ new_recursive.saved_max = md->offset_end; if (new_recursive.saved_max <= REC_STACK_SAVE_MAX) new_recursive.offset_save = stacksave; else { new_recursive.offset_save = (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int)); if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); } memcpy(new_recursive.offset_save, md->offset_vector, new_recursive.saved_max * sizeof(int)); new_recursive.save_start = md->start_match; md->start_match = eptr; /* OK, now we can do the recursion. For each top-level alternative we restore the offset and recursion data. */ DPRINTF(("Recursing into group %d\n", new_recursive.group_num)); flags = (*callpat >= OP_SBRA)? match_cbegroup : 0; do { RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top, md, ims, eptrb, flags); if (rrc == MATCH_MATCH) { DPRINTF(("Recursion matched\n")); md->recursive = new_recursive.prevrec; if (new_recursive.offset_save != stacksave) (pcre_free)(new_recursive.offset_save); RRETURN(MATCH_MATCH); } else if (rrc != MATCH_NOMATCH) { DPRINTF(("Recursion gave error %d\n", rrc)); RRETURN(rrc); } md->recursive = &new_recursive; memcpy(md->offset_vector, new_recursive.offset_save, new_recursive.saved_max * sizeof(int)); callpat += GET(callpat, 1); } while (*callpat == OP_ALT); DPRINTF(("Recursion didn't match\n")); md->recursive = new_recursive.prevrec; if (new_recursive.offset_save != stacksave) (pcre_free)(new_recursive.offset_save); RRETURN(MATCH_NOMATCH); } /* Control never reaches here */ /* "Once" brackets are like assertion brackets except that after a match, the point in the subject string is not moved back. Thus there can never be a move back into the brackets. Friedl calls these "atomic" subpatterns. Check the alternative branches in turn - the matching won't pass the KET for this kind of subpattern. If any one branch matches, we carry on as at the end of a normal bracket, leaving the subject pointer. */ case OP_ONCE: prev = ecode; saved_eptr = eptr; do { RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0); if (rrc == MATCH_MATCH) break; if (rrc != MATCH_NOMATCH) RRETURN(rrc); ecode += GET(ecode,1); } while (*ecode == OP_ALT); /* If hit the end of the group (which could be repeated), fail */ if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH); /* Continue as from after the assertion, updating the offsets high water mark, since extracts may have been taken. */ do ecode += GET(ecode, 1); while (*ecode == OP_ALT); offset_top = md->end_offset_top; eptr = md->end_match_ptr; /* For a non-repeating ket, just continue at this level. This also happens for a repeating ket if no characters were matched in the group. This is the forcible breaking of infinite loops as implemented in Perl 5.005. If there is an options reset, it will get obeyed in the normal course of events. */ if (*ecode == OP_KET || eptr == saved_eptr) { ecode += 1+LINK_SIZE; break; } /* The repeating kets try the rest of the pattern or restart from the preceding bracket, in the appropriate order. The second "call" of match() uses tail recursion, to avoid using another stack frame. We need to reset any options that changed within the bracket before re-running it, so check the next opcode. */ if (ecode[1+LINK_SIZE] == OP_OPT) { ims = (ims & ~PCRE_IMS) | ecode[4]; DPRINTF(("ims set to %02lx at group repeat\n", ims)); } if (*ecode == OP_KETRMIN) { RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); ecode = prev; flags = match_tail_recursed; goto TAIL_RECURSE; } else /* OP_KETRMAX */ { RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup); if (rrc != MATCH_NOMATCH) RRETURN(rrc); ecode += 1 + LINK_SIZE; flags = match_tail_recursed; goto TAIL_RECURSE; } /* Control never gets here */ /* An alternation is the end of a branch; scan along to find the end of the bracketed group and go to there. */ case OP_ALT: do ecode += GET(ecode,1); while (*ecode == OP_ALT); break; /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating that it may occur zero times. It may repeat infinitely, or not at all - i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper repeat limits are compiled as a number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO. */ case OP_BRAZERO: { next = ecode+1; RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); do next += GET(next,1); while (*next == OP_ALT); ecode = next + 1 + LINK_SIZE; } break; case OP_BRAMINZERO: { next = ecode+1; do next += GET(next, 1); while (*next == OP_ALT); RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); ecode++; } break; /* End of a group, repeated or non-repeating. */ case OP_KET: case OP_KETRMIN: case OP_KETRMAX: prev = ecode - GET(ecode, 1); /* If this was a group that remembered the subject start, in order to break infinite repeats of empty string matches, retrieve the subject start from the chain. Otherwise, set it NULL. */ if (*prev >= OP_SBRA) { saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */ eptrb = eptrb->epb_prev; /* Backup to previous group */ } else saved_eptr = NULL; /* If we are at the end of an assertion group, stop matching and return MATCH_MATCH, but record the current high water mark for use by positive assertions. Do this also for the "once" (atomic) groups. */ if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT || *prev == OP_ONCE) { md->end_match_ptr = eptr; /* For ONCE */ md->end_offset_top = offset_top; RRETURN(MATCH_MATCH); } /* For capturing groups we have to check the group number back at the start and if necessary complete handling an extraction by setting the offsets and bumping the high water mark. Note that whole-pattern recursion is coded as a recurse into group 0, so it won't be picked up here. Instead, we catch it when the OP_END is reached. Other recursion is handled here. */ if (*prev == OP_CBRA || *prev == OP_SCBRA) { number = GET2(prev, 1+LINK_SIZE); offset = number << 1;#ifdef DEBUG printf("end bracket %d", number); printf("\n");#endif md->capture_last = number; if (offset >= md->offset_max) md->offset_overflow = TRUE; else { md->offset_vector[offset] = md->offset_vector[md->offset_end - number]; md->offset_vector[offset+1] = eptr - md->start_subject; if (offset_top <= offset) offset_top = offset + 2; } /* Handle a recursively called group. Restore the offsets appropriately and continue from after the call. */ if (md->recursive != NULL && md->recursive->group_num == number) { recursion_info *rec = md->recursive; DPRINTF(("Recursion (%d) succeeded - continuing\n", number)); md->recursive = rec->prevrec; md->start_match = rec->save_start; memcpy(md->offset_vector, rec->offset_save, rec->saved_max * sizeof(int)); ecode = rec->after_call; ims = original_ims; break; } } /* For both capturing and non-capturing groups, reset the value of the ims flags, in case they got changed during the group. */ ims = original_ims; DPRINTF(("ims reset to %02lx\n", ims)); /* For a non-repeating ket, just continue at this level. This also happens for a repeating ket if no characters were matched in the group. This is the forcible breaking of infinite loops as implemented in Perl 5.005. If there is an options reset, it will get obeyed in the normal course of events. */ if (*ecode == OP_KET || eptr == saved_eptr) { ecode += 1 + LINK_SIZE; break; } /* The repeating kets try the rest of the pattern or restart from the preceding bracket, in the appropriate order. In the second case, we can use tail recursion to avoid using another stack frame. */ flags = (*prev >= OP_SBRA)? match_cbegroup : 0; if (*ecode == OP_KETRMIN) { RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); ecode = prev; flags |= match_tail_recursed; goto TAIL_RECURSE; } else /* OP_KETRMAX */ { RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags); if (rrc != MATCH_NOMATCH) RRETURN(rrc); ecode += 1 + LINK_SIZE; flags = match_tail_recursed; goto TAIL_RECURSE; } /* Control never gets here */ /* Start of subject unless notbol, or after internal newline if multiline */ case OP_CIRC: if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH); if ((ims & PCRE_MULTILINE) != 0) { if (eptr != md->start_subject && (eptr == md->end_subject || !WAS_NEWLINE(eptr))) RRETURN(MATCH_NOMATCH); ecode++; break; } /* ... else fall through */ /* Start of subject assertion */ case OP_SOD: if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH); ecode++; break; /* Start of match assertion */ case OP_SOM: if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH); ecode++; break; /* Assert before internal newline if multiline, or before a terminating newline unless endonly is set, else end of subject unless noteol is set. */ case OP_DOLL: if ((ims & PCRE_MULTILINE) != 0) { if (eptr < md->end_subject) { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); } else { if (md->noteol) RRETURN(MATCH_NOMATCH); } ecode++; break; } else { if (md->noteol) RRETURN(MATCH_NOMATCH); if (!md->endonly) { if (eptr != md->end_subject && (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) RRETURN(MATCH_NOMATCH); ecode++; break; } } /* ... else fall through for endonly */ /* End of subject assertion (\z) */ case OP_EOD: if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH); ecode++; break; /* End of subject or ending \n assertion (\Z) */ case OP_EODN: if (eptr != md->end_subject && (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -