📄 pcre_exec.cpp
字号:
/************************************************** Match from current position **************************************************//* On entry instructionPtr points to the first opcode, and subjectPtr to the first characterin the subject string, while substringStart holds the value of subjectPtr at the start of thelast bracketed group - used for breaking infinite loops matching zero-lengthstrings. This function is called recursively in many circumstances. Whenever itreturns a negative (error) response, the outer match() call must also return thesame response.Arguments: subjectPtr pointer in subject instructionPtr position in code offsetTop current top pointer md pointer to "static" info for the matchReturns: 1 if matched ) these values are >= 0 0 if failed to match ) a negative error value if aborted by an error condition (e.g. stopped by repeated call or recursion limit)*/static const unsigned FRAMES_ON_STACK = 16;struct MatchStack { MatchStack() : framesEnd(frames + FRAMES_ON_STACK) , currentFrame(frames) , size(1) // match() creates accesses the first frame w/o calling pushNewFrame { ASSERT((sizeof(frames) / sizeof(frames[0])) == FRAMES_ON_STACK); } MatchFrame frames[FRAMES_ON_STACK]; MatchFrame* framesEnd; MatchFrame* currentFrame; unsigned size; inline bool canUseStackBufferForNextFrame() { return size < FRAMES_ON_STACK; } inline MatchFrame* allocateNextFrame() { if (canUseStackBufferForNextFrame()) return currentFrame + 1; return new MatchFrame; } inline void pushNewFrame(const unsigned char* instructionPtr, BracketChainNode* bracketChain, ReturnLocation returnLocation) { MatchFrame* newframe = allocateNextFrame(); newframe->previousFrame = currentFrame; newframe->args.subjectPtr = currentFrame->args.subjectPtr; newframe->args.offsetTop = currentFrame->args.offsetTop; newframe->args.instructionPtr = instructionPtr; newframe->args.bracketChain = bracketChain; newframe->returnLocation = returnLocation; size++; currentFrame = newframe; } inline void popCurrentFrame() { MatchFrame* oldFrame = currentFrame; currentFrame = currentFrame->previousFrame; if (size > FRAMES_ON_STACK) delete oldFrame; size--; } void popAllFrames() { while (size) popCurrentFrame(); }};static int matchError(int errorCode, MatchStack& stack){ stack.popAllFrames(); return errorCode;}/* Get the next UTF-8 character, not advancing the pointer, incrementing length if there are extra bytes. This is called when we know we are in UTF-8 mode. */static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* subjectPtr, int& len){ c = *subjectPtr; if ((c & 0xc0) == 0xc0) { int gcaa = kjs_pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ int gcss = 6 * gcaa; c = (c & kjs_pcre_utf8_table3[gcaa]) << gcss; for (int gcii = 1; gcii <= gcaa; gcii++) { gcss -= 6; c |= (subjectPtr[gcii] & 0x3f) << gcss; } len += gcaa; }}static inline void startNewGroup(MatchFrame* currentFrame){ /* At the start of a bracketed group, add the current subject pointer to the stack of such pointers, to be re-instated at the end of the group when we hit the closing ket. When match() is called in other circumstances, we don't add to this stack. */ currentFrame->locals.bracketChainNode.previousBracket = currentFrame->args.bracketChain; currentFrame->locals.bracketChainNode.bracketStart = currentFrame->args.subjectPtr; currentFrame->args.bracketChain = ¤tFrame->locals.bracketChainNode;}// FIXME: "minimize" means "not greedy", we should invert the callers to ask for "greedy" to be less confusingstatic inline void repeatInformationFromInstructionOffset(short instructionOffset, bool& minimize, int& minimumRepeats, int& maximumRepeats){ // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_NOTSTAR static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 }; static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 }; ASSERT(instructionOffset >= 0); ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR)); minimize = (instructionOffset & 1); // this assumes ordering: Instruction, MinimizeInstruction, Instruction2, MinimizeInstruction2 minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset]; maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];}static int match(const UChar* subjectPtr, const unsigned char* instructionPtr, int offsetTop, MatchData& md){ bool isMatch = false; int min; bool minimize = false; /* Initialization not really needed, but some compilers think so. */ unsigned matchCount = 0; MatchStack stack; /* The opcode jump table. */#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP#define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode, static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };#undef EMIT_JUMP_TABLE_ENTRY#endif /* One-time setup of the opcode jump table. */#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP for (int i = 255; !opcodeJumpTable[i]; i--) opcodeJumpTable[i] = &&CAPTURING_BRACKET;#endif #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION // Shark shows this as a hot line // Using a static const here makes this line disappear, but makes later access hotter (not sure why) stack.currentFrame->returnLocation = &&RETURN;#else stack.currentFrame->returnLocation = 0;#endif stack.currentFrame->args.subjectPtr = subjectPtr; stack.currentFrame->args.instructionPtr = instructionPtr; stack.currentFrame->args.offsetTop = offsetTop; stack.currentFrame->args.bracketChain = 0; startNewGroup(stack.currentFrame); /* This is where control jumps back to to effect "recursion" */ RECURSE: if (++matchCount > matchLimit) return matchError(JSRegExpErrorHitLimit, stack); /* Now start processing the operations. */ #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP while (true)#endif { #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP#define BEGIN_OPCODE(opcode) LABEL_OP_##opcode#define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionPtr]#else#define BEGIN_OPCODE(opcode) case OP_##opcode#define NEXT_OPCODE continue#endif #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP NEXT_OPCODE;#else switch (*stack.currentFrame->args.instructionPtr)#endif { /* Non-capturing bracket: optimized */ BEGIN_OPCODE(BRA): NON_CAPTURING_BRACKET: DPRINTF(("start bracket 0\n")); do { RECURSIVE_MATCH_STARTNG_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain); if (isMatch) RRETURN; stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); } while (*stack.currentFrame->args.instructionPtr == OP_ALT); DPRINTF(("bracket 0 failed\n")); RRETURN; /* Skip over large extraction number data if encountered. */ BEGIN_OPCODE(BRANUMBER): stack.currentFrame->args.instructionPtr += 3; NEXT_OPCODE; /* End of the pattern. */ BEGIN_OPCODE(END): md.endMatchPtr = stack.currentFrame->args.subjectPtr; /* Record where we ended */ md.endOffsetTop = stack.currentFrame->args.offsetTop; /* and how many extracts were taken */ isMatch = true; RRETURN; /* Assertion brackets. Check the alternative branches in turn - the matching won't pass the KET for an assertion. If any one branch matches, the assertion is true. Lookbehind assertions have an OP_REVERSE item at the start of each branch to move the current point backwards, so the code at this level is identical to the lookahead case. */ BEGIN_OPCODE(ASSERT): do { RECURSIVE_MATCH_STARTNG_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL); if (isMatch) break; stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); } while (*stack.currentFrame->args.instructionPtr == OP_ALT); if (*stack.currentFrame->args.instructionPtr == OP_KET) RRETURN_NO_MATCH; /* Continue from after the assertion, updating the offsets high water mark, since extracts may have been taken during the assertion. */ advanceToEndOfBracket(stack.currentFrame->args.instructionPtr); stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE; stack.currentFrame->args.offsetTop = md.endOffsetTop; NEXT_OPCODE; /* Negative assertion: all branches must fail to match */ BEGIN_OPCODE(ASSERT_NOT): do { RECURSIVE_MATCH_STARTNG_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL); if (isMatch) RRETURN_NO_MATCH; stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); } while (*stack.currentFrame->args.instructionPtr == OP_ALT); stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE; NEXT_OPCODE; /* An alternation is the end of a branch; scan along to find the end of the bracketed group and go to there. */ BEGIN_OPCODE(ALT): advanceToEndOfBracket(stack.currentFrame->args.instructionPtr); NEXT_OPCODE; /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating that it may occur zero times. It may repeat infinitely, or not at all - i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper repeat limits are compiled as a number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -