⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcre_exec.cpp

📁 Google浏览器V8内核代码
💻 CPP
📖 第 1 页 / 共 5 页
字号:
/**************************************************         Match from current position            **************************************************//* On entry instructionPtr points to the first opcode, and subjectPtr to the first characterin the subject string, while substringStart holds the value of subjectPtr at the start of thelast bracketed group - used for breaking infinite loops matching zero-lengthstrings. This function is called recursively in many circumstances. Whenever itreturns a negative (error) response, the outer match() call must also return thesame response.Arguments:   subjectPtr        pointer in subject   instructionPtr       position in code   offsetTop  current top pointer   md          pointer to "static" info for the matchReturns:       1 if matched          )  these values are >= 0               0 if failed to match  )               a negative error value if aborted by an error condition                 (e.g. stopped by repeated call or recursion limit)*/static const unsigned FRAMES_ON_STACK = 16;struct MatchStack {    MatchStack()        : framesEnd(frames + FRAMES_ON_STACK)        , currentFrame(frames)        , size(1) // match() creates accesses the first frame w/o calling pushNewFrame    {        ASSERT((sizeof(frames) / sizeof(frames[0])) == FRAMES_ON_STACK);    }        MatchFrame frames[FRAMES_ON_STACK];    MatchFrame* framesEnd;    MatchFrame* currentFrame;    unsigned size;        inline bool canUseStackBufferForNextFrame()    {        return size < FRAMES_ON_STACK;    }        inline MatchFrame* allocateNextFrame()    {        if (canUseStackBufferForNextFrame())            return currentFrame + 1;        return new MatchFrame;    }        inline void pushNewFrame(const unsigned char* instructionPtr, BracketChainNode* bracketChain, ReturnLocation returnLocation)    {        MatchFrame* newframe = allocateNextFrame();        newframe->previousFrame = currentFrame;        newframe->args.subjectPtr = currentFrame->args.subjectPtr;        newframe->args.offsetTop = currentFrame->args.offsetTop;        newframe->args.instructionPtr = instructionPtr;        newframe->args.bracketChain = bracketChain;        newframe->returnLocation = returnLocation;        size++;        currentFrame = newframe;    }        inline void popCurrentFrame()    {        MatchFrame* oldFrame = currentFrame;        currentFrame = currentFrame->previousFrame;        if (size > FRAMES_ON_STACK)            delete oldFrame;        size--;    }    void popAllFrames()    {        while (size)            popCurrentFrame();    }};static int matchError(int errorCode, MatchStack& stack){    stack.popAllFrames();    return errorCode;}/* Get the next UTF-8 character, not advancing the pointer, incrementing length if there are extra bytes. This is called when we know we are in UTF-8 mode. */static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* subjectPtr, int& len){    c = *subjectPtr;    if ((c & 0xc0) == 0xc0) {        int gcaa = kjs_pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */        int gcss = 6 * gcaa;        c = (c & kjs_pcre_utf8_table3[gcaa]) << gcss;        for (int gcii = 1; gcii <= gcaa; gcii++) {            gcss -= 6;            c |= (subjectPtr[gcii] & 0x3f) << gcss;        }        len += gcaa;    }}static inline void startNewGroup(MatchFrame* currentFrame){    /* At the start of a bracketed group, add the current subject pointer to the     stack of such pointers, to be re-instated at the end of the group when we hit     the closing ket. When match() is called in other circumstances, we don't add to     this stack. */        currentFrame->locals.bracketChainNode.previousBracket = currentFrame->args.bracketChain;    currentFrame->locals.bracketChainNode.bracketStart = currentFrame->args.subjectPtr;    currentFrame->args.bracketChain = &currentFrame->locals.bracketChainNode;}// FIXME: "minimize" means "not greedy", we should invert the callers to ask for "greedy" to be less confusingstatic inline void repeatInformationFromInstructionOffset(short instructionOffset, bool& minimize, int& minimumRepeats, int& maximumRepeats){    // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_NOTSTAR    static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 };    static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 };    ASSERT(instructionOffset >= 0);    ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR));    minimize = (instructionOffset & 1); // this assumes ordering: Instruction, MinimizeInstruction, Instruction2, MinimizeInstruction2    minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset];    maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];}static int match(const UChar* subjectPtr, const unsigned char* instructionPtr, int offsetTop, MatchData& md){    bool isMatch = false;    int min;    bool minimize = false; /* Initialization not really needed, but some compilers think so. */    unsigned matchCount = 0;        MatchStack stack;    /* The opcode jump table. */#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP#define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,    static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };#undef EMIT_JUMP_TABLE_ENTRY#endif        /* One-time setup of the opcode jump table. */#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP    for (int i = 255; !opcodeJumpTable[i]; i--)        opcodeJumpTable[i] = &&CAPTURING_BRACKET;#endif    #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION    // Shark shows this as a hot line    // Using a static const here makes this line disappear, but makes later access hotter (not sure why)    stack.currentFrame->returnLocation = &&RETURN;#else    stack.currentFrame->returnLocation = 0;#endif    stack.currentFrame->args.subjectPtr = subjectPtr;    stack.currentFrame->args.instructionPtr = instructionPtr;    stack.currentFrame->args.offsetTop = offsetTop;    stack.currentFrame->args.bracketChain = 0;    startNewGroup(stack.currentFrame);        /* This is where control jumps back to to effect "recursion" */    RECURSE:    if (++matchCount > matchLimit)        return matchError(JSRegExpErrorHitLimit, stack);    /* Now start processing the operations. */    #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP    while (true)#endif    {        #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP#define BEGIN_OPCODE(opcode) LABEL_OP_##opcode#define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionPtr]#else#define BEGIN_OPCODE(opcode) case OP_##opcode#define NEXT_OPCODE continue#endif        #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP        NEXT_OPCODE;#else        switch (*stack.currentFrame->args.instructionPtr)#endif        {            /* Non-capturing bracket: optimized */                            BEGIN_OPCODE(BRA):            NON_CAPTURING_BRACKET:                DPRINTF(("start bracket 0\n"));                do {                    RECURSIVE_MATCH_STARTNG_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);                    if (isMatch)                        RRETURN;                    stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);                } while (*stack.currentFrame->args.instructionPtr == OP_ALT);                DPRINTF(("bracket 0 failed\n"));                RRETURN;                            /* Skip over large extraction number data if encountered. */                            BEGIN_OPCODE(BRANUMBER):                stack.currentFrame->args.instructionPtr += 3;                NEXT_OPCODE;                            /* End of the pattern. */                            BEGIN_OPCODE(END):                md.endMatchPtr = stack.currentFrame->args.subjectPtr;          /* Record where we ended */                md.endOffsetTop = stack.currentFrame->args.offsetTop;   /* and how many extracts were taken */                isMatch = true;                RRETURN;                            /* Assertion brackets. Check the alternative branches in turn - the             matching won't pass the KET for an assertion. If any one branch matches,             the assertion is true. Lookbehind assertions have an OP_REVERSE item at the             start of each branch to move the current point backwards, so the code at             this level is identical to the lookahead case. */                            BEGIN_OPCODE(ASSERT):                do {                    RECURSIVE_MATCH_STARTNG_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);                    if (isMatch)                        break;                    stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);                } while (*stack.currentFrame->args.instructionPtr == OP_ALT);                if (*stack.currentFrame->args.instructionPtr == OP_KET)                    RRETURN_NO_MATCH;                                /* Continue from after the assertion, updating the offsets high water                 mark, since extracts may have been taken during the assertion. */                                advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);                stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;                stack.currentFrame->args.offsetTop = md.endOffsetTop;                NEXT_OPCODE;                            /* Negative assertion: all branches must fail to match */                            BEGIN_OPCODE(ASSERT_NOT):                do {                    RECURSIVE_MATCH_STARTNG_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);                    if (isMatch)                        RRETURN_NO_MATCH;                    stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);                } while (*stack.currentFrame->args.instructionPtr == OP_ALT);                                stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;                NEXT_OPCODE;                            /* An alternation is the end of a branch; scan along to find the end of the             bracketed group and go to there. */                            BEGIN_OPCODE(ALT):                advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);                NEXT_OPCODE;                            /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating             that it may occur zero times. It may repeat infinitely, or not at all -             i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper             repeat limits are compiled as a number of copies, with the optional ones             preceded by BRAZERO or BRAMINZERO. */                

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -