⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcre_exec.cpp

📁 Google浏览器V8内核代码
💻 CPP
📖 第 1 页 / 共 5 页
字号:
/* This is JavaScriptCore's variant of the PCRE library. While this librarystarted out as a copy of PCRE, many of the features of PCRE have beenremoved. This library now supports only the regular expression featuresrequired by the JavaScript language specification, and has only the functionsneeded by JavaScriptCore and the rest of WebKit.                 Originally written by Philip Hazel           Copyright (c) 1997-2006 University of Cambridge    Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.    Copyright (C) 2007 Eric Seidel <eric@webkit.org>-----------------------------------------------------------------------------Redistribution and use in source and binary forms, with or withoutmodification, are permitted provided that the following conditions are met:    * Redistributions of source code must retain the above copyright notice,      this list of conditions and the following disclaimer.    * Redistributions in binary form must reproduce the above copyright      notice, this list of conditions and the following disclaimer in the      documentation and/or other materials provided with the distribution.    * Neither the name of the University of Cambridge nor the names of its      contributors may be used to endorse or promote products derived from      this software without specific prior written permission.THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THEIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSEARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BELIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, ORCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OFSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESSINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER INCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THEPOSSIBILITY OF SUCH DAMAGE.-----------------------------------------------------------------------------*//* This module contains jsRegExpExecute(), the externally visible functionthat does pattern matching using an NFA algorithm, following the rules fromthe JavaScript specification. There are also some supporting functions. */#include "config.h"#include "pcre_internal.h"#include "ASCIICType.h"#include <ctype.h>#include <limits.h>#include <string.h> /* for memcpy */#ifdef __GNUC__#define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION//#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP#endif/* Avoid warnings on Windows. */#undef min#undef max#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSIONtypedef int ReturnLocation;#elsetypedef void* ReturnLocation;#endif/* Structure for building a chain of data for holding the values ofthe subject pointer at the start of each bracket, used to detect whenan empty string has been matched by a bracket to break infinite loops. */ struct BracketChainNode {    BracketChainNode* previousBracket;    const UChar* bracketStart;};struct MatchFrame {    ReturnLocation returnLocation;    struct MatchFrame* previousFrame;        /* Function arguments that may change */    struct {        const UChar* subjectPtr;        const unsigned char* instructionPtr;        int offsetTop;        BracketChainNode* bracketChain;    } args;            /* PCRE uses "fake" recursion built off of gotos, thus     stack-based local variables are not safe to use.  Instead we have to     store local variables on the current MatchFrame. */    struct {        const unsigned char* data;        const unsigned char* startOfRepeatingBracket;        const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare        const unsigned char* instructionPtrAtStartOfOnce;                int repeatOthercase;                int ctype;        int fc;        int fi;        int length;        int max;        int number;        int offset;        int saveOffset1;        int saveOffset2;        int saveOffset3;                BracketChainNode bracketChainNode;    } locals;};/* Structure for passing "static" information around between the functionsdoing traditional NFA matching, so that they are thread-safe. */struct MatchData {  int*   offsetVector;         /* Offset vector */  int    offsetEnd;            /* One past the end */  int    offsetMax;            /* The maximum usable for return data */  bool   offsetOverflow;       /* Set if too many extractions */  const UChar*  startSubject;         /* Start of the subject string */  const UChar*  endSubject;           /* End of the subject string */  const UChar*  endMatchPtr;         /* Subject position at end match */  int    endOffsetTop;        /* Highwater mark at end of match */  bool   multiline;  bool   ignoreCase;};/* The maximum remaining length of subject we are prepared to search for areq_byte match. */#define REQ_BYTE_MAX 1000/* The below limit restricts the number of "recursive" match calls in order toavoid spending exponential time on complex regular expressions. */static const unsigned matchLimit = 100000;#ifdef DEBUG/**************************************************        Debugging function to print chars       **************************************************//* Print a sequence of chars in printable format, stopping at the end of thesubject if the requested.Arguments:  p           points to characters  length      number to print  isSubject  true if printing from within md.startSubject  md          pointer to matching data block, if isSubject is true*/static void pchars(const UChar* p, int length, bool isSubject, const MatchData& md){    if (isSubject && length > md.endSubject - p)        length = md.endSubject - p;    while (length-- > 0) {        int c;        if (isprint(c = *(p++)))            printf("%c", c);        else if (c < 256)            printf("\\x%02x", c);        else            printf("\\x{%x}", c);    }}#endif/**************************************************          Match a back-reference                **************************************************//* If a back reference hasn't been set, the length that is passed is greaterthan the number of characters left in the string, so the match fails.Arguments:  offset      index into the offset vector  subjectPtr        points into the subject  length      length to be matched  md          points to match data blockReturns:      true if matched*/static bool matchRef(int offset, const UChar* subjectPtr, int length, const MatchData& md){    const UChar* p = md.startSubject + md.offsetVector[offset];    #ifdef DEBUG    if (subjectPtr >= md.endSubject)        printf("matching subject <null>");    else {        printf("matching subject ");        pchars(subjectPtr, length, true, md);    }    printf(" against backref ");    pchars(p, length, false, md);    printf("\n");#endif        /* Always fail if not enough characters left */        if (length > md.endSubject - subjectPtr)        return false;        /* Separate the caselesss case for speed */        if (md.ignoreCase) {        while (length-- > 0) {            UChar c = *p++;            int othercase = kjs_pcre_ucp_othercase(c);            UChar d = *subjectPtr++;            if (c != d && othercase != d)                return false;        }    }    else {        while (length-- > 0)            if (*p++ != *subjectPtr++)                return false;    }        return true;}#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION/* Use numbered labels and switch statement at the bottom of the match function. */#define RMATCH_WHERE(num) num#define RRETURN_LABEL RRETURN_SWITCH#else/* Use GCC's computed goto extension. *//* For one test case this is more than 40% faster than the switch statement.We could avoid the use of the num argument entirely by using local labels,but using it for the GCC case as well as the non-GCC case allows us to sharea bit more code and notice if we use conflicting numbers.*/#define RMATCH_WHERE(num) &&RRETURN_##num#define RRETURN_LABEL *stack.currentFrame->returnLocation#endif#define RECURSIVE_MATCH_COMMON(num) \    goto RECURSE;\    RRETURN_##num: \    stack.popCurrentFrame();#define RECURSIVE_MATCH(num, ra, rb) \    do { \        stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \        RECURSIVE_MATCH_COMMON(num) \    } while (0)#define RECURSIVE_MATCH_STARTNG_NEW_GROUP(num, ra, rb) \    do { \        stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \        startNewGroup(stack.currentFrame); \        RECURSIVE_MATCH_COMMON(num) \    } while (0)#define RRETURN goto RRETURN_LABEL#define RRETURN_NO_MATCH do { isMatch = false; RRETURN; } while (0)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -