📄 pcre_compile.cpp.svn-base

📁 Google浏览器V8内核代码
💻 SVN-BASE
📖 第 1 页 / 共 5 页
字号:
                    *errorcodeptr = ERR2;                    return 0;                }                c = *ptr;                                /* A letter is upper-cased; then the 0x40 bit is flipped. This coding                 is ASCII-specific, but then the whole concept of \cx is ASCII-specific. */                c = toASCIIUpper(c) ^ 0x40;                break;            }    }        *ptrptr = ptr;    return c;}/**************************************************            Check for counted repeat            **************************************************//* This function is called when a '{' is encountered in a place where it mightstart a quantifier. It looks ahead to see if it really is a quantifier or not.It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}where the ddds are digits.Arguments:  p         pointer to the first char after '{'Returns:    true or false*/static bool isCountedRepeat(const UChar* p, const UChar* patternEnd){    if (p >= patternEnd || !isASCIIDigit(*p))        return false;    p++;    while (p < patternEnd && isASCIIDigit(*p))        p++;    if (p < patternEnd && *p == '}')        return true;        if (p >= patternEnd || *p++ != ',')        return false;    if (p < patternEnd && *p == '}')        return true;        if (p >= patternEnd || !isASCIIDigit(*p))        return false;    p++;    while (p < patternEnd && isASCIIDigit(*p))        p++;        return (p < patternEnd && *p == '}');}/**************************************************         Read repeat counts                     **************************************************//* Read an item of the form {n,m} and return the values. This is called onlyafter isCountedRepeat() has confirmed that a repeat-count quantifier exists,so the syntax is guaranteed to be correct, but we need to check the values.Arguments:  p              pointer to first char after '{'  minp           pointer to int for min  maxp           pointer to int for max                 returned as -1 if no max  errorcodeptr   points to error code variableReturns:         pointer to '}' on success;                 current ptr on error, with errorcodeptr set non-zero*/static const UChar* readRepeatCounts(const UChar* p, int* minp, int* maxp, ErrorCode* errorcodeptr){    int min = 0;    int max = -1;        /* Read the minimum value and do a paranoid check: a negative value indicates     an integer overflow. */        while (isASCIIDigit(*p))        min = min * 10 + *p++ - '0';    if (min < 0 || min > 65535) {        *errorcodeptr = ERR5;        return p;    }        /* Read the maximum value if there is one, and again do a paranoid on its size.     Also, max must not be less than min. */        if (*p == '}')        max = min;    else {        if (*(++p) != '}') {            max = 0;            while (isASCIIDigit(*p))                max = max * 10 + *p++ - '0';            if (max < 0 || max > 65535) {                *errorcodeptr = ERR5;                return p;            }            if (max < min) {                *errorcodeptr = ERR4;                return p;            }        }    }        /* Fill in the required variables, and pass back the pointer to the terminating     '}'. */        *minp = min;    *maxp = max;    return p;}/**************************************************      Find first significant op code            **************************************************//* This is called by several functions that scan a compiled expression lookingfor a fixed first character, or an anchoring op code etc. It skips over thingsthat do not influence this.Arguments:  code         pointer to the start of the groupReturns:       pointer to the first significant opcode*/static const unsigned char* firstSignificantOpcode(const unsigned char* code){    while (*code == OP_BRANUMBER)        code += 3;    return code;}static const unsigned char* firstSignificantOpcodeSkippingAssertions(const unsigned char* code){    while (true) {        switch (*code) {            case OP_ASSERT_NOT:                advanceToEndOfBracket(code);                code += 1 + LINK_SIZE;                break;            case OP_WORD_BOUNDARY:            case OP_NOT_WORD_BOUNDARY:                ++code;                break;            case OP_BRANUMBER:                code += 3;                break;            default:                return code;        }    }}/**************************************************           Get othercase range                  **************************************************//* This function is passed the start and end of a class range, in UTF-8 modewith UCP support. It searches up the characters, looking for internal ranges ofcharacters in the "other" case. Each call returns the next one, updating thestart address.Arguments:  cptr        points to starting character value; updated  d           end value  ocptr       where to put start of othercase range  odptr       where to put end of othercase rangeYield:        true when range returned; false when no more*/static bool getOthercaseRange(int* cptr, int d, int* ocptr, int* odptr){    int c, othercase = 0;        for (c = *cptr; c <= d; c++) {        if ((othercase = kjs_pcre_ucp_othercase(c)) >= 0)            break;    }        if (c > d)        return false;        *ocptr = othercase;    int next = othercase + 1;        for (++c; c <= d; c++) {        if (kjs_pcre_ucp_othercase(c) != next)            break;        next++;    }        *odptr = next - 1;    *cptr = c;        return true;}/************************************************* *       Convert character value to UTF-8         * *************************************************//* This function takes an integer value in the range 0 - 0x7fffffff and encodes it as a UTF-8 character in 0 to 6 bytes.  Arguments: cvalue     the character value buffer     pointer to buffer for result - at least 6 bytes long  Returns:     number of characters placed in the buffer */static int encodeUTF8(int cvalue, unsigned char *buffer){    int i;    for (i = 0; i < kjs_pcre_utf8_table1_size; i++)        if (cvalue <= kjs_pcre_utf8_table1[i])            break;    buffer += i;    for (int j = i; j > 0; j--) {        *buffer-- = 0x80 | (cvalue & 0x3f);        cvalue >>= 6;    }    *buffer = kjs_pcre_utf8_table2[i] | cvalue;    return i + 1;}/**************************************************           Compile one branch                   **************************************************//* Scan the pattern, compiling it into the code vector.Arguments:  options        the option bits  brackets       points to number of extracting brackets used  codeptr        points to the pointer to the current code point  ptrptr         points to the current pattern pointer  errorcodeptr   points to error code variable  firstbyteptr   set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)  reqbyteptr     set to the last literal character required, else < 0  cd             contains pointers to tables etc.Returns:         true on success                 false, with *errorcodeptr set non-zero on error*/static inline bool safelyCheckNextChar(const UChar* ptr, const UChar* patternEnd, UChar expected){    return ((ptr + 1 < patternEnd) && ptr[1] == expected);}static boolcompileBranch(int options, int* brackets, unsigned char** codeptr,               const UChar** ptrptr, const UChar* patternEnd, ErrorCode* errorcodeptr, int *firstbyteptr,               int* reqbyteptr, CompileData& cd){    int repeat_type, op_type;    int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */    int bravalue = 0;    int reqvary, tempreqvary;    int c;    unsigned char* code = *codeptr;    unsigned char* tempcode;    bool groupsetfirstbyte = false;    const UChar* ptr = *ptrptr;    const UChar* tempptr;    unsigned char* previous = NULL;    unsigned char classbits[32];        bool class_utf8;    unsigned char* class_utf8data;    unsigned char utf8_char[6];        /* Initialize no first byte, no required byte. REQ_UNSET means "no char     matching encountered yet". It gets changed to REQ_NONE if we hit something that     matches a non-fixed char first char; reqbyte just remains unset if we never     find one.          When we hit a repeat whose minimum is zero, we may have to adjust these values     to take the zero repeat into account. This is implemented by setting them to     zerofirstbyte and zeroreqbyte when such a repeat is encountered. The individual     item types that can be repeated set these backoff variables appropriately. */        int firstbyte = REQ_UNSET;    int reqbyte = REQ_UNSET;    int zeroreqbyte = REQ_UNSET;    int zerofirstbyte = REQ_UNSET;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -