pcre_compile.cpp.svn-base

来自「Google浏览器V8内核代码」· SVN-BASE 代码 · 共 1,470 行 · 第 1/5 页
SVN-BASE
1,470 行
/* This is JavaScriptCore's variant of the PCRE library. While this librarystarted out as a copy of PCRE, many of the features of PCRE have beenremoved. This library now supports only the regular expression featuresrequired by the JavaScript language specification, and has only the functionsneeded by JavaScriptCore and the rest of WebKit.                 Originally written by Philip Hazel           Copyright (c) 1997-2006 University of Cambridge    Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.    Copyright (C) 2007 Eric Seidel <eric@webkit.org>-----------------------------------------------------------------------------Redistribution and use in source and binary forms, with or withoutmodification, are permitted provided that the following conditions are met:    * Redistributions of source code must retain the above copyright notice,      this list of conditions and the following disclaimer.    * Redistributions in binary form must reproduce the above copyright      notice, this list of conditions and the following disclaimer in the      documentation and/or other materials provided with the distribution.    * Neither the name of the University of Cambridge nor the names of its      contributors may be used to endorse or promote products derived from      this software without specific prior written permission.THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THEIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSEARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BELIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, ORCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OFSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESSINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER INCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THEPOSSIBILITY OF SUCH DAMAGE.-----------------------------------------------------------------------------*//* This module contains the external function jsRegExpExecute(), along withsupporting internal functions that are not used by other modules. */#include "config.h"#include "pcre_internal.h"#include <string.h>#include "ASCIICType.h"/* Negative values for the firstchar and reqchar variables */#define REQ_UNSET (-2)#define REQ_NONE  (-1)/**************************************************      Code parameters and static tables         **************************************************//* Maximum number of items on the nested bracket stacks at compile time. Thisapplies to the nesting of all kinds of parentheses. It does not limitun-nested, non-capturing parentheses. This number can be made bigger ifnecessary - it is used to dimension one int and one unsigned char vector atcompile time. */#define BRASTACK_SIZE 200/* Table for handling escaped characters in the range '0'-'z'. Positive returnsare simple data values; negative values are for special things like \d and soon. Zero means further processing is needed (for things like \x), or the escapeis invalid. */static const short escapes[] = {     0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */     0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */   '@',      0, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */     0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */     0,      0,      0, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */     0,      0,      0,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */   '`',      7, -ESC_b,      0, -ESC_d,      0,   '\f',      0,   /* ` - g */     0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */     0,      0,    '\r', -ESC_s,   '\t',      0,  '\v', -ESC_w,   /* p - w */     0,      0,      0                                            /* x - z */};/* Error code numbers. They are given names so that they can more easily betracked. */enum ErrorCode {    ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,    ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17};/* The texts of compile-time error messages. These are "char *" because theyare passed to the outside world. */static const char* errorText(ErrorCode code){    static const char errorTexts[] =      /* 1 */      "\\ at end of pattern\0"      "\\c at end of pattern\0"      "character value in \\x{...} sequence is too large\0"      "numbers out of order in {} quantifier\0"      /* 5 */      "number too big in {} quantifier\0"      "missing terminating ] for character class\0"      "internal error: code overflow\0"      "range out of order in character class\0"      "nothing to repeat\0"      /* 10 */      "unmatched parentheses\0"      "internal error: unexpected repeat\0"      "unrecognized character after (?\0"      "failed to get memory\0"      "missing )\0"      /* 15 */      "reference to non-existent subpattern\0"      "regular expression too large\0"      "parentheses nested too deeply"    ;    int i = code;    const char* text = errorTexts;    while (i > 1)        i -= !*text++;    return text;}/* Structure for passing "static" information around between the functionsdoing the compiling. */struct CompileData {    CompileData() {        top_backref = 0;        backrefMap = 0;        req_varyopt = 0;        needOuterBracket = false;        numCapturingBrackets = 0;    }    int top_backref;            /* Maximum back reference */    unsigned backrefMap;       /* Bitmap of low back refs */    int req_varyopt;            /* "After variable item" flag for reqbyte */    bool needOuterBracket;    int numCapturingBrackets;};/* Definitions to allow mutual recursion */static bool compileBracket(int, int*, unsigned char**, const UChar**, const UChar*, ErrorCode*, int, int*, int*, CompileData&);static bool bracketIsAnchored(const unsigned char* code);static bool bracketNeedsLineStart(const unsigned char* code, unsigned captureMap, unsigned backrefMap);static int bracketFindFirstAssertedCharacter(const unsigned char* code, bool inassert);/**************************************************            Handle escapes                      **************************************************//* This function is called when a \ has been encountered. It either returns apositive value for a simple escape such as \n, or a negative value whichencodes one of the more complicated things such as \d. When UTF-8 is enabled,a positive value greater than 255 may be returned. On entry, ptr is pointing atthe \. On exit, it is on the final character of the escape sequence.Arguments:  ptrptr         points to the pattern position pointer  errorcodeptr   points to the errorcode variable  bracount       number of previous extracting brackets  options        the options bits  isclass        true if inside a character classReturns:         zero or positive => a data character                 negative => a special escape sequence                 on error, errorptr is set*/static int checkEscape(const UChar** ptrptr, const UChar* patternEnd, ErrorCode* errorcodeptr, int bracount, bool isclass){    const UChar* ptr = *ptrptr + 1;    /* If backslash is at the end of the pattern, it's an error. */    if (ptr == patternEnd) {        *errorcodeptr = ERR1;        *ptrptr = ptr;        return 0;    }        int c = *ptr;        /* Non-alphamerics are literals. For digits or letters, do an initial lookup in     a table. A non-zero result is something that can be returned immediately.     Otherwise further processing may be required. */        if (c < '0' || c > 'z') { /* Not alphameric */    } else if (int escapeValue = escapes[c - '0']) {        c = escapeValue;        if (isclass) {            if (-c == ESC_b)                c = '\b'; /* \b is backslash in a class */            else if (-c == ESC_B)                c = 'B'; /* and \B is a capital B in a class (in browsers event though ECMAScript 15.10.2.19 says it raises an error) */        }    /* Escapes that need further processing, or are illegal. */        } else {        switch (c) {            case '1':            case '2':            case '3':            case '4':            case '5':            case '6':            case '7':            case '8':            case '9':                /* Escape sequences starting with a non-zero digit are backreferences,                 unless there are insufficient brackets, in which case they are octal                 escape sequences. Those sequences end on the first non-octal character                 or when we overflow 0-255, whichever comes first. */                                if (!isclass) {                    const UChar* oldptr = ptr;                    c -= '0';                    while ((ptr + 1 < patternEnd) && isASCIIDigit(ptr[1]) && c <= bracount)                        c = c * 10 + *(++ptr) - '0';                    if (c <= bracount) {                        c = -(ESC_REF + c);                        break;                    }                    ptr = oldptr;      /* Put the pointer back and fall through */                }                                /* Handle an octal number following \. If the first digit is 8 or 9,                 this is not octal. */                                if ((c = *ptr) >= '8')                    break;            /* \0 always starts an octal number, but we may drop through to here with a             larger first octal digit. */            case '0': {                c -= '0';                int i;                for (i = 1; i <= 2; ++i) {                    if (ptr + i >= patternEnd || ptr[i] < '0' || ptr[i] > '7')                        break;                    int cc = c * 8 + ptr[i] - '0';                    if (cc > 255)                        break;                    c = cc;                }                ptr += i - 1;                break;            }            case 'x': {                c = 0;                int i;                for (i = 1; i <= 2; ++i) {                    if (ptr + i >= patternEnd || !isASCIIHexDigit(ptr[i])) {                        c = 'x';                        i = 1;                        break;                    }                    int cc = ptr[i];                    if (cc >= 'a')                        cc -= 32;             /* Convert to upper case */                    c = c * 16 + cc - ((cc < 'A') ? '0' : ('A' - 10));                }                ptr += i - 1;                break;            }            case 'u': {                c = 0;                int i;                for (i = 1; i <= 4; ++i) {                    if (ptr + i >= patternEnd || !isASCIIHexDigit(ptr[i])) {                        c = 'u';                        i = 1;                        break;                    }                    int cc = ptr[i];                    if (cc >= 'a')                        cc -= 32;             /* Convert to upper case */                    c = c * 16 + cc - ((cc < 'A') ? '0' : ('A' - 10));                }                ptr += i - 1;                break;            }            case 'c':                if (++ptr == patternEnd) {
pcre_compile.cpp.svn-base - 源码说明

本页面展示了「Google浏览器V8内核代码」中的 pcre_compile.cpp.svn-base 源码文件，采用 SVN-BASE 编程语言编写，共 1,470 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Google相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?