⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 jsregexp.c

📁 Swfdec still is development software, but has also followed a rigid no-crashes-allowed policy. I b
💻 C
📖 第 1 页 / 共 5 页
字号:
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- * * ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Mozilla Communicator client code, released * March 31, 1998. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * * Contributor(s): * * Alternatively, the contents of this file may be used under the terms of * either of the GNU General Public License Version 2 or later (the "GPL"), * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** *//* * JS regular expressions, after Perl. */#include "jsstddef.h"#include <stdlib.h>#include <string.h>#include "jstypes.h"#include "jsarena.h" /* Added by JSIFY */#include "jsutil.h" /* Added by JSIFY */#include "jsapi.h"#include "jsarray.h"#include "jsatom.h"#include "jscntxt.h"#include "jsconfig.h"#include "jsfun.h"#include "jsgc.h"#include "jsinterp.h"#include "jslock.h"#include "jsnum.h"#include "jsobj.h"#include "jsopcode.h"#include "jsregexp.h"#include "jsscan.h"#include "jsstr.h"#ifdef XP_MAC#include <MacMemory.h>#endif#if JS_HAS_REGEXPS/* Note : contiguity of 'simple opcodes' is important for SimpleMatch() */typedef enum REOp {    REOP_EMPTY         = 0,  /* match rest of input against rest of r.e. */    REOP_ALT           = 1,  /* alternative subexpressions in kid and next */    REOP_SIMPLE_START  = 2,  /* start of 'simple opcodes' */    REOP_BOL           = 2,  /* beginning of input (or line if multiline) */    REOP_EOL           = 3,  /* end of input (or line if multiline) */    REOP_WBDRY         = 4,  /* match "" at word boundary */    REOP_WNONBDRY      = 5,  /* match "" at word non-boundary */    REOP_DOT           = 6,  /* stands for any character */    REOP_DIGIT         = 7,  /* match a digit char: [0-9] */    REOP_NONDIGIT      = 8,  /* match a non-digit char: [^0-9] */    REOP_ALNUM         = 9,  /* match an alphanumeric char: [0-9a-z_A-Z] */    REOP_NONALNUM      = 10, /* match a non-alphanumeric char: [^0-9a-z_A-Z] */    REOP_SPACE         = 11, /* match a whitespace char */    REOP_NONSPACE      = 12, /* match a non-whitespace char */    REOP_BACKREF       = 13, /* back-reference (e.g., \1) to a parenthetical */    REOP_FLAT          = 14, /* match a flat string */    REOP_FLAT1         = 15, /* match a single char */    REOP_FLATi         = 16, /* case-independent REOP_FLAT */    REOP_FLAT1i        = 17, /* case-independent REOP_FLAT1 */    REOP_UCFLAT1       = 18, /* single Unicode char */    REOP_UCFLAT1i      = 19, /* case-independent REOP_UCFLAT1 */    REOP_UCFLAT        = 20, /* flat Unicode string; len immediate counts chars */    REOP_UCFLATi       = 21, /* case-independent REOP_UCFLAT */    REOP_CLASS         = 22, /* character class with index */    REOP_NCLASS        = 23, /* negated character class with index */    REOP_SIMPLE_END    = 23, /* end of 'simple opcodes' */    REOP_QUANT         = 25, /* quantified atom: atom{1,2} */    REOP_STAR          = 26, /* zero or more occurrences of kid */    REOP_PLUS          = 27, /* one or more occurrences of kid */    REOP_OPT           = 28, /* optional subexpression in kid */    REOP_LPAREN        = 29, /* left paren bytecode: kid is u.num'th sub-regexp */    REOP_RPAREN        = 30, /* right paren bytecode */    REOP_JUMP          = 31, /* for deoptimized closure loops */    REOP_DOTSTAR       = 32, /* optimize .* to use a single opcode */    REOP_ANCHOR        = 33, /* like .* but skips left context to unanchored r.e. */    REOP_EOLONLY       = 34, /* $ not preceded by any pattern */    REOP_BACKREFi      = 37, /* case-independent REOP_BACKREF */    REOP_LPARENNON     = 41, /* non-capturing version of REOP_LPAREN */    REOP_ASSERT        = 43, /* zero width positive lookahead assertion */    REOP_ASSERT_NOT    = 44, /* zero width negative lookahead assertion */    REOP_ASSERTTEST    = 45, /* sentinel at end of assertion child */    REOP_ASSERTNOTTEST = 46, /* sentinel at end of !assertion child */    REOP_MINIMALSTAR   = 47, /* non-greedy version of * */    REOP_MINIMALPLUS   = 48, /* non-greedy version of + */    REOP_MINIMALOPT    = 49, /* non-greedy version of ? */    REOP_MINIMALQUANT  = 50, /* non-greedy version of {} */    REOP_ENDCHILD      = 51, /* sentinel at end of quantifier child */    REOP_REPEAT        = 52, /* directs execution of greedy quantifier */    REOP_MINIMALREPEAT = 53, /* directs execution of non-greedy quantifier */    REOP_ALTPREREQ     = 54, /* prerequisite for ALT, either of two chars */    REOP_ALTPREREQ2    = 55, /* prerequisite for ALT, a char or a class */    REOP_ENDALT        = 56, /* end of final alternate */    REOP_CONCAT        = 57, /* concatenation of terms (parse time only) */    REOP_END} REOp;#define REOP_IS_SIMPLE(op)  ((unsigned)((op) - REOP_SIMPLE_START) <           \                             (unsigned)REOP_SIMPLE_END)struct RENode {    REOp            op;         /* r.e. op bytecode */    RENode          *next;      /* next in concatenation order */    void            *kid;       /* first operand */    union {        void        *kid2;      /* second operand */        jsint       num;        /* could be a number */        uint16      parenIndex; /* or a parenthesis index */        struct {                /* or a quantifier range */            uint16  min;            uint16  max;            JSBool  greedy;        } range;        struct {                /* or a character class */            uint16  startIndex;            uint16  kidlen;     /* length of string at kid, in jschars */            uint16  bmsize;     /* bitmap size, based on max char code */            uint16  index;      /* index into class list */            JSBool  sense;        } ucclass;        struct {                /* or a literal sequence */            jschar  chr;        /* of one character */            uint16  length;     /* or many (via the kid) */        } flat;        struct {            RENode  *kid2;      /* second operand from ALT */            jschar  ch1;        /* match char for ALTPREREQ */            jschar  ch2;        /* ditto, or class index for ALTPREREQ2 */        } altprereq;    } u;};#define RE_IS_LETTER(c)     (((c >= 'A') && (c <= 'Z')) ||                    \                             ((c >= 'a') && (c <= 'z')) )#define RE_IS_LINE_TERM(c)  ((c == '\n') || (c == '\r') ||                    \                             (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))#define CLASS_CACHE_SIZE    4typedef struct CompilerState {    JSContext       *context;    JSTokenStream   *tokenStream; /* For reporting errors */    const jschar    *cpbegin;    const jschar    *cpend;    const jschar    *cp;    uintN           flags;    uint16          parenCount;    uint16          classCount;   /* number of [] encountered */    size_t          progLength;   /* estimated bytecode length */    uintN           treeDepth;    /* maximum depth of parse tree */    RENode          *result;    struct {        const jschar *start;        /* small cache of class strings */        uint16 length;              /* since they're often the same */        uint16 index;    } classCache[CLASS_CACHE_SIZE];} CompilerState;typedef struct RECapture {    int32 index;               /* start of contents, -1 for empty  */    uint16 length;             /* length of capture */} RECapture;typedef struct REMatchState {    const jschar *cp;    RECapture parens[1];      /* first of 're->parenCount' captures,                               * allocated at end of this struct.                               */} REMatchState;struct REBackTrackData;typedef struct REProgState {    jsbytecode *continue_pc;        /* current continuation data */    jsbytecode continue_op;    uint16 index;                   /* progress in text */    uintN parenSoFar;               /* highest indexed paren started */    union {        struct {            uint16 min;             /* current quantifier limits */            uint16 max;        } quantifier;        struct {            size_t top;             /* backtrack stack state */            size_t sz;        } assertion;    } u;} REProgState;typedef struct REBackTrackData {    size_t sz;                      /* size of previous stack entry */    jsbytecode *backtrack_pc;       /* where to backtrack to */    jsbytecode backtrack_op;    const jschar *cp;               /* index in text of match at backtrack */    uint16 parenIndex;              /* start index of saved paren contents */    uint16 parenCount;              /* # of saved paren contents */    uint16 saveStateStackTop;       /* number of parent states */    /* saved parent states follow */    /* saved paren contents follow */} REBackTrackData;#define INITIAL_STATESTACK (100)#define INITIAL_BACKTRACK (8000)typedef struct REGlobalData {    JSContext *cx;    JSRegExp *regexp;               /* the RE in execution */    JSBool ok;                      /* runtime error (out_of_memory only?) */    size_t start;                   /* offset to start at */    ptrdiff_t skipped;              /* chars skipped anchoring this r.e. */    const jschar *cpbegin, *cpend;  /* text base address and limit */    REProgState *stateStack;        /* stack of state of current parents */    uint16 stateStackTop;    uint16 stateStackLimit;    REBackTrackData *backTrackStack;/* stack of matched-so-far positions */    REBackTrackData *backTrackSP;    size_t backTrackStackSize;    size_t cursz;                   /* size of current stack entry */    JSArenaPool pool;               /* I don't understand but it's faster to                                     * use this than to malloc/free the three                                     * items that are allocated from this pool                                     */} REGlobalData;/* * 1. If IgnoreCase is false, return ch. * 2. Let u be ch converted to upper case as if by calling *    String.prototype.toUpperCase on the one-character string ch. * 3. If u does not consist of a single character, return ch. * 4. Let cu be u's character. * 5. If ch's code point value is greater than or equal to decimal 128 and cu's *    code point value is less than decimal 128, then return ch. * 6. Return cu. */static jscharupcase(jschar ch){    jschar cu = JS_TOUPPER(ch);    if (ch >= 128 && cu < 128)        return ch;    return cu;}static jschardowncase(jschar ch){    jschar cl = JS_TOLOWER(ch);    if (cl >= 128 && ch < 128)        return ch;    return cl;}/* Construct and initialize an RENode, returning NULL for out-of-memory */static RENode *NewRENode(CompilerState *state, REOp op){    JSContext *cx;    RENode *ren;    cx = state->context;    JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren);    if (!ren) {        JS_ReportOutOfMemory(cx);        return NULL;    }    ren->op = op;    ren->next = NULL;    ren->kid = NULL;    return ren;}/* * Validates and converts hex ascii value. */static JSBoolisASCIIHexDigit(jschar c, uintN *digit){    uintN cv = c;    if (cv < '0')        return JS_FALSE;    if (cv <= '9') {        *digit = cv - '0';        return JS_TRUE;    }    cv |= 0x20;    if (cv >= 'a' && cv <= 'f') {        *digit = cv - 'a' + 10;        return JS_TRUE;    }    return JS_FALSE;}typedef struct {    REOp op;    const jschar *errPos;    uint16 parenIndex;} REOpData;/* * Process the op against the two top operands, reducing them to a single * operand in the penultimate slot. Update progLength and treeDepth. */static JSBoolProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack, intN operandSP){    RENode *result;    switch (opData->op) {    case REOP_ALT:        result = NewRENode(state, REOP_ALT);        if (!result)            return JS_FALSE;        result->kid = operandStack[operandSP - 2];        result->u.kid2 = operandStack[operandSP - 1];        operandStack[operandSP - 2] = result;        /*         * look at both alternates to see if there's a FLAT or a CLASS at         * the start of each. If so, use a prerequisite match         */        ++state->treeDepth;        if (((RENode *) result->kid)->op == REOP_FLAT &&            ((RENode *) result->u.kid2)->op == REOP_FLAT &&            (state->flags & JSREG_FOLD) == 0) {            result->op = REOP_ALTPREREQ;            result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;            result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;            /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -