⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 jsregexp.c

📁 java script test programing source code
💻 C
📖 第 1 页 / 共 5 页
字号:
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- * vim: set sw=4 ts=8 et tw=78: * * ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Mozilla Communicator client code, released * March 31, 1998. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * * Contributor(s): * * Alternatively, the contents of this file may be used under the terms of * either of the GNU General Public License Version 2 or later (the "GPL"), * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** *//* * JS regular expressions, after Perl. */#include "jsstddef.h"#include <stdlib.h>#include <string.h>#include "jstypes.h"#include "jsarena.h" /* Added by JSIFY */#include "jsutil.h" /* Added by JSIFY */#include "jsapi.h"#include "jsarray.h"#include "jsatom.h"#include "jscntxt.h"#include "jsconfig.h"#include "jsfun.h"#include "jsgc.h"#include "jsinterp.h"#include "jslock.h"#include "jsnum.h"#include "jsobj.h"#include "jsopcode.h"#include "jsregexp.h"#include "jsscan.h"#include "jsstr.h"/* Note : contiguity of 'simple opcodes' is important for SimpleMatch() */typedef enum REOp {    REOP_EMPTY         = 0,  /* match rest of input against rest of r.e. */    REOP_ALT           = 1,  /* alternative subexpressions in kid and next */    REOP_SIMPLE_START  = 2,  /* start of 'simple opcodes' */    REOP_BOL           = 2,  /* beginning of input (or line if multiline) */    REOP_EOL           = 3,  /* end of input (or line if multiline) */    REOP_WBDRY         = 4,  /* match "" at word boundary */    REOP_WNONBDRY      = 5,  /* match "" at word non-boundary */    REOP_DOT           = 6,  /* stands for any character */    REOP_DIGIT         = 7,  /* match a digit char: [0-9] */    REOP_NONDIGIT      = 8,  /* match a non-digit char: [^0-9] */    REOP_ALNUM         = 9,  /* match an alphanumeric char: [0-9a-z_A-Z] */    REOP_NONALNUM      = 10, /* match a non-alphanumeric char: [^0-9a-z_A-Z] */    REOP_SPACE         = 11, /* match a whitespace char */    REOP_NONSPACE      = 12, /* match a non-whitespace char */    REOP_BACKREF       = 13, /* back-reference (e.g., \1) to a parenthetical */    REOP_FLAT          = 14, /* match a flat string */    REOP_FLAT1         = 15, /* match a single char */    REOP_FLATi         = 16, /* case-independent REOP_FLAT */    REOP_FLAT1i        = 17, /* case-independent REOP_FLAT1 */    REOP_UCFLAT1       = 18, /* single Unicode char */    REOP_UCFLAT1i      = 19, /* case-independent REOP_UCFLAT1 */    REOP_UCFLAT        = 20, /* flat Unicode string; len immediate counts chars */    REOP_UCFLATi       = 21, /* case-independent REOP_UCFLAT */    REOP_CLASS         = 22, /* character class with index */    REOP_NCLASS        = 23, /* negated character class with index */    REOP_SIMPLE_END    = 23, /* end of 'simple opcodes' */    REOP_QUANT         = 25, /* quantified atom: atom{1,2} */    REOP_STAR          = 26, /* zero or more occurrences of kid */    REOP_PLUS          = 27, /* one or more occurrences of kid */    REOP_OPT           = 28, /* optional subexpression in kid */    REOP_LPAREN        = 29, /* left paren bytecode: kid is u.num'th sub-regexp */    REOP_RPAREN        = 30, /* right paren bytecode */    REOP_JUMP          = 31, /* for deoptimized closure loops */    REOP_DOTSTAR       = 32, /* optimize .* to use a single opcode */    REOP_ANCHOR        = 33, /* like .* but skips left context to unanchored r.e. */    REOP_EOLONLY       = 34, /* $ not preceded by any pattern */    REOP_BACKREFi      = 37, /* case-independent REOP_BACKREF */    REOP_LPARENNON     = 41, /* non-capturing version of REOP_LPAREN */    REOP_ASSERT        = 43, /* zero width positive lookahead assertion */    REOP_ASSERT_NOT    = 44, /* zero width negative lookahead assertion */    REOP_ASSERTTEST    = 45, /* sentinel at end of assertion child */    REOP_ASSERTNOTTEST = 46, /* sentinel at end of !assertion child */    REOP_MINIMALSTAR   = 47, /* non-greedy version of * */    REOP_MINIMALPLUS   = 48, /* non-greedy version of + */    REOP_MINIMALOPT    = 49, /* non-greedy version of ? */    REOP_MINIMALQUANT  = 50, /* non-greedy version of {} */    REOP_ENDCHILD      = 51, /* sentinel at end of quantifier child */    REOP_REPEAT        = 52, /* directs execution of greedy quantifier */    REOP_MINIMALREPEAT = 53, /* directs execution of non-greedy quantifier */    REOP_ALTPREREQ     = 54, /* prerequisite for ALT, either of two chars */    REOP_ALTPREREQ2    = 55, /* prerequisite for ALT, a char or a class */    REOP_ENDALT        = 56, /* end of final alternate */    REOP_CONCAT        = 57, /* concatenation of terms (parse time only) */    REOP_END} REOp;#define REOP_IS_SIMPLE(op)  ((unsigned)((op) - REOP_SIMPLE_START) <           \                             (unsigned)REOP_SIMPLE_END)struct RENode {    REOp            op;         /* r.e. op bytecode */    RENode          *next;      /* next in concatenation order */    void            *kid;       /* first operand */    union {        void        *kid2;      /* second operand */        jsint       num;        /* could be a number */        size_t      parenIndex; /* or a parenthesis index */        struct {                /* or a quantifier range */            uintN  min;            uintN  max;            JSPackedBool greedy;        } range;        struct {                /* or a character class */            size_t  startIndex;            size_t  kidlen;     /* length of string at kid, in jschars */            size_t  index;      /* index into class list */            uint16  bmsize;     /* bitmap size, based on max char code */            JSPackedBool sense;        } ucclass;        struct {                /* or a literal sequence */            jschar  chr;        /* of one character */            size_t  length;     /* or many (via the kid) */        } flat;        struct {            RENode  *kid2;      /* second operand from ALT */            jschar  ch1;        /* match char for ALTPREREQ */            jschar  ch2;        /* ditto, or class index for ALTPREREQ2 */        } altprereq;    } u;};#define RE_IS_LETTER(c)     (((c >= 'A') && (c <= 'Z')) ||                    \                             ((c >= 'a') && (c <= 'z')) )#define RE_IS_LINE_TERM(c)  ((c == '\n') || (c == '\r') ||                    \                             (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))#define CLASS_CACHE_SIZE    4typedef struct CompilerState {    JSContext       *context;    JSTokenStream   *tokenStream; /* For reporting errors */    const jschar    *cpbegin;    const jschar    *cpend;    const jschar    *cp;    size_t          parenCount;    size_t          classCount;   /* number of [] encountered */    size_t          treeDepth;    /* maximum depth of parse tree */    size_t          progLength;   /* estimated bytecode length */    RENode          *result;    size_t          classBitmapsMem; /* memory to hold all class bitmaps */    struct {        const jschar *start;        /* small cache of class strings */        size_t length;              /* since they're often the same */        size_t index;    } classCache[CLASS_CACHE_SIZE];    uint16          flags;} CompilerState;typedef struct EmitStateStackEntry {    jsbytecode      *altHead;       /* start of REOP_ALT* opcode */    jsbytecode      *nextAltFixup;  /* fixup pointer to next-alt offset */    jsbytecode      *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */    jsbytecode      *endTermFixup;  /* fixup ptr. to REOPT_ALTPREREQ* offset */    RENode          *continueNode;  /* original REOP_ALT* node being stacked */    jsbytecode      continueOp;     /* REOP_JUMP or REOP_ENDALT continuation */    JSPackedBool    jumpToJumpFlag; /* true if we've patched jump-to-jump to                                       avoid 16-bit unsigned offset overflow */} EmitStateStackEntry;/* * Immediate operand sizes and getter/setters.  Unlike the ones in jsopcode.h, * the getters and setters take the pc of the offset, not of the opcode before * the offset. */#define ARG_LEN             2#define GET_ARG(pc)         ((uint16)(((pc)[0] << 8) | (pc)[1]))#define SET_ARG(pc, arg)    ((pc)[0] = (jsbytecode) ((arg) >> 8),       \                             (pc)[1] = (jsbytecode) (arg))#define OFFSET_LEN          ARG_LEN#define OFFSET_MAX          (JS_BIT(ARG_LEN * 8) - 1)#define GET_OFFSET(pc)      GET_ARG(pc)/* * Maximum supported tree depth is maximum size of EmitStateStackEntry stack. * For sanity, we limit it to 2^24 bytes. */#define TREE_DEPTH_MAX  (JS_BIT(24) / sizeof(EmitStateStackEntry))/* * The maximum memory that can be allocated for class bitmaps. * For sanity, we limit it to 2^24 bytes. */#define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)/* * Functions to get size and write/read bytecode that represent small indexes * compactly. * Each byte in the code represent 7-bit chunk of the index. 8th bit when set * indicates that the following byte brings more bits to the index. Otherwise * this is the last byte in the index bytecode representing highest index bits. */static size_tGetCompactIndexWidth(size_t index){    size_t width;    for (width = 1; (index >>= 7) != 0; ++width) { }    return width;}static jsbytecode *WriteCompactIndex(jsbytecode *pc, size_t index){    size_t next;    while ((next = index >> 7) != 0) {        *pc++ = (jsbytecode)(index | 0x80);        index = next;    }    *pc++ = (jsbytecode)index;    return pc;}static jsbytecode *ReadCompactIndex(jsbytecode *pc, size_t *result){    size_t nextByte;    nextByte = *pc++;    if ((nextByte & 0x80) == 0) {        /*         * Short-circuit the most common case when compact index <= 127.         */        *result = nextByte;    } else {        size_t shift = 7;        *result = 0x7F & nextByte;        do {            nextByte = *pc++;            *result |= (nextByte & 0x7F) << shift;            shift += 7;        } while ((nextByte & 0x80) != 0);    }    return pc;}typedef struct RECapture {    ptrdiff_t index;           /* start of contents, -1 for empty  */    size_t length;             /* length of capture */} RECapture;typedef struct REMatchState {    const jschar *cp;    RECapture parens[1];      /* first of 're->parenCount' captures,                                 allocated at end of this struct */} REMatchState;struct REBackTrackData;typedef struct REProgState {    jsbytecode *continue_pc;        /* current continuation data */    jsbytecode continue_op;    ptrdiff_t index;                /* progress in text */    size_t parenSoFar;              /* highest indexed paren started */    union {        struct {            uintN min;             /* current quantifier limits */            uintN max;        } quantifier;        struct {            size_t top;             /* backtrack stack state */            size_t sz;        } assertion;    } u;} REProgState;typedef struct REBackTrackData {    size_t sz;                      /* size of previous stack entry */    jsbytecode *backtrack_pc;       /* where to backtrack to */    jsbytecode backtrack_op;    const jschar *cp;               /* index in text of match at backtrack */    size_t parenIndex;              /* start index of saved paren contents */    size_t parenCount;              /* # of saved paren contents */    size_t saveStateStackTop;       /* number of parent states */    /* saved parent states follow */    /* saved paren contents follow */} REBackTrackData;#define INITIAL_STATESTACK  100#define INITIAL_BACKTRACK   8000typedef struct REGlobalData {    JSContext *cx;    JSRegExp *regexp;               /* the RE in execution */    JSBool ok;                      /* runtime error (out_of_memory only?) */    size_t start;                   /* offset to start at */    ptrdiff_t skipped;              /* chars skipped anchoring this r.e. */    const jschar    *cpbegin;       /* text base address */    const jschar    *cpend;         /* text limit address */    REProgState *stateStack;        /* stack of state of current parents */    size_t stateStackTop;    size_t stateStackLimit;    REBackTrackData *backTrackStack;/* stack of matched-so-far positions */    REBackTrackData *backTrackSP;    size_t backTrackStackSize;    size_t cursz;                   /* size of current stack entry */    JSArenaPool     pool;           /* It's faster to use one malloc'd pool                                       than to malloc/free the three items                                       that are allocated from this pool */} REGlobalData;/* * 1. If IgnoreCase is false, return ch. * 2. Let u be ch converted to upper case as if by calling *    String.prototype.toUpperCase on the one-character string ch. * 3. If u does not consist of a single character, return ch. * 4. Let cu be u's character. * 5. If ch's code point value is greater than or equal to decimal 128 and cu's *    code point value is less than decimal 128, then return ch. * 6. Return cu. */static jscharupcase(jschar ch){    jschar cu = JS_TOUPPER(ch);    if (ch >= 128 && cu < 128)        return ch;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -