⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nativeregexp.java

📁 java中比较著名的js引擎当属mozilla开源的rhino
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- * * ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Rhino code, released * May 6, 1998. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1997-1999 * the Initial Developer. All Rights Reserved. * * Contributor(s): *   Norris Boyd *   Igor Bukanov *   Brendan Eich *   Matthias Radestock * * Alternatively, the contents of this file may be used under the terms of * the GNU General Public License Version 2 or later (the "GPL"), in which * case the provisions of the GPL are applicable instead of those above. If * you wish to allow use of your version of this file only under the terms of * the GPL and not to allow others to use your version of this file under the * MPL, indicate your decision by deleting the provisions above and replacing * them with the notice and other provisions required by the GPL. If you do * not delete the provisions above, a recipient may use your version of this * file under either the MPL or the GPL. * * ***** END LICENSE BLOCK ***** */package org.mozilla.javascript.regexp;import java.io.Serializable;import org.mozilla.javascript.Context;import org.mozilla.javascript.Function;import org.mozilla.javascript.IdFunctionObject;import org.mozilla.javascript.IdScriptableObject;import org.mozilla.javascript.Kit;import org.mozilla.javascript.ScriptRuntime;import org.mozilla.javascript.Scriptable;import org.mozilla.javascript.ScriptableObject;import org.mozilla.javascript.Undefined;/** * This class implements the RegExp native object. * * Revision History: * Implementation in C by Brendan Eich * Initial port to Java by Norris Boyd from jsregexp.c version 1.36 * Merged up to version 1.38, which included Unicode support. * Merged bug fixes in version 1.39. * Merged JSFUN13_BRANCH changes up to 1.32.2.13 * * @author Brendan Eich * @author Norris Boyd */public class NativeRegExp extends IdScriptableObject implements Function{    static final long serialVersionUID = 4965263491464903264L;    private static final Object REGEXP_TAG = new Object();    public static final int JSREG_GLOB = 0x1;       // 'g' flag: global    public static final int JSREG_FOLD = 0x2;       // 'i' flag: fold    public static final int JSREG_MULTILINE = 0x4;  // 'm' flag: multiline    //type of match to perform    public static final int TEST = 0;    public static final int MATCH = 1;    public static final int PREFIX = 2;    private static final boolean debug = false;    private static final byte REOP_EMPTY         = 0;  /* match rest of input against rest of r.e. */    private static final byte REOP_ALT           = 1;  /* alternative subexpressions in kid and next */    private static final byte REOP_BOL           = 2;  /* beginning of input (or line if multiline) */    private static final byte REOP_EOL           = 3;  /* end of input (or line if multiline) */    private static final byte REOP_WBDRY         = 4;  /* match "" at word boundary */    private static final byte REOP_WNONBDRY      = 5;  /* match "" at word non-boundary */    private static final byte REOP_QUANT         = 6;  /* quantified atom: atom{1,2} */    private static final byte REOP_STAR          = 7;  /* zero or more occurrences of kid */    private static final byte REOP_PLUS          = 8;  /* one or more occurrences of kid */    private static final byte REOP_OPT           = 9;  /* optional subexpression in kid */    private static final byte REOP_LPAREN        = 10; /* left paren bytecode: kid is u.num'th sub-regexp */    private static final byte REOP_RPAREN        = 11; /* right paren bytecode */    private static final byte REOP_DOT           = 12; /* stands for any character */    private static final byte REOP_CCLASS        = 13; /* character class: [a-f] */    private static final byte REOP_DIGIT         = 14; /* match a digit char: [0-9] */    private static final byte REOP_NONDIGIT      = 15; /* match a non-digit char: [^0-9] */    private static final byte REOP_ALNUM         = 16; /* match an alphanumeric char: [0-9a-z_A-Z] */    private static final byte REOP_NONALNUM      = 17; /* match a non-alphanumeric char: [^0-9a-z_A-Z] */    private static final byte REOP_SPACE         = 18; /* match a whitespace char */    private static final byte REOP_NONSPACE      = 19; /* match a non-whitespace char */    private static final byte REOP_BACKREF       = 20; /* back-reference (e.g., \1) to a parenthetical */    private static final byte REOP_FLAT          = 21; /* match a flat string */    private static final byte REOP_FLAT1         = 22; /* match a single char */    private static final byte REOP_JUMP          = 23; /* for deoptimized closure loops */    private static final byte REOP_DOTSTAR       = 24; /* optimize .* to use a single opcode */    private static final byte REOP_ANCHOR        = 25; /* like .* but skips left context to unanchored r.e. */    private static final byte REOP_EOLONLY       = 26; /* $ not preceded by any pattern */    private static final byte REOP_UCFLAT        = 27; /* flat Unicode string; len immediate counts chars */    private static final byte REOP_UCFLAT1       = 28; /* single Unicode char */    private static final byte REOP_UCCLASS       = 29; /* Unicode character class, vector of chars to match */    private static final byte REOP_NUCCLASS      = 30; /* negated Unicode character class */    private static final byte REOP_BACKREFi      = 31; /* case-independent REOP_BACKREF */    private static final byte REOP_FLATi         = 32; /* case-independent REOP_FLAT */    private static final byte REOP_FLAT1i        = 33; /* case-independent REOP_FLAT1 */    private static final byte REOP_UCFLATi       = 34; /* case-independent REOP_UCFLAT */    private static final byte REOP_UCFLAT1i      = 35; /* case-independent REOP_UCFLAT1 */    private static final byte REOP_ANCHOR1       = 36; /* first-char discriminating REOP_ANCHOR */    private static final byte REOP_NCCLASS       = 37; /* negated 8-bit character class */    private static final byte REOP_DOTSTARMIN    = 38; /* ungreedy version of REOP_DOTSTAR */    private static final byte REOP_LPARENNON     = 39; /* non-capturing version of REOP_LPAREN */    private static final byte REOP_RPARENNON     = 40; /* non-capturing version of REOP_RPAREN */    private static final byte REOP_ASSERT        = 41; /* zero width positive lookahead assertion */    private static final byte REOP_ASSERT_NOT    = 42; /* zero width negative lookahead assertion */    private static final byte REOP_ASSERTTEST    = 43; /* sentinel at end of assertion child */    private static final byte REOP_ASSERTNOTTEST = 44; /* sentinel at end of !assertion child */    private static final byte REOP_MINIMALSTAR   = 45; /* non-greedy version of * */    private static final byte REOP_MINIMALPLUS   = 46; /* non-greedy version of + */    private static final byte REOP_MINIMALOPT    = 47; /* non-greedy version of ? */    private static final byte REOP_MINIMALQUANT  = 48; /* non-greedy version of {} */    private static final byte REOP_ENDCHILD      = 49; /* sentinel at end of quantifier child */    private static final byte REOP_CLASS         = 50; /* character class with index */    private static final byte REOP_REPEAT        = 51; /* directs execution of greedy quantifier */    private static final byte REOP_MINIMALREPEAT = 52; /* directs execution of non-greedy quantifier */    private static final byte REOP_END           = 53;    public static void init(Context cx, Scriptable scope, boolean sealed)    {        NativeRegExp proto = new NativeRegExp();        proto.re = (RECompiled)compileRE("", null, false);        proto.activatePrototypeMap(MAX_PROTOTYPE_ID);        proto.setParentScope(scope);        proto.setPrototype(getObjectPrototype(scope));        NativeRegExpCtor ctor = new NativeRegExpCtor();        // Bug #324006: ECMA-262 15.10.6.1 says "The initial value of        // RegExp.prototype.constructor is the builtin RegExp constructor."         proto.put("constructor", proto, ctor);        ScriptRuntime.setFunctionProtoAndParent(ctor, scope);        ctor.setImmunePrototypeProperty(proto);        if (sealed) {            proto.sealObject();            ctor.sealObject();        }        defineProperty(scope, "RegExp", ctor, ScriptableObject.DONTENUM);    }    NativeRegExp(Scriptable scope, Object regexpCompiled)    {        this.re = (RECompiled)regexpCompiled;        this.lastIndex = 0;        ScriptRuntime.setObjectProtoAndParent(this, scope);    }    public String getClassName()    {        return "RegExp";    }    public Object call(Context cx, Scriptable scope, Scriptable thisObj,                       Object[] args)    {        return execSub(cx, scope, args, MATCH);    }    public Scriptable construct(Context cx, Scriptable scope, Object[] args)    {        return (Scriptable)execSub(cx, scope, args, MATCH);    }    Scriptable compile(Context cx, Scriptable scope, Object[] args)    {        if (args.length > 0 && args[0] instanceof NativeRegExp) {            if (args.length > 1 && args[1] != Undefined.instance) {                // report error                throw ScriptRuntime.typeError0("msg.bad.regexp.compile");            }            NativeRegExp thatObj = (NativeRegExp) args[0];            this.re = thatObj.re;            this.lastIndex = thatObj.lastIndex;            return this;        }        String s = args.length == 0 ? "" : ScriptRuntime.toString(args[0]);        String global = args.length > 1 && args[1] != Undefined.instance            ? ScriptRuntime.toString(args[1])            : null;        this.re = (RECompiled)compileRE(s, global, false);        this.lastIndex = 0;        return this;    }    public String toString()    {        StringBuffer buf = new StringBuffer();        buf.append('/');        if (re.source.length != 0) {            buf.append(re.source);        } else {            // See bugzilla 226045            buf.append("(?:)");        }        buf.append('/');        if ((re.flags & JSREG_GLOB) != 0)            buf.append('g');        if ((re.flags & JSREG_FOLD) != 0)            buf.append('i');        if ((re.flags & JSREG_MULTILINE) != 0)            buf.append('m');        return buf.toString();    }    NativeRegExp() {  }    private static RegExpImpl getImpl(Context cx)    {        return (RegExpImpl) ScriptRuntime.getRegExpProxy(cx);    }    private Object execSub(Context cx, Scriptable scopeObj,                           Object[] args, int matchType)    {        RegExpImpl reImpl = getImpl(cx);        String str;        if (args.length == 0) {            str = reImpl.input;            if (str == null) {                reportError("msg.no.re.input.for", toString());            }        } else {            str = ScriptRuntime.toString(args[0]);        }        double d = ((re.flags & JSREG_GLOB) != 0) ? lastIndex : 0;        Object rval;        if (d < 0 || str.length() < d) {            lastIndex = 0;            rval = null;        }        else {            int indexp[] = { (int)d };            rval = executeRegExp(cx, scopeObj, reImpl, str, indexp, matchType);            if ((re.flags & JSREG_GLOB) != 0) {                lastIndex = (rval == null || rval == Undefined.instance)                            ? 0 : indexp[0];            }        }        return rval;    }    static Object compileRE(String str, String global, boolean flat)    {        RECompiled regexp = new RECompiled();        regexp.source = str.toCharArray();        int length = str.length();        int flags = 0;        if (global != null) {            for (int i = 0; i < global.length(); i++) {                char c = global.charAt(i);                if (c == 'g') {                    flags |= JSREG_GLOB;                } else if (c == 'i') {                    flags |= JSREG_FOLD;                } else if (c == 'm') {                    flags |= JSREG_MULTILINE;                } else {                    reportError("msg.invalid.re.flag", String.valueOf(c));                }            }        }        regexp.flags = flags;        CompilerState state = new CompilerState(regexp.source, length, flags);        if (flat && length > 0) {if (debug) {System.out.println("flat = \"" + str + "\"");}            state.result = new RENode(REOP_FLAT);            state.result.chr = state.cpbegin[0];            state.result.length = length;            state.result.flatIndex = 0;            state.progLength += 5;        }        else            if (!parseDisjunction(state))                return null;        regexp.program = new byte[state.progLength + 1];        if (state.classCount != 0) {            regexp.classList = new RECharSet[state.classCount];            regexp.classCount = state.classCount;        }        int endPC = emitREBytecode(state, regexp, 0, state.result);        regexp.program[endPC++] = REOP_END;if (debug) {System.out.println("Prog. length = " + endPC);for (int i = 0; i < endPC; i++) {    System.out.print(regexp.program[i]);    if (i < (endPC - 1)) System.out.print(", ");}System.out.println();}        regexp.parenCount = state.parenCount;        // If re starts with literal, init anchorCh accordingly        switch (regexp.program[0]) {        case REOP_UCFLAT1:        case REOP_UCFLAT1i:            regexp.anchorCh = (char)getIndex(regexp.program, 1);            break;        case REOP_FLAT1:        case REOP_FLAT1i:            regexp.anchorCh = (char)(regexp.program[1] & 0xFF);            break;        case REOP_FLAT:        case REOP_FLATi:            int k = getIndex(regexp.program, 1);            regexp.anchorCh = regexp.source[k];            break;        }if (debug) {if (regexp.anchorCh >= 0) {    System.out.println("Anchor ch = '" + (char)regexp.anchorCh + "'");}}        return regexp;    }    static boolean isDigit(char c)    {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -