📄 regexpnode.java
字号:
/* * Copyright (c) 1998-2007 Caucho Technology -- all rights reserved * * This file is part of Resin(R) Open Source * * Each copy or derived work must preserve the copyright notice and this * notice unmodified. * * Resin Open Source is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Resin Open Source is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty * of NON-INFRINGEMENT. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with Resin Open Source; if not, write to the * * Free Software Foundation, Inc. * 59 Temple Place, Suite 330 * Boston, MA 02111-1307 USA * * @author Scott Ferguson */package com.caucho.quercus.lib.regexp;import java.util.*;import com.caucho.util.*;import com.caucho.quercus.env.StringValue;class RegexpNode { private static final L10N L = new L10N(RegexpNode.class); static final int RC_END = 0; static final int RC_NULL = 1; static final int RC_STRING = 2; static final int RC_SET = 3; static final int RC_NSET = 4; static final int RC_BEG_GROUP = 5; static final int RC_END_GROUP = 6; static final int RC_GROUP_REF = 7; static final int RC_LOOP = 8; static final int RC_LOOP_INIT = 9; static final int RC_LOOP_SHORT = 10; static final int RC_LOOP_UNIQUE = 11; static final int RC_LOOP_SHORT_UNIQUE = 12; static final int RC_LOOP_LONG = 13; static final int RC_OR = 64; static final int RC_OR_UNIQUE = 65; static final int RC_POS_LOOKAHEAD = 66; static final int RC_NEG_LOOKAHEAD = 67; static final int RC_POS_LOOKBEHIND = 68; static final int RC_NEG_LOOKBEHIND = 69; static final int RC_LOOKBEHIND_OR = 70; static final int RC_WORD = 73; static final int RC_NWORD = 74; static final int RC_BLINE = 75; static final int RC_ELINE = 76; static final int RC_BSTRING = 77; static final int RC_ESTRING = 78; static final int RC_ENSTRING = 79; static final int RC_GSTRING = 80; // conditionals static final int RC_COND = 81; // ignore case static final int RC_STRING_I = 128; static final int RC_SET_I = 129; static final int RC_NSET_I = 130; static final int RC_GROUP_REF_I = 131; static final int RC_LEXEME = 256; // unicode properties static final int RC_UNICODE = 512; static final int RC_NUNICODE = 513; // unicode properties sets static final int RC_C = 1024; static final int RC_L = 1025; static final int RC_M = 1026; static final int RC_N = 1027; static final int RC_P = 1028; static final int RC_S = 1029; static final int RC_Z = 1030; // negated unicode properties sets static final int RC_NC = 1031; static final int RC_NL = 1032; static final int RC_NM = 1033; static final int RC_NN = 1034; static final int RC_NP = 1035; // POSIX character classes static final int RC_CHAR_CLASS = 2048; static final int RC_ALNUM = 1; static final int RC_ALPHA = 2; static final int RC_BLANK = 3; static final int RC_CNTRL = 4; static final int RC_DIGIT = 5; static final int RC_GRAPH = 6; static final int RC_LOWER = 7; static final int RC_PRINT = 8; static final int RC_PUNCT = 9; static final int RC_SPACE = 10; static final int RC_UPPER = 11; static final int RC_XDIGIT = 12; // #2526, possible JIT/OS issue with Integer.MAX_VALUE private static final int INTEGER_MAX = Integer.MAX_VALUE - 1; public static final int FAIL = -1; public static final int SUCCESS = 0; static final RegexpNode N_END = new End(); static final RegexpNode ANY_CHAR; /** * Creates a node with a code */ protected RegexpNode() { } // // parsing constructors // RegexpNode concat(RegexpNode next) { return new Concat(this, next); } /** * '?' operator */ RegexpNode createOptional(Regcomp parser) { return createLoop(parser, 0, 1); } /** * '*' operator */ RegexpNode createStar(Regcomp parser) { return createLoop(parser, 0, INTEGER_MAX); } /** * '+' operator */ RegexpNode createPlus(Regcomp parser) { return createLoop(parser, 1, INTEGER_MAX); } /** * Any loop */ RegexpNode createLoop(Regcomp parser, int min, int max) { return new LoopHead(parser, this, min, max); } /** * Any loop */ RegexpNode createLoopUngreedy(Regcomp parser, int min, int max) { return new LoopHeadUngreedy(parser, this, min, max); } /** * Possessive loop */ RegexpNode createPossessiveLoop(int min, int max) { return new PossessiveLoop(getHead(), min, max); } /** * Create an or expression */ RegexpNode createOr(RegexpNode node) { return Or.create(this, node); } // // optimization functions // int minLength() { return 0; } String prefix() { return ""; } int firstChar() { return -1; } boolean isNullable() { return false; } boolean []firstSet(boolean []firstSet) { return null; } RegexpNode getTail() { return this; } RegexpNode getHead() { return this; } // // matching // int match(StringValue string, int length, int offset, RegexpState state) { throw new UnsupportedOperationException(getClass().getName()); } @Override public String toString() { String name = getClass().getName(); int p = name.lastIndexOf('$'); if (p < 0) p = name.lastIndexOf('.'); return name.substring(p + 1) + "[]"; } /** * A node with exactly one character matches. */ static class AbstractCharNode extends RegexpNode { @Override RegexpNode createLoop(Regcomp parser, int min, int max) { return new CharLoop(this, min, max); } @Override RegexpNode createLoopUngreedy(Regcomp parser, int min, int max) { return new CharUngreedyLoop(this, min, max); } @Override int minLength() { return 1; } } static class CharNode extends AbstractCharNode { private char _ch; CharNode(char ch) { _ch = ch; } @Override int firstChar() { return _ch; } @Override boolean []firstSet(boolean []firstSet) { if (firstSet != null && _ch < firstSet.length) { firstSet[_ch] = true; return firstSet; } else return null; } @Override int match(StringValue string, int length, int offset, RegexpState state) { if (offset < length && string.charAt(offset) == _ch) return offset + 1; else return -1; } } static final AnchorBegin ANCHOR_BEGIN = new AnchorBegin(); static final AnchorBeginOrNewline ANCHOR_BEGIN_OR_NEWLINE = new AnchorBeginOrNewline(); static final AnchorEnd ANCHOR_END = new AnchorEnd(); static final AnchorEndOnly ANCHOR_END_ONLY = new AnchorEndOnly(); static final AnchorEndOrNewline ANCHOR_END_OR_NEWLINE = new AnchorEndOrNewline(); private static class AnchorBegin extends NullableNode { @Override int match(StringValue string, int length, int offset, RegexpState state) { if (offset == 0) return offset; else return -1; } } private static class AnchorBeginOrNewline extends NullableNode { @Override int match(StringValue string, int strlen, int offset, RegexpState state) { if (offset == 0 || string.charAt(offset - 1) == '\n') return offset; else return -1; } } private static class AnchorEnd extends NullableNode { @Override int match(StringValue string, int strlen, int offset, RegexpState state) { if (offset == strlen || offset + 1 == strlen && string.charAt(offset) == '\n') return offset; else return -1; } } private static class AnchorEndOnly extends NullableNode { @Override int match(StringValue string, int length, int offset, RegexpState state) { if (offset == length) return offset; else return -1; } } private static class AnchorEndOrNewline extends NullableNode { @Override int match(StringValue string, int length, int offset, RegexpState state) { if (offset == length || string.charAt(offset) == '\n') return offset; else return -1; } } static final RegexpNode DIGIT = RegexpSet.DIGIT.createNode(); static final RegexpNode NOT_DIGIT = RegexpSet.DIGIT.createNotNode(); static final RegexpNode DOT = RegexpSet.DOT.createNotNode(); static final RegexpNode NOT_DOT = RegexpSet.DOT.createNode(); static final RegexpNode SPACE = RegexpSet.SPACE.createNode(); static final RegexpNode NOT_SPACE = RegexpSet.SPACE.createNotNode(); static final RegexpNode S_WORD = RegexpSet.WORD.createNode(); static final RegexpNode NOT_S_WORD = RegexpSet.WORD.createNotNode(); static class AsciiSet extends AbstractCharNode { private final boolean []_set; AsciiSet() { _set = new boolean[128]; } AsciiSet(boolean []set) { _set = set; } @Override boolean []firstSet(boolean []firstSet) { if (firstSet == null) return null; for (int i = 0; i < _set.length; i++) { if (_set[i]) firstSet[i] = true; } return firstSet; } void setChar(char ch) { _set[ch] = true; } void clearChar(char ch) { _set[ch] = false; } @Override int match(StringValue string, int length, int offset, RegexpState state) { if (length <= offset) return -1; char ch = string.charAt(offset); if (ch < 128 && _set[ch]) return offset + 1; else return -1; } } static class AsciiNotSet extends AbstractCharNode { private final boolean []_set; AsciiNotSet() { _set = new boolean[128]; } AsciiNotSet(boolean []set) { _set = set; } void setChar(char ch) { _set[ch] = true; } void clearChar(char ch) { _set[ch] = false; } @Override int match(StringValue string, int length, int offset, RegexpState state) { if (length <= offset) return -1; char ch = string.charAt(offset); if (ch < 128 && _set[ch]) return -1; else return offset + 1; } } static class CharLoop extends RegexpNode { private final RegexpNode _node; private RegexpNode _next = N_END; private int _min; private int _max; CharLoop(RegexpNode node, int min, int max) { _node = node.getHead(); _min = min; _max = max; if (_min < 0) throw new IllegalStateException(); } @Override RegexpNode concat(RegexpNode next) { if (next == null) throw new NullPointerException(); if (_next != null) _next = _next.concat(next); else _next = next.getHead(); return this; } @Override RegexpNode createLoop(Regcomp parser, int min, int max) { if (min == 0 && max == 1) { _min = 0; return this; } else return new LoopHead(parser, this, min, max); } @Override int minLength() { return _min; } @Override boolean []firstSet(boolean []firstSet) { firstSet = _node.firstSet(firstSet); if (_min > 0 && ! _node.isNullable()) return firstSet; firstSet = _next.firstSet(firstSet); return firstSet; } // // match functions // @Override int match(StringValue string, int length, int offset, RegexpState state) { RegexpNode next = _next; RegexpNode node = _node; int min = _min; int max = _max; int i; int tail; for (i = 0; i < min; i++) { tail = node.match(string, length, offset + i, state); if (tail < 0) return tail; } for (; i < max; i++) { if (node.match(string, length, offset + i, state) < 0) { break; } } for (; min <= i; i--) { tail = next.match(string, length, offset + i, state); if (tail >= 0) return tail; } return -1; } @Override public String toString() { return "CharLoop[" + _min + ", " + _max + ", " + _node + ", " + _next + "]"; } } static class CharUngreedyLoop extends RegexpNode { private final RegexpNode _node; private RegexpNode _next = N_END; private int _min; private int _max; CharUngreedyLoop(RegexpNode node, int min, int max) { _node = node.getHead(); _min = min; _max = max; if (_min < 0) throw new IllegalStateException(); } @Override RegexpNode concat(RegexpNode next) { if (next == null) throw new NullPointerException(); if (_next != null) _next = _next.concat(next); else _next = next.getHead(); return this; } @Override RegexpNode createLoop(Regcomp parser, int min, int max) { if (min == 0 && max == 1) { _min = 0; return this; } else return new LoopHead(parser, this, min, max); } @Override int minLength() { return _min; } @Override boolean []firstSet(boolean []firstSet) { firstSet = _node.firstSet(firstSet); if (_min > 0 && ! _node.isNullable()) return firstSet; firstSet = _next.firstSet(firstSet); return firstSet; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -