📄 regexp.java
字号:
/* * RegExp.java * * This work is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published * by the Free Software Foundation; either version 2 of the License, * or (at your option) any later version. * * This work is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * * As a special exception, the copyright holders of this library give * you permission to link this library with independent modules to * produce an executable, regardless of the license terms of these * independent modules, and to copy and distribute the resulting * executable under terms of your choice, provided that you also meet, * for each linked independent module, the terms and conditions of the * license of that module. An independent module is a module which is * not derived from or based on this library. If you modify this * library, you may extend this exception to your version of the * library, but you are not obligated to do so. If you do not wish to * do so, delete this exception statement from your version. * * Copyright (c) 2003 Per Cederberg. All rights reserved. */package net.percederberg.grammatica.parser.re;import java.io.PrintWriter;import java.io.StringWriter;import java.util.ArrayList;/** * A regular expression. This class creates and holds an internal * data structure representing a regular expression. It also allows * creating matchers. This class is thread-safe. Multiple matchers may * operate simultanously on the same regular expression. * * @author Per Cederberg, <per at percederberg dot net> * @version 1.0 */public class RegExp { /** * The base regular expression element. */ private Element element; /** * The regular expression pattern. */ private String pattern; /** * The current position in the pattern. This variable is used by * the parsing methods. */ private int pos; /** * Creates a new regular expression. * * @param pattern the regular expression pattern * * @throws RegExpException if the regular expression couldn't be * parsed correctly */ public RegExp(String pattern) throws RegExpException { this.pattern = pattern; this.pos = 0; this.element = parseExpr(); if (pos < pattern.length()) { throw new RegExpException( RegExpException.UNEXPECTED_CHARACTER, pos, pattern); } } /** * Creates a new matcher for the specified string. * * @param str the string to work with * * @return the regular expresion matcher */ public Matcher matcher(CharBuffer str) { return new Matcher((Element) element.clone(), str); } /** * Creates a new matcher for the specified string. * * @param str the string to work with * * @return the regular expresion matcher */ public Matcher matcher(String str) { return matcher(new CharBuffer(str)); } /** * Creates a new matcher for the specified string. * * @param str the string to work with * * @return the regular expresion matcher */ public Matcher matcher(StringBuffer str) { return matcher(new CharBuffer(str)); } /** * Returns a string representation of the regular expression. * * @return a string representation of the regular expression */ public String toString() { StringWriter str; str = new StringWriter(); str.write(" Regexp Pattern: " + pattern + "\n"); str.write(" Regexp Compiled:\n"); element.printTo(new PrintWriter(str), " "); return str.toString(); } /** * Parses a regular expression. This method handles the Expr * production in the grammar (see regexp.grammar). * * @return the element representing this expression * * @throws RegExpException if an error was encountered in the * pattern string */ private Element parseExpr() throws RegExpException { Element first; Element second; first = parseTerm(); if (peekChar(0) != '|') { return first; } else { readChar('|'); second = parseExpr(); return new AlternativeElement(first, second); } } /** * Parses a regular expression term. This method handles the * Term production in the grammar (see regexp.grammar). * * @return the element representing this term * * @throws RegExpException if an error was encountered in the * pattern string */ private Element parseTerm() throws RegExpException { ArrayList list = new ArrayList(); list.add(parseFact()); while (true) { switch (peekChar(0)) { case -1: case ')': case ']': case '{': case '}': case '?': case '+': case '|': return combineElements(list); default: list.add(parseFact()); } } } /** * Parses a regular expression factor. This method handles the * Fact production in the grammar (see regexp.grammar). * * @return the element representing this factor * * @throws RegExpException if an error was encountered in the * pattern string */ private Element parseFact() throws RegExpException { Element elem; elem = parseAtom(); switch (peekChar(0)) { case '?': case '*': case '+': case '{': return parseAtomModifier(elem); default: return elem; } } /** * Parses a regular expression atom. This method handles the * Atom production in the grammar (see regexp.grammar). * * @return the element representing this atom * * @throws RegExpException if an error was encountered in the * pattern string */ private Element parseAtom() throws RegExpException { Element elem; switch (peekChar(0)) { case '.': readChar('.'); return CharacterSetElement.DOT; case '(': readChar('('); elem = parseExpr(); readChar(')'); return elem; case '[': readChar('['); elem = parseCharSet(); readChar(']'); return elem; case -1: case ')': case ']': case '{': case '}': case '?': case '*': case '+': case '|': throw new RegExpException( RegExpException.UNEXPECTED_CHARACTER, pos, pattern); default: return parseChar(); } } /** * Parses a regular expression atom modifier. This method handles * the AtomModifier production in the grammar (see regexp.grammar). * * @param elem the element to modify * * @return the modified element * * @throws RegExpException if an error was encountered in the * pattern string */ private Element parseAtomModifier(Element elem) throws RegExpException { int min = 0; int max = -1; int type = RepeatElement.GREEDY; int firstPos; // Read min and max switch (readChar()) { case '?': min = 0; max = 1; break; case '*': min = 0; max = -1; break; case '+': min = 1; max = -1; break; case '{': firstPos = pos - 1; min = readNumber(); max = min; if (peekChar(0) == ',') { readChar(','); max = -1; if (peekChar(0) != '}') { max = readNumber(); } } readChar('}'); if (max == 0 || (max > 0 && min > max)) { throw new RegExpException( RegExpException.INVALID_REPEAT_COUNT, firstPos, pattern); } break; default: throw new RegExpException( RegExpException.UNEXPECTED_CHARACTER, pos - 1, pattern); } // Read operator mode if (peekChar(0) == '?') { readChar('?'); type = RepeatElement.RELUCTANT; } else if (peekChar(0) == '+') { readChar('+'); type = RepeatElement.POSSESSIVE; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -