📄 regexp.cs
字号:
/* * RegExp.cs * * This work is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published * by the Free Software Foundation; either version 2 of the License, * or (at your option) any later version. * * This work is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * * As a special exception, the copyright holders of this library give * you permission to link this library with independent modules to * produce an executable, regardless of the license terms of these * independent modules, and to copy and distribute the resulting * executable under terms of your choice, provided that you also meet, * for each linked independent module, the terms and conditions of the * license of that module. An independent module is a module which is * not derived from or based on this library. If you modify this * library, you may extend this exception to your version of the * library, but you are not obligated to do so. If you do not wish to * do so, delete this exception statement from your version. * * Copyright (c) 2003 Per Cederberg. All rights reserved. */using System;using System.Collections;using System.IO;using System.Globalization;using System.Text;namespace PerCederberg.Grammatica.Parser.RE { /** * A regular expression. This class creates and holds an internal * data structure representing a regular expression. It also * allows creating matchers. This class is thread-safe. Multiple * matchers may operate simultanously on the same regular * expression. * * @author Per Cederberg, <per at percederberg dot net> * @version 1.0 */ public class RegExp { /** * The base regular expression element. */ private Element element; /** * The regular expression pattern. */ private string pattern; /** * The current position in the pattern. This variable is used by * the parsing methods. */ private int pos; /** * Creates a new regular expression. * * @param pattern the regular expression pattern * * @throws RegExpException if the regular expression couldn't be * parsed correctly */ public RegExp(string pattern) { this.pattern = pattern; this.pos = 0; this.element = ParseExpr(); if (pos < pattern.Length) { throw new RegExpException( RegExpException.ErrorType.UNEXPECTED_CHARACTER, pos, pattern); } } /** * Creates a new matcher for the specified string. * * @param str the string to work with * * @return the regular expresion matcher */ public Matcher Matcher(string str) { return new Matcher((Element) element.Clone(), str); } /** * Returns a string representation of the regular expression. * * @return a string representation of the regular expression */ public override string ToString() { StringWriter str; str = new StringWriter(); str.WriteLine("Regular Expression"); str.WriteLine(" Pattern: " + pattern); str.WriteLine(" Compiled:"); element.PrintTo(str, " "); return str.ToString(); } /** * Parses a regular expression. This method handles the Expr * production in the grammar (see regexp.grammar). * * @return the element representing this expression * * @throws RegExpException if an error was encountered in the * pattern string */ private Element ParseExpr() { Element first; Element second; first = ParseTerm(); if (PeekChar(0) != '|') { return first; } else { ReadChar('|'); second = ParseExpr(); return new AlternativeElement(first, second); } } /** * Parses a regular expression term. This method handles the * Term production in the grammar (see regexp.grammar). * * @return the element representing this term * * @throws RegExpException if an error was encountered in the * pattern string */ private Element ParseTerm() { ArrayList list = new ArrayList(); list.Add(ParseFact()); while (true) { switch (PeekChar(0)) { case -1: case ')': case ']': case '{': case '}': case '?': case '+': case '|': return CombineElements(list); default: list.Add(ParseFact()); break; } } } /** * Parses a regular expression factor. This method handles the * Fact production in the grammar (see regexp.grammar). * * @return the element representing this factor * * @throws RegExpException if an error was encountered in the * pattern string */ private Element ParseFact() { Element elem; elem = ParseAtom(); switch (PeekChar(0)) { case '?': case '*': case '+': case '{': return ParseAtomModifier(elem); default: return elem; } } /** * Parses a regular expression atom. This method handles the * Atom production in the grammar (see regexp.grammar). * * @return the element representing this atom * * @throws RegExpException if an error was encountered in the * pattern string */ private Element ParseAtom() { Element elem; switch (PeekChar(0)) { case '.': ReadChar('.'); return CharacterSetElement.DOT; case '(': ReadChar('('); elem = ParseExpr(); ReadChar(')'); return elem; case '[': ReadChar('['); elem = ParseCharSet(); ReadChar(']'); return elem; case -1: case ')': case ']': case '{': case '}': case '?': case '*': case '+': case '|': throw new RegExpException( RegExpException.ErrorType.UNEXPECTED_CHARACTER, pos, pattern); default: return ParseChar(); } } /** * Parses a regular expression atom modifier. This method handles * the AtomModifier production in the grammar (see regexp.grammar). * * @param elem the element to modify * * @return the modified element * * @throws RegExpException if an error was encountered in the * pattern string */ private Element ParseAtomModifier(Element elem) { int min = 0; int max = -1; RepeatElement.RepeatType type; int firstPos; // Read min and max type = RepeatElement.RepeatType.GREEDY; switch (ReadChar()) { case '?': min = 0; max = 1; break; case '*': min = 0; max = -1; break; case '+': min = 1; max = -1; break; case '{': firstPos = pos - 1; min = ReadNumber(); max = min; if (PeekChar(0) == ',') { ReadChar(','); max = -1; if (PeekChar(0) != '}') { max = ReadNumber(); } } ReadChar('}'); if (max == 0 || (max > 0 && min > max)) { throw new RegExpException( RegExpException.ErrorType.INVALID_REPEAT_COUNT, firstPos, pattern); } break; default: throw new RegExpException( RegExpException.ErrorType.UNEXPECTED_CHARACTER, pos - 1, pattern); } // Read operator mode if (PeekChar(0) == '?') { ReadChar('?'); type = RepeatElement.RepeatType.RELUCTANT; } else if (PeekChar(0) == '+') { ReadChar('+'); type = RepeatElement.RepeatType.POSSESSIVE; } return new RepeatElement(elem, min, max, type); } /** * Parses a regular expression character set. This method handles * the contents of the '[...]' construct in a regular expression. *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -