📄 perl5matcher.java
字号:
package org.apache.oro.text.regex;/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2000 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro" * must not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache" * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their * name, without prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. * * Portions of this software are based upon software originally written * by Daniel F. Savarese. We appreciate his contributions. */import java.io.IOException;import java.util.*;/** * The Perl5Matcher class is used to match regular expressions * (conforming to the Perl5 regular expression syntax) generated by * Perl5Compiler. @author <a href="mailto:dfs@savarese.org">Daniel F. Savarese</a> @version $Id: Perl5Matcher.java,v 1.1 2004/01/10 00:58:23 mikedemmer Exp $ * @see PatternMatcher * @see Perl5Compiler */public final class Perl5Matcher implements PatternMatcher { private static final char __EOS = Character.MAX_VALUE; private static final int __INITIAL_NUM_OFFSETS = 20; private boolean __multiline = false, __lastSuccess = false; private char __previousChar, __input[], __originalInput[]; private Perl5Repetition __currentRep; private int __numParentheses, __bol, __eol, __currentOffset, __endOffset; private char[] __program; private int __expSize, __inputOffset, __lastParen; private int[] __beginMatchOffsets, __endMatchOffsets; private Stack __stack = new Stack(); private Perl5MatchResult __lastMatchResult = null; private static boolean __compare(char[] s1, int s1Offs, char[] s2, int s2Offs, int n) { int cnt; for(cnt = 0; cnt < n; cnt++, s1Offs++, s2Offs++) { if(s1Offs >= s1.length) return false; if(s2Offs >= s2.length) return false; if(s1[s1Offs] != s2[s2Offs]) return false; } return true; } private static int __findFirst(char[] input, int current, int endOffset, char[] mustString) { int count, saveCurrent; char ch; if(input.length == 0) return endOffset; ch = mustString[0]; // Find the offset of the first character of the must string while(current < endOffset) { if(ch == input[current]){ saveCurrent = current; count = 0; while(current < endOffset && count < mustString.length) { if(mustString[count] != input[current]) break; ++count; ++current; } current = saveCurrent; if(count >= mustString.length) break; } ++current; } return current; } private void __pushState(int parenFloor) { int[] state; int stateEntries, paren; stateEntries = 3*(__expSize - parenFloor); if(stateEntries <= 0) state = new int[3]; else state = new int[stateEntries + 3]; state[0] = __expSize; state[1] = __lastParen; state[2] = __inputOffset; for(paren = __expSize; paren > parenFloor; paren-=3, stateEntries-=3) { state[stateEntries] = __endMatchOffsets[paren]; state[stateEntries + 1] = __beginMatchOffsets[paren]; state[stateEntries + 2] = paren; } __stack.push(state); } private void __popState() { int[] state; int entry, paren; state = (int[])__stack.pop(); __expSize = state[0]; __lastParen = state[1]; __inputOffset = state[2]; for(entry = 3; entry < state.length; entry+=3) { paren = state[entry + 2]; __beginMatchOffsets[paren] = state[entry + 1]; if(paren <= __lastParen) __endMatchOffsets[paren] = state[entry]; } for(paren = __lastParen + 1; paren <= __numParentheses; paren++) { if(paren > __expSize) __beginMatchOffsets[paren] = OpCode._NULL_OFFSET; __endMatchOffsets[paren] = OpCode._NULL_OFFSET; } } // Initialize globals needed before calling __tryExpression for first time private void __initInterpreterGlobals(Perl5Pattern expression, char[] input, int beginOffset, int endOffset) { __input = input; __endOffset = endOffset; __currentRep = new Perl5Repetition(); __currentRep._numInstances = 0; __currentRep._lastRepetition = null; __program = expression._program; __stack.setSize(0); if(beginOffset == 0) __previousChar = '\n'; else { __previousChar = input[beginOffset - 1]; if(!__multiline && __previousChar == '\n') __previousChar = '\0'; } __numParentheses = expression._numParentheses; __currentOffset = beginOffset; __bol = beginOffset; __eol = endOffset; // Ok, here we're using endOffset as a temporary variable. endOffset = __numParentheses + 1; if(__beginMatchOffsets == null || endOffset > __beginMatchOffsets.length) { if(endOffset < __INITIAL_NUM_OFFSETS) endOffset = __INITIAL_NUM_OFFSETS; __beginMatchOffsets = new int[endOffset]; __endMatchOffsets = new int[endOffset]; } } // Set the match result information. Only call this if we successfully // matched. private void __setLastMatchResult() { int offs; //endOffset+=dontTry; __lastMatchResult = new Perl5MatchResult(__numParentheses + 1); // This can happen when using Perl5StreamInput if(__endMatchOffsets[0] > __originalInput.length) throw new ArrayIndexOutOfBoundsException(); __lastMatchResult._match = new String(__originalInput, __beginMatchOffsets[0], __endMatchOffsets[0] - __beginMatchOffsets[0]); __lastMatchResult._matchBeginOffset = __beginMatchOffsets[0]; while(__numParentheses >= 0) { offs = __beginMatchOffsets[__numParentheses]; if(offs >= 0) __lastMatchResult._beginGroupOffset[__numParentheses] = offs - __lastMatchResult._matchBeginOffset; else __lastMatchResult._beginGroupOffset[__numParentheses] = OpCode._NULL_OFFSET; offs = __endMatchOffsets[__numParentheses]; if(offs >= 0) __lastMatchResult._endGroupOffset[__numParentheses] = offs - __lastMatchResult._matchBeginOffset; else __lastMatchResult._endGroupOffset[__numParentheses] = OpCode._NULL_OFFSET; --__numParentheses; } // Free up for garbage collection __originalInput = null; } // Expects to receive a valid regular expression program. No checking // is done to ensure validity. // __originalInput must be set before calling this method for // __lastMatchResult to be set correctly. private boolean __interpret(Perl5Pattern expression, char[] input, int beginOffset, int endOffset) { boolean success; int minLength = 0, dontTry = 0, offset; char ch, mustString[]; __initInterpreterGlobals(expression, input, beginOffset, endOffset); success = false; mustString = expression._mustString; _mainLoop: while(true) { if(mustString != null && ((expression._anchor & Perl5Pattern._OPT_ANCH) == 0 || (__multiline && expression._back >= 0))) { __currentOffset = __findFirst(__input, __currentOffset, endOffset, mustString); if(__currentOffset >= endOffset) { if((expression._options & Perl5Compiler.READ_ONLY_MASK) == 0) expression._mustUtility++; success = false; break _mainLoop; } else if(expression._back >= 0) { __currentOffset-=expression._back; if(__currentOffset < beginOffset) __currentOffset = beginOffset; minLength = expression._back + mustString.length; } else if(!expression._isExpensive && (expression._options & Perl5Compiler.READ_ONLY_MASK) == 0 && (--expression._mustUtility < 0)) { // Be careful! The preceding logical expression is constructed // so that mustUtility is only decremented if the expression is // compiled without READ_ONLY_MASK. mustString = expression._mustString = null; __currentOffset = beginOffset; } else { __currentOffset = beginOffset; minLength = mustString.length; } } if((expression._anchor & Perl5Pattern._OPT_ANCH) != 0) { if(__tryExpression(expression, beginOffset)) { success = true; break _mainLoop; } else if(__multiline || (expression._anchor & Perl5Pattern._OPT_IMPLICIT) != 0) { if(minLength > 0) dontTry = minLength - 1; endOffset-=dontTry; if(__currentOffset > beginOffset) --__currentOffset; while(__currentOffset < endOffset) { if(__input[__currentOffset++] == '\n') { if(__currentOffset < endOffset && __tryExpression(expression, __currentOffset)) { success = true; break _mainLoop; } } } } break _mainLoop; } if(expression._startString != null) { mustString = expression._startString; if((expression._anchor & Perl5Pattern._OPT_SKIP) != 0) { ch = mustString[0]; while(__currentOffset < endOffset) { if(ch == __input[__currentOffset]) { if(__tryExpression(expression, __currentOffset)){ success = true; break _mainLoop; } ++__currentOffset; while(__currentOffset < endOffset && __input[__currentOffset] == ch) ++__currentOffset; } ++__currentOffset; } } else { while((__currentOffset = __findFirst(__input, __currentOffset, endOffset, mustString)) < endOffset){ if(__tryExpression(expression, __currentOffset)) { success = true; break _mainLoop; } ++__currentOffset; } } break _mainLoop; } if((offset = expression._startClassOffset) != OpCode._NULL_OFFSET) { boolean doEvery, tmp; doEvery = ((expression._anchor & Perl5Pattern._OPT_SKIP) == 0); if(minLength > 0) dontTry = minLength - 1; endOffset -= dontTry; tmp = true; switch(__program[offset]) { case OpCode._ANYOF: offset = OpCode._getOperand(offset); while(__currentOffset < endOffset) { ch = __input[__currentOffset]; if(ch < 256 && (__program[offset + (ch >> 4)] & (1 << (ch & 0xf))) == 0) { if(tmp && __tryExpression(expression, __currentOffset)) { success = true; break _mainLoop; } else tmp = doEvery; } else tmp = true; ++__currentOffset; } break; case OpCode._BOUND: if(minLength > 0) { ++dontTry; --endOffset; } if(__currentOffset != beginOffset) { ch = __input[__currentOffset - 1]; tmp = OpCode._isWordCharacter(ch); } else tmp = OpCode._isWordCharacter(__previousChar); while(__currentOffset < endOffset) { ch = __input[__currentOffset]; if(tmp != OpCode._isWordCharacter(ch)){ tmp = !tmp; if(__tryExpression(expression, __currentOffset)) { success = true; break _mainLoop; } } ++__currentOffset; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -