📄 awkmatcher.java
字号:
* an AwkPattern as the pattern parameter. * <p> * @param input The char[] to test for a match. * @param pattern The AwkPattern to be matched. * @return True if the input contains a pattern match, false otherwise. * @exception ClassCastException If a Pattern instance other than an * AwkPattern is passed as the pattern parameter. */ public boolean contains(char[] input, Pattern pattern) { __awkPattern = (AwkPattern)pattern; // Begin anchor requires match occur at beginning of input if(__awkPattern._hasBeginAnchor && !__awkPattern._fastMap[input[0]]){ __lastMatchResult = null; return false; } __scratchBuffer._buffer = input; __scratchBuffer._bufferSize = input.length; __scratchBuffer._bufferOffset = __beginOffset = 0; __scratchBuffer._endOfStreamReached = true; __streamSearchBuffer = __scratchBuffer; __lastMatchedBufferOffset = 0; try { _search(); } catch(IOException e) { // do nothing } return (__lastMatchResult != null); } /** * Determines if a string contains a pattern. If the pattern is * matched by some substring of the input, a MatchResult instance * representing the <b> first </b> such match is made acessible via * {@link #getMatch()}. If you want to access * subsequent matches you should either use a PatternMatcherInput object * or use the offset information in the MatchResult to create a substring * representing the remaining input. Using the MatchResult offset * information is the recommended method of obtaining the parts of the * string preceeding the match and following the match. * <p> * The pattern must be an AwkPattern instance, otherwise a * ClassCastException will be thrown. You are not required to, and * indeed should NOT try to (for performance reasons), catch a * ClassCastException because it will never be thrown as long as you use * an AwkPattern as the pattern parameter. * <p> * @param input The String to test for a match. * @param pattern The AwkPattern to be matched. * @return True if the input contains a pattern match, false otherwise. * @exception ClassCastException If a Pattern instance other than an * AwkPattern is passed as the pattern parameter. */ public boolean contains(String input, Pattern pattern){ return contains(input.toCharArray(), pattern); } /** * Determines if the contents of a PatternMatcherInput, starting from the * current offset of the input contains a pattern. * If a pattern match is found, a MatchResult * instance representing the <b>first</b> such match is made acessible via * {@link #getMatch()}. The current offset of the * PatternMatcherInput is set to the offset corresponding to the end * of the match, so that a subsequent call to this method will continue * searching where the last call left off. You should remember that the * region between the begin and end offsets of the PatternMatcherInput are * considered the input to be searched, and that the current offset * of the PatternMatcherInput reflects where a search will start from. * Matches extending beyond the end offset of the PatternMatcherInput * will not be matched. In other words, a match must occur entirely * between the begin and end offsets of the input. See * {@link org.apache.oro.text.regex.PatternMatcherInput PatternMatcherInput} * for more details. * <p> * As a side effect, if a match is found, the PatternMatcherInput match * offset information is updated. See the PatternMatcherInput * {@link org.apache.oro.text.regex.PatternMatcherInput#setMatchOffsets * setMatchOffsets(int, int)} method for more details. * <p> * The pattern must be an AwkPattern instance, otherwise a * ClassCastException will be thrown. You are not required to, and * indeed should NOT try to (for performance reasons), catch a * ClassCastException because it will never be thrown as long as you use * an AwkPattern as the pattern parameter. * <p> * This method is usually used in a loop as follows: * <blockquote><pre> * PatternMatcher matcher; * PatternCompiler compiler; * Pattern pattern; * PatternMatcherInput input; * MatchResult result; * * compiler = new AwkCompiler(); * matcher = new AwkMatcher(); * * try { * pattern = compiler.compile(somePatternString); * } catch(MalformedPatternException e) { * System.err.println("Bad pattern."); * System.err.println(e.getMessage()); * return; * } * * input = new PatternMatcherInput(someStringInput); * * while(matcher.contains(input, pattern)) { * result = matcher.getMatch(); * // Perform whatever processing on the result you want. * } * * </pre></blockquote> * <p> * @param input The PatternMatcherInput to test for a match. * @param pattern The Pattern to be matched. * @return True if the input contains a pattern match, false otherwise. * @exception ClassCastException If a Pattern instance other than an * AwkPattern is passed as the pattern parameter. */ public boolean contains(PatternMatcherInput input, Pattern pattern) { __awkPattern = (AwkPattern)pattern; __scratchBuffer._buffer = input.getBuffer(); __scratchBuffer._bufferOffset = __beginOffset = input.getBeginOffset(); __lastMatchedBufferOffset = input.getCurrentOffset(); // Begin anchor requires match occur at beginning of input // No need to adjust current offset if no match found. if(__awkPattern._hasBeginAnchor) { if(__beginOffset != __lastMatchedBufferOffset || !__awkPattern._fastMap[__scratchBuffer._buffer[__beginOffset]]) { __lastMatchResult = null; return false; } } __scratchBuffer._bufferSize = input.length(); __scratchBuffer._endOfStreamReached = true; __streamSearchBuffer = __scratchBuffer; try { _search(); } catch(IOException e) { // do nothing } input.setCurrentOffset(__lastMatchedBufferOffset); if(__lastMatchResult == null) return false; input.setMatchOffsets(__lastMatchResult.beginOffset(0), __lastMatchResult.endOffset(0)); return true; } /** * Determines if the contents of an AwkStreamInput, starting from the * current offset of the input contains a pattern. * If a pattern match is found, a MatchResult * instance representing the <b>first</b> such match is made acessible via * {@link #getMatch()}. The current offset of the * input stream is advanced to the end offset corresponding to the end * of the match. Consequently a subsequent call to this method will continue * searching where the last call left off. * See {@link AwkStreamInput} for more details. * <p> * Note, patterns matching the null string do NOT match at end of input * stream. This is different from the behavior you get from the other * contains() methods. * <p> * The pattern must be an AwkPattern instance, otherwise a * ClassCastException will be thrown. You are not required to, and * indeed should NOT try to (for performance reasons), catch a * ClassCastException because it will never be thrown as long as you use * an AwkPattern as the pattern parameter. * <p> * This method is usually used in a loop as follows: * <blockquote><pre> * PatternMatcher matcher; * PatternCompiler compiler; * Pattern pattern; * AwkStreamInput input; * MatchResult result; * * compiler = new AwkCompiler(); * matcher = new AwkMatcher(); * * try { * pattern = compiler.compile(somePatternString); * } catch(MalformedPatternException e) { * System.err.println("Bad pattern."); * System.err.println(e.getMessage()); * return; * } * * input = new AwkStreamInput( * new BufferedInputStream(new FileInputStream(someFileName))); * * while(matcher.contains(input, pattern)) { * result = matcher.getMatch(); * // Perform whatever processing on the result you want. * } * * </pre></blockquote> * <p> * @param input The PatternStreamInput to test for a match. * @param pattern The Pattern to be matched. * @return True if the input contains a pattern match, false otherwise. * @exception ClassCastException If a Pattern instance other than an * AwkPattern is passed as the pattern parameter. */ public boolean contains(AwkStreamInput input, Pattern pattern) throws IOException { __awkPattern = (AwkPattern)pattern; // Begin anchor requires match occur at beginning of input if(__awkPattern._hasBeginAnchor) { // Do read here instead of in _search() so we can test first char if(input._bufferOffset == 0) { if(input.read() && !__awkPattern._fastMap[input._buffer[0]]) { __lastMatchResult = null; return false; } } else { __lastMatchResult = null; return false; } } __lastMatchedBufferOffset = input._currentOffset; __streamSearchBuffer = input; __beginOffset = 0; _search(); input._currentOffset = __lastMatchedBufferOffset; if(__lastMatchResult != null) { // Adjust match begin offset to be relative to beginning of stream. __lastMatchResult._incrementMatchBeginOffset(input._bufferOffset); return true; } return false; } private int __streamMatchPrefix() throws IOException { int token, current = AwkPattern._START_STATE, lastState; int offset, initialOffset, maxOffset; int lastMatchedOffset = -1; int[] tstateArray; offset = initialOffset = __offsets[0]; maxOffset = __streamSearchBuffer._bufferSize + __beginOffset; test: while(offset < maxOffset) { token = __streamSearchBuffer._buffer[offset++]; if(current < __awkPattern._numStates) { lastState = current; tstateArray = __awkPattern._getStateArray(current); current = tstateArray[token]; if(current == 0){ __awkPattern._createNewState(lastState, token, tstateArray); current = tstateArray[token]; } if(current == AwkPattern._INVALID_STATE){ break test; } else if(__awkPattern._endStates.get(current)){ lastMatchedOffset = offset; } if(offset == maxOffset){ offset = __streamSearchBuffer._reallocate(initialOffset) + __beginOffset; maxOffset = __streamSearchBuffer._bufferSize + __beginOffset; // If we're at the end of the stream, don't reset values if(offset != maxOffset){ if(lastMatchedOffset != -1) lastMatchedOffset-=initialOffset; initialOffset = 0; } } } else break; } __offsets[0] = initialOffset; __offsets[1] = lastMatchedOffset - 1; if(lastMatchedOffset == -1 && __awkPattern._matchesNullString) return 0; // End anchor requires match occur at end of input if(__awkPattern._hasEndAnchor && (!__streamSearchBuffer._endOfStreamReached || lastMatchedOffset < __streamSearchBuffer._bufferSize + __beginOffset)) return -1; return (lastMatchedOffset - initialOffset); } void _search() throws IOException { int position, tokensMatched; __lastMatchResult = null; while(true){ if(__lastMatchedBufferOffset >= __streamSearchBuffer._bufferSize + __beginOffset) { if(__streamSearchBuffer._endOfStreamReached){ // Get rid of reference now that it should no longer be used. __streamSearchBuffer = null; return; } else { if(!__streamSearchBuffer.read()) return; __lastMatchedBufferOffset = 0; } } for(position = __lastMatchedBufferOffset; position < __streamSearchBuffer._bufferSize + __beginOffset; position = __offsets[0] + 1) { __offsets[0] = position; if(__awkPattern._fastMap[__streamSearchBuffer._buffer[position]] && (tokensMatched = __streamMatchPrefix()) > -1) { __lastMatchResult = new AwkMatchResult( new String(__streamSearchBuffer._buffer, __offsets[0], tokensMatched), __offsets[0]); __lastMatchedBufferOffset = (tokensMatched > 0 ? __offsets[1] + 1 : __offsets[0] + 1); return; } else if(__awkPattern._matchesNullString) { __lastMatchResult = new AwkMatchResult(new String(), position); __lastMatchedBufferOffset = position + 1; return; } } __lastMatchedBufferOffset = position; } } /** * Fetches the last match found by a call to a matches() or contains() * method. * <p> * @return A MatchResult instance containing the pattern match found * by the last call to any one of the matches() or contains() * methods. If no match was found by the last call, returns * null. */ public MatchResult getMatch() { return __lastMatchResult; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -