📄 like.java
字号:
/* Derby - Class org.apache.derby.iapi.types.Like Copyright 1999, 2004 The Apache Software Foundation or its licensors, as applicable. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */package org.apache.derby.iapi.types;// RESOLVE: MOVE THIS CLASS TO PROTOCOL (See LikeOperatorNode)import org.apache.derby.iapi.services.sanity.SanityManager;import org.apache.derby.iapi.error.StandardException;import org.apache.derby.iapi.reference.SQLState;import java.text.CollationElementIterator;import java.text.Collator;import java.text.RuleBasedCollator;import java.util.Locale;/** Like matching algorithm. Not too speedy for %s. SQL92 says the escape character can only and must be followed by itself, %, or _. So if you choose % or _ as the escape character, you can no longer do that sort of matching. Not the most recent Like -- missing the unit tests @author ames */public class Like { private static final char anyChar = '_'; private static final char anyString = '%'; private static final String SUPER_STRING = "\uffff"; private Like() { // do not instantiate } /** @param val value to compare. if null, result is null. @param valLength length of val @param pat pattern to compare. if null, result is null. @param patLength length of pat @param escape escape character. Must be 1 char long. if null, no escape character is used. @param escapeLength length of escape @return null if val or pat null, otherwise true if match and false if not. @exception StandardException thrown if data invalid */ public static Boolean like ( char[] val, int valLength, char[] pat, int patLength, char[] escape, int escapeLength ) throws StandardException { return like(val, 0, valLength, pat, 0, patLength, escape, escapeLength); } /** For national chars. @param val value to compare. if null, result is null. @param valLength length of val @param pat pattern to compare. if null, result is null. @param patLength length of pat @param escape escape character. Must be 1 char long. if null, no escape character is used. @param escapeLength length of escape @param collator The collator to use. @return null if val or pat null, otherwise true if match and false if not. @exception StandardException thrown if data invalid */ public static Boolean like ( int[] val, int valLength, int[] pat, int patLength, int[] escape, int escapeLength, RuleBasedCollator collator ) throws StandardException { return like(val, 0, valLength, pat, 0, patLength, escape, escapeLength, collator); } /* non-national chars */ private static Boolean like ( char[] val, int vLoc, // start at val[vLoc] int vEnd, // end at val[vEnd] char[] pat, int pLoc, // start at pat[pLoc] int pEnd, // end at pat[pEnd] char[] escape, int escapeLength ) throws StandardException { char escChar = ' '; boolean haveEsc = true; if (val == null) return null; if (pat == null) return null; if (escape == null) { haveEsc = false; } else { escChar = escape[0]; } Boolean result; while (true) { if ((result = checkLengths(vLoc, vEnd, pLoc, pat, pEnd)) != null) { return result; } // go until we get a special char in the pattern or hit EOS while (pat[pLoc] != anyChar && pat[pLoc] != anyString && ((! haveEsc) || pat[pLoc] != escChar)) { if (val[vLoc] == pat[pLoc]) { vLoc++; pLoc++; result = checkLengths(vLoc, vEnd, pLoc, pat, pEnd); if (result != null) return result; } else { return Boolean.FALSE; } } // deal with escChar first, as it can be escaping a special char // and can be a special char itself. if (haveEsc && pat[pLoc] == escChar) { pLoc++; if (pLoc == pEnd) { throw StandardException.newException(SQLState.LANG_INVALID_ESCAPE_SEQUENCE); } if (pat[pLoc] != escChar && pat[pLoc] != anyChar && pat[pLoc] != anyString) { throw StandardException.newException(SQLState.LANG_INVALID_ESCAPE_SEQUENCE); } // regardless of the char in pat, it must match exactly: if (val[vLoc] == pat[pLoc]) { vLoc++; pLoc++; result = checkLengths(vLoc, vEnd, pLoc, pat, pEnd); if (result != null) return result; } else return Boolean.FALSE; } else if (pat[pLoc] == anyChar) { // regardless of the char, it matches vLoc++; pLoc++; result = checkLengths(vLoc, vEnd, pLoc, pat, pEnd); if (result != null) return result; } else if (pat[pLoc] == anyString) { // catch the simple cases -- end of the pattern or of the string if (pLoc+1 == pEnd) return Boolean.TRUE; // would return true, but caught in checkLengths above if (SanityManager.DEBUG) SanityManager.ASSERT(vLoc!=vEnd, "Should have been found already"); //if (vLoc == vEnd) // caught in checkLengths //return Boolean.TRUE; // check if remainder of pattern is anyString's // if escChar == anyString, we couldn't be here boolean anys = true; for (int i=pLoc+1;i<pEnd;i++) if (pat[i]!=anyString) { anys=false; break; } if (anys) return Boolean.TRUE; // pattern can match 0 or more chars in value. // to test that, we take the remainder of pattern and // apply it to ever-shorter remainders of value until // we hit a match. // the loop never continues from this point -- we will // always generate an answer here. // REMIND: there are smarter ways to pick the remainders // and do this matching. // num chars left in value includes current char int vRem = vEnd - vLoc; int n=0; // num chars left in pattern excludes the anychar int minLen = getMinLen(pat, pLoc+1, pEnd, haveEsc, escChar); for (int i=vRem; i>=minLen; i--) { Boolean restResult = Like.like(val,vLoc+n,vLoc+n+i,pat,pLoc+1,pEnd,escape,escapeLength); if (SanityManager.DEBUG) { if (restResult == null) { String vStr = new String(val,vLoc+n,i); String pStr = new String(pat,pLoc+1,pEnd-(pLoc+1)); SanityManager.THROWASSERT("null result on like(value = "+vStr+", pat = "+pStr+")"); } } if (restResult.booleanValue()) return restResult; n++; } // none of the possibilities worked return Boolean.FALSE; } } } /* national chars */ private static Boolean like ( int[] val, int vLoc, // start at val[vLoc] int vEnd, // end at val[vEnd] int[] pat, int pLoc, // start at pat[pLoc] int pEnd, // end at pat[pEnd] int[] escape, int escapeLength, RuleBasedCollator collator ) throws StandardException { int[] escCharInts = null; boolean haveEsc = true; int[] anyCharInts = new int[1]; // assume only 1 int int[] anyStringInts = new int[1]; // assume only 1 int if (val == null) return null; if (pat == null) return null; if (escape == null) { haveEsc = false; } else { escCharInts = escape; } Boolean result; // get the collation integer representing "_" CollationElementIterator cei = collator.getCollationElementIterator("_"); anyCharInts[0] = cei.next(); { int nextInt; // There may be multiple ints representing this character while ((nextInt = cei.next()) != CollationElementIterator.NULLORDER) { int[] temp = new int[anyCharInts.length + 1]; for (int index = 0; index < anyCharInts.length; index++) { temp[index] = anyCharInts[index]; } temp[anyCharInts.length] = nextInt; anyCharInts = temp; } } // get the collation integer representing "%" cei = collator.getCollationElementIterator("%"); anyStringInts[0] = cei.next(); { int nextInt; // There may be multiple ints representing this character while ((nextInt = cei.next()) != CollationElementIterator.NULLORDER) { int[] temp = new int[anyStringInts.length + 1]; for (int index = 0; index < anyStringInts.length; index++) { temp[index] = anyStringInts[index]; } temp[anyStringInts.length] = nextInt; anyStringInts = temp; } } while (true) { // returns null if more work to do, otherwise match Boolean result = checkLengths(vLoc, vEnd, pLoc, pat, pEnd, anyStringInts); if (result != null) return result; // go until we get a special char in the pattern or hit EOS while ( (! matchSpecial(pat, pLoc, pEnd, anyCharInts)) && (! matchSpecial(pat, pLoc, pEnd, anyStringInts)) && ((! haveEsc) || (! matchSpecial(pat, pLoc, pEnd, escCharInts)))) { if (val[vLoc] == pat[pLoc]) { vLoc++; pLoc++; result = checkLengths(vLoc, vEnd, pLoc, pat, pEnd, anyStringInts); if (result != null) { return result; } } else { return Boolean.FALSE; } } // deal with escCharInt first, as it can be escaping a special char // and can be a special char itself. if (haveEsc && matchSpecial(pat, pLoc, pEnd, escCharInts)) { pLoc += escCharInts.length; if (pLoc == pEnd) { throw StandardException.newException( SQLState.LANG_INVALID_ESCAPE_SEQUENCE); } int[] specialInts = null; if (matchSpecial(pat, pLoc, pEnd, escCharInts)) { specialInts = escCharInts; } if (matchSpecial(pat, pLoc, pEnd, anyCharInts)) { specialInts = anyCharInts; } if (matchSpecial(pat, pLoc, pEnd, anyStringInts)) { specialInts = anyStringInts; } if (specialInts == null) { throw StandardException.newException(SQLState.LANG_INVALID_ESCAPE_SEQUENCE); } // regardless of the char in pat, it must match exactly: for (int index = 0; index < specialInts.length; index++) { if (val[vLoc + index] != pat[pLoc + index]) { return Boolean.FALSE; } } vLoc += specialInts.length; pLoc += specialInts.length; // returns null if more work to do, otherwise match Boolean result = checkLengths(vLoc, vEnd, pLoc, pat, pEnd, anyStringInts); if (result != null) return result; } else if (matchSpecial(pat, pLoc, pEnd, anyCharInts)) { // regardless of the char, it matches vLoc += anyCharInts.length; pLoc += anyCharInts.length; result = checkLengths(vLoc, vEnd, pLoc, pat, pEnd, anyStringInts); if (result != null) return result; } else if (matchSpecial(pat, pLoc, pEnd, anyStringInts)) { // catch the simple cases -- end of the pattern or of the string if (pLoc+1 == pEnd) return Boolean.TRUE; // would return true, but caught in checkLengths above if (SanityManager.DEBUG) SanityManager.ASSERT(vLoc!=vEnd, "Should have been found already"); if (vLoc == vEnd) return Boolean.TRUE; // check if remainder of pattern is anyString's // if escChar == anyString, we couldn't be here // If there is an escape in the pattern we break boolean allPercentChars = true; for (int i=pLoc+1;i<pEnd;i++) { if (! matchSpecial(pat, i, pEnd, anyStringInts)) { allPercentChars=false; break; } } if (allPercentChars) return Boolean.TRUE; // pattern can match 0 or more chars in value. // to test that, we take the remainder of pattern and // apply it to ever-shorter remainders of value until // we hit a match. // the loop never continues from this point -- we will // always generate an answer here. // REMIND: there are smarter ways to pick the remainders // and do this matching. // num chars left in value includes current char int vRem = vEnd - vLoc; int n=0; // num chars left in pattern excludes the anyString int minLen = getMinLen(pat, pLoc+1, pEnd, haveEsc, escCharInts, anyStringInts); for (int i=vRem; i>=minLen; i--) { Boolean restResult = Like.like(val,vLoc+n,vLoc+n+i,pat,pLoc+1,pEnd,escape,escapeLength, collator); if (SanityManager.DEBUG) { if (restResult == null) { SanityManager.THROWASSERT("null result on like(vLoc+n = "+(vLoc+n)+", i = "+i+ ", pLoc+1 = " + (pLoc+1) + ", pEnd-(pLoc+1) = " + (pEnd-(pLoc+1)) + ")"); } } if (restResult.booleanValue()) return restResult; n++; } // none of the possibilities worked return Boolean.FALSE; } } } /** Calculate the shortest length string that could match this pattern for non-national chars */ static int getMinLen(char[] pattern, int pStart, int pEnd, boolean haveEsc, char escChar) { int m=0; for (int l = pStart; l<pEnd; ) { if (haveEsc && pattern[l] == escChar) { // need one char l+=2; m++; } else if (pattern[l] == anyString) { l++; // anyString, nothing needed } else { // anyChar or other chars, need one char l++; m++; } } return m; } /** Calculate the shortest length string that could match this pattern for national chars */ static int getMinLen(int[] pattern, int pStart, int pEnd, boolean haveEsc, int[] escCharInts, int[] anyStringInts) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -