📄 bm.java
字号:
* @param pattern String to search for. May be "" but not null.
*
* @return 0-based offset in text, just like String.indexOf.
* -1 means not found.
*/
public int indexOf( String pattern )
{
return indexOf(pattern, 0);
} // end indexOf
/**
* Search for given pattern in String or char array.
* Presume Pattern and Text have been previously set.
*
* @param index index at which to start search
*
* @return 0-based offset in text, just like String.indexOf.
* -1 means not found.
*/
protected final int indexOf(int index)
{
if ( text != null )
{
return indexOfViaText(index);
}
else
{
return indexOfViaTextArray(index);
}
} // end indexOf
/**
* Search for given pattern in String.
* Presume Pattern and Text have been previously set.
*
* @param index index at which to start search
*
* @return 0-based offset in text, just like String.indexOf.
* -1 means not found.
*/
private final int indexOfViaText(int index)
{
// Deal with cases that don't rate the full
// Boyer-Moore treatment.
if ( (lenText <= breakEvenLenText/2 || lenPat <= breakEvenLenPat ) )
{
// this way we are consistent with
// String.indexOf for "".indexOf("")
// which is -1 in JDK 1.1
// and 0 in JDK 1.2. See Bug Parade entry 4096273.
// "".indexOf("abc") is always -1
return text.indexOf(pattern, index);
} // end if
analysePattern();
// At this point we have the pattern, and have skip[] calculated
// We are commited to calculating the indexOf via Boyer-Moore.
// tforward works left to right through the text, skipping depending
// on what char it found in the text corresponding to the end of the pattern,
// not to the place of the mismatch.
char testChar = 0;
final int lastPatChar = patternArray[lenPat-1];
outer:
for ( int tforward=index+lenPat-1; tforward<lenText; tforward+=skip[testChar & 0xff ] )
{
// compare working right to left through both pattern and text
testChar = text.charAt(tforward);
if ( testChar != lastPatChar )
{
continue outer;
}
// step back through pattern
// step back through text
inner:
for ( int tback=tforward-1,pback=lenPat-2 ; pback>=0 ; tback--,pback-- )
{
if ( text.charAt(tback) != patternArray[pback] ) continue outer;
} // end inner for
// we stepped all the way back through the pattern comparing
// without finding a mismatch. We found it!
return tforward-lenPat+1;
} // end outer for
// stepped through entire text without finding it.
return -1;
} // end indexOf
/**
* Search for given pattern in charArray.
* presume Pattern and Text have been previously set.
*
* @param index index at which to start search
*
* @return 0-based offset in text, just like String.indexOf.
* -1 means not found.
*/
private final int indexOfViaTextArray(int index)
{
// Deal with cases that don't rate the full
// Boyer-Moore treatment.
// !! Actually this has a different break even point to cover the overhead
// of going back to String. We probably should adjust slightly.
if ( (lenText <= breakEvenLenText/2 || lenPat <= breakEvenLenPat ) )
{
// this way we are consistent with
// String.indexOf for "".indexOf("")
// which is -1 in JDK 1.1
// and 0 in JDK 1.2
// "".indexOf("abc") is always -1
return new String (textArray).indexOf(pattern, index);
} // end if
analysePattern();
// At this point we have the pattern, and have skip[] calculated
// We are commited to calculating the indexOf via Boyer-Moore.
// tforward works left to right through the text, skipping depending
// on what char it found in the text corresponding to the end of the pattern,
// not to the place of the mismatch.
char testChar = 0;
final int lastPatChar = patternArray[lenPat-1];
outer:
for ( int tforward=index+lenPat-1; tforward<lenText; tforward+=skip[testChar & 0xff ] )
{
// compare working right to left through both pattern and text
testChar = textArray[tforward];
if ( testChar != lastPatChar )
{
continue outer;
}
// step back through pattern
// step back through text
inner:
for ( int tback=tforward-1,pback=lenPat-2 ; pback>=0 ; tback--,pback-- )
{
if ( textArray[tback] != patternArray[pback] ) continue outer;
} // end inner for
// we stepped all the way back through the pattern comparing
// without finding a mismatch. We found it!
return tforward-lenPat+1;
} // end outer for
// stepped through entire text without finding it.
return -1;
} // end indexOf
/**
* Set pattern to use for subsequent indexOf searches.
* Note this is protected. Clients of this package
* set the pattern using indexOf.
*
* @param pattern String to search for. May be "" but not null..
*/
protected void setPattern (String pattern)
{
if ( pattern == null )
{
throw new NullPointerException();
}
this.pattern = pattern;
lenPat = pattern.length();
}
/**
* Set text to search in for subsequent indexOf searches.
*
* @param text String to search in. May be "" but not null.
*/
public void setText (String text)
{
if ( text == null )
{
throw new NullPointerException();
}
this.text = text;
lenText = text.length();
// we don't yet have this is efficient char[] form.
this.textArray = null;
}
/**
* Set text to search in for subsequent indexOf searches.
*
* @param text char array to search in. May be empty but not null.
* This is more efficient that providing the equivalent String.
*/
public void setText (char[] text)
{
if ( text == null )
{
throw new NullPointerException();
}
// we have efficient char[] form, but not the string form.
this.textArray = text;
lenText = textArray.length;
this.text = null;
}
/**
* test harness
*
* @param args command line arguments. Not used.
*/
public static void main( String[] args)
{
if ( DEBUGGING )
{
System.out.println(BM.indexOf("dogcatwombat","cat"));
System.out.println("dogcatwombat".indexOf("cat"));
System.out.println(BM.indexOf("crtcamccmcarogcatwombat","cat"));
System.out.println("crtcamccmcarogcatwombat".indexOf("cat"));
System.out.println(BM.indexOf("dogcatwombat",""));
System.out.println("dogcatwombat".indexOf(""));
System.out.println(BM.indexOf("",""));
System.out.println("".indexOf(""));
System.out.println(BM.indexOf("","abcde"));
System.out.println("".indexOf("abcde"));
System.out.println(BM.indexOf("dogcatwombat","cow"));
System.out.println("dogcatwombat".indexOf("cow"));
String s = "create table foo (created_date datetime default sysdate not null)";
System.out.println(s.indexOf("create", 10));
System.out.println(BM.indexOf(s, "create", 10));
try
{
// O P E N
// Any file of sample characters
File f = new File ("C:/temp/temp.txt");
int size = (int) f.length();
FileReader fr;
fr = new FileReader(f);
// R E A D
char[] ca = new char[size];
int charsRead = fr.read(ca);
s = new String(ca);
long start;
long stop;
int result = 0;
start = System.currentTimeMillis();
for ( int i=0; i<1000; i++ )
{
// Need to make different so optimiser will actually do
// the work repeatedly.
// search for strings like "efficiency9" that probably won't be there.
result = BM.indexOf(ca,"efficiency"+i%10);
}
System.out.println("Boyer.indexOf(char[]): " + result);
stop = System.currentTimeMillis();
System.out.println("Elapsed:"
+ (stop-start));
// benchmark Boyer.indexOf
start = System.currentTimeMillis();
System.out.println("Boyer.indexOf(String): " + result);
stop = System.currentTimeMillis();
System.out.println("Elapsed:"
+ (stop-start));
// Benchmark String.indexOf
start = System.currentTimeMillis();
for ( int i=0; i<1000; i++ )
{
result = s.indexOf("efficiency"+i%10);
}
System.out.println("String.indexOf: " + result);
stop = System.currentTimeMillis();
System.out.println("Elapsed:"
+ (stop-start));
// C L O S E
fr.close();
}
catch ( IOException e )
{
return;
}
} // end if debugging
} // end main
} // end class Boyer
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -