boyermooresearchmatcher.java

来自「用java 编写的源码开放的文本编辑器。有很多有用的特性」· Java 代码 · 共 349 行
JAVA
349 行
/* * BoyerMooreSearchMatcher.java - Literal pattern String matcher utilizing the *         Boyer-Moore algorithm * :tabSize=8:indentSize=8:noTabs=false: * :folding=explicit:collapseFolds=1: * * Copyright (C) 1999, 2000 mike dillon * Portions copyright (C) 2001 Tom Locke * Portions copyright (C) 2001, 2002 Slava Pestov * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. */package org.gjt.sp.jedit.search;//{{{ Importsimport bsh.BshMethod;import bsh.NameSpace;import gnu.regexp.CharIndexed;import org.gjt.sp.jedit.BeanShell;//}}}/** * Implements literal search using the Boyer-Moore algorithm. */public class BoyerMooreSearchMatcher implements SearchMatcher{	//{{{ BoyerMooreSearchMatcher constructor	/**	 * Creates a new string literal matcher.	 */	public BoyerMooreSearchMatcher(String pattern, String replace,		boolean ignoreCase, boolean beanshell, BshMethod replaceMethod)	{		if (ignoreCase)			this.pattern = pattern.toUpperCase().toCharArray();		else			this.pattern = pattern.toCharArray();		this.replace = replace;		this.ignoreCase = ignoreCase;		if(beanshell && replaceMethod != null && replace.length() != 0)		{			this.beanshell = true;			this.replaceMethod = replaceMethod;			replaceNS = new NameSpace(BeanShell.getNameSpace(),				"search and replace");		}		pattern_end = this.pattern.length - 1;		returnValue = new int[2];	} //}}}	//{{{ nextMatch() method	/**	 * Returns the offset of the first match of the specified text	 * within this matcher.	 * @param text The text to search in	 * @param start True if the start of the segment is the beginning of the	 * buffer	 * @param end True if the end of the segment is the end of the buffer	 * @param firstTime If false and the search string matched at the start	 * offset with length zero, automatically find next match	 * @param reverse If true, searching will be performed in a backward	 * direction.	 * @return an array where the first element is the start offset	 * of the match, and the second element is the end offset of	 * the match	 * @since jEdit 4.1pre7	 */	public int[] nextMatch(CharIndexed text, boolean start, boolean end,		boolean firstTime, boolean reverse)	{		int pos = match(text,reverse);		if (pos == -1)		{			return null;		}		else		{			returnValue[0] = pos;			returnValue[1] = pos + pattern.length;			return returnValue;		}	} //}}}	//{{{ substitute() method	/**	 * Returns the specified text, with any substitution specified	 * within this matcher performed.	 * @param text The text	 */	public String substitute(String text) throws Exception	{		if(beanshell)		{			replaceNS.setVariable("_0",text);			Object obj = BeanShell.runCachedBlock(replaceMethod,				null,replaceNS);			if(obj == null)				return "";			else				return obj.toString();		}		else			return replace;	} //}}}	//{{{ match() method	/*	 *  a good introduction to the Boyer-Moore fast string matching	 *  algorithm may be found on Moore's website at:	 *	 *   http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/	 *	 */	public int match(CharIndexed text, boolean reverse)	{		//{{{		// lazily create skip and suffix arrays for either the		// search pattern, or the reversed search pattern		int[] skip, suffix;		if(reverse)		{			if(back_skip == null)			{				back_skip = generateSkipArray(true);				back_suffix = generateSuffixArray(true);			}			skip = back_skip;			suffix = back_suffix;		}		else		{			if(fwd_skip == null)			{				fwd_skip = generateSkipArray(false);				fwd_suffix = generateSuffixArray(false);			}			skip = fwd_skip;			suffix = fwd_suffix;		} //}}}		// position variable for pattern test position		int pos;		// position variable for pattern start		int anchor = 0;		// last possible start position of a match with this pattern;		// this is negative if the pattern is longer than the text		// causing the search loop below to immediately fail		//int last_anchor = reverseSearch		//	? offset + pattern.length - 1		//	: length - pattern.length;		char ch = 0;		int bad_char;		int good_suffix;		// the search works by starting the anchor (first character		// of the pattern) at the initial offset. as long as the		// anchor is far enough from the enough of the text for the		// pattern to match, and until the pattern matches, we		// compare the pattern to the text from the last character		// to the first character in reverse order. where a character		// in the pattern mismatches, we use the two heuristics		// based on the mismatch character and its position in the		// pattern to determine the furthest we can move the anchor		// without missing any potential pattern matches.SEARCH:		while (text.isValid())		{			for (pos = pattern_end; pos >= 0; --pos)			{				ch = text.charAt(pos);				if(ignoreCase)					ch = Character.toUpperCase(ch);				// pattern test				if ((reverse ? ch != pattern[pattern_end - pos]					: ch != pattern[pos]))				{					// character mismatch, determine how many characters to skip					// heuristic #1					bad_char = pos - skip[getSkipIndex(ch)];					// heuristic #2					good_suffix = suffix[pos];					// skip the greater of the two distances provided by the					// heuristics					int skip_index = (bad_char > good_suffix) ? bad_char : good_suffix;					anchor += skip_index;					text.move(skip_index);					// go back to the while loop					continue SEARCH;				}			}			// MATCH: return the position of its first character			return anchor;		}		// MISMATCH: return -1 as defined by API		return -1;	} //}}}	//{{{ Private members	private char[] pattern;	private int pattern_end;	private String replace;	private boolean ignoreCase;	private boolean beanshell;	private BshMethod replaceMethod;	private NameSpace replaceNS;	// Boyer-Moore member fields	private int[] fwd_skip;	private int[] fwd_suffix;	private int[] back_skip;	private int[] back_suffix;	private int[] returnValue;	//}}}	// Boyer-Moore helper methods	//{{{ generateSkipArray() method	/*	 *  the 'skip' array is used to determine for each index in the	 *  hashed alphabet how many characters can be skipped if	 *  a mismatch occurs on a characater hashing to that index.	 */	private int[] generateSkipArray(boolean reverse)	{		// initialize the skip array to all zeros		int[] skip = new int[256];		// leave the table cleanly-initialized for an empty pattern		if (pattern.length == 0)			return skip;		int pos = 0;		do		{			skip[getSkipIndex(pattern[reverse ? pattern_end - pos : pos])] = pos;		}		while (++pos < pattern.length);		return skip;	} //}}}	//{{{ getSkipIndex() method	/*	 *  to avoid our skip table having a length of 2 ^ 16, we hash each	 *  character of the input into a character in the alphabet [\x00-\xFF]	 *  using the lower 8 bits of the character's value (resulting in	 *  a more reasonable skip table of length 2 ^ 8).	 *	 *  the result of this is that more than one character can hash to the	 *  same index, but since the skip table encodes the position of	 *  occurence of the character furthest into the string with a particular	 *  index (whether or not it is the only character with that index), an	 *  index collision only means that that this heuristic will give a	 *  sub-optimal skip (i.e. a complete skip table could use the differences	 *  between colliding characters to maximal effect, at the expense of	 *  building a table that is over 2 orders of magnitude larger and very	 *  sparse).	 */	private static final int getSkipIndex(char ch)	{		return ((int) ch) & 0x000000FF;	} //}}}	//{{{ generateSuffixArray() method	/*	 *  XXX: hairy code that is basically just a functional(?) port of some	 *  other code i barely understood	 */	private int[] generateSuffixArray(boolean reverse)	{		int m = pattern.length;		int j = m + 1;		int[] suffix = new int[j];		int[] tmp = new int[j];		tmp[m] = j;		for (int i = m; i > 0; --i)		{			while (j <= m && pattern[reverse ? pattern_end - i + 1 : i - 1]				!= pattern[reverse ? pattern_end - j + 1 : j - 1])			{				if (suffix[j] == 0)				{					suffix[j] = j - i;				}				j = tmp[j];			}			tmp[i - 1] = --j;		}		int k = tmp[0];		for (j = 0; j <= m; j++)		{			// the code above builds a 1-indexed suffix array,			// but we shift it to be 0-indexed, ignoring the			// original 0-th element			if (j > 0)			{				suffix[j - 1] = (suffix[j] == 0) ? k : suffix[j];			}			if (j == k)			{				k = tmp[k];			}		}		return suffix;	} //}}}	//}}}}
boyermooresearchmatcher.java - 源码说明

本页面展示了「用java 编写的源码开放的文本编辑器。有很多有用的特性」中的 boyermooresearchmatcher.java 源码文件，采用 Java 编程语言编写，共 349 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?