frenchstemmer.java

来自「一套java版本的搜索引擎源码」· Java 代码 · 共 710 行 · 第 1/2 页
JAVA
710 行
					if (from!=null && from.endsWith( prefix + search[i] ))					{						sb.delete( sb.length() - search[i].length(), sb.length());						found = true;						setStrings();						break;					}				}			}		}		return found;	}	/**	 * Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel	 *	 * @param source java.lang.String - the primary source zone for search	 * @param search java.lang.String[] - the strings to search for suppression	 * @param vowel boolean - true if we need a vowel before the search string	 * @param from java.lang.String - the secondary source zone for search (where vowel could be)	 * @return boolean - true if modified	 */	private boolean deleteFromIfTestVowelBeforeIn( String source, String[] search, boolean vowel, String from ) {		boolean found = false;		if (source!=null && from!=null)		{			for (int i = 0; i < search.length; i++) {				if ( source.endsWith( search[i] ))				{					if ((search[i].length() + 1) <= from.length())					{						boolean test = isVowel(sb.charAt(sb.length()-(search[i].length()+1)));						if (test == vowel)						{							sb.delete( sb.length() - search[i].length(), sb.length());							modified = true;							found = true;							setStrings();							break;						}					}				}			}		}		return found;	}	/**	 * Delete a suffix searched in zone "source" if preceded by the prefix	 *	 * @param source java.lang.String - the primary source zone for search	 * @param search java.lang.String[] - the strings to search for suppression	 * @param prefix java.lang.String - the prefix to add to the search string to test	 * @param without boolean - true if it will be deleted even without prefix found	 */	private void deleteButSuffixFrom( String source, String[] search, String prefix, boolean without ) {		if (source!=null)		{			for (int i = 0; i < search.length; i++) {				if ( source.endsWith( prefix + search[i] ))				{					sb.delete( sb.length() - (prefix.length() + search[i].length()), sb.length() );					modified = true;					setStrings();					break;				}				else if ( without && source.endsWith( search[i] ))				{					sb.delete( sb.length() - search[i].length(), sb.length() );					modified = true;					setStrings();					break;				}			}		}	}	/**	 * Delete a suffix searched in zone "source" if preceded by prefix<br>	 * or replace it with the replace string if preceded by the prefix in the zone "from"<br>	 * or delete the suffix if specified	 *	 * @param source java.lang.String - the primary source zone for search	 * @param search java.lang.String[] - the strings to search for suppression	 * @param prefix java.lang.String - the prefix to add to the search string to test	 * @param without boolean - true if it will be deleted even without prefix found	 */	private void deleteButSuffixFromElseReplace( String source, String[] search, String prefix, boolean without, String from, String replace ) {		if (source!=null)		{			for (int i = 0; i < search.length; i++) {				if ( source.endsWith( prefix + search[i] ))				{					sb.delete( sb.length() - (prefix.length() + search[i].length()), sb.length() );					modified = true;					setStrings();					break;				}				else if ( from!=null && from.endsWith( prefix + search[i] ))				{					sb.replace( sb.length() - (prefix.length() + search[i].length()), sb.length(), replace );					modified = true;					setStrings();					break;				}				else if ( without && source.endsWith( search[i] ))				{					sb.delete( sb.length() - search[i].length(), sb.length() );					modified = true;					setStrings();					break;				}			}		}	}	/**	 * Replace a search string with another within the source zone	 *	 * @param source java.lang.String - the source zone for search	 * @param search java.lang.String[] - the strings to search for replacement	 * @param replace java.lang.String - the replacement string	 */	private boolean replaceFrom( String source, String[] search, String replace ) {		boolean found = false;		if (source!=null)		{			for (int i = 0; i < search.length; i++) {				if ( source.endsWith( search[i] ))				{					sb.replace( sb.length() - search[i].length(), sb.length(), replace );					modified = true;					found = true;					setStrings();					break;				}			}		}		return found;	}	/**	 * Delete a search string within the source zone	 *	 * @param source the source zone for search	 * @param suffix the strings to search for suppression	 */	private void deleteFrom(String source, String[] suffix ) {		if (source!=null)		{			for (int i = 0; i < suffix.length; i++) {				if (source.endsWith( suffix[i] ))				{					sb.delete( sb.length() - suffix[i].length(), sb.length());					modified = true;					setStrings();					break;				}			}		}	}	/**	 * Test if a char is a french vowel, including accentuated ones	 *	 * @param ch the char to test	 * @return boolean - true if the char is a vowel	 */	private boolean isVowel(char ch) {		switch (ch)		{			case 'a':			case 'e':			case 'i':			case 'o':			case 'u':			case 'y':			case 'â':			case 'à':			case 'ë':			case 'é':			case 'ê':			case 'è':			case 'ï':			case 'î':			case 'ô':			case 'ü':			case 'ù':			case 'û':				return true;			default:				return false;		}	}	/**	 * Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string<br>	 * "R is the region after the first non-vowel following a vowel	 * or is the null region at the end of the word if there is no such non-vowel"<br>	 * @param buffer java.lang.StringBuffer - the in buffer	 * @return java.lang.String - the resulting string	 */	private String retrieveR( StringBuffer buffer ) {		int len = buffer.length();		int pos = -1;		for (int c = 0; c < len; c++) {			if (isVowel( buffer.charAt( c )))			{				pos = c;				break;			}		}		if (pos > -1)		{			int consonne = -1;			for (int c = pos; c < len; c++) {				if (!isVowel(buffer.charAt( c )))				{					consonne = c;					break;				}			}			if (consonne > -1 && (consonne+1) < len)				return buffer.substring( consonne+1, len );			else				return null;		}		else			return null;	}	/**	 * Retrieve the "RV zone" from a buffer an return the corresponding string<br>	 * "If the word begins with two vowels, RV is the region after the third letter,	 * otherwise the region after the first vowel not at the beginning of the word,	 * or the end of the word if these positions cannot be found."<br>	 * @param buffer java.lang.StringBuffer - the in buffer	 * @return java.lang.String - the resulting string	 */	private String retrieveRV( StringBuffer buffer ) {		int len = buffer.length();		if ( buffer.length() > 3)		{			if ( isVowel(buffer.charAt( 0 )) && isVowel(buffer.charAt( 1 ))) {				return buffer.substring(3,len);			}			else			{				int pos = 0;				for (int c = 1; c < len; c++) {					if (isVowel( buffer.charAt( c )))					{						pos = c;						break;					}				}				if ( pos+1 < len )					return buffer.substring( pos+1, len );				else					return null;			}		}		else			return null;	}    /**	 * Turns u and i preceded AND followed by a vowel to UpperCase<br>	 * Turns y preceded OR followed by a vowel to UpperCase<br>	 * Turns u preceded by q to UpperCase<br>     *     * @param buffer java.util.StringBuffer - the buffer to treat     * @return java.util.StringBuffer - the treated buffer     */    private StringBuffer treatVowels( StringBuffer buffer ) {		for ( int c = 0; c < buffer.length(); c++ ) {			char ch = buffer.charAt( c );			if (c == 0) // first char			{				if (buffer.length()>1)				{					if (ch == 'y' && isVowel(buffer.charAt( c + 1 )))						buffer.setCharAt( c, 'Y' );				}			}			else if (c == buffer.length()-1) // last char			{				if (ch == 'u' && buffer.charAt( c - 1 ) == 'q')					buffer.setCharAt( c, 'U' );				if (ch == 'y' && isVowel(buffer.charAt( c - 1 )))					buffer.setCharAt( c, 'Y' );			}			else // other cases			{				if (ch == 'u')				{					if (buffer.charAt( c - 1) == 'q')						buffer.setCharAt( c, 'U' );					else if (isVowel(buffer.charAt( c - 1 )) && isVowel(buffer.charAt( c + 1 )))						buffer.setCharAt( c, 'U' );				}				if (ch == 'i')				{					if (isVowel(buffer.charAt( c - 1 )) && isVowel(buffer.charAt( c + 1 )))						buffer.setCharAt( c, 'I' );				}				if (ch == 'y')				{					if (isVowel(buffer.charAt( c - 1 )) || isVowel(buffer.charAt( c + 1 )))						buffer.setCharAt( c, 'Y' );				}			}		}		return buffer;    }    /**     * Checks a term if it can be processed correctly.     *     * @return boolean - true if, and only if, the given term consists in letters.     */    private boolean isStemmable( String term ) {		boolean upper = false;		int first = -1;		for ( int c = 0; c < term.length(); c++ ) {			// Discard terms that contain non-letter characters.			if ( !Character.isLetter( term.charAt( c ) ) ) {				return false;			}			// Discard terms that contain multiple uppercase letters.			if ( Character.isUpperCase( term.charAt( c ) ) ) {				if ( upper ) {					return false;				}			// First encountered uppercase letter, set flag and save			// position.				else {					first = c;					upper = true;				}			}		}		// Discard the term if it contains a single uppercase letter that		// is not starting the term.		if ( first > 0 ) {			return false;		}		return true;    }}
frenchstemmer.java - 源码说明

本页面展示了「一套java版本的搜索引擎源码」中的 frenchstemmer.java 源码文件，采用 Java 编程语言编写，共 710 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?