📄 frenchstemmer.java
字号:
if (from!=null && from.endsWith( prefix + search[i] )) { sb.delete( sb.length() - search[i].length(), sb.length()); found = true; setStrings(); break; } } } } return found; } /** * Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel * * @param source java.lang.String - the primary source zone for search * @param search java.lang.String[] - the strings to search for suppression * @param vowel boolean - true if we need a vowel before the search string * @param from java.lang.String - the secondary source zone for search (where vowel could be) * @return boolean - true if modified */ private boolean deleteFromIfTestVowelBeforeIn( String source, String[] search, boolean vowel, String from ) { boolean found = false; if (source!=null && from!=null) { for (int i = 0; i < search.length; i++) { if ( source.endsWith( search[i] )) { if ((search[i].length() + 1) <= from.length()) { boolean test = isVowel(sb.charAt(sb.length()-(search[i].length()+1))); if (test == vowel) { sb.delete( sb.length() - search[i].length(), sb.length()); modified = true; found = true; setStrings(); break; } } } } } return found; } /** * Delete a suffix searched in zone "source" if preceded by the prefix * * @param source java.lang.String - the primary source zone for search * @param search java.lang.String[] - the strings to search for suppression * @param prefix java.lang.String - the prefix to add to the search string to test * @param without boolean - true if it will be deleted even without prefix found */ private void deleteButSuffixFrom( String source, String[] search, String prefix, boolean without ) { if (source!=null) { for (int i = 0; i < search.length; i++) { if ( source.endsWith( prefix + search[i] )) { sb.delete( sb.length() - (prefix.length() + search[i].length()), sb.length() ); modified = true; setStrings(); break; } else if ( without && source.endsWith( search[i] )) { sb.delete( sb.length() - search[i].length(), sb.length() ); modified = true; setStrings(); break; } } } } /** * Delete a suffix searched in zone "source" if preceded by prefix<br> * or replace it with the replace string if preceded by the prefix in the zone "from"<br> * or delete the suffix if specified * * @param source java.lang.String - the primary source zone for search * @param search java.lang.String[] - the strings to search for suppression * @param prefix java.lang.String - the prefix to add to the search string to test * @param without boolean - true if it will be deleted even without prefix found */ private void deleteButSuffixFromElseReplace( String source, String[] search, String prefix, boolean without, String from, String replace ) { if (source!=null) { for (int i = 0; i < search.length; i++) { if ( source.endsWith( prefix + search[i] )) { sb.delete( sb.length() - (prefix.length() + search[i].length()), sb.length() ); modified = true; setStrings(); break; } else if ( from!=null && from.endsWith( prefix + search[i] )) { sb.replace( sb.length() - (prefix.length() + search[i].length()), sb.length(), replace ); modified = true; setStrings(); break; } else if ( without && source.endsWith( search[i] )) { sb.delete( sb.length() - search[i].length(), sb.length() ); modified = true; setStrings(); break; } } } } /** * Replace a search string with another within the source zone * * @param source java.lang.String - the source zone for search * @param search java.lang.String[] - the strings to search for replacement * @param replace java.lang.String - the replacement string */ private boolean replaceFrom( String source, String[] search, String replace ) { boolean found = false; if (source!=null) { for (int i = 0; i < search.length; i++) { if ( source.endsWith( search[i] )) { sb.replace( sb.length() - search[i].length(), sb.length(), replace ); modified = true; found = true; setStrings(); break; } } } return found; } /** * Delete a search string within the source zone * * @param source the source zone for search * @param suffix the strings to search for suppression */ private void deleteFrom(String source, String[] suffix ) { if (source!=null) { for (int i = 0; i < suffix.length; i++) { if (source.endsWith( suffix[i] )) { sb.delete( sb.length() - suffix[i].length(), sb.length()); modified = true; setStrings(); break; } } } } /** * Test if a char is a french vowel, including accentuated ones * * @param ch the char to test * @return boolean - true if the char is a vowel */ private boolean isVowel(char ch) { switch (ch) { case 'a': case 'e': case 'i': case 'o': case 'u': case 'y': case 'â': case 'à': case 'ë': case 'é': case 'ê': case 'è': case 'ï': case 'î': case 'ô': case 'ü': case 'ù': case 'û': return true; default: return false; } } /** * Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string<br> * "R is the region after the first non-vowel following a vowel * or is the null region at the end of the word if there is no such non-vowel"<br> * @param buffer java.lang.StringBuffer - the in buffer * @return java.lang.String - the resulting string */ private String retrieveR( StringBuffer buffer ) { int len = buffer.length(); int pos = -1; for (int c = 0; c < len; c++) { if (isVowel( buffer.charAt( c ))) { pos = c; break; } } if (pos > -1) { int consonne = -1; for (int c = pos; c < len; c++) { if (!isVowel(buffer.charAt( c ))) { consonne = c; break; } } if (consonne > -1 && (consonne+1) < len) return buffer.substring( consonne+1, len ); else return null; } else return null; } /** * Retrieve the "RV zone" from a buffer an return the corresponding string<br> * "If the word begins with two vowels, RV is the region after the third letter, * otherwise the region after the first vowel not at the beginning of the word, * or the end of the word if these positions cannot be found."<br> * @param buffer java.lang.StringBuffer - the in buffer * @return java.lang.String - the resulting string */ private String retrieveRV( StringBuffer buffer ) { int len = buffer.length(); if ( buffer.length() > 3) { if ( isVowel(buffer.charAt( 0 )) && isVowel(buffer.charAt( 1 ))) { return buffer.substring(3,len); } else { int pos = 0; for (int c = 1; c < len; c++) { if (isVowel( buffer.charAt( c ))) { pos = c; break; } } if ( pos+1 < len ) return buffer.substring( pos+1, len ); else return null; } } else return null; } /** * Turns u and i preceded AND followed by a vowel to UpperCase<br> * Turns y preceded OR followed by a vowel to UpperCase<br> * Turns u preceded by q to UpperCase<br> * * @param buffer java.util.StringBuffer - the buffer to treat * @return java.util.StringBuffer - the treated buffer */ private StringBuffer treatVowels( StringBuffer buffer ) { for ( int c = 0; c < buffer.length(); c++ ) { char ch = buffer.charAt( c ); if (c == 0) // first char { if (buffer.length()>1) { if (ch == 'y' && isVowel(buffer.charAt( c + 1 ))) buffer.setCharAt( c, 'Y' ); } } else if (c == buffer.length()-1) // last char { if (ch == 'u' && buffer.charAt( c - 1 ) == 'q') buffer.setCharAt( c, 'U' ); if (ch == 'y' && isVowel(buffer.charAt( c - 1 ))) buffer.setCharAt( c, 'Y' ); } else // other cases { if (ch == 'u') { if (buffer.charAt( c - 1) == 'q') buffer.setCharAt( c, 'U' ); else if (isVowel(buffer.charAt( c - 1 )) && isVowel(buffer.charAt( c + 1 ))) buffer.setCharAt( c, 'U' ); } if (ch == 'i') { if (isVowel(buffer.charAt( c - 1 )) && isVowel(buffer.charAt( c + 1 ))) buffer.setCharAt( c, 'I' ); } if (ch == 'y') { if (isVowel(buffer.charAt( c - 1 )) || isVowel(buffer.charAt( c + 1 ))) buffer.setCharAt( c, 'Y' ); } } } return buffer; } /** * Checks a term if it can be processed correctly. * * @return boolean - true if, and only if, the given term consists in letters. */ private boolean isStemmable( String term ) { boolean upper = false; int first = -1; for ( int c = 0; c < term.length(); c++ ) { // Discard terms that contain non-letter characters. if ( !Character.isLetter( term.charAt( c ) ) ) { return false; } // Discard terms that contain multiple uppercase letters. if ( Character.isUpperCase( term.charAt( c ) ) ) { if ( upper ) { return false; } // First encountered uppercase letter, set flag and save // position. else { first = c; upper = true; } } } // Discard the term if it contains a single uppercase letter that // is not starting the term. if ( first > 0 ) { return false; } return true; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -