📄 lovinsstemmer.java
字号:
break; case 'G': if ((length - el > 2) && (word.charAt(length - el - 1) == 'f')) { return word.substring(0, length - el); } break; case 'H': if ((word.charAt(length - el - 1) == 't') || ((word.charAt(length - el - 1) == 'l') && (word.charAt(length - el - 2) == 'l'))) { return word.substring(0, length - el); } break; case 'I': if ((word.charAt(length - el - 1) != 'o') && (word.charAt(length - el - 1) != 'e')) { return word.substring(0, length - el); } break; case 'J': if ((word.charAt(length - el - 1) != 'a') && (word.charAt(length - el - 1) != 'e')) { return word.substring(0, length - el); } break; case 'K': if ((length - el > 2) && ((word.charAt(length - el - 1) == 'l') || (word.charAt(length - el - 1) == 'i') || ((word.charAt(length - el - 1) == 'e') && (word.charAt(length - el - 3) == 'u')))) { return word.substring(0, length - el); } break; case 'L': if ((word.charAt(length - el - 1) != 'u') && (word.charAt(length - el - 1) != 'x') && ((word.charAt(length - el - 1) != 's') || (word.charAt(length - el - 2) == 'o'))) { return word.substring(0, length - el); } break; case 'M': if ((word.charAt(length - el - 1) != 'a') && (word.charAt(length - el - 1) != 'c') && (word.charAt(length - el - 1) != 'e') && (word.charAt(length - el - 1) != 'm')) { return word.substring(0, length - el); } break; case 'N': if ((length - el > 3) || ((length - el == 3) && ((word.charAt(length - el - 3) != 's')))) { return word.substring(0, length - el); } break; case 'O': if ((word.charAt(length - el - 1) == 'l') || (word.charAt(length - el - 1) == 'i')) { return word.substring(0, length - el); } break; case 'P': if (word.charAt(length - el - 1) != 'c') { return word.substring(0, length - el); } break; case 'Q': if ((length - el > 2) && (word.charAt(length - el - 1) != 'l') && (word.charAt(length - el - 1) != 'n')) { return word.substring(0, length - el); } break; case 'R': if ((word.charAt(length - el - 1) == 'n') || (word.charAt(length - el - 1) == 'r')) { return word.substring(0, length - el); } break; case 'S': if (((word.charAt(length - el - 1) == 'r') && (word.charAt(length - el - 2) == 'd')) || ((word.charAt(length - el - 1) == 't') && (word.charAt(length - el - 2) != 't'))) { return word.substring(0, length - el); } break; case 'T': if ((word.charAt(length - el - 1) == 's') || ((word.charAt(length - el - 1) == 't') && (word.charAt(length - el - 2) != 'o'))) { return word.substring(0, length - el); } break; case 'U': if ((word.charAt(length - el - 1) == 'l') || (word.charAt(length - el - 1) == 'm') || (word.charAt(length - el - 1) == 'n') || (word.charAt(length - el - 1) == 'r')) { return word.substring(0, length - el); } break; case 'V': if (word.charAt(length - el - 1) == 'c') { return word.substring(0, length - el); } break; case 'W': if ((word.charAt(length - el - 1) != 's') && (word.charAt(length - el - 1) != 'u')) { return word.substring(0, length - el); } break; case 'X': if ((word.charAt(length - el - 1) == 'l') || (word.charAt(length - el - 1) == 'i') || ((length - el > 2) && (word.charAt(length - el - 1) == 'e') && (word.charAt(length - el - 3) == 'u'))) { return word.substring(0, length - el); } break; case 'Y': if ((word.charAt(length - el - 1) == 'n') && (word.charAt(length - el - 2) == 'i')) { return word.substring(0, length - el); } break; case 'Z': if (word.charAt(length - el - 1) != 'f') { return word.substring(0, length - el); } break; case 'a': if ((word.charAt(length - el - 1) == 'd') || (word.charAt(length - el - 1) == 'f') || (((word.charAt(length - el - 1) == 'h') && (word.charAt(length - el - 2) == 'p'))) || (((word.charAt(length - el - 1) == 'h') && (word.charAt(length - el - 2) == 't'))) || (word.charAt(length - el - 1) == 'l') || (((word.charAt(length - el - 1) == 'r') && (word.charAt(length - el - 2) == 'e'))) || (((word.charAt(length - el - 1) == 'r') && (word.charAt(length - el - 2) == 'o'))) || (((word.charAt(length - el - 1) == 's') && (word.charAt(length - el - 2) == 'e'))) || (word.charAt(length - el - 1) == 't')) { return word.substring(0, length - el); } break; case 'b': if (m_CompMode) { if (((length - el == 3 ) && (!((word.charAt(length - el - 1) == 't') && (word.charAt(length - el - 2) == 'e') && (word.charAt(length - el - 3) == 'm')))) || ((length - el > 3) && (!((word.charAt(length - el - 1) == 't') && (word.charAt(length - el - 2) == 's') && (word.charAt(length - el - 3) == 'y') && (word.charAt(length - el - 4) == 'r'))))) { return word.substring(0, length - el); } } else { if ((length - el > 2) && (!((word.charAt(length - el - 1) == 't') && (word.charAt(length - el - 2) == 'e') && (word.charAt(length - el - 3) == 'm'))) && ((length - el < 4) || (!((word.charAt(length - el - 1) == 't') && (word.charAt(length - el - 2) == 's') && (word.charAt(length - el - 3) == 'y') && (word.charAt(length - el - 4) == 'r'))))) { return word.substring(0, length - el); } } break; case 'c': if (word.charAt(length - el - 1) == 'l') { return word.substring(0, length - el); } break; default: throw new IllegalArgumentException("Fatal error."); } } } el--; } return word; } /** * Recodes ending of given word. * * @param word the word to work on * @return the processed word */ private String recodeEnding(String word) { int lastPos = word.length() - 1; // Rule 1 if (word.endsWith("bb") || word.endsWith("dd") || word.endsWith("gg") || word.endsWith("ll") || word.endsWith("mm") || word.endsWith("nn") || word.endsWith("pp") || word.endsWith("rr") || word.endsWith("ss") || word.endsWith("tt")) { word = word.substring(0, lastPos); lastPos--; } // Rule 2 if (word.endsWith("iev")) { word = word.substring(0, lastPos - 2).concat("ief"); } // Rule 3 if (word.endsWith("uct")) { word = word.substring(0, lastPos - 2).concat("uc"); lastPos--; } // Rule 4 if (word.endsWith("umpt")) { word = word.substring(0, lastPos - 3).concat("um"); lastPos -= 2; } // Rule 5 if (word.endsWith("rpt")) { word = word.substring(0, lastPos - 2).concat("rb"); lastPos--; } // Rule 6 if (word.endsWith("urs")) { word = word.substring(0, lastPos - 2).concat("ur"); lastPos--; } // Rule 7 if (word.endsWith("istr")) { word = word.substring(0, lastPos - 3).concat("ister"); lastPos++; } // Rule 7a if (word.endsWith("metr")) { word = word.substring(0, lastPos - 3).concat("meter"); lastPos++; } // Rule 8 if (word.endsWith("olv")) { word = word.substring(0, lastPos - 2).concat("olut"); lastPos++; } // Rule 9 if (word.endsWith("ul")) { if ((lastPos - 2 < 0) || ((word.charAt(lastPos - 2) != 'a') && (word.charAt(lastPos - 2) != 'i') && (word.charAt(lastPos - 2) != 'o'))) { word = word.substring(0, lastPos - 1).concat("l"); lastPos--; } } // Rule 10 if (word.endsWith("bex")) { word = word.substring(0, lastPos - 2).concat("bic"); } // Rule 11 if (word.endsWith("dex")) { word = word.substring(0, lastPos - 2).concat("dic"); } // Rule 12 if (word.endsWith("pex")) { word = word.substring(0, lastPos - 2).concat("pic"); } // Rule 13 if (word.endsWith("tex")) { word = word.substring(0, lastPos - 2).concat("tic"); } // Rule 14 if (word.endsWith("ax")) { word = word.substring(0, lastPos - 1).concat("ac"); } // Rule 15 if (word.endsWith("ex")) { word = word.substring(0, lastPos - 1).concat("ec"); } // Rule 16 if (word.endsWith("ix")) { word = word.substring(0, lastPos - 1).concat("ic"); } // Rule 17 if (word.endsWith("lux")) { word = word.substring(0, lastPos - 2).concat("luc"); } // Rule 18 if (word.endsWith("uad")) { word = word.substring(0, lastPos - 2).concat("uas"); } // Rule 19 if (word.endsWith("vad")) { word = word.substring(0, lastPos - 2).concat("vas"); } // Rule 20 if (word.endsWith("cid")) { word = word.substring(0, lastPos - 2).concat("cis"); } // Rule 21 if (word.endsWith("lid")) { word = word.substring(0, lastPos - 2).concat("lis"); } // Rule 22 if (word.endsWith("erid")) { word = word.substring(0, lastPos - 3).concat("eris"); } // Rule 23 if (word.endsWith("pand")) { word = word.substring(0, lastPos - 3).concat("pans"); } // Rule 24 if (word.endsWith("end")) { if ((lastPos - 3 < 0) || (word.charAt(lastPos - 3) != 's')) { word = word.substring(0, lastPos - 2).concat("ens"); } } // Rule 25 if (word.endsWith("ond")) { word = word.substring(0, lastPos - 2).concat("ons"); } // Rule 26 if (word.endsWith("lud")) { word = word.substring(0, lastPos - 2).concat("lus"); } // Rule 27 if (word.endsWith("rud")) { word = word.substring(0, lastPos - 2).concat("rus"); } // Rule 28 if (word.endsWith("her")) { if ((lastPos - 3 < 0) || ((word.charAt(lastPos - 3) != 'p') && (word.charAt(lastPos - 3) != 't'))) { word = word.substring(0, lastPos - 2).concat("hes"); } } // Rule 29 if (word.endsWith("mit")) { word = word.substring(0, lastPos - 2).concat("mis"); } // Rule 30 if (word.endsWith("end")) { if ((lastPos - 3 < 0) || (word.charAt(lastPos - 3) != 'm')) { word = word.substring(0, lastPos - 2).concat("ens"); } } // Rule 31 if (word.endsWith("ert")) { word = word.substring(0, lastPos - 2).concat("ers"); } // Rule 32 if (word.endsWith("et")) { if ((lastPos - 2 < 0) || (word.charAt(lastPos - 2) != 'n')) { word = word.substring(0, lastPos - 1).concat("es"); } } // Rule 33 if (word.endsWith("yt")) { word = word.substring(0, lastPos - 1).concat("ys"); } // Rule 34 if (word.endsWith("yz")) { word = word.substring(0, lastPos - 1).concat("ys"); } return word; } /** * Returns the stemmed version of the given word. * Word is converted to lower case before stemming. * * @param word a string consisting of a single word * @return the stemmed word */ public String stem(String word) { if (word.length() > 2) { return recodeEnding(removeEnding(word.toLowerCase())); } else { return word.toLowerCase(); } } /** * Stems everything in the given string. String * is converted to lower case before stemming. * * @param str the string to stem * @return the processed string */ public String stemString(String str) { StringBuffer result = new StringBuffer(); int start = -1; for (int j = 0; j < str.length(); j++) { char c = str.charAt(j); if (Character.isLetterOrDigit(c)) { if (start == -1) { start = j; } } else if (c == '\'') { if (start == -1) { result.append(c); } } else { if (start != -1) { result.append(stem(str.substring(start, j))); start = -1; } result.append(c); } } if (start != -1) { result.append(stem(str.substring(start, str.length()))); } return result.toString(); } /** * returns a string representation of the stemmer * * @return a string representation of the stemmer */ public String toString() { return getClass().getName(); } /** * Runs the stemmer with the given options * * @param args the options */ public static void main(String[] args) { try { Stemming.useStemmer(new LovinsStemmer(), args); } catch (Exception e) { e.printStackTrace(); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -