lovinsstemmer.java

来自「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclip」· Java 代码 · 共 993 行 · 第 1/2 页
JAVA
993 行
              break;            case 'G':              if ((length - el > 2) &&                  (word.charAt(length - el - 1) == 'f')) {                return word.substring(0, length - el);                  }              break;            case 'H':              if ((word.charAt(length - el - 1) == 't') ||                  ((word.charAt(length - el - 1) == 'l') &&                   (word.charAt(length - el - 2) == 'l'))) {                return word.substring(0, length - el);                   }              break;            case 'I':              if ((word.charAt(length - el - 1) != 'o') &&                  (word.charAt(length - el - 1) != 'e')) {                 return word.substring(0, length - el);                  }              break;            case 'J':              if ((word.charAt(length - el - 1) != 'a') &&                  (word.charAt(length - el - 1) != 'e')) {                 return word.substring(0, length - el);                  }              break;            case 'K':              if ((length - el > 2) &&                  ((word.charAt(length - el - 1) == 'l') ||                   (word.charAt(length - el - 1) == 'i') ||                   ((word.charAt(length - el - 1) == 'e') &&                    (word.charAt(length - el - 3) == 'u')))) {                return word.substring(0, length - el);                    }              break;            case 'L':              if ((word.charAt(length - el - 1) != 'u') &&                  (word.charAt(length - el - 1) != 'x') &&                  ((word.charAt(length - el - 1) != 's') ||                   (word.charAt(length - el - 2) == 'o'))) {                return word.substring(0, length - el);                   }              break;            case 'M':              if ((word.charAt(length - el - 1) != 'a') &&                  (word.charAt(length - el - 1) != 'c') &&                  (word.charAt(length - el - 1) != 'e') &&                  (word.charAt(length - el - 1) != 'm')) {                return word.substring(0, length - el);                  }              break;            case 'N':              if ((length - el > 3) ||                   ((length - el == 3) &&                   ((word.charAt(length - el - 3) != 's')))) {                return word.substring(0, length - el);                   }              break;            case 'O':              if ((word.charAt(length - el - 1) == 'l') ||                  (word.charAt(length - el - 1) == 'i')) {                return word.substring(0, length - el);                  }              break;            case 'P':              if (word.charAt(length - el - 1) != 'c') {                return word.substring(0, length - el);              }              break;            case 'Q':              if ((length - el > 2) &&                  (word.charAt(length - el - 1) != 'l') &&                  (word.charAt(length - el - 1) != 'n')) {                return word.substring(0, length - el);                  }              break;            case 'R':              if ((word.charAt(length - el - 1) == 'n') ||                  (word.charAt(length - el - 1) == 'r')) {                return word.substring(0, length - el);                  }              break;            case 'S':              if (((word.charAt(length - el - 1) == 'r') &&                    (word.charAt(length - el - 2) == 'd')) ||                  ((word.charAt(length - el - 1) == 't') &&                   (word.charAt(length - el - 2) != 't'))) {                return word.substring(0, length - el);                   }              break;            case 'T':              if ((word.charAt(length - el - 1) == 's') ||                  ((word.charAt(length - el - 1) == 't') &&                   (word.charAt(length - el - 2) != 'o'))) {                return word.substring(0, length - el);                   }              break;            case 'U':              if ((word.charAt(length - el - 1) == 'l') ||                  (word.charAt(length - el - 1) == 'm') ||                  (word.charAt(length - el - 1) == 'n') ||                  (word.charAt(length - el - 1) == 'r')) {                return word.substring(0, length - el);                  }              break;            case 'V':              if (word.charAt(length - el - 1) == 'c') {                return word.substring(0, length - el);              }              break;            case 'W':              if ((word.charAt(length - el - 1) != 's') &&                  (word.charAt(length - el - 1) != 'u')) {                return word.substring(0, length - el);                  }              break;            case 'X':              if ((word.charAt(length - el - 1) == 'l') ||                  (word.charAt(length - el - 1) == 'i') ||                  ((length - el > 2) &&                   (word.charAt(length - el - 1) == 'e') &&                   (word.charAt(length - el - 3) == 'u'))) {                return word.substring(0, length - el);                   }              break;            case 'Y':              if ((word.charAt(length - el - 1) == 'n') &&                  (word.charAt(length - el - 2) == 'i')) {                return word.substring(0, length - el);                  }              break;            case 'Z':              if (word.charAt(length - el - 1) != 'f') {                return word.substring(0, length - el);              }              break;            case 'a':              if ((word.charAt(length - el - 1) == 'd') ||                  (word.charAt(length - el - 1) == 'f') ||                  (((word.charAt(length - el - 1) == 'h') &&                    (word.charAt(length - el - 2) == 'p'))) ||                  (((word.charAt(length - el - 1) == 'h') &&                    (word.charAt(length - el - 2) == 't'))) ||                  (word.charAt(length - el - 1) == 'l') ||                  (((word.charAt(length - el - 1) == 'r') &&                    (word.charAt(length - el - 2) == 'e'))) ||                  (((word.charAt(length - el - 1) == 'r') &&                    (word.charAt(length - el - 2) == 'o'))) ||                  (((word.charAt(length - el - 1) == 's') &&                    (word.charAt(length - el - 2) == 'e'))) ||                  (word.charAt(length - el - 1) == 't')) {                return word.substring(0, length - el);                  }              break;            case 'b':              if (m_CompMode) {                if (((length - el == 3 ) &&                      (!((word.charAt(length - el - 1) == 't') &&                         (word.charAt(length - el - 2) == 'e') &&                         (word.charAt(length - el - 3) == 'm')))) ||                    ((length - el > 3) &&                     (!((word.charAt(length - el - 1) == 't') &&                        (word.charAt(length - el - 2) == 's') &&                        (word.charAt(length - el - 3) == 'y') &&                        (word.charAt(length - el - 4) == 'r'))))) {                  return word.substring(0, length - el);                        }              } else {                if ((length - el > 2) &&                    (!((word.charAt(length - el - 1) == 't') &&                       (word.charAt(length - el - 2) == 'e') &&                       (word.charAt(length - el - 3) == 'm'))) &&                    ((length - el < 4) ||                     (!((word.charAt(length - el - 1) == 't') &&                        (word.charAt(length - el - 2) == 's') &&                        (word.charAt(length - el - 3) == 'y') &&                        (word.charAt(length - el - 4) == 'r'))))) {                  return word.substring(0, length - el);                        }              }               break;            case 'c':              if (word.charAt(length - el - 1) == 'l') {                return word.substring(0, length - el);              }              break;            default:              throw new IllegalArgumentException("Fatal error.");          }        }      }      el--;    }    return word;  }  /**   * Recodes ending of given word.   *    * @param word	the word to work on   * @return		the processed word   */  private String recodeEnding(String word) {    int lastPos = word.length() - 1;    // Rule 1    if (word.endsWith("bb") ||        word.endsWith("dd") ||        word.endsWith("gg") ||        word.endsWith("ll") ||        word.endsWith("mm") ||        word.endsWith("nn") ||        word.endsWith("pp") ||        word.endsWith("rr") ||        word.endsWith("ss") ||        word.endsWith("tt")) {      word = word.substring(0, lastPos);      lastPos--;        }    // Rule 2    if (word.endsWith("iev")) {      word = word.substring(0, lastPos - 2).concat("ief");    }    // Rule 3    if (word.endsWith("uct")) {      word = word.substring(0, lastPos - 2).concat("uc");      lastPos--;    }    // Rule 4    if (word.endsWith("umpt")) {      word = word.substring(0, lastPos - 3).concat("um");      lastPos -= 2;    }    // Rule 5    if (word.endsWith("rpt")) {      word = word.substring(0, lastPos - 2).concat("rb");      lastPos--;    }    // Rule 6    if (word.endsWith("urs")) {      word = word.substring(0, lastPos - 2).concat("ur");      lastPos--;    }    // Rule 7    if (word.endsWith("istr")) {      word = word.substring(0, lastPos - 3).concat("ister");      lastPos++;    }    // Rule 7a    if (word.endsWith("metr")) {      word = word.substring(0, lastPos - 3).concat("meter");      lastPos++;    }    // Rule 8    if (word.endsWith("olv")) {      word = word.substring(0, lastPos - 2).concat("olut");      lastPos++;    }    // Rule 9    if (word.endsWith("ul")) {      if ((lastPos - 2 < 0) ||          ((word.charAt(lastPos - 2) != 'a') &&           (word.charAt(lastPos - 2) != 'i') &&           (word.charAt(lastPos - 2) != 'o'))) {        word = word.substring(0, lastPos - 1).concat("l");        lastPos--;           }    }    // Rule 10    if (word.endsWith("bex")) {      word = word.substring(0, lastPos - 2).concat("bic");    }    // Rule 11    if (word.endsWith("dex")) {      word = word.substring(0, lastPos - 2).concat("dic");    }    // Rule 12    if (word.endsWith("pex")) {      word = word.substring(0, lastPos - 2).concat("pic");    }    // Rule 13    if (word.endsWith("tex")) {      word = word.substring(0, lastPos - 2).concat("tic");    }    // Rule 14    if (word.endsWith("ax")) {      word = word.substring(0, lastPos - 1).concat("ac");    }    // Rule 15    if (word.endsWith("ex")) {      word = word.substring(0, lastPos - 1).concat("ec");    }    // Rule 16    if (word.endsWith("ix")) {      word = word.substring(0, lastPos - 1).concat("ic");    }    // Rule 17    if (word.endsWith("lux")) {      word = word.substring(0, lastPos - 2).concat("luc");    }    // Rule 18    if (word.endsWith("uad")) {      word = word.substring(0, lastPos - 2).concat("uas");    }    // Rule 19    if (word.endsWith("vad")) {      word = word.substring(0, lastPos - 2).concat("vas");    }    // Rule 20    if (word.endsWith("cid")) {      word = word.substring(0, lastPos - 2).concat("cis");    }    // Rule 21    if (word.endsWith("lid")) {      word = word.substring(0, lastPos - 2).concat("lis");    }    // Rule 22    if (word.endsWith("erid")) {      word = word.substring(0, lastPos - 3).concat("eris");    }    // Rule 23    if (word.endsWith("pand")) {      word = word.substring(0, lastPos - 3).concat("pans");    }    // Rule 24    if (word.endsWith("end")) {      if ((lastPos - 3 < 0) ||          (word.charAt(lastPos - 3) != 's')) {        word = word.substring(0, lastPos - 2).concat("ens");          }    }    // Rule 25    if (word.endsWith("ond")) {      word = word.substring(0, lastPos - 2).concat("ons");    }    // Rule 26    if (word.endsWith("lud")) {      word = word.substring(0, lastPos - 2).concat("lus");    }    // Rule 27    if (word.endsWith("rud")) {      word = word.substring(0, lastPos - 2).concat("rus");    }    // Rule 28    if (word.endsWith("her")) {      if ((lastPos - 3 < 0) ||          ((word.charAt(lastPos - 3) != 'p') &&           (word.charAt(lastPos - 3) != 't'))) {        word = word.substring(0, lastPos - 2).concat("hes");           }    }    // Rule 29    if (word.endsWith("mit")) {      word = word.substring(0, lastPos - 2).concat("mis");    }    // Rule 30    if (word.endsWith("end")) {      if ((lastPos - 3 < 0) ||          (word.charAt(lastPos - 3) != 'm')) {        word = word.substring(0, lastPos - 2).concat("ens");          }    }    // Rule 31    if (word.endsWith("ert")) {      word = word.substring(0, lastPos - 2).concat("ers");    }    // Rule 32    if (word.endsWith("et")) {      if ((lastPos - 2 < 0) ||          (word.charAt(lastPos - 2) != 'n')) {        word = word.substring(0, lastPos - 1).concat("es");          }    }    // Rule 33    if (word.endsWith("yt")) {      word = word.substring(0, lastPos - 1).concat("ys");    }    // Rule 34    if (word.endsWith("yz")) {      word = word.substring(0, lastPos - 1).concat("ys");    }    return word;  }  /**   * Returns the stemmed version of the given word.   * Word is converted to lower case before stemming.   *    * @param word 	a string consisting of a single word   * @return 		the stemmed word   */  public String stem(String word) {    if (word.length() > 2) {      return recodeEnding(removeEnding(word.toLowerCase()));    } else {      return word.toLowerCase();    }  }  /**   * Stems everything in the given string. String   * is converted to lower case before stemming.   *    * @param str		the string to stem   * @return 		the processed string   */  public String stemString(String str) {    StringBuffer result = new StringBuffer();    int start = -1;    for (int j = 0; j < str.length(); j++) {      char c = str.charAt(j);      if (Character.isLetterOrDigit(c)) {        if (start == -1) {          start = j;        }      } else if (c == '\'') {        if (start == -1) {          result.append(c);        }      } else {        if (start != -1) {          result.append(stem(str.substring(start, j)));          start = -1;        }        result.append(c);      }    }    if (start != -1) {      result.append(stem(str.substring(start, str.length())));    }    return result.toString();    }  /**   * returns a string representation of the stemmer   *    * @return a string representation of the stemmer   */  public String toString() {    return getClass().getName();  }  /**   * Runs the stemmer with the given options   *   * @param args      the options   */  public static void main(String[] args) {    try {      Stemming.useStemmer(new LovinsStemmer(), args);    }    catch (Exception e) {      e.printStackTrace();    }  }}
lovinsstemmer.java - 源码说明

本页面展示了「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行」中的 lovinsstemmer.java 源码文件，采用 Java 编程语言编写，共 993 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与eclipse相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?