⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lovinsstemmer.java

📁 一个很不错的词频统计程序,目前只支持英文,中文的本人正在修改中.改好后上传给大家分享
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
        m_l3.put("age", "B");
        m_l3.put("aic", "A");
        m_l3.put("als", "b");
        m_l3.put("ant", "B");
        m_l3.put("ars", "O");
        m_l3.put("ary", "F");
        m_l3.put("ata", "A");
        m_l3.put("ate", "A");
        m_l3.put("eal", "Y");
        m_l3.put("ear", "Y");
        m_l3.put("ely", "E");
        m_l3.put("ene", "E");
        m_l3.put("ent", "C");
        m_l3.put("ery", "E");
        m_l3.put("ese", "A");
        m_l3.put("ful", "A");
        m_l3.put("ial", "A");
        m_l3.put("ian", "A");
        m_l3.put("ics", "A");
        m_l3.put("ide", "L");
        m_l3.put("ied", "A");
        m_l3.put("ier", "A");
        m_l3.put("ies", "P");
        m_l3.put("ily", "A");
        m_l3.put("ine", "M");
        m_l3.put("ing", "N");
        m_l3.put("ion", "Q");
        m_l3.put("ish", "C");
        m_l3.put("ism", "B");
        m_l3.put("ist", "A");
        m_l3.put("ite", "a");
        m_l3.put("ity", "A");
        m_l3.put("ium", "A");
        m_l3.put("ive", "A");
        m_l3.put("ize", "F");
        m_l3.put("oid", "A");
        m_l3.put("one", "R");
        m_l3.put("ous", "A");
        m_l2 = new HashMap();
        m_l2.put("ae", "A");
        m_l2.put("al", "b");
        m_l2.put("ar", "X");
        m_l2.put("as", "B");
        m_l2.put("ed", "E");
        m_l2.put("en", "F");
        m_l2.put("es", "E");
        m_l2.put("ia", "A");
        m_l2.put("ic", "A");
        m_l2.put("is", "A");
        m_l2.put("ly", "B");
        m_l2.put("on", "S");
        m_l2.put("or", "T");
        m_l2.put("um", "U");
        m_l2.put("us", "V");
        m_l2.put("yl", "R");
        m_l2.put("s\'", "A");
        m_l2.put("\'s", "A");
        m_l1 = new HashMap();
        m_l1.put("a", "A");
        m_l1.put("e", "A");
        m_l1.put("i", "A");
        m_l1.put("o", "A");
        m_l1.put("s", "W");
        m_l1.put("y", "B");
    }

    /**
     * Finds and removes ending from given word.
     */
    private String removeEnding(String word) {

        int length = word.length();
        int el = 11;

        while (el > 0) {
            if (length - el > 1) {
                String ending = word.substring(length - el);
                String conditionCode = null;
                switch (el) {
                case 11:
                    conditionCode = (String) m_l11.get(ending);
                    break;
                case 10:
                    conditionCode = (String) m_l10.get(ending);
                    break;
                case 9:
                    conditionCode = (String) m_l9.get(ending);
                    break;
                case 8:
                    conditionCode = (String) m_l8.get(ending);
                    break;
                case 7:
                    conditionCode = (String) m_l7.get(ending);
                    break;
                case 6:
                    conditionCode = (String) m_l6.get(ending);
                    break;
                case 5:
                    conditionCode = (String) m_l5.get(ending);
                    break;
                case 4:
                    conditionCode = (String) m_l4.get(ending);
                    break;
                case 3:
                    conditionCode = (String) m_l3.get(ending);
                    break;
                case 2:
                    conditionCode = (String) m_l2.get(ending);
                    break;
                case 1:
                    conditionCode = (String) m_l1.get(ending);
                    break;
                default:
                }
                if (conditionCode != null) {
                    switch (conditionCode.charAt(0)) {
                    case 'A':
                        return word.substring(0, length - el);
                    case 'B':
                        if (length - el > 2) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'C':
                        if (length - el > 3) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'D':
                        if (length - el > 4) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'E':
                        if (word.charAt(length - el - 1) != 'e') {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'F':
                        if ((length - el > 2) && (word.charAt(length - el - 1) != 'e')) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'G':
                        if ((length - el > 2) && (word.charAt(length - el - 1) == 'f')) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'H':
                        if ((word.charAt(length - el - 1) == 't')
                                || ((word.charAt(length - el - 1) == 'l') && (word.charAt(length - el - 2) == 'l'))) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'I':
                        if ((word.charAt(length - el - 1) != 'o') && (word.charAt(length - el - 1) != 'e')) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'J':
                        if ((word.charAt(length - el - 1) != 'a') && (word.charAt(length - el - 1) != 'e')) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'K':
                        if ((length - el > 2)
                                && ((word.charAt(length - el - 1) == 'l') || (word.charAt(length - el - 1) == 'i') || ((word
                                        .charAt(length - el - 1) == 'e') && (word.charAt(length - el - 3) == 'u')))) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'L':
                        if ((word.charAt(length - el - 1) != 'u') && (word.charAt(length - el - 1) != 'x')
                                && ((word.charAt(length - el - 1) != 's') || (word.charAt(length - el - 2) == 'o'))) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'M':
                        if ((word.charAt(length - el - 1) != 'a') && (word.charAt(length - el - 1) != 'c')
                                && (word.charAt(length - el - 1) != 'e') && (word.charAt(length - el - 1) != 'm')) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'N':
                        if ((length - el > 3) || ((length - el == 3) && ((word.charAt(length - el - 3) != 's')))) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'O':
                        if ((word.charAt(length - el - 1) == 'l') || (word.charAt(length - el - 1) == 'i')) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'P':
                        if (word.charAt(length - el - 1) != 'c') {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'Q':
                        if ((length - el > 2) && (word.charAt(length - el - 1) != 'l')
                                && (word.charAt(length - el - 1) != 'n')) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'R':
                        if ((word.charAt(length - el - 1) == 'n') || (word.charAt(length - el - 1) == 'r')) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'S':
                        if (((word.charAt(length - el - 1) == 'r') && (word.charAt(length - el - 2) == 'd'))
                                || ((word.charAt(length - el - 1) == 't') && (word.charAt(length - el - 2) != 't'))) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'T':
                        if ((word.charAt(length - el - 1) == 's')
                                || ((word.charAt(length - el - 1) == 't') && (word.charAt(length - el - 2) != 'o'))) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'U':
                        if ((word.charAt(length - el - 1) == 'l') || (word.charAt(length - el - 1) == 'm')
                                || (word.charAt(length - el - 1) == 'n') || (word.charAt(length - el - 1) == 'r')) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'V':
                        if (word.charAt(length - el - 1) == 'c') {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'W':
                        if ((word.charAt(length - el - 1) != 's') && (word.charAt(length - el - 1) != 'u')) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'X':
                        if ((word.charAt(length - el - 1) == 'l')
                                || (word.charAt(length - el - 1) == 'i')
                                || ((length - el > 2) && (word.charAt(length - el - 1) == 'e') && (word.charAt(length
                                        - el - 3) == 'u'))) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'Y':
                        if ((word.charAt(length - el - 1) == 'n') && (word.charAt(length - el - 2) == 'i')) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'Z':
                        if (word.charAt(length - el - 1) != 'f') {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'a':
                        if ((word.charAt(length - el - 1) == 'd') || (word.charAt(length - el - 1) == 'f')
                                || (((word.charAt(length - el - 1) == 'h') && (word.charAt(length - el - 2) == 'p')))
                                || (((word.charAt(length - el - 1) == 'h') && (word.charAt(length - el - 2) == 't')))
                                || (word.charAt(length - el - 1) == 'l')
                                || (((word.charAt(length - el - 1) == 'r') && (word.charAt(length - el - 2) == 'e')))
                                || (((word.charAt(length - el - 1) == 'r') && (word.charAt(length - el - 2) == 'o')))
                                || (((word.charAt(length - el - 1) == 's') && (word.charAt(length - el - 2) == 'e')))
                                || (word.charAt(length - el - 1) == 't')) {
                            return word.substring(0, length - el);
                        }
                        break;
                    case 'b':
                        if (m_CompMode) {
                            if (((length - el == 3) && (!((word.charAt(length - el - 1) == 't')
                                    && (word.charAt(length - el - 2) == 'e') && (word.charAt(length - el - 3) == 'm'))))
                                    || ((length - el > 3) && (!((word.charAt(length - el - 1) == 't')
                                            && (word.charAt(length - el - 2) == 's')
                                            && (word.charAt(length - el - 3) == 'y') && (word.charAt(length - el - 4) == 'r'))))) {
                                return word.substring(0, length - el);
                            }
                        } else {
                            if ((length - el > 2)
                                    && (!((word.charAt(length - el - 1) == 't')
                                            && (word.charAt(length - el - 2) == 'e') && (word.charAt(length - el - 3) == 'm')))
                                    && ((length - el < 4) || (!((word.charAt(length - el - 1) == 't')
                                            && (word.charAt(length - el - 2) == 's')
                                            && (word.charAt(length - el - 3) == 'y') && (word.charAt(length - el - 4) == 'r'))))) {
                                return word.substring(0, length - el);
                            }
                        }
                        break;
                    case 'c':
                        if (word.charAt(length - el - 1) == 'l') {
                            return word.substring(0, length - el);
                        }
                        break;
                    default:

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -