⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 russianstemmer.java

📁 Lucene 2.1的源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
    {        boolean match = false;        for (int i = theEndingClass.length - 1; i >= 0; i--)        {            char[] theEnding = theEndingClass[i];            // check if the ending is bigger than stemming zone            if (startIndex < theEnding.length - 1)            {                match = false;                continue;            }            match = true;            int stemmingIndex = startIndex;            for (int j = theEnding.length - 1; j >= 0; j--)            {                if (stemmingZone.charAt(stemmingIndex--) != charset[theEnding[j]])                {                    match = false;                    break;                }            }            // check if ending was found            if (match)            {                return theEndingClass[i].length; // cut ending            }        }        return 0;    }    private int findEnding(StringBuffer stemmingZone, char[][] theEndingClass)    {        return findEnding(stemmingZone, stemmingZone.length() - 1, theEndingClass);    }    /**     * Finds the ending among the given class of endings and removes it from stemming zone.     * Creation date: (17/03/2002 8:18:34 PM)     */    private boolean findAndRemoveEnding(StringBuffer stemmingZone, char[][] theEndingClass)    {        int endingLength = findEnding(stemmingZone, theEndingClass);        if (endingLength == 0)            // not found            return false;        else {            stemmingZone.setLength(stemmingZone.length() - endingLength);            // cut the ending found            return true;        }    }    /**     * Finds the ending among the given class of endings, then checks if this ending was     * preceded by any of given predessors, and if so, removes it from stemming zone.     * Creation date: (17/03/2002 8:18:34 PM)     */    private boolean findAndRemoveEnding(StringBuffer stemmingZone,        char[][] theEndingClass, char[][] thePredessors)    {        int endingLength = findEnding(stemmingZone, theEndingClass);        if (endingLength == 0)            // not found            return false;        else        {            int predessorLength =                findEnding(stemmingZone,                    stemmingZone.length() - endingLength - 1,                    thePredessors);            if (predessorLength == 0)                return false;            else {                stemmingZone.setLength(stemmingZone.length() - endingLength);                // cut the ending found                return true;            }        }    }    /**     * Marks positions of RV, R1 and R2 in a given word.     * Creation date: (16/03/2002 3:40:11 PM)     */    private void markPositions(String word)    {        RV = 0;        R1 = 0;        R2 = 0;        int i = 0;        // find RV        while (word.length() > i && !isVowel(word.charAt(i)))        {            i++;        }        if (word.length() - 1 < ++i)            return; // RV zone is empty        RV = i;        // find R1        while (word.length() > i && isVowel(word.charAt(i)))        {            i++;        }        if (word.length() - 1 < ++i)            return; // R1 zone is empty        R1 = i;        // find R2        while (word.length() > i && !isVowel(word.charAt(i)))        {            i++;        }        if (word.length() - 1 < ++i)            return; // R2 zone is empty        while (word.length() > i && isVowel(word.charAt(i)))        {            i++;        }        if (word.length() - 1 < ++i)            return; // R2 zone is empty        R2 = i;    }    /**     * Checks if character is a vowel..     * Creation date: (16/03/2002 10:47:03 PM)     * @return boolean     * @param letter char     */    private boolean isVowel(char letter)    {        for (int i = 0; i < vowels.length; i++)        {            if (letter == charset[vowels[i]])                return true;        }        return false;    }    /**     * Noun endings.     * Creation date: (17/03/2002 12:14:58 AM)     * @param stemmingZone java.lang.StringBuffer     */    private boolean noun(StringBuffer stemmingZone)    {        return findAndRemoveEnding(stemmingZone, nounEndings);    }    /**     * Perfective gerund endings.     * Creation date: (17/03/2002 12:14:58 AM)     * @param stemmingZone java.lang.StringBuffer     */    private boolean perfectiveGerund(StringBuffer stemmingZone)    {        return findAndRemoveEnding(            stemmingZone,            perfectiveGerundEndings1,            perfectiveGerund1Predessors)            || findAndRemoveEnding(stemmingZone, perfectiveGerundEndings2);    }    /**     * Reflexive endings.     * Creation date: (17/03/2002 12:14:58 AM)     * @param stemmingZone java.lang.StringBuffer     */    private boolean reflexive(StringBuffer stemmingZone)    {        return findAndRemoveEnding(stemmingZone, reflexiveEndings);    }    /**     * Insert the method's description here.     * Creation date: (17/03/2002 12:14:58 AM)     * @param stemmingZone java.lang.StringBuffer     */    private boolean removeI(StringBuffer stemmingZone)    {        if (stemmingZone.length() > 0            && stemmingZone.charAt(stemmingZone.length() - 1) == charset[I])        {            stemmingZone.setLength(stemmingZone.length() - 1);            return true;        }        else        {            return false;        }    }    /**     * Insert the method's description here.     * Creation date: (17/03/2002 12:14:58 AM)     * @param stemmingZone java.lang.StringBuffer     */    private boolean removeSoft(StringBuffer stemmingZone)    {        if (stemmingZone.length() > 0            && stemmingZone.charAt(stemmingZone.length() - 1) == charset[SOFT])        {            stemmingZone.setLength(stemmingZone.length() - 1);            return true;        }        else        {            return false;        }    }    /**     * Insert the method's description here.     * Creation date: (16/03/2002 10:58:42 PM)     * @param newCharset char[]     */    public void setCharset(char[] newCharset)    {        charset = newCharset;    }    /**     * Finds the stem for given Russian word.     * Creation date: (16/03/2002 3:36:48 PM)     * @return java.lang.String     * @param input java.lang.String     */    public String stem(String input)    {        markPositions(input);        if (RV == 0)            return input; //RV wasn't detected, nothing to stem        StringBuffer stemmingZone = new StringBuffer(input.substring(RV));        // stemming goes on in RV        // Step 1        if (!perfectiveGerund(stemmingZone))        {            reflexive(stemmingZone);            // variable r is unused, we are just interested in the flow that gets            // created by logical expression: apply adjectival(); if that fails,            // apply verb() etc            boolean r =                adjectival(stemmingZone)                || verb(stemmingZone)                || noun(stemmingZone);        }        // Step 2        removeI(stemmingZone);        // Step 3        derivational(stemmingZone);        // Step 4        superlative(stemmingZone);        undoubleN(stemmingZone);        removeSoft(stemmingZone);        // return result        return input.substring(0, RV) + stemmingZone.toString();    }    /**     * Superlative endings.     * Creation date: (17/03/2002 12:14:58 AM)     * @param stemmingZone java.lang.StringBuffer     */    private boolean superlative(StringBuffer stemmingZone)    {        return findAndRemoveEnding(stemmingZone, superlativeEndings);    }    /**     * Undoubles N.     * Creation date: (17/03/2002 12:14:58 AM)     * @param stemmingZone java.lang.StringBuffer     */    private boolean undoubleN(StringBuffer stemmingZone)    {        char[][] doubleN = {            { N, N }        };        if (findEnding(stemmingZone, doubleN) != 0)        {            stemmingZone.setLength(stemmingZone.length() - 1);            return true;        }        else        {            return false;        }    }    /**     * Verb endings.     * Creation date: (17/03/2002 12:14:58 AM)     * @param stemmingZone java.lang.StringBuffer     */    private boolean verb(StringBuffer stemmingZone)    {        return findAndRemoveEnding(            stemmingZone,            verbEndings1,            verb1Predessors)            || findAndRemoveEnding(stemmingZone, verbEndings2);    }    /**     * Static method for stemming with different charsets     */    public static String stem(String theWord, char[] charset)    {        RussianStemmer stemmer = new RussianStemmer();        stemmer.setCharset(charset);        return stemmer.stem(theWord);    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -