⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 compiledspellchecker.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
                    }                }            }            if (c == ' ') {                charPosition = 0;                // this'll also affect first by making it non-first                penalties[i] += mFirstCharEditCost;            } else if (charPosition == 0) {                penalties[i] += mFirstCharEditCost;                ++charPosition;            } else if (charPosition == 1) {                penalties[i] += mSecondCharEditCost;                ++charPosition;            }        }        // for (int i = 0; i < penalties.length; ++i)        //   System.out.println(" " + msg.charAt(i) + "=" + penalties[i]);        //           return penalties;    }    /**     * Returns a string-based representation of the parameters of     * this compiled spell checker.     *     * @return A string representing the parameters of this spell     * checker.     */    public String parametersToString() {        StringBuffer sb = new StringBuffer();        sb.append("SEARCH");        sb.append("\n  N-best size=" + mNBestSize);        sb.append("\n\nTOKEN SENSITIVITY");        sb.append("\n  Token sensitive=" + (mTokenSet != null));        if (mTokenSet != null) {            sb.append("\n  # Known Tokens=" + mTokenSet.size());        }        sb.append("\n\nEDITS ALLOWED");        sb.append("\n  Allow insert=" + mAllowInsert);        sb.append("\n  Allow delete=" + mAllowDelete);        sb.append("\n  Allow match=" + mAllowMatch);        sb.append("\n  Allow substitute=" + mAllowSubstitute);        sb.append("\n  Allow transpose=" + mAllowTranspose);        sb.append("\n  Num consecutive insertions allowed="                  + mNumConsecutiveInsertionsAllowed);        sb.append("\n  Minimum Length Token Edit="                  + mMinimumTokenLengthToCorrect);        sb.append("\n  # of do-not-Edit Tokens="                  + mDoNotEditTokens.size());        sb.append("\n\nEDIT COSTS");        sb.append("\n  Edit Distance=" + mEditDistance);        sb.append("\n  Known Token Edit Cost=" + mKnownTokenEditCost);        sb.append("\n  First Char Edit Cost=" + mFirstCharEditCost);        sb.append("\n  Second Char Edit Cost=" + mSecondCharEditCost);        sb.append("\n\nEDIT DISTANCE\n");        sb.append(mEditDistance);        sb.append("\n\nTOKENIZER FACTORY\n");        sb.append(mTokenizerFactory);        return sb.toString();    }    String normalizeQuery(String query) {        StringBuffer sb = new StringBuffer();        if (mTokenizerFactory == null) {            Strings.normalizeWhitespace(query,sb);        } else {            char[] cs = query.toCharArray();            Tokenizer tokenizer = mTokenizerFactory.tokenizer(cs,0,cs.length);            String nextToken;            for (int i = 0; (nextToken = tokenizer.nextToken()) != null; ++i) {                if (i > 0) sb.append(' ');                sb.append(nextToken);            }        }        return sb.toString();    }    // set this locally if tokenset == null    char[] observedCharacters() {        return mLM.observedCharacters();    }    void extendToFinalSpace(StateQueue queue,                            StateQueue finalQueue) {        Iterator stateIt = queue.iterator();        while (stateIt.hasNext()) {            State state = (State) stateIt.next();            if (state.mTokenEdited && !state.tokenComplete()) {                continue; // delete if final edited to non-token            }            double nextScore = state.mScore                + mLM.log2Estimate(state.mContextIndex,' ');            if (nextScore == Double.NEGATIVE_INFINITY) {                continue;            }            State nextState = new State(nextScore,false,                                        null,                                        state,                                        -1);            finalQueue.addState(nextState);        }    }    void extend2(char c1, char c2, State state,                 DpSpellQueue nextQ, DpSpellQueue nextQ2,                 double positionalEditPenalty) {        extend1(c1,state,nextQ,positionalEditPenalty);        if (positionalEditPenalty == Double.NEGATIVE_INFINITY) return;        if (allowTranspose())            transpose(c1,c2,state,nextQ2,positionalEditPenalty);    }    void extend1(char c, State state, DpSpellQueue nextQ,                 double positionalEditPenalty) {        if (allowMatch())            match(c,state,nextQ,positionalEditPenalty);        if (positionalEditPenalty == Double.NEGATIVE_INFINITY) return;        if (allowSubstitute())            substitute(c,state,nextQ,positionalEditPenalty);        if (allowDelete())            delete(c,state,nextQ,positionalEditPenalty);    }    void addToQueue(StateQueue queue, State state,                    double positionalEditPenalty) {        addToQueue(queue,state,0,positionalEditPenalty);    }    void addToQueue(StateQueue queue, State state, int numInserts,                    double positionalEditPenalty) {        if (!queue.addState(state)) return;        if (numInserts >= mNumConsecutiveInsertionsAllowed) return;        if (positionalEditPenalty == Double.NEGATIVE_INFINITY) return;        insert(state,queue,numInserts,positionalEditPenalty);    }    TokenTrieNode daughter(TokenTrieNode node,                           char c) {        return node == null ? null : node.daughter(c);    }    void match(char c, State state, DpSpellQueue nextQ,               double positionalEditPenalty) {        if (state.mTokenEdited) {            if (c == ' ') {                if (!state.tokenComplete()) {                    return;                }            } else if (!state.continuedBy(c)) {                return;            }        }        double score = state.mScore            + mLM.log2Estimate(state.mContextIndex,c)            + mEditDistance.matchWeight(c);        if (score == Double.NEGATIVE_INFINITY) return;        TokenTrieNode tokenTrieNode =            (c == ' ') ? mTokenPrefixTrie : daughter(state.mTokenTrieNode,c);        addToQueue(nextQ,                   new State1(score,                              (c != ' ') && state.mTokenEdited,                              tokenTrieNode,                              state,c,                              mLM.nextContext(state.mContextIndex,c)),                   positionalEditPenalty);    }    void delete(char c, State state, DpSpellQueue nextQ,                double positionalEditPenalty) {        double deleteWeight = mEditDistance.deleteWeight(c);        if (deleteWeight == Double.NEGATIVE_INFINITY) return;        double score = state.mScore + deleteWeight + positionalEditPenalty;        addToQueue(nextQ,                   new State(score, true,                             state.mTokenTrieNode,                             state,                             state.mContextIndex),                   positionalEditPenalty);    }    void insert(State state, StateQueue nextQ, int numInserts,                double positionalEditPenalty) {        if (state.tokenComplete()) {            double score = state.mScore                + mLM.log2Estimate(state.mContextIndex,' ')                + mEditDistance.insertWeight(' ')                + positionalEditPenalty;            if (score != Double.NEGATIVE_INFINITY)                addToQueue(nextQ,                           new State1(score,true,                                      mTokenPrefixTrie,                                      state,' ',                                      mLM.nextContext(state.mContextIndex,                                                      ' ')),                           numInserts+1,positionalEditPenalty);        }        char[] followers = state.getContinuations();        if (followers == null) return;        for (int i = 0; i < followers.length; ++i) {            char c = followers[i];            double insertWeight = mEditDistance.insertWeight(c);            if (insertWeight == Double.NEGATIVE_INFINITY) continue;            double score = state.mScore                + mLM.log2Estimate(state.mContextIndex,c)                + insertWeight                + positionalEditPenalty;            if (score == Double.NEGATIVE_INFINITY) continue;            addToQueue(nextQ,                       new State1(score,true,                                  state.followingNode(i),                                  state,c,                                  mLM.nextContext(state.mContextIndex,c)),                       numInserts+1, positionalEditPenalty);        }    }    void substitute(char c, State state, StateQueue nextQ,                    double positionalEditPenalty) {        if (state.tokenComplete() && c != ' ') {            double score = state.mScore                + mLM.log2Estimate(state.mContextIndex,' ')                + mEditDistance.substituteWeight(c,' ')                + positionalEditPenalty;            if (score != Double.NEGATIVE_INFINITY)                addToQueue(nextQ,                           new State1(score,true,                                      mTokenPrefixTrie,                                      state,' ',                                      mLM.nextContext(state.mContextIndex,                                                      ' ')),                           positionalEditPenalty);        }        char[] followers = state.getContinuations();        if (followers == null) return;        for (int i = 0; i < followers.length; ++i) {            char c2 = followers[i];            if (c == c2) continue; // don't match            double substWeight                = mEditDistance.substituteWeight(c,c2);            if (substWeight == Double.NEGATIVE_INFINITY) continue;            double score = state.mScore                + mLM.log2Estimate(state.mContextIndex,c2)                + substWeight                + positionalEditPenalty;            if (score == Double.NEGATIVE_INFINITY) continue;            addToQueue(nextQ,                       new State1(score,true,                                  state.followingNode(i),                                  state,c2,                                  mLM.nextContext(state.mContextIndex,                                                  c2)),                       positionalEditPenalty);        }    }    void transpose(char c1, char c2, State state, StateQueue nextQ,                   double positionalEditPenalty) {        double transposeWeight = mEditDistance.transposeWeight(c1,c2);        if (transposeWeight == Double.NEGATIVE_INFINITY) return;        if (c2 == ' ' && !state.tokenComplete()) return;        TokenTrieNode midNode            = (c2 == ' ')            ? mTokenPrefixTrie            : daughter(state.mTokenTrieNode,c2);        if (c1 == ' ' && midNode != null && !midNode.mIsToken) return;        int nextContextIndex = mLM.nextContext(state.mContextIndex,c2);        int nextContextIndex2 = mLM.nextContext(nextContextIndex,c1);        double score = state.mScore            + mLM.log2Estimate(state.mContextIndex,c2)            + mLM.log2Estimate(nextContextIndex,c1)            + mEditDistance.transposeWeight(c1,c2)            + positionalEditPenalty;        if (score == Double.NEGATIVE_INFINITY) return;        TokenTrieNode nextNode            = (c1 == ' ')            ? mTokenPrefixTrie            : daughter(midNode,c1);        addToQueue(nextQ,                   new State2(score,true,nextNode,                              state,c2,c1,                              nextContextIndex2),                   positionalEditPenalty);    }    /**     * A weighted edit distance ordered by similarity that treats case     * variants as zero cost and all other edits as infinite cost.     * The inifite cost is {@link Double#NEGATIVE_INFINITY}.  See     * {@link WeightedEditDistance} for more information on     * similarity-based distances.     *     * <P>If this model is used for spelling correction, the result is     * a system that simply chooses the most likely case for output     * characters given an input character and does not change anything     * else.     *     * <P>Case here is based on the methods     * {@link Character#isUpperCase(char)}, {@link Character#isLowerCase(char)}     * and equality is tested by converting the upper case character to     * lower case using {@link Character#toLowerCase(char)}.     *     * <P>This edit distance is compilable and the result of writing it     * and reading it is referentially equal to this instance.     */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -