📄 compiledspellchecker.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
     * language model and edit distance, tokenizer factory, the     * set of valid output tokens, and maximum n-best size, with     * default known token and first and second character edit costs.     * The set of do-not-edit tokens     * is initiall empty; set it using {@link #setDoNotEditTokens(Set)}.     *     * @param lm Source language model.     * @param editDistance Channel edit distance model.     * @param factory Tokenizer factory for tokenizing inputs.     * @param tokenSet Set of valid tokens for outputs or     * <code>null</code> if output is not token sensitive.     * @param nBestSize Size of n-best list for spell checking.     * hypothesis is pruned.     * @throws IllegalArgumentException If the edit distance is not a     * similarity measure.     */    public CompiledSpellChecker(CompiledNGramProcessLM lm,                                WeightedEditDistance editDistance,                                TokenizerFactory factory,                                Set<String> tokenSet,                                int nBestSize) {        this(lm,editDistance,factory,tokenSet,nBestSize,             DEFAULT_KNOWN_TOKEN_EDIT_COST,             DEFAULT_FIRST_CHAR_EDIT_COST,             DEFAULT_SECOND_CHAR_EDIT_COST);    }    /**     * Construct a compiled spell checker based on the specified     * language model and edit distance, a null tokenizer factory, the     * set of valid output tokens, and maximum n-best size, with     * default known token and first and second character edit costs.     * The set of do-not-edit tokens     * is initiall empty; set it using {@link #setDoNotEditTokens(Set)}.     *     * @param lm Source language model.     * @param editDistance Channel edit distance model.     * @param tokenSet Set of valid tokens for outputs or     * <code>null</code> if output is not token sensitive.     * @param nBestSize Size of n-best list for spell checking.     * hypothesis is pruned.     */    public CompiledSpellChecker(CompiledNGramProcessLM lm,                                WeightedEditDistance editDistance,                                Set<String> tokenSet,                                int nBestSize) {        this(lm,editDistance,null,tokenSet,nBestSize);    }    /**     * Construct a compiled spell checker based on the specified     * language model and edit distance, with a null tokenizer     * factory, the specified set of valid output tokens, with default     * value for n-best size, known token edit cost and first and     * second character edit costs.     * The set of do-not-edit tokens     * is initiall empty; set it using {@link #setDoNotEditTokens(Set)}.     *     * @param lm Source language model.     * @param editDistance Channel edit distance model.     * @param tokenSet Set of valid tokens for outputs or     * <code>null</code> if output is not token sensitive.     */    public CompiledSpellChecker(CompiledNGramProcessLM lm,                                WeightedEditDistance editDistance,                                Set<String> tokenSet) {        this(lm,editDistance,tokenSet,DEFAULT_N_BEST_SIZE);    }    /**     * Returns the compiled language model for this spell checker.     * Compiled language models are themselves immutable, and the     * language model for a spell checker may not be changed, but     * the result returned by this method may be used to construct     * a new compiled spell checker.     *     * @return The language model for this spell checker.     */    public CompiledNGramProcessLM languageModel() {        return mLM;    }    /**     * Returns the weighted edit distance for this compiled spell     * checker.     *     * @return The edit distance for this spell checker.     */    public WeightedEditDistance editDistance() {        return mEditDistance;    }    /**     * Returns the tokenizer factory for this spell checker.     *     * @return The tokenizer factory for this spell checker.     */    public TokenizerFactory tokenizerFactory() {        return mTokenizerFactory;    }    /**     * Returns an unmodifiable view the set of tokens for this spell     * checker.  In order to change the token set, construct a new     * set and use {@link #setTokenSet(Set)}.     *     * @return The set of tokens for this spell checker.     */    public Set<String> tokenSet() {        return Collections.<String>unmodifiableSet(mTokenSet);    }    /**     * Returns an unmodifiable view of the set of tokens that will     * never be edited in this compiled spell checker.  To change the     * value of this set, use {@link #setDoNotEditTokens(Set)}.     *     * @return The set of tokens that will not be edited.     */    public Set<String> doNotEditTokens() {        return Collections.<String>unmodifiableSet(mDoNotEditTokens);    }    /**     * Updates the set of do-not-edit tokens to be the specified     * value.  If one of these tokens shows up in the input, it will     * also show up in any correction supplied.     *     * @param tokens Set of tokens not to edit.     */    public void setDoNotEditTokens(Set<String> tokens) {        mDoNotEditTokens = tokens;    }    /**     * Returns the n-best size for this spell checker. See the class     * documentation above and the documentation for the method {@link     * #setNBest(int)} for more information.     *     * @return The n-best size for this spell checker.     */    public int nBestSize() {        return mNBestSize;    }    /**     * Returns the cost penalty for editing a character in a known     * token.  This penalty is added to each edit within a known     * token.     *     * @return Known token edit penalty.     */    public double knownTokenEditCost() {        return mKnownTokenEditCost;    }    /**     * Returns the cost penalty for editing the first character in a     * token.  This penalty is added to each edit while scanning the     * first character of a token in the input.     *     * <P>As a special case, transposition only pays a single     * penalty based on the penalty of the first character in     * the transposition.     *     * @return First character edit penalty.     */    public double firstCharEditCost() {        return mFirstCharEditCost;    }    /**     * Returns the cost penalty for editing the second character     * in a token.  This penalty is added for each edit while     * scanning the second character in an input.     *     * @return Second character edit penalty.     */    public double secondCharEditCost() {        return mSecondCharEditCost;    }    /**     * Set the known token edit cost to the specified value.     *     * @param cost New value for known token edit cost.     */    public void setKnownTokenEditCost(double cost) {        mKnownTokenEditCost = cost;    }    /**     * Set the first character edit cost to the specified value.     *     * @param cost New value for the first character edit cost.     */    public void setFirstCharEditCost(double cost) {        mFirstCharEditCost = cost;    }    /**     * Set the second character edit cost to the specified value.     *     * @param cost New value for the second character edit cost.     */    public void setSecondCharEditCost(double cost) {        mSecondCharEditCost = cost;    }    /**     * Returns the number of consecutive insertions allowed.     * This will be zero if insertions are not allowed.     */    public int numConsecutiveInsertionsAllowed() {        return mNumConsecutiveInsertionsAllowed;    }    /**     * Returns <code>true</code> if this spell checker allows     * insertions.     *     * @return <code>true</code> if this spell checker allows     * insertions.     */    public boolean allowInsert() {        return mAllowInsert;    }    /**     * Returns <code>true</code> if this spell checker allows     * deletions.     *     * @return <code>true</code> if this spell checker allows     * deletions.     */    public boolean allowDelete() {        return mAllowDelete;    }    /**     * Returns <code>true</code> if this spell checker allows     * matches.     *     * @return <code>true</code> if this spell checker allows     * matches.     */    public boolean allowMatch() {        return mAllowMatch;    }    /**     * Returns <code>true</code> if this spell checker allows     * substitutions.     *     * @return <code>true</code> if this spell checker allows     * substitutions.     */    public boolean allowSubstitute() {        return mAllowSubstitute;    }    /**     * Returns <code>true</code> if this spell checker allows     * transpositions.     *     * @return <code>true</code> if this spell checker allows     * transpositions.     */    public boolean allowTranspose() {        return mAllowTranspose;    }    /**     * Sets the edit distance for this spell checker to the     * specified value.     *     * @param editDistance Edit distance to use for spell checking.     */    public void setEditDistance(WeightedEditDistance editDistance) {        mEditDistance = editDistance;    }    /**     * Sets a minimum character length for tokens to be eligible for     * editing.     *     * @param tokenCharLength Edit distance to use for spell checking.     * @throws IllegalArgumentException If the character length     * specified is less than <code>0</code>.     */    public void setMinimumTokenLengthToCorrect(int tokenCharLength) {        if (tokenCharLength < 0) {            String msg = "Minimum token length to correct must be >= 0."                + " Found tokenCharLength=" + tokenCharLength;            throw new IllegalArgumentException(msg);        }        mMinimumTokenLengthToCorrect = tokenCharLength;    }    /**     * Returns the minimum length of token that will be corrected.     * This value is initially <code>0</code>, but may be set     * using {@link #setMinimumTokenLengthToCorrect(int)}.     *     * @return The minimum token length to correct.     */    public int minimumTokenLengthToCorrect() {        return mMinimumTokenLengthToCorrect;    }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -