⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 compiledspellcheckertest.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
字号:
package com.aliasi.test.unit.spell;import com.aliasi.lm.NGramProcessLM;import com.aliasi.lm.CompiledNGramProcessLM;import com.aliasi.spell.CompiledSpellChecker;import com.aliasi.spell.FixedWeightEditDistance;import com.aliasi.test.unit.BaseTestCase;import com.aliasi.util.AbstractExternalizable;import java.util.HashSet;import java.io.IOException;public class CompiledSpellCheckerTest extends BaseTestCase {    public void testShortTokenMidQuery() throws         ClassNotFoundException, IOException {        NGramProcessLM lm = new NGramProcessLM(5);        for ( int i = 1; i < 10000; ++i) {            lm.train(" FINANCE ACT SCHEDULE ");        }        CompiledNGramProcessLM clm             = (CompiledNGramProcessLM) AbstractExternalizable.compile(lm);        FixedWeightEditDistance editDistance            = new FixedWeightEditDistance(0,-1,-1,-1,-1);        HashSet tokenSet = new HashSet();        tokenSet.add("FINANCE");        tokenSet.add("ACT");        tokenSet.add("SCHEDULE");        CompiledSpellChecker sc             = new CompiledSpellChecker(clm,editDistance,tokenSet);        sc.setFirstCharEditCost(-1);        sc.setSecondCharEditCost(-1);        sc.setNBest(32);        sc.setKnownTokenEditCost(-1);        sc.setNumConsecutiveInsertionsAllowed(1);        sc.setAllowDelete(true);        sc.setAllowInsert(true);        sc.setAllowMatch(true);        sc.setAllowSubstitute(true);        sc.setAllowTranspose(true);        sc.setMinimumTokenLengthToCorrect(2);        assertCorrection(sc,"FINANCE ACT SCEDULE","FINANCE ACT SCHEDULE");        sc.setMinimumTokenLengthToCorrect(3);        assertCorrection(sc,"FINANCE ACT SCEDULE","FINANCE ACT SCHEDULE");    }    public void testShortToken() throws         ClassNotFoundException, IOException {        NGramProcessLM lm = new NGramProcessLM(5);        String training1 = " ab ";         for ( int i = 1; i < 1000; ++i) {            lm.train(training1);        }        CompiledNGramProcessLM clm             = (CompiledNGramProcessLM) AbstractExternalizable.compile(lm);        FixedWeightEditDistance editDistance            = new FixedWeightEditDistance(0,-1,-1,-1,-1);        HashSet tokenSet = new HashSet();        tokenSet.add("ab");        CompiledSpellChecker sc             = new CompiledSpellChecker(clm,editDistance,tokenSet);        sc.setMinimumTokenLengthToCorrect(2);        assertCorrection(sc,"ac","ac");    }    public void testTwo() throws         ClassNotFoundException, IOException {        NGramProcessLM lm = new NGramProcessLM(5);        String training1 = " Smith ";        for ( int i = 1; i < 10000; ++i) {            lm.train(training1);        }        CompiledNGramProcessLM clm             = (CompiledNGramProcessLM) AbstractExternalizable.compile(lm);        FixedWeightEditDistance editDistance            = new FixedWeightEditDistance(0,-2,-2,-2,-2);        HashSet tokenSet = new HashSet();        tokenSet.add("Smith");        CompiledSpellChecker sc             = new CompiledSpellChecker(clm,editDistance,tokenSet);        assertCorrection(sc,"Smythe","Smith");        assertCorrection(sc,"mith","Smith");        assertCorrection(sc,"Tmith","Smith");        assertCorrection(sc,"mSith","Smith");        assertCorrection(sc,"Stith","Smith");        assertCorrection(sc,"Skth","Smith");                assertCorrection(sc,"mith Smith","Smith Smith");        assertCorrection(sc,"Smith mith","Smith Smith");        assertCorrection(sc,"SmithSmith","Smith Smith");        assertCorrection(sc,"Smi th","Smith");        HashSet doNotEditTokens = new HashSet();        doNotEditTokens.add("mith");        sc.setDoNotEditTokens(doNotEditTokens);        assertCorrection(sc,"mith","mith");        assertCorrection(sc,"Smith mith","Smith mith");        sc.setMinimumTokenLengthToCorrect(3);        assertCorrection(sc,"Sm th","Sm th");        sc.setMinimumTokenLengthToCorrect(1);        assertCorrection(sc,"Sm th","Smith");            doNotEditTokens.add("Sm");        doNotEditTokens.add("th");        sc.setDoNotEditTokens(doNotEditTokens);        assertCorrection(sc,"Sm th","Sm th");            sc.setDoNotEditTokens(java.util.Collections.EMPTY_SET);        sc.setFirstCharEditCost(-1000);        assertCorrection(sc,"mith","mith");        assertCorrection(sc,"Tmith","Tmith");        // assertCorrection(sc,"mSith","mith");        // no tokens        CompiledSpellChecker sc2            = new CompiledSpellChecker(clm,editDistance,new HashSet());        assertCorrection(sc2,"Smth","Smth");    }    void assertCorrection(CompiledSpellChecker sc,                          String query, String expectedCorrection) {        String correction = sc.didYouMean(query);            // displayPs("Query",query,sc);        // displayPs("Correction",correction,sc);        // displayPs("Expected",expectedCorrection,sc);        assertEquals(expectedCorrection,correction);    }    void displayPs(String msg, String query, CompiledSpellChecker sc) {        CompiledNGramProcessLM lm = sc.languageModel();        System.out.println(msg + " "                            + " log2 P(" + query + ")="                           + lm.log2Estimate(" " + query + " "));    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -