⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 approxdictionarychunkertest.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
字号:
package com.aliasi.test.unit.dict;import com.aliasi.dict.DictionaryEntry;import com.aliasi.dict.TrieDictionary;import com.aliasi.dict.ApproxDictionaryChunker;import com.aliasi.spell.FixedWeightEditDistance;import com.aliasi.spell.WeightedEditDistance;import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;import com.aliasi.tokenizer.TokenizerFactory;import com.aliasi.chunk.Chunker;import com.aliasi.chunk.ChunkFactory;import com.aliasi.chunk.Chunking;import com.aliasi.chunk.ChunkingImpl;import com.aliasi.test.unit.BaseTestCase;public class ApproxDictionaryChunkerTest extends BaseTestCase {    public void testGeneChunker() {        TrieDictionary trie = new TrieDictionary();        trie.addEntry(new DictionaryEntry("SERPINA3","GENE"));        TokenizerFactory tokenizer = new IndoEuropeanTokenizerFactory();        ApproxDictionaryChunker fuzzyDictMatcher =            new ApproxDictionaryChunker(trie,                                        tokenizer,                                        ApproxDictionaryChunker.TT_DISTANCE,                                        500);        String test1 = "serpin";        Chunking chunking = fuzzyDictMatcher.chunk(test1);        ChunkingImpl chunkingExpected = new ChunkingImpl(test1);        chunkingExpected.add(ChunkFactory.createChunk(0,6,"GENE",260));        assertEquals(chunkingExpected,chunking);        // System.out.println("Found Chunks=" + chunking.chunkSet());    }    public void testDissimilarity() {        TrieDictionary dict = new TrieDictionary();        DictionaryEntry entry1            = new DictionaryEntry("ab","X");        DictionaryEntry entry2            = new DictionaryEntry("ab","Y");        // new DictionaryEntry("ab cde","X");        DictionaryEntry entry3            = new DictionaryEntry("cd","Z");        dict.addEntry(entry1);        dict.addEntry(entry2);        dict.addEntry(entry3);        TokenizerFactory tokenizerFactory            = new IndoEuropeanTokenizerFactory();        double matchWeight = 0;        double deleteWeight = -1;        double insertWeight = -1;        double substituteWeight = -1;        double transposeWeight = Double.NEGATIVE_INFINITY;        WeightedEditDistance editDistance            = new FixedWeightEditDistance(matchWeight,deleteWeight,                                          insertWeight,substituteWeight,                                          transposeWeight);        ApproxDictionaryChunker chunker            = new ApproxDictionaryChunker(dict,tokenizerFactory,                                          editDistance,2);        String test1 = "ab";        ChunkingImpl chunking = new ChunkingImpl(test1);        chunking.add(ChunkFactory.createChunk(0,2,"X",0.0));        chunking.add(ChunkFactory.createChunk(0,2,"Y",0.0));        chunking.add(ChunkFactory.createChunk(0,2,"Z",2.0));        test(test1,chunker,chunking);        String test2 = " ab xyzwf ";        ChunkingImpl chunking2 = new ChunkingImpl(test2);        chunking2.add(ChunkFactory.createChunk(1,3,"X",0.0));        chunking2.add(ChunkFactory.createChunk(1,3,"Y",0.0));        chunking2.add(ChunkFactory.createChunk(1,3,"Z",2.0));        test(test2,chunker,chunking2);        String test3 = " a c ";        ChunkingImpl chunking3 = new ChunkingImpl(test3);        chunking3.add(ChunkFactory.createChunk(1,2,"X",1.0));        chunking3.add(ChunkFactory.createChunk(1,2,"Y",1.0));        chunking3.add(ChunkFactory.createChunk(1,2,"Z",2.0));        chunking3.add(ChunkFactory.createChunk(3,4,"X",2.0));        chunking3.add(ChunkFactory.createChunk(3,4,"Y",2.0));        chunking3.add(ChunkFactory.createChunk(3,4,"Z",1.0));        chunking3.add(ChunkFactory.createChunk(1,4,"X",2.0));        chunking3.add(ChunkFactory.createChunk(1,4,"Y",2.0));        test(test3,chunker,chunking3);    }    public void testSimilarity() {        TrieDictionary dict = new TrieDictionary();        DictionaryEntry entry1            = new DictionaryEntry("ab","X");        DictionaryEntry entry2            = new DictionaryEntry("ab","Y");        // new DictionaryEntry("ab cde","X");        DictionaryEntry entry3            = new DictionaryEntry("cd","Z");        dict.addEntry(entry1);        dict.addEntry(entry2);        dict.addEntry(entry3);        TokenizerFactory tokenizerFactory            = new IndoEuropeanTokenizerFactory();        double matchWeight = 0;        double deleteWeight = -1;        double insertWeight = -1;        double substituteWeight = -1;        double transposeWeight = Double.NEGATIVE_INFINITY;        WeightedEditDistance editDistance            = new FixedWeightEditDistance(matchWeight,deleteWeight,                                          insertWeight,substituteWeight,                                          transposeWeight);        ApproxDictionaryChunker chunker            = new ApproxDictionaryChunker(dict,tokenizerFactory,                                          editDistance,2);        String test1 = "ab";        ChunkingImpl chunking = new ChunkingImpl(test1);        chunking.add(ChunkFactory.createChunk(0,2,"X",0.0));        chunking.add(ChunkFactory.createChunk(0,2,"Y",0.0));        chunking.add(ChunkFactory.createChunk(0,2,"Z",2.0));        test(test1,chunker,chunking);        String test2 = " ab xyzwf ";        ChunkingImpl chunking2 = new ChunkingImpl(test2);        chunking2.add(ChunkFactory.createChunk(1,3,"X",0.0));        chunking2.add(ChunkFactory.createChunk(1,3,"Y",0.0));        chunking2.add(ChunkFactory.createChunk(1,3,"Z",2.0));        test(test2,chunker,chunking2);        String test3 = " a c ";        ChunkingImpl chunking3 = new ChunkingImpl(test3);        chunking3.add(ChunkFactory.createChunk(1,2,"X",1.0));        chunking3.add(ChunkFactory.createChunk(1,2,"Y",1.0));        chunking3.add(ChunkFactory.createChunk(1,2,"Z",2.0));        chunking3.add(ChunkFactory.createChunk(3,4,"X",2.0));        chunking3.add(ChunkFactory.createChunk(3,4,"Y",2.0));        chunking3.add(ChunkFactory.createChunk(3,4,"Z",1.0));        chunking3.add(ChunkFactory.createChunk(1,4,"X",2.0));        chunking3.add(ChunkFactory.createChunk(1,4,"Y",2.0));        test(test3,chunker,chunking3);    }    /* NO TRANSPOSE YET       public void testThree() {       TrieDictionary dict = new TrieDictionary();       DictionaryEntry entry1       = new DictionaryEntry("abcde","X");       DictionaryEntry entry2       = new DictionaryEntry("fgh","Y");       dict.addEntry(entry1);       dict.addEntry(entry2);       TokenizerFactory tokenizerFactory       = new IndoEuropeanTokenizerFactory();       double matchWeight = 0;       double deleteWeight = 1.1;       double insertWeight = 1.01;       double substituteWeight = 1.001;       double transposeWeight = 1.0001;       WeightedEditDistance editDistance       = new FixedWeightEditDistance(matchWeight,deleteWeight,       insertWeight,substituteWeight,       transposeWeight);       ApproxDictionaryChunker chunker       = new ApproxDictionaryChunker(dict,tokenizerFactory,       editDistance,1.5);       String test1 = " abdce ";       ChunkingImpl chunking = new ChunkingImpl(test1);       chunking.add(ChunkFactory.createChunk(1,6,"X",1.0001));       test(test1,chunker,chunking);       }    */    void test(String test, Chunker chunker, Chunking expectedChunking) {        char[] cs = test.toCharArray();        Chunking chunking = chunker.chunk(cs,0,cs.length);        // System.out.println("Found Chunks=" + chunking.chunkSet());        // System.out.println("    Expected=" + expectedChunking.chunkSet());        assertEquals(expectedChunking,chunking);    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -