⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 charlmhmmchunkertest.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
字号:
package com.aliasi.test.unit.chunk;import com.aliasi.chunk.Chunk;import com.aliasi.chunk.Chunker;import com.aliasi.chunk.ChunkFactory;import com.aliasi.chunk.Chunking;import com.aliasi.chunk.ChunkingImpl;import com.aliasi.chunk.CharLmHmmChunker;import com.aliasi.hmm.AbstractHmmEstimator;import com.aliasi.hmm.HmmCharLmEstimator;import com.aliasi.test.unit.BaseTestCase;import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;import com.aliasi.tokenizer.TokenizerFactory;import com.aliasi.util.AbstractExternalizable;import java.io.IOException;public class CharLmHmmChunkerTest extends BaseTestCase {    public void testDictionaryTraining() {        TokenizerFactory factory            = new IndoEuropeanTokenizerFactory();        AbstractHmmEstimator estimator            = new HmmCharLmEstimator(5,128,5.0,false); // don't smooth HMM directly        CharLmHmmChunker chunkerEstimator            = new CharLmHmmChunker(factory,estimator,true); // do smooth chunker        chunkerEstimator.trainDictionary("John Smith","PERSON");        chunkerEstimator.trainDictionary("Washington, D.C.","LOCATION");        chunkerEstimator.trainDictionary("Alias-i","ORGANIZATION");        // prints out start/end/transitions        // System.out.println("chunkerEstimator\n" + chunkerEstimator.toString());        Chunking chunking            = chunkerEstimator.chunk("John Smith");        assertEquals(1,chunking.chunkSet().size());        Chunk chunk = chunking.chunkSet().iterator().next();        assertEquals(0,chunk.start());        assertEquals(10,chunk.end());        assertEquals("PERSON",chunk.type());    }    public void testDefectiveTraining() {        TokenizerFactory factory            = new IndoEuropeanTokenizerFactory();        AbstractHmmEstimator estimator            = new HmmCharLmEstimator(5,128,5.0);        CharLmHmmChunker chunkerEstimator            = new CharLmHmmChunker(factory,estimator);        assertEquals(new ChunkingImpl("hello"),                     chunkerEstimator.chunk("hello"));        assertChunking(chunkerEstimator,                       new ChunkingImpl(""));        assertChunking(chunkerEstimator,                       new ChunkingImpl("hello"));        assertChunking(chunkerEstimator,                       new ChunkingImpl("John Smith lives in Washington."));    }    public void testDefectiveTags() {        TokenizerFactory factory            = new IndoEuropeanTokenizerFactory();        AbstractHmmEstimator estimator            = new HmmCharLmEstimator(5,128,5.0);        CharLmHmmChunker chunkerEstimator            = new CharLmHmmChunker(factory,estimator);        String text1 = "John Smith lives in Washington.";        //              0123456789012345678901234567890        //              0         1         2         3        ChunkingImpl chunking1 = new ChunkingImpl(text1);        Chunk chunk11 = ChunkFactory.createChunk(0,10,"PER");        Chunk chunk12 = ChunkFactory.createChunk(20,30,"LOC");        chunking1.add(chunk11);        chunking1.add(chunk12);        // no middle tag or whole tag given for PERSON        // no begin, middle end tag for LOCATION        for (int i = 0; i < 10; ++i)            chunkerEstimator.handle(chunking1);        assertChunking(chunkerEstimator,chunking1);    }    public void testDefectiveTags2() {        TokenizerFactory factory            = new IndoEuropeanTokenizerFactory();        AbstractHmmEstimator estimator            = new HmmCharLmEstimator(5,128,5.0);        CharLmHmmChunker chunkerEstimator            = new CharLmHmmChunker(factory,estimator);        String text1 = "John";        //              012345        //              0        ChunkingImpl chunking1 = new ChunkingImpl(text1);        Chunk chunk11 = ChunkFactory.createChunk(0,4,"PER");        chunking1.add(chunk11);        // no middle tag or whole tag given for PERSON        // no begin, middle end tag for LOCATION        for (int i = 0; i < 10; ++i)            chunkerEstimator.handle(chunking1);        assertChunking(chunkerEstimator,chunking1);    }    public void testChunkHandler() {        TokenizerFactory factory            = new IndoEuropeanTokenizerFactory();        AbstractHmmEstimator estimator            = new HmmCharLmEstimator(5,128,5.0);        CharLmHmmChunker chunkerEstimator            = new CharLmHmmChunker(factory,estimator);        String text1 = "John J. Smith lives in Washington.";        //              0123456789012345678901234567890123        //              0         1         2         3        ChunkingImpl chunking1 = new ChunkingImpl(text1);        Chunk chunk11 = ChunkFactory.createChunk(0,13,"PER");        Chunk chunk12 = ChunkFactory.createChunk(23,33,"LOC");        chunking1.add(chunk11);        chunking1.add(chunk12);        for (int i = 0; i < 10; ++i)            chunkerEstimator.handle(chunking1);        assertChunking(chunkerEstimator,chunking1);        String text2 = "Washington is near John";        //              01234567890123456789012        //              0         1         2        ChunkingImpl chunking2 = new ChunkingImpl(text2);        Chunk chunk21 = ChunkFactory.createChunk(0,10,"LOC");        Chunk chunk22 = ChunkFactory.createChunk(19,23,"PER");        chunking2.add(chunk21);        chunking2.add(chunk22);        for (int i = 0; i < 10; ++i)            chunkerEstimator.handle(chunking2);        assertChunking(chunkerEstimator,chunking2);        String text3 = "Washington D.C. is near Frank Jones.";        //              012345678901234567890123456789012345        //              0         1         2         3        ChunkingImpl chunking3 = new ChunkingImpl(text3);        Chunk chunk31 = ChunkFactory.createChunk(0,15,"LOC");        Chunk chunk32 = ChunkFactory.createChunk(24,36,"PER");        chunking3.add(chunk31);        chunking3.add(chunk32);        for (int i = 0; i < 10; ++i)            chunkerEstimator.handle(chunking3);        assertChunking(chunkerEstimator,chunking3);    }    void assertChunking(CharLmHmmChunker chunkerEstimator,                        Chunking expectedChunking) {        CharSequence text = expectedChunking.charSequence();        Chunking chunking = chunkerEstimator.chunk(text);        assertEqualsChunking(expectedChunking,chunking);        try {            Chunker chunker                = (Chunker) AbstractExternalizable.compile(chunkerEstimator);            Chunking chunking2 = chunker.chunk(text);            assertEqualsChunking(expectedChunking,chunking2);        } catch (IOException e) {            fail(e.toString());        } catch (ClassNotFoundException e) {            fail(e.toString());        }    }    void assertEqualsChunking(Chunking expectedChunking, Chunking chunking) {        assertEquals(expectedChunking.charSequence(),                     chunking.charSequence());        assertEquals(expectedChunking.chunkSet(),                     chunking.chunkSet());    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -