⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 triecharseqcountertest.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
        counter.incrementSubstrings("ab");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ad");        counter.incrementSubstrings("ad");        // lose {1}        counter.prune(2);        assertEquals(0,counter.count("ab"));        assertEquals(2,counter.count("ac"));        counter = new TrieCharSeqCounter(4);        counter.incrementSubstrings("ab");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ad");        counter.incrementSubstrings("ad");        // lose {1,2}        counter.prune(2);        assertEquals(0,counter.count("ab"));        assertEquals(0,counter.count("ac"));        assertEquals(2,counter.count("ad"));        counter = new TrieCharSeqCounter(4);        counter.incrementSubstrings("ab");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ad");        // lose {1,3}        counter.prune(2);        assertEquals(0,counter.count("ab"));        assertEquals(2,counter.count("ac"));        assertEquals(0,counter.count("ad"));        counter = new TrieCharSeqCounter(4);        counter.incrementSubstrings("ab");        counter.incrementSubstrings("ab");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ad");        // lose {2,3}        counter.prune(2);        assertEquals(2,counter.count("ab"));        assertEquals(0,counter.count("ac"));        assertEquals(0,counter.count("ad"));        counter = new TrieCharSeqCounter(4);        counter.incrementSubstrings("ab");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ad");        // lose {1,2,3}        counter.prune(2);        assertEquals(0,counter.count("ab"));        assertEquals(0,counter.count("ac"));        assertEquals(0,counter.count("ad"));    }    public void testPruneCount6() {        // array dtr        TrieCharSeqCounter counter = new TrieCharSeqCounter(4);        counter.incrementSubstrings("ab");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ad");        counter.incrementSubstrings("ad");        counter.incrementSubstrings("ae");        counter.incrementSubstrings("ae");        // lose first in array        counter.prune(2);        assertEquals(0,counter.count("ab"));        assertEquals(2,counter.count("ac"));        assertEquals(2,counter.count("ad"));        assertEquals(2,counter.count("ae"));        counter = new TrieCharSeqCounter(4);        counter.incrementSubstrings("ab");        counter.incrementSubstrings("ab");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ad");        counter.incrementSubstrings("ad");        counter.incrementSubstrings("ae");        counter.incrementSubstrings("ae");        // lose second        counter.prune(2);        assertEquals(2,counter.count("ab"));        assertEquals(0,counter.count("ac"));        assertEquals(2,counter.count("ad"));        assertEquals(2,counter.count("ae"));        counter = new TrieCharSeqCounter(4);        counter.incrementSubstrings("ab");        counter.incrementSubstrings("ab");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ad");        counter.incrementSubstrings("ad");        counter.incrementSubstrings("ae");        // lose last        counter.prune(2);        assertEquals(2,counter.count("ab"));        assertEquals(2,counter.count("ac"));        assertEquals(2,counter.count("ad"));        assertEquals(0,counter.count("ae"));        counter = new TrieCharSeqCounter(4);        counter.incrementSubstrings("ab");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ad");        counter.incrementSubstrings("ad");        counter.incrementSubstrings("ae");        // lose first and last        counter.prune(2);        assertEquals(0,counter.count("ab"));        assertEquals(2,counter.count("ac"));        assertEquals(2,counter.count("ad"));        assertEquals(0,counter.count("ae"));        counter = new TrieCharSeqCounter(4);        counter.incrementSubstrings("ab");        counter.incrementSubstrings("ab");        counter.incrementSubstrings("ac");        counter.incrementSubstrings("ad");        counter.incrementSubstrings("ae");        counter.incrementSubstrings("ae");        // lose two middle        counter.prune(2);        assertEquals(2,counter.count("ab"));        assertEquals(0,counter.count("ac"));        assertEquals(0,counter.count("ad"));        assertEquals(2,counter.count("ae"));    }    public void testPruneCount7() {        // PAT Dtr        TrieCharSeqCounter counter = new TrieCharSeqCounter(4);        counter.incrementSubstrings("ab");        counter.incrementSubstrings("cd");        counter.incrementSubstrings("cd");        // whole 1pat        counter.prune(2);        assertEquals(0,counter.count("ab"));        assertEquals(0,counter.count("a"));        assertEquals(2,counter.count("cd"));        assertEquals(2,counter.count("c"));        counter = new TrieCharSeqCounter(4);        counter.incrementSubstrings("abc");        counter.incrementSubstrings("def");        counter.incrementSubstrings("def");        // whole 2pat        counter.prune(2);        assertEquals(0,counter.count("abc"));        assertEquals(0,counter.count("a"));        assertEquals(2,counter.count("def"));        assertEquals(2,counter.count("de"));        counter = new TrieCharSeqCounter(4);        counter.incrementSubstrings("abcd");        counter.incrementSubstrings("efgh");        counter.incrementSubstrings("efgh");        // whole 3pat        counter.prune(2);        assertEquals(0,counter.count("abcd"));        assertEquals(0,counter.count("a"));        assertEquals(2,counter.count("efgh"));        assertEquals(2,counter.count("efg"));        assertEquals(2,counter.count("ef"));        assertEquals(2,counter.count("e"));        counter = new TrieCharSeqCounter(6);        counter.incrementSubstrings("abcde");        counter.incrementSubstrings("fghij");        counter.incrementSubstrings("fghij");        // whole 4pat        counter.prune(2);        assertEquals(0,counter.count("abcde"));        assertEquals(0,counter.count("a"));        assertEquals(2,counter.count("fghij"));        assertEquals(2,counter.count("fghi"));        assertEquals(2,counter.count("fgh"));        assertEquals(2,counter.count("fg"));        assertEquals(2,counter.count("f"));        counter = new TrieCharSeqCounter(7);        counter.incrementSubstrings("abcdef");        counter.incrementSubstrings("ghijkl");        counter.incrementSubstrings("ghijkl");        // array pat        counter.prune(2);        assertEquals(0,counter.count("abcdef"));        assertEquals(0,counter.count("a"));        assertEquals(2,counter.count("ghijkl"));        assertEquals(2,counter.count("ghijk"));        assertEquals(2,counter.count("ghij"));        assertEquals(2,counter.count("ghi"));        assertEquals(2,counter.count("gh"));        assertEquals(2,counter.count("g"));    }    public void testReadWrite() throws IOException {        TrieCharSeqCounter c1 = new TrieCharSeqCounter(3);    c1.incrementSubstrings("abcd");    assertEqualCounts(c1,new String[] { "a", "b", "c", "x",                        "ab", "bc", "xy", "ax", "xa",                        "abc", "bxa" },              3);    c1.incrementSubstrings("aef");    c1.incrementSubstrings("bef");    c1.incrementSubstrings("cde");    assertEqualCounts(c1,new String[] { "a", "b", "c", "x",                        "ab", "bc", "xy", "ax", "xa",                        "abc", "bxa" },              3);    c1.incrementSubstrings("abracadabra");    assertEqualCounts(c1,new String[] { "abr", "br", "cad", "db" },              3);    }    public void assertEqualCounts(TrieCharSeqCounter c,                  String[] ss,                  int maxNGram) throws IOException {    assertCopy(c);    // top level tests    TrieCharSeqCounter cId = writeRead(c);    assertEquals(c.uniqueSequenceCount(),cId.uniqueSequenceCount());    assertEquals(c.totalSequenceCount(),cId.totalSequenceCount());            TrieCharSeqCounter c2 = writeRead(c);    for (int i = 0; i < ss.length; ++i) {        String s = ss[i];        if (s.length() <= maxNGram) {        assertEquals(s,c.count(s), c2.count(s));        if (s.length() < maxNGram)             assertEquals("ngram=" + maxNGram                 + " extensionCount(" + s + ")",                 c.extensionCount(s), c2.extensionCount(s));        } else {        assertEquals(0,c2.count(s));        assertEquals(0,c2.extensionCount(s));        }    }    }    public TrieCharSeqCounter writeRead(TrieCharSeqCounter counter)    throws IOException {    ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();    counter.writeTo(bytesOut);    byte[] bytes = bytesOut.toByteArray();    ByteArrayInputStream bytesIn = new ByteArrayInputStream(bytes);    TrieCharSeqCounter counter2         = TrieCharSeqCounter.readFrom(bytesIn);    return counter2;    }    public void assertCopy(TrieCharSeqCounter counter) throws IOException {    ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();    BitOutput bitsOut = new BitOutput(bytesOut);    BitTrieWriter writer = new BitTrieWriter(bitsOut);    TrieCharSeqCounter.writeCounter(counter,writer,128);    bitsOut.flush();    byte[] bytes = bytesOut.toByteArray();    ByteArrayInputStream bytesIn = new ByteArrayInputStream(bytes);    BitInput bitsIn = new BitInput(bytesIn);    BitTrieReader reader = new BitTrieReader(bitsIn);    ByteArrayOutputStream bytesOut2 = new ByteArrayOutputStream();    BitOutput bitsOut2 = new BitOutput(bytesOut2);    BitTrieWriter writer2 = new BitTrieWriter(bitsOut2);    BitTrieWriter.copy(reader,writer2);    bitsOut2.flush();    byte[] bytes2 = bytesOut2.toByteArray();    // System.out.println("\nRound trip it");    ByteArrayInputStream bytesIn3 = new ByteArrayInputStream(bytes);    BitInput bitsIn3 = new BitInput(bytesIn3);    BitTrieReader reader3 = new BitTrieReader(bitsIn3);    ByteArrayOutputStream bytesOut3 = new ByteArrayOutputStream();    BitOutput bitsOut3 = new BitOutput(bytesOut3);    BitTrieWriter writer3 = new BitTrieWriter(bitsOut3);    BitTrieWriter.copy(reader3,writer3);    bitsOut3.flush();    byte[] bytes3 = bytesOut3.toByteArray();    // System.out.println("bytes.length=" + bytes.length);    // System.out.println("bytes2.length=" + bytes2.length);    // System.out.println("bytes3.length=" + bytes3.length);    assertEqualsBytes(bytes,bytes3);    assertEqualsBytes(bytes,bytes2);    }    void assertEqualsBytes(byte[] bytes, byte[] bytes2) {    assertEquals("length",bytes.length,bytes2.length);    for (int i = 0; i < bytes2.length; ++i)        assertEquals(bytes[i],bytes2[i]);    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -