📄 triecharseqcountertest.java
字号:
package com.aliasi.test.unit.lm;import com.aliasi.test.unit.BaseTestCase;import com.aliasi.io.BitInput;import com.aliasi.io.BitOutput;import com.aliasi.lm.BitTrieReader;import com.aliasi.lm.BitTrieWriter;import com.aliasi.lm.TrieCharSeqCounter;import java.io.ByteArrayInputStream;import java.io.ByteArrayOutputStream;import java.io.IOException;public class TrieCharSeqCounterTest extends BaseTestCase { public void testDecrementSubstrings() { TrieCharSeqCounter counter = new TrieCharSeqCounter(5); char[] cs = "abcdef".toCharArray(); counter.incrementSubstrings(cs,0,cs.length); counter.decrementSubstrings(cs,0,3); assertEquals(0,counter.count("abc".toCharArray(),0,3)); assertEquals(1,counter.count("abcd".toCharArray(),0,4)); } public void testDecrementSubstrings2() { TrieCharSeqCounter counter = new TrieCharSeqCounter(5); char[] cs = "abcdef".toCharArray(); counter.incrementSubstrings(cs,0,cs.length); counter.incrementSubstrings(cs,0,cs.length); counter.decrementSubstrings(cs,0,3); assertEquals(1,counter.count("abc".toCharArray(),0,3)); assertEquals(2,counter.count("abcd".toCharArray(),0,4)); } public void testDecrementSubstrings3() { TrieCharSeqCounter counter = new TrieCharSeqCounter(5); char[] cs = "abcdef".toCharArray(); counter.incrementSubstrings(cs,0,cs.length); char[] cs2 = "abxyz".toCharArray(); counter.incrementSubstrings(cs2,0,cs2.length); counter.decrementSubstrings(cs,0,3); assertEquals(1,counter.count("ab".toCharArray(),0,2)); assertEquals(0,counter.count("abc".toCharArray(),0,3)); assertEquals(1,counter.count("abx".toCharArray(),0,3)); assertEquals(1,counter.count("abcd".toCharArray(),0,4)); assertEquals(1,counter.count("abxy".toCharArray(),0,4)); } public void testDecrementSubstrings4() { TrieCharSeqCounter counter = new TrieCharSeqCounter(5); char[] cs = "abcdef".toCharArray(); counter.incrementSubstrings(cs,0,cs.length); char[] cs2 = "abxyz".toCharArray(); counter.incrementSubstrings(cs2,0,cs2.length); char[] cs3 = "abmnl".toCharArray(); counter.incrementSubstrings(cs3,0,cs3.length); counter.decrementSubstrings(cs,0,3); assertEquals(2,counter.count("ab".toCharArray(),0,2)); assertEquals(0,counter.count("abc".toCharArray(),0,3)); assertEquals(1,counter.count("abx".toCharArray(),0,3)); assertEquals(1,counter.count("abm".toCharArray(),0,3)); assertEquals(1,counter.count("abcd".toCharArray(),0,4)); assertEquals(1,counter.count("abxy".toCharArray(),0,4)); assertEquals(1,counter.count("bmnl".toCharArray(),0,4)); } public void testDecrementSubstrings5() { TrieCharSeqCounter counter = new TrieCharSeqCounter(5); char[] cs = "abcdef".toCharArray(); counter.incrementSubstrings(cs,0,cs.length); char[] cs2 = "abxyz".toCharArray(); counter.incrementSubstrings(cs2,0,cs2.length); char[] cs3 = "abmnl".toCharArray(); counter.incrementSubstrings(cs3,0,cs3.length); char[] cs4 = "ab123".toCharArray(); counter.incrementSubstrings(cs4,0,cs4.length); counter.decrementSubstrings(cs,0,3); assertEquals(3,counter.count("ab".toCharArray(),0,2)); assertEquals(0,counter.count("abc".toCharArray(),0,3)); assertEquals(1,counter.count("abx".toCharArray(),0,3)); assertEquals(1,counter.count("abm".toCharArray(),0,3)); assertEquals(1,counter.count("abcd".toCharArray(),0,4)); assertEquals(1,counter.count("abxy".toCharArray(),0,4)); assertEquals(1,counter.count("bmnl".toCharArray(),0,4)); assertEquals(1,counter.count("123".toCharArray(),0,3)); } public void testUniqueTotals() { TrieCharSeqCounter counter = new TrieCharSeqCounter(3); char[] cs = "abcde".toCharArray(); counter.incrementSubstrings(cs,0,cs.length); long[][] uniqueTotals = counter.uniqueTotalNGramCount(); assertEquals(1,counter.uniqueSequenceCount(0)); assertEquals(5,counter.totalSequenceCount(0)); assertEquals(1,uniqueTotals[0][0]); assertEquals(5,uniqueTotals[0][1]); assertEquals(5,counter.uniqueSequenceCount(1)); assertEquals(5,counter.totalSequenceCount(1)); assertEquals(5,uniqueTotals[1][0]); assertEquals(5,uniqueTotals[1][1]); assertEquals(4,counter.uniqueSequenceCount(2)); assertEquals(4,counter.totalSequenceCount(2)); assertEquals(4,uniqueTotals[2][0]); assertEquals(4,uniqueTotals[2][1]); assertEquals(3,counter.uniqueSequenceCount(3)); assertEquals(3,counter.totalSequenceCount(3)); assertEquals(3,uniqueTotals[3][0]); assertEquals(3,uniqueTotals[3][1]); } public void testExs() { try { new TrieCharSeqCounter(-1); fail(); } catch (IllegalArgumentException e) { assertTrue(true); } TrieCharSeqCounter counter = new TrieCharSeqCounter(5); try { counter.count(new char[4], -1, 3); fail(); } catch (IndexOutOfBoundsException e) { assertTrue(true); } try { counter.count(new char[4], 3, 2); fail(); } catch (IndexOutOfBoundsException e) { assertTrue(true); } try { counter.count(new char[4], 2, 5); fail(); } catch (IndexOutOfBoundsException e) { assertTrue(true); } try { counter.extensionCount(new char[4], 2, 5); fail(); } catch (IndexOutOfBoundsException e) { assertTrue(true); } try { counter.charactersFollowing(new char[4], 2, 5); fail(); } catch (IndexOutOfBoundsException e) { assertTrue(true); } try { counter.numCharactersFollowing(new char[4], 2, 5); fail(); } catch (IndexOutOfBoundsException e) { assertTrue(true); } } // COUNTS FOR: abracadabra // 11 // a 5 // bra 2 // cad 1 // dab 1 // b 2 // r 2 // a 2 // c 1 // cada 1 // dabr 1 // r 2 // a 2 // ca 1 // INCLUDES: Root, OneDtr, ThreeDtr, PAT1, PAT3, PAT4 // MISSES: TwoDtr, ArrayDtr, PAT2 public void testAbracadabra() { String abracadabra = "abracadabra"; TrieCharSeqCounter counter = new TrieCharSeqCounter(4); counter.incrementSubstrings(abracadabra); assertEquals(5,counter.uniqueSequenceCount(1)); assertEquals(7,counter.uniqueSequenceCount(2)); assertEquals(7,counter.uniqueSequenceCount(3)); assertEquals(7,counter.uniqueSequenceCount(4)); assertEquals(11,counter.totalSequenceCount(1)); assertEquals(10,counter.totalSequenceCount(2)); assertEquals(9,counter.totalSequenceCount(3)); assertEquals(8,counter.totalSequenceCount(4)); assertEqualsArray(new int[] { 5, 2, 2, 1, 1 }, counter.nGramFrequencies(1)); } public void testPruneCount() { TrieCharSeqCounter counter = new TrieCharSeqCounter(4); assertEquals(0,counter.count("a")); counter.incrementSubstrings("a"); assertEquals(1,counter.count("a")); assertEquals(1,counter.count("")); counter.prune(2); assertEquals(0,counter.count("a")); assertEquals(0,counter.count("")); counter.incrementSubstrings("a"); assertEquals(1,counter.count("a")); assertEquals(1,counter.count("")); counter.incrementSubstrings("ab"); assertEquals(3,counter.extensionCount("")); // a, ab, b assertEquals(2,counter.count("a")); assertEquals(1,counter.count("ab")); counter.incrementSubstrings("ac"); counter.incrementSubstrings("ac"); // assertEquals(7,counter.extensionCount("")); assertEquals(4,counter.count("a")); assertEquals(1,counter.count("ab")); assertEquals(2,counter.count("ac")); counter.prune(2); assertEquals(0,counter.count("ab")); assertEquals(4,counter.count("a")); assertEquals(0,counter.count("ab")); assertEquals(2,counter.count("ac")); } public void testPruneCount2() { TrieCharSeqCounter counter = new TrieCharSeqCounter(4); counter.incrementSubstrings("abc"); counter.incrementSubstrings("ab"); // one dtr node losing extension counter.prune(2); assertEquals(0,counter.count("abc")); assertEquals(2,counter.count("ab")); assertEquals(2,counter.count("a")); // one dtr node losing counter = new NGramProcessLM(4,128,4.0,1000000,100,0,Math.sqrt(2.0));self counter.prune(3); assertEquals(0,counter.count("ab")); } public void testPruneCount3() { // one dtr node prune TrieCharSeqCounter counter = new TrieCharSeqCounter(4); counter.incrementSubstrings("abc"); counter.incrementSubstrings("ab"); counter.prune(2); assertEquals(0,counter.count("abc")); assertEquals(2,counter.count("ab")); assertEquals(2,counter.count("a")); } public void testPruneCount4() { // two dtr node prune TrieCharSeqCounter counter = new TrieCharSeqCounter(4); counter.incrementSubstrings("ab"); counter.incrementSubstrings("ab"); counter.incrementSubstrings("ac"); // two dtr node, losing first dtr counter.prune(2); assertEquals(0,counter.count("ac")); assertEquals(2,counter.count("ab")); counter = new TrieCharSeqCounter(4); counter.incrementSubstrings("ab"); counter.incrementSubstrings("ac"); counter.incrementSubstrings("ac"); counter.prune(2); assertEquals(0,counter.count("ab")); assertEquals(2,counter.count("ac")); } public void testPruneCount5() { // three dtr node prune TrieCharSeqCounter counter = new TrieCharSeqCounter(4); counter.incrementSubstrings("ab"); counter.incrementSubstrings("ab"); counter.incrementSubstrings("ac"); counter.incrementSubstrings("ac"); counter.incrementSubstrings("ad"); // lose {3} counter.prune(2); assertEquals(2,counter.count("ab")); assertEquals(2,counter.count("ac")); assertEquals(0,counter.count("ad")); counter = new TrieCharSeqCounter(4); counter.incrementSubstrings("ab"); counter.incrementSubstrings("ab"); counter.incrementSubstrings("ac"); counter.incrementSubstrings("ad"); counter.incrementSubstrings("ad"); // lose {2} counter.prune(2); assertEquals(2,counter.count("ab")); assertEquals(0,counter.count("ac")); counter = new TrieCharSeqCounter(4);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -