📄 node.java
字号:
} public long contextCount() { return mDaughter1.count() + mDaughter2.count() + mDaughter3.count(); } public Node getDtr(char c) { return c == mC1 ? mDaughter1 : ( c == mC2 ? mDaughter2 : ( c == mC3 ? mDaughter3 : null ) ); } char[] chars() { return new char[] { mC1, mC2, mC3 }; } Node[] dtrs() { return new Node[] { mDaughter1, mDaughter2, mDaughter3 }; } public int numDtrs() { return 3; }}abstract class ArrayDtrNode extends AbstractDtrNode { char[] mCs; Node[] mDtrs; public ArrayDtrNode(char[] cs, Node[] daughters) { mCs = cs; mDtrs = daughters; } char[] chars() { return mCs; } Node[] dtrs() { return mDtrs; } public int numDtrs() { return mDtrs.length; }}abstract class AbstractPATNode extends AbstractNode { abstract char[] chars(); abstract int length(); public Node prune(long minCount) { return count() < minCount ? null : this; } public long count(char[] cs, int start, int end) { return match(cs,start,end) ? count() : 0; } public long contextCount(char[] cs, int start, int end) { return properSubMatch(cs,start,end) ? count() : 0; } boolean match(char[] cs, int start, int end) { if ((end-start) > length()) return false; return stringMatch(cs,start,end); } boolean properSubMatch(char[] cs, int start, int end) { if ((end-start) >= length()) return false; return stringMatch(cs,start,end); } abstract boolean stringMatch(char[] cs, int start, int end); public void addDtrNGramCounts(long[][] uniqueTotalCounts, int depth) { int patDepth = chars().length; long count = count(); for (int i = 0; i < patDepth; ++i) { uniqueTotalCounts[depth+i][0] += 1; uniqueTotalCounts[depth+i][1] += count; } } public void topNGramsDtrs(NBestCounter counter, char[] csAccum, int level, int dtrLevel) { char[] patCs = chars(); if (dtrLevel > patCs.length) return; for (int i = 0; i < dtrLevel; ++i) csAccum[level+i] = patCs[i]; counter.put(csAccum,level+dtrLevel,count()); } public void addDtrCounts(List accum, int nGramOrder) { char[] patCs = chars(); if (nGramOrder < patCs.length) accum.add(new Long(count())); } public int numOutcomes(char[] cs, int start, int end) { return properSubMatch(cs,start,end) ? 1 : 0; } public Node increment(char[] cs, int start, int end) { return increment(cs,start,end,1); } public Node increment(char[] cs, int start, int end, int incr) { char[] patCs = chars(); long count = count(); if ((patCs.length == (end-start)) && match(cs,start,end)) { return NodeFactory.createNode(patCs,0,patCs.length,count+incr); } Node tailNode = NodeFactory.createNode(patCs,1,patCs.length,count); // can unfold OneDtrNode's increment into here; // eventually becomes loop of matching w. one-dtr nodes // until a split and a two-dtr node is created Node newNode = NodeFactory.createNode(patCs[0],tailNode,count); return newNode.increment(cs,start,end,incr); } public Node decrement(char[] cs, int start, int end) { if (end == start) return decrement(); char[] patCs = chars(); long count = count(); Node tailNode = NodeFactory.createNode(patCs,1,patCs.length,count); // can unfold OneDtrNode's increment into here; // eventually becomes loop of matching w. one-dtr nodes // until a split and a two-dtr node is created Node newNode = NodeFactory.createNode(patCs[0],tailNode,count); return newNode.decrement(cs,start,end); } public Node decrement(char[] cs, int start, int end, int decr) { if (end == start) return decrement(decr); char[] patCs = chars(); long count = count(); Node tailNode = NodeFactory.createNode(patCs,1,patCs.length,count); // can unfold OneDtrNode's increment into here; // eventually becomes loop of matching w. one-dtr nodes // until a split and a two-dtr node is created Node newNode = NodeFactory.createNode(patCs[0],tailNode,count); return newNode.decrement(cs,start,end,decr); } public Node decrement() { long count = count(); if (count == 0L) return this; char[] patCs = chars(); Node tailNode = NodeFactory.createNode(patCs,1,patCs.length,count); return NodeFactory.createNode(patCs[0],tailNode,count-1); } public Node decrement(int decr) { long count = count(); long decrL = Math.min(count,decr); // don't go below 0 char[] patCs = chars(); Node tailNode = NodeFactory.createNode(patCs,1,patCs.length,count-decrL); return NodeFactory.createNode(patCs[0],tailNode,count-decrL); } public long size() { return chars().length + 1; } public char[] outcomes(char[] cs, int start, int end) { char[] patCs = chars(); for (int i = 0; i < patCs.length; ++i) { if (start+i == end) return new char[] { patCs[i] }; if (patCs[i] != cs[start+i]) return com.aliasi.util.Arrays.EMPTY_CHAR_ARRAY; } return com.aliasi.util.Arrays.EMPTY_CHAR_ARRAY; // ran off end of PAT } public long dtrUniqueNGramCount(int dtrLevel) { return dtrLevel < chars().length ? 1 : 0; } public long dtrTotalNGramCount(int dtrLevel) { return dtrLevel < chars().length ? count() : 0; } public void addDaughters(LinkedList queue) { char[] patCs = chars(); Node tailNode = NodeFactory.createNode(patCs,1,patCs.length,count()); queue.add(tailNode); } public void toString(StringBuffer sb, int depth) { sb.append(new String(chars())); sb.append(' '); sb.append(count()); } public void countNodeTypes(ObjectToCounterMap counter) { counter.increment(this.getClass().toString()); }}abstract class PAT1Node extends AbstractPATNode { char mC; PAT1Node(char c) { mC = c; } char[] chars() { return new char[] { mC }; } int length() { return 1; } boolean stringMatch(char[] cs, int start, int end) { switch (end-start) { // cascade without break is intentional; checks all way down case 1: if (cs[start] != mC) return false; default: return true; } }}abstract class PAT2Node extends AbstractPATNode { char mC1; char mC2; PAT2Node(char c1, char c2) { mC1 = c1; mC2 = c2; } char[] chars() { return new char[] { mC1, mC2 }; } int length() { return 2; } boolean stringMatch(char[] cs, int start, int end) { switch (end-start) { // cascade without break is intentional; checks all way down case 2: if (cs[start+1] != mC2) return false; case 1: if (cs[start] != mC1) return false; default: return true; } }}abstract class PAT3Node extends AbstractPATNode { char mC1; char mC2; char mC3; PAT3Node(char c1, char c2, char c3) { mC1 = c1; mC2 = c2; mC3 = c3; } char[] chars() { return new char[] { mC1, mC2, mC3 }; } int length() { return 3; } boolean stringMatch(char[] cs, int start, int end) { switch (end-start) { // cascade without break is intentional; checks all way down case 3: if (cs[start+2] != mC3) return false; case 2: if (cs[start+1] != mC2) return false; case 1: if (cs[start] != mC1) return false; default: return true; } }}abstract class PAT4Node extends AbstractPATNode { char mC1; char mC2; char mC3; char mC4; PAT4Node(char c1, char c2, char c3, char c4) { mC1 = c1; mC2 = c2; mC3 = c3; mC4 = c4; } char[] chars() { return new char[] { mC1, mC2, mC3, mC4 }; } int length() { return 4; } boolean stringMatch(char[] cs, int start, int end) { switch (end-start) { // cascade without break is intentional; checks all way down case 4: if (cs[start+3] != mC4) return false; case 3: if (cs[start+2] != mC3) return false; case 2: if (cs[start+1] != mC2) return false; case 1: if (cs[start] != mC1) return false; default: return true; } }}abstract class PATArrayNode extends AbstractPATNode { char[] mCs; PATArrayNode(char[] cs) { mCs = cs; } char[] chars() { return mCs; } int length() { return mCs.length; } boolean stringMatch(char[] cs, int start, int end) { for (int i = 0; i < (end-start); ++i) if (mCs[i] != cs[start+i]) return false; return true; }}final class PAT1NodeOne extends PAT1Node { public PAT1NodeOne(char c) { super(c); } public long count() { return 1l; }}final class PAT2NodeOne extends PAT2Node { public PAT2NodeOne(char c1, char c2) { super(c1,c2); } public long count() { return 1l; }}final class PAT3NodeOne extends PAT3Node { public PAT3NodeOne(char c1, char c2, char c3) { super(c1,c2,c3); } public long count() { return 1l; }}final class PAT4NodeOne extends PAT4Node { public PAT4NodeOne(char c1, char c2, char c3, char c4) { super(c1,c2,c3,c4); } public long count() { return 1l; }}final class PATArrayNodeOne extends PATArrayNode { int mCount; public PATArrayNodeOne(char[] cs) { super(cs); } public long count() { return 1l; }}final class PAT1NodeTwo extends PAT1Node { public PAT1NodeTwo(char c) { super(c); } public long count() { return 2l; }}final class PAT2NodeTwo extends PAT2Node { public PAT2NodeTwo(char c1, char c2) { super(c1,c2); } public long count() { return 2l; }}final class PAT3NodeTwo extends PAT3Node { public PAT3NodeTwo(char c1, char c2, char c3) { super(c1,c2,c3); } public long count() { return 2l; }}final class PAT4NodeTwo extends PAT4Node { public PAT4NodeTwo(char c1, char c2, char c3, char c4) { super(c1,c2,c3,c4); } public long count() { return 2l; }}final class PATArrayNodeTwo extends PATArrayNode { int mCount; public PATArrayNodeTwo(char[] cs) { super(cs); } public long count() { return 2l; }}final class PAT1NodeThree extends PAT1Node { public PAT1NodeThree(char c) { super(c); } public long count() { return 3l; }}final class PAT2NodeThree extends PAT2Node { public PAT2NodeThree(char c1, char c2) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -