📄 node.java
字号:
/* * LingPipe v. 3.5 * Copyright (C) 2003-2008 Alias-i * * This program is licensed under the Alias-i Royalty Free License * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Alias-i * Royalty Free License Version 1 for more details. * * You should have received a copy of the Alias-i Royalty Free License * Version 1 along with this program; if not, visit * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211, * +1 (718) 290-9170. */package com.aliasi.lm;import com.aliasi.util.NBestSet;import com.aliasi.util.ObjectToCounterMap;// import com.aliasi.util.Arrays;// import java.util.Arrays;import java.util.Iterator;import java.util.LinkedList;import java.util.List;/** * * @version 3.0 */interface Node { public long count(char[] cs, int start, int end); public long count(); public long contextCount(char[] cs, int start, int end); public int numOutcomes(char[] cs, int start, int end); public char[] outcomes(char[] cs, int start, int end); public long size(); public Node increment(char[] cs, int start, int end); public Node increment(char[] cs, int start, int end, int incr); public Node decrement(); public Node decrement(int count); // just decrements final char in path public Node decrement(char[] cs, int start, int end); public Node decrement(char[] cs, int start, int end, int count); public Node prune(long minCount); // below here is just for reporting! public void toString(StringBuffer sb, int depth); public void addCounts(List counts, int dtrLevel); public void topNGrams(NBestCounter counter, char[] csAccum, int level, int dtrLevel); public void addNGramCounts(long[][] uniqueTotalCounts, int depth); public long uniqueNGramCount(int dtrLevel); public long totalNGramCount(int dtrLevel); public void countNodeTypes(ObjectToCounterMap counter); public void addDaughters(LinkedList queue);}abstract class AbstractNode implements Node { public abstract void topNGramsDtrs(NBestCounter counter, char[] csAccum, int level, int dtrLevel); public abstract void countNodeTypes(ObjectToCounterMap counter); public abstract long dtrUniqueNGramCount(int dtrLevel); public abstract long dtrTotalNGramCount(int dtrLevel); public abstract void addDtrCounts(List counts, int dtrLevel); public abstract void addDtrNGramCounts(long[][] uniqueNGramCount, int depth); public void addNGramCounts(long[][] uniqueTotalCounts, int depth) { uniqueTotalCounts[depth][0] += 1; uniqueTotalCounts[depth][1] += count(); addDtrNGramCounts(uniqueTotalCounts,depth+1); } public void topNGrams(NBestCounter counter, char[] csAccum, int level, int dtrLevel) { if (dtrLevel == 0) counter.put(csAccum,level,count()); else topNGramsDtrs(counter,csAccum,level,dtrLevel); } public void addCounts(List counts, int dtrLevel) { if (dtrLevel == 0) { counts.add(new Long(count())); return; } addDtrCounts(counts,dtrLevel-1); } public long uniqueNGramCount(int dtrLevel) { if (dtrLevel == 0) return 1; return dtrUniqueNGramCount(dtrLevel-1); } public long totalNGramCount(int dtrLevel) { if (dtrLevel == 0) return count(); return dtrTotalNGramCount(dtrLevel-1); } public String toString() { StringBuffer sb = new StringBuffer(); toString(sb,0); return sb.toString(); } static void indent(StringBuffer sb, int depth) { sb.append('\n'); for (int i = 0; i < depth; ++i) sb.append(" "); } protected static void toString(StringBuffer sb, char c, Node daughter, int depth) { indent(sb,depth); sb.append(c); daughter.toString(sb,depth+1); }}abstract class AbstractDtrNode extends AbstractNode { abstract char[] chars(); abstract Node[] dtrs(); abstract int numDtrs(); Node getDtr(char c) { char[] cs = chars(); int i = java.util.Arrays.binarySearch(cs,c); if (i < 0) return null; return dtrs()[i]; } public int numOutcomes(char[] cs, int start, int end) { if (start == end) return numDtrs(); Node dtr = getDtr(cs[start]); if (dtr == null) return 0; return dtr.numOutcomes(cs,start+1,end); } public long count(char[] cs, int start, int end) { if (start == end) { return count(); } Node dtr = getDtr(cs[start]); if (dtr == null) return 0; return dtr.count(cs,start+1,end); } public long contextCount() { Node[] dtrs = dtrs(); long dtrCount = 0; for (int i = 0; i < dtrs.length; ++i) dtrCount += dtrs[i].count(); return dtrCount; } public long contextCount(char[] cs, int start, int end) { if (start == end) { return contextCount(); } Node dtr = getDtr(cs[start]); if (dtr == null) return 0; return dtr.contextCount(cs,start+1,end); } public Node decrement() { return decrement(1); } public Node decrement(int decr) { return NodeFactory.createNode(chars(),dtrs(),count()-decr); } public Node decrement(char[] cs, int start, int end) { if (start == end) return decrement(); char[] dtrCs = chars(); int k = java.util.Arrays.binarySearch(dtrCs,cs[start]); if (k >= 0) { Node[] dtrs = dtrs(); dtrs[k] = dtrs[k].decrement(cs,start+1,end); return NodeFactory.createNodePrune(dtrCs,dtrs,count()); } String msg = "Could not find string to decrement=" + new String(cs,start,end-start); throw new IllegalArgumentException(msg); } public Node decrement(char[] cs, int start, int end, int decr) { if (start == end) return decrement(decr); char[] dtrCs = chars(); int k = java.util.Arrays.binarySearch(dtrCs,cs[start]); if (k >= 0) { Node[] dtrs = dtrs(); dtrs[k] = dtrs[k].decrement(cs,start+1,end,decr); return NodeFactory.createNodePrune(dtrCs,dtrs,count()); } String msg = "Could not find string to decrement=" + new String(cs,start,end-start); throw new IllegalArgumentException(msg); } public Node increment(char[] cs, int start, int end) { return increment(cs,start,end,1); } public Node increment(char[] cs, int start, int end, int incr) { if (start == end) return NodeFactory.createNode(chars(),dtrs(),count() + incr); char[] dtrCs = chars(); int k = java.util.Arrays.binarySearch(dtrCs,cs[start]); Node[] dtrs = dtrs(); if (k >= 0) { dtrs[k] = dtrs[k].increment(cs,start+1,end,incr); return NodeFactory.createNode(dtrCs,dtrs,count() + incr); } char[] newCs = new char[dtrCs.length+1]; Node[] newDtrs = new Node[dtrs.length+1]; int i = 0; for (; i < dtrCs.length && dtrCs[i] < cs[start]; ++i) { newCs[i] = dtrCs[i]; newDtrs[i] = dtrs[i]; } newCs[i] = cs[start]; newDtrs[i] = NodeFactory.createNode(cs,start+1,end,incr); for (; i < dtrCs.length; ++i) { newCs[i+1] = dtrCs[i]; newDtrs[i+1] = dtrs[i]; } return NodeFactory.createNode(newCs,newDtrs,count()+incr); } // below here for reporting -- don't over-optimize public long size() { Node[] dtrs = dtrs(); long size = 1; for (int i = 0; i < dtrs.length; ++i) size += dtrs[i].size(); return size; } public void topNGramsDtrs(NBestCounter counter, char[] csAccum, int level, int dtrLevel) { Node[] dtrs = dtrs(); char[] cs = chars(); for (int i = 0; i < dtrs.length; ++i) { csAccum[level] = cs[i]; dtrs[i].topNGrams(counter,csAccum,level+1,dtrLevel-1); } } public void addDtrNGramCounts(long[][] uniqueTotalCounts, int depth) { Node[] dtrs = dtrs(); for (int i = 0; i < dtrs.length; ++i) dtrs[i].addNGramCounts(uniqueTotalCounts,depth); } public long dtrUniqueNGramCount(int dtrLevel) { Node[] dtrs = dtrs(); long sum = 0; for (int i = 0; i < dtrs.length; ++i) sum += dtrs[i].uniqueNGramCount(dtrLevel); return sum; } public long dtrTotalNGramCount(int dtrLevel) { Node[] dtrs = dtrs(); long sum = 0; for (int i = 0; i < dtrs.length; ++i) sum += dtrs[i].totalNGramCount(dtrLevel); return sum; } public void addDtrCounts(List accum, int nGramOrder) { Node[] dtrs = dtrs(); for (int i = 0; i < dtrs.length; ++i) dtrs[i].addCounts(accum,nGramOrder); } public void addDaughters(LinkedList queue) { Node[] dtrs = dtrs(); for (int i = 0; i < dtrs.length; ++i) queue.addLast(dtrs[i]); } public char[] outcomes(char[] cs, int start, int end) { if (start == end) return chars(); Node dtr = getDtr(cs[start]); if (dtr == null) return com.aliasi.util.Arrays.EMPTY_CHAR_ARRAY; return dtr.outcomes(cs,start+1,end); } public void countNodeTypes(ObjectToCounterMap counter) { counter.increment(this.getClass().toString()); Node[] dtrs = dtrs(); for (int i = 0; i < dtrs.length; ++i) dtrs[i].countNodeTypes(counter); } public void toString(StringBuffer sb, int depth) { char[] cs = chars(); Node[] dtrs = dtrs(); sb.append(' '); sb.append(count()); for (int i = 0; i < dtrs.length; ++i) toString(sb,cs[i],dtrs[i],depth); } public Node prune(long minCount) { long count = count(); if (count < minCount) return null; Node[] dtrs = dtrs(); for (int i = 0; i < dtrs.length; ++i) dtrs[i] = dtrs[i].prune(minCount); return NodeFactory.createNodePrune(chars(),dtrs,count); }}abstract class TerminalNode extends AbstractDtrNode { char[] chars() { return com.aliasi.util.Arrays.EMPTY_CHAR_ARRAY; } Node[] dtrs() { return NodeFactory.EMPTY_NODES; } public long contextCount(char[] cs, int start, int end) { return 0; } public Node getDtr(char c) { return null; } public int numDtrs() { return 0; }}abstract class OneDtrNode extends AbstractDtrNode { char mC; Node mDaughter; public OneDtrNode(char c, Node daughter) { mC = c; mDaughter = daughter; } public long contextCount() { return mDaughter.count(); } public Node getDtr(char c) { return c == mC ? mDaughter : null; } char[] chars() { return new char[] { mC }; } Node[] dtrs() { return new Node[] { mDaughter }; } public int numDtrs() { return 1; }}abstract class TwoDtrNode extends AbstractDtrNode { char mC1; Node mDaughter1; char mC2; Node mDaughter2; public TwoDtrNode(char c1, Node daughter1, char c2, Node daughter2) { mC1 = c1; mDaughter1 = daughter1; mC2 = c2; mDaughter2 = daughter2; } public long contextCount() { return mDaughter1.count() + mDaughter2.count(); } public Node getDtr(char c) { return c == mC1 ? mDaughter1 : ( c == mC2 ? mDaughter2 : null ); } char[] chars() { return new char[] { mC1, mC2 }; } Node[] dtrs() { return new Node[] { mDaughter1, mDaughter2 }; } public int numDtrs() { return 2; }}abstract class ThreeDtrNode extends AbstractDtrNode { char mC1; Node mDaughter1; char mC2; Node mDaughter2; char mC3; Node mDaughter3; public ThreeDtrNode(char c1, Node daughter1, char c2, Node daughter2, char c3, Node daughter3) { mC1 = c1; mDaughter1 = daughter1; mC2 = c2; mDaughter2 = daughter2; mC3 = c3; mDaughter3 = daughter3;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -