⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 node.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
/* * LingPipe v. 3.5 * Copyright (C) 2003-2008 Alias-i * * This program is licensed under the Alias-i Royalty Free License * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i * Royalty Free License Version 1 for more details. * * You should have received a copy of the Alias-i Royalty Free License * Version 1 along with this program; if not, visit * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211, * +1 (718) 290-9170. */package com.aliasi.lm;import com.aliasi.util.NBestSet;import com.aliasi.util.ObjectToCounterMap;// import com.aliasi.util.Arrays;// import java.util.Arrays;import java.util.Iterator;import java.util.LinkedList;import java.util.List;/** * * @version 3.0 */interface Node {    public long count(char[] cs, int start, int end);    public long count();    public long contextCount(char[] cs, int start, int end);    public int numOutcomes(char[] cs, int start, int end);    public char[] outcomes(char[] cs, int start, int end);    public long size();    public Node increment(char[] cs, int start, int end);    public Node increment(char[] cs, int start, int end, int incr);    public Node decrement();     public Node decrement(int count);     // just decrements final char in path    public Node decrement(char[] cs, int start, int end);     public Node decrement(char[] cs, int start, int end, int count);     public Node prune(long minCount);    // below here is just for reporting!    public void toString(StringBuffer sb, int depth);    public void addCounts(List counts, int dtrLevel);    public void topNGrams(NBestCounter counter,                           char[] csAccum, int level, int dtrLevel);    public void addNGramCounts(long[][] uniqueTotalCounts, int depth);    public long uniqueNGramCount(int dtrLevel);    public long totalNGramCount(int dtrLevel);    public void countNodeTypes(ObjectToCounterMap counter);    public void addDaughters(LinkedList queue);}abstract class AbstractNode implements Node {    public abstract void topNGramsDtrs(NBestCounter counter,                                        char[] csAccum,                                        int level, int dtrLevel);    public abstract void countNodeTypes(ObjectToCounterMap counter);    public abstract long dtrUniqueNGramCount(int dtrLevel);    public abstract long dtrTotalNGramCount(int dtrLevel);    public abstract void addDtrCounts(List counts, int dtrLevel);    public abstract void addDtrNGramCounts(long[][] uniqueNGramCount,                                            int depth);    public void addNGramCounts(long[][] uniqueTotalCounts, int depth) {        uniqueTotalCounts[depth][0] += 1;        uniqueTotalCounts[depth][1] += count();        addDtrNGramCounts(uniqueTotalCounts,depth+1);    }    public void topNGrams(NBestCounter counter, char[] csAccum,                           int level, int dtrLevel) {        if (dtrLevel == 0)            counter.put(csAccum,level,count());        else            topNGramsDtrs(counter,csAccum,level,dtrLevel);    }    public void addCounts(List counts, int dtrLevel) {        if (dtrLevel == 0) {            counts.add(new Long(count()));            return;        }        addDtrCounts(counts,dtrLevel-1);    }    public long uniqueNGramCount(int dtrLevel) {        if (dtrLevel == 0) return 1;        return dtrUniqueNGramCount(dtrLevel-1);    }    public long totalNGramCount(int dtrLevel) {        if (dtrLevel == 0) return count();        return dtrTotalNGramCount(dtrLevel-1);    }    public String toString() {        StringBuffer sb = new StringBuffer();        toString(sb,0);        return sb.toString();    }    static void indent(StringBuffer sb, int depth) {        sb.append('\n');        for (int i = 0; i < depth; ++i)            sb.append("  ");    }    protected static void toString(StringBuffer sb,                                   char c, Node daughter, int depth) {        indent(sb,depth);        sb.append(c);        daughter.toString(sb,depth+1);    }}abstract class AbstractDtrNode extends AbstractNode {    abstract char[] chars();    abstract Node[] dtrs();    abstract int numDtrs();    Node getDtr(char c) {        char[] cs = chars();        int i = java.util.Arrays.binarySearch(cs,c);        if (i < 0) return null;        return dtrs()[i];    }    public int numOutcomes(char[] cs, int start, int end) {        if (start == end) return numDtrs();        Node dtr = getDtr(cs[start]);        if (dtr == null) return 0;        return dtr.numOutcomes(cs,start+1,end);    }    public long count(char[] cs, int start, int end) {        if (start == end) {            return count();        }        Node dtr = getDtr(cs[start]);        if (dtr == null) return 0;        return dtr.count(cs,start+1,end);    }    public long contextCount() {        Node[] dtrs = dtrs();        long dtrCount = 0;        for (int i = 0; i < dtrs.length; ++i)            dtrCount += dtrs[i].count();        return dtrCount;    }    public long contextCount(char[] cs, int start, int end) {        if (start == end) {            return contextCount();        }        Node dtr = getDtr(cs[start]);        if (dtr == null) return 0;        return dtr.contextCount(cs,start+1,end);    }    public Node decrement() {        return decrement(1);    }    public Node decrement(int decr) {        return NodeFactory.createNode(chars(),dtrs(),count()-decr);    }    public Node decrement(char[] cs, int start, int end) {        if (start == end)            return decrement();        char[] dtrCs = chars();        int k = java.util.Arrays.binarySearch(dtrCs,cs[start]);        if (k >= 0) {            Node[] dtrs = dtrs();            dtrs[k] = dtrs[k].decrement(cs,start+1,end);            return NodeFactory.createNodePrune(dtrCs,dtrs,count());        }        String msg = "Could not find string to decrement="            + new String(cs,start,end-start);        throw new IllegalArgumentException(msg);    }    public Node decrement(char[] cs, int start, int end, int decr) {        if (start == end)            return decrement(decr);        char[] dtrCs = chars();        int k = java.util.Arrays.binarySearch(dtrCs,cs[start]);        if (k >= 0) {            Node[] dtrs = dtrs();            dtrs[k] = dtrs[k].decrement(cs,start+1,end,decr);            return NodeFactory.createNodePrune(dtrCs,dtrs,count());        }        String msg = "Could not find string to decrement="            + new String(cs,start,end-start);        throw new IllegalArgumentException(msg);    }    public Node increment(char[] cs, int start, int end) {        return increment(cs,start,end,1);    }    public Node increment(char[] cs, int start, int end, int incr) {        if (start == end)            return NodeFactory.createNode(chars(),dtrs(),count() + incr);        char[] dtrCs = chars();        int k = java.util.Arrays.binarySearch(dtrCs,cs[start]);        Node[] dtrs = dtrs();        if (k >= 0) {            dtrs[k] = dtrs[k].increment(cs,start+1,end,incr);            return NodeFactory.createNode(dtrCs,dtrs,count() + incr);        }        char[] newCs = new char[dtrCs.length+1];        Node[] newDtrs = new Node[dtrs.length+1];        int i = 0;        for (; i < dtrCs.length && dtrCs[i] < cs[start]; ++i) {            newCs[i] = dtrCs[i];            newDtrs[i] = dtrs[i];        }        newCs[i] = cs[start];        newDtrs[i] = NodeFactory.createNode(cs,start+1,end,incr);        for (; i < dtrCs.length; ++i) {            newCs[i+1] = dtrCs[i];            newDtrs[i+1] = dtrs[i];        }        return NodeFactory.createNode(newCs,newDtrs,count()+incr);    }    // below here for reporting -- don't over-optimize    public long size() {        Node[] dtrs = dtrs();        long size = 1;        for (int i = 0; i < dtrs.length; ++i)            size += dtrs[i].size();        return size;    }    public void topNGramsDtrs(NBestCounter counter, char[] csAccum,                              int level, int dtrLevel) {        Node[] dtrs = dtrs();        char[] cs = chars();        for (int i = 0; i < dtrs.length; ++i) {            csAccum[level] = cs[i];            dtrs[i].topNGrams(counter,csAccum,level+1,dtrLevel-1);        }    }    public void addDtrNGramCounts(long[][] uniqueTotalCounts, int depth) {        Node[] dtrs = dtrs();        for (int i = 0; i < dtrs.length; ++i)            dtrs[i].addNGramCounts(uniqueTotalCounts,depth);    }    public long dtrUniqueNGramCount(int dtrLevel) {        Node[] dtrs = dtrs();        long sum = 0;        for (int i = 0; i < dtrs.length; ++i)            sum += dtrs[i].uniqueNGramCount(dtrLevel);        return sum;    }    public long dtrTotalNGramCount(int dtrLevel) {        Node[] dtrs = dtrs();        long sum = 0;        for (int i = 0; i < dtrs.length; ++i)            sum += dtrs[i].totalNGramCount(dtrLevel);        return sum;    }    public void addDtrCounts(List accum, int nGramOrder) {        Node[] dtrs = dtrs();        for (int i = 0; i < dtrs.length; ++i)            dtrs[i].addCounts(accum,nGramOrder);    }    public void addDaughters(LinkedList queue) {        Node[] dtrs = dtrs();        for (int i = 0; i < dtrs.length; ++i)            queue.addLast(dtrs[i]);    }    public char[] outcomes(char[] cs, int start, int end) {        if (start == end)            return chars();        Node dtr = getDtr(cs[start]);        if (dtr == null)            return com.aliasi.util.Arrays.EMPTY_CHAR_ARRAY;        return dtr.outcomes(cs,start+1,end);    }    public void countNodeTypes(ObjectToCounterMap counter) {        counter.increment(this.getClass().toString());        Node[] dtrs = dtrs();        for (int i = 0; i < dtrs.length; ++i)            dtrs[i].countNodeTypes(counter);    }    public void toString(StringBuffer sb, int depth) {        char[] cs = chars();        Node[] dtrs = dtrs();        sb.append(' ');        sb.append(count());        for (int i = 0; i < dtrs.length; ++i)            toString(sb,cs[i],dtrs[i],depth);    }    public Node prune(long minCount) {        long count = count();        if (count < minCount) return null;        Node[] dtrs = dtrs();        for (int i = 0; i < dtrs.length; ++i)            dtrs[i] = dtrs[i].prune(minCount);        return NodeFactory.createNodePrune(chars(),dtrs,count);    }}abstract class TerminalNode extends AbstractDtrNode {    char[] chars() {        return com.aliasi.util.Arrays.EMPTY_CHAR_ARRAY;    }    Node[] dtrs() {        return NodeFactory.EMPTY_NODES;    }    public long contextCount(char[] cs, int start, int end) {        return 0;    }    public Node getDtr(char c) { return null; }    public int numDtrs() { return 0; }}abstract class OneDtrNode extends AbstractDtrNode {    char mC;    Node mDaughter;    public OneDtrNode(char c, Node daughter) {        mC = c;        mDaughter = daughter;    }    public long contextCount() {        return mDaughter.count();    }    public Node getDtr(char c) {        return c == mC ? mDaughter : null;    }    char[] chars() {        return new char[] { mC };    }    Node[] dtrs() {        return new Node[] { mDaughter };    }    public int numDtrs() { return 1; }}abstract class TwoDtrNode extends AbstractDtrNode {    char mC1;    Node mDaughter1;    char mC2;    Node mDaughter2;    public TwoDtrNode(char c1, Node daughter1,                      char c2, Node daughter2) {        mC1 = c1;        mDaughter1 = daughter1;        mC2 = c2;        mDaughter2 = daughter2;    }    public long contextCount() {        return mDaughter1.count()             + mDaughter2.count();    }    public Node getDtr(char c) {        return c == mC1            ? mDaughter1            : ( c == mC2                ? mDaughter2                : null );    }    char[] chars() {        return new char[] { mC1, mC2 };    }    Node[] dtrs() {        return new Node[] { mDaughter1, mDaughter2 };    }    public int numDtrs() { return 2; }}abstract class ThreeDtrNode extends AbstractDtrNode {    char mC1;    Node mDaughter1;    char mC2;    Node mDaughter2;    char mC3;    Node mDaughter3;    public ThreeDtrNode(char c1, Node daughter1,                        char c2, Node daughter2,                        char c3, Node daughter3) {        mC1 = c1;        mDaughter1 = daughter1;        mC2 = c2;        mDaughter2 = daughter2;        mC3 = c3;        mDaughter3 = daughter3;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -