📄 ngramboundarylm.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
        BitInput bitIn = new BitInput(in);        char boundaryChar = (char) (bitIn.readDelta()-1L);        NGramProcessLM processLM = NGramProcessLM.readFrom(bitIn);        return new NGramBoundaryLM(processLM,boundaryChar);    }    /**     * Returns the underlying n-gram process language model     * for this boundary language model.  Changes to the returned     * model affect this language model.     *     * @return The underlying process language model.     */    public NGramProcessLM getProcessLM() {        return mProcessLM;    }    /**     * Returns the characters that have been observed for this     * language model, including the special boundary character.     *     * @return The observed characters for this langauge model.     */    public char[] observedCharacters() {        return mProcessLM.observedCharacters();    }    /**     * Returns the underlying substring counter for this language     * model.  This model may be pruned by pruning the counter     * returned by this method.     *     * @return The underlying substring counter for this language model.     */    public TrieCharSeqCounter substringCounter() {        return mProcessLM.substringCounter();    }    /**     * Writes a compiled version of this boundary language model to     * the specified object output.  The result may be read back in     * by casting the result of {@link ObjectInput#readObject()} to     * {@link CompiledNGramBoundaryLM}.     *     * @param objOut Object output to which this model is compiled.     * @throws IOException If there is an I/O exception during the     * write.     */    public void compileTo(ObjectOutput objOut) throws IOException {        objOut.writeObject(new Externalizer(this));    }        public void train(CharSequence cs, int count) {        char[] csBounded = addBoundaries(cs,mBoundaryChar);        mProcessLM.train(csBounded,0,csBounded.length,count);        // don't count initial boundary        mProcessLM.decrementUnigram(mBoundaryChar,count);    }    public void train(CharSequence cs) {        train(cs,1);    }    public void train(char[] cs, int start, int end) {        train(cs,start,end,1);    }    public void train(char[] cs, int start, int end, int count) {        char[] csBounded = addBoundaries(cs,start,end,mBoundaryChar);        mProcessLM.train(csBounded,0,csBounded.length,count);        mProcessLM.decrementUnigram(mBoundaryChar,count);    }    public double log2ConditionalEstimate(CharSequence cs) {        if (cs.length() < 1) {            String msg = "Conditional estimate must be at least one character.";            throw new IllegalArgumentException(msg);        }        char[] csBounded = addBoundaries(cs,mBoundaryChar);        return mProcessLM.log2ConditionalEstimate(csBounded,0,csBounded.length-1);    }    public double log2ConditionalEstimate(char[] cs, int start, int end) {        if (end <= start) {            String msg = "Conditional estimate must be at least one character.";            throw new IllegalArgumentException(msg);        }        char[] csBounded = addBoundaries(cs,start,end,mBoundaryChar);        return mProcessLM.log2ConditionalEstimate(csBounded,0,csBounded.length-1);    }    public double log2Estimate(CharSequence cs) {        char[] csBounded = addBoundaries(cs,mBoundaryChar);        return mProcessLM.log2Estimate(csBounded,0,csBounded.length)            - mProcessLM.log2Estimate(mBoundaryArray,0,1);    }    public double log2Estimate(char[] cs, int start, int end) {        char[] csBounded = addBoundaries(cs,start,end,mBoundaryChar);        return mProcessLM.log2Estimate(csBounded,0,csBounded.length)            - mProcessLM.log2Estimate(mBoundaryArray,0,1);    }    /**     * This method is a convenience impelementation of the {@link     * Model} interface which delegates the call to {@link     * #log2Estimate(CharSequence)}.     *     * @param cSeq Character sequence whose probability is returned.     * @return The log (base 2) probability of the specified character sequence.     */    public double log2Prob(CharSequence cSeq) {        return log2Estimate(cSeq);    }    /**     * This method is a convenience implementation of the {@link Model}     * interface which returns the result of raising 2.0 to the      * power of the result of a call to {@link #log2Estimate(CharSequence)}.     *     * @param cSeq Character sequence whose probability is returned.     * @return The log probability of the specified character sequence.     */    public double prob(CharSequence cSeq) {        return Math.pow(2.0,log2Estimate(cSeq));    }        /**     * Returns a string-based representation of this language model.     * It displays the boundary character and the contained     * process language model.     *     * @return A string-based representation of this language model.     */    public String toString() {        StringBuffer sb = new StringBuffer();        sb.append("Boundary char=" + ((int)mBoundaryChar));        sb.append('\n');        mProcessLM.toStringBuffer(sb);        return sb.toString();    }    static char[] addBoundaries(CharSequence cs, char boundaryChar) {        char[] cs2 = new char[cs.length() + 2];        for (int i = 0; i < cs.length(); ++i) {            char c = cs.charAt(i);            if (c == boundaryChar) {                String msg = "Estimated string cannot contain boundary char."                    + " Found boundary char=" + c                    + " at index=" + i;                throw new IllegalArgumentException(msg);            }            cs2[i+1] = cs.charAt(i);        }        addBoundaryChars(cs2,boundaryChar);        return cs2;    }    static char[] addBoundaries(char[] cs, int start, int end, char boundaryChar) {        char[] cs2 = new char[cs.length+1];        int len = end-start;        for (int i = 0; i < len; ++i) {            char c = cs[i+start];            if (c == boundaryChar) {                // ugly cut and paste from above                String msg = "Estimated string cannot contain boundary char."                    + " Found boundary char=" + c                    + " at index=" + (i+start);                throw new IllegalArgumentException(msg);            }            cs2[i+1] = c;        }        addBoundaryChars(cs2,boundaryChar);        return cs2;    }    static void addBoundaryChars(char[] cs, char boundaryChar) {        cs[0] = boundaryChar;        cs[cs.length-1] = boundaryChar;    }    static class Externalizer extends AbstractExternalizable {        private static final long serialVersionUID = -7945082563035787530L;        final NGramBoundaryLM mLM;        public Externalizer() {             this(null);        }        public Externalizer(NGramBoundaryLM lm) {            mLM = lm;        }        public Object read(ObjectInput objIn) throws IOException {            return new CompiledNGramBoundaryLM(objIn);        }        public void writeExternal(ObjectOutput objOut) throws IOException {            objOut.writeChar(mLM.mBoundaryChar);            mLM.mProcessLM.compileTo(objOut);        }    }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -