📄 hyphenationtree.java

📁 处理PDF
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * Copyright 1999-2004 The Apache Software Foundation. *  * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *  *      http://www.apache.org/licenses/LICENSE-2.0 *  * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* $Id: HyphenationTree.java 3117 2008-01-31 05:53:22Z xlv $ */ package com.lowagie.text.pdf.hyphenation;import java.io.InputStream;import java.util.ArrayList;import java.util.HashMap;/** * This tree structure stores the hyphenation patterns in an efficient * way for fast lookup. It provides the provides the method to * hyphenate a word. * * @author Carlos Villegas <cav@uniscope.co.jp> */public class HyphenationTree extends TernaryTree             implements PatternConsumer {    private static final long serialVersionUID = -7763254239309429432L;	/**     * value space: stores the interletter values     */    protected ByteVector vspace;    /**     * This map stores hyphenation exceptions     */    protected HashMap stoplist;    /**     * This map stores the character classes     */    protected TernaryTree classmap;    /**     * Temporary map to store interletter values on pattern loading.     */    private transient TernaryTree ivalues;    public HyphenationTree() {        stoplist = new HashMap(23);    // usually a small table        classmap = new TernaryTree();        vspace = new ByteVector();        vspace.alloc(1);    // this reserves index 0, which we don't use    }    /**     * Packs the values by storing them in 4 bits, two values into a byte     * Values range is from 0 to 9. We use zero as terminator,     * so we'll add 1 to the value.     * @param values a string of digits from '0' to '9' representing the     * interletter values.     * @return the index into the vspace array where the packed values     * are stored.     */    protected int packValues(String values) {        int i, n = values.length();        int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;        int offset = vspace.alloc(m);        byte[] va = vspace.getArray();        for (i = 0; i < n; i++) {            int j = i >> 1;            byte v = (byte)((values.charAt(i) - '0' + 1) & 0x0f);            if ((i & 1) == 1) {                va[j + offset] = (byte)(va[j + offset] | v);            } else {                va[j + offset] = (byte)(v << 4);    // big endian            }        }        va[m - 1 + offset] = 0;    // terminator        return offset;    }    protected String unpackValues(int k) {        StringBuffer buf = new StringBuffer();        byte v = vspace.get(k++);        while (v != 0) {            char c = (char)((v >>> 4) - 1 + '0');            buf.append(c);            c = (char)(v & 0x0f);            if (c == 0) {                break;            }            c = (char)(c - 1 + '0');            buf.append(c);            v = vspace.get(k++);        }        return buf.toString();    }    public void loadSimplePatterns(InputStream stream) {        SimplePatternParser pp = new SimplePatternParser();        ivalues = new TernaryTree();        pp.parse(stream, this);        // patterns/values should be now in the tree        // let's optimize a bit        trimToSize();        vspace.trimToSize();        classmap.trimToSize();        // get rid of the auxiliary map        ivalues = null;    }    public String findPattern(String pat) {        int k = super.find(pat);        if (k >= 0) {            return unpackValues(k);        }        return "";    }    /**     * String compare, returns 0 if equal or     * t is a substring of s     */    protected int hstrcmp(char[] s, int si, char[] t, int ti) {        for (; s[si] == t[ti]; si++, ti++) {            if (s[si] == 0) {                return 0;            }        }        if (t[ti] == 0) {            return 0;        }        return s[si] - t[ti];    }    protected byte[] getValues(int k) {        StringBuffer buf = new StringBuffer();        byte v = vspace.get(k++);        while (v != 0) {            char c = (char)((v >>> 4) - 1);            buf.append(c);            c = (char)(v & 0x0f);            if (c == 0) {                break;            }            c = (char)(c - 1);            buf.append(c);            v = vspace.get(k++);        }        byte[] res = new byte[buf.length()];        for (int i = 0; i < res.length; i++) {            res[i] = (byte)buf.charAt(i);        }        return res;    }    /**     * <p>Search for all possible partial matches of word starting     * at index an update interletter values. In other words, it     * does something like:</p>     * <code>     * for(i=0; i<patterns.length; i++) {     * if ( word.substring(index).startsWidth(patterns[i]) )     * update_interletter_values(patterns[i]);     * }     * </code>     * <p>But it is done in an efficient way since the patterns are     * stored in a ternary tree. In fact, this is the whole purpose     * of having the tree: doing this search without having to test     * every single pattern. The number of patterns for languages     * such as English range from 4000 to 10000. Thus, doing thousands     * of string comparisons for each word to hyphenate would be     * really slow without the tree. The tradeoff is memory, but     * using a ternary tree instead of a trie, almost halves the     * the memory used by Lout or TeX. It's also faster than using     * a hash table</p>     * @param word null terminated word to match     * @param index start index from word     * @param il interletter values array to update     */    protected void searchPatterns(char[] word, int index, byte[] il) {        byte[] values;        int i = index;        char p, q;        char sp = word[i];        p = root;        while (p > 0 && p < sc.length) {            if (sc[p] == 0xFFFF) {                if (hstrcmp(word, i, kv.getArray(), lo[p]) == 0) {                    values = getValues(eq[p]);    // data pointer is in eq[]                    int j = index;                    for (int k = 0; k < values.length; k++) {                        if (j < il.length && values[k] > il[j]) {                            il[j] = values[k];                        }                        j++;                    }                }                return;            }            int d = sp - sc[p];            if (d == 0) {                if (sp == 0) {                    break;                }                sp = word[++i];                p = eq[p];                q = p;                // look for a pattern ending at this position by searching for                // the null char ( splitchar == 0 )                while (q > 0 && q < sc.length) {                    if (sc[q] == 0xFFFF) {        // stop at compressed branch                        break;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -