⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hyphenationtree.java

📁 一个java操作pdf文件的开发包,很好用的.
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* * $Id: HyphenationTree.java,v 1.7 2002/06/20 12:56:46 blowagie Exp $ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. * For details on use and redistribution please refer to the * LICENSE file included with these sources. */package com.lowagie.text.pdf.hyphenation;import java.io.*;import java.util.Vector;import java.util.Hashtable;/** * This tree structure stores the hyphenation patterns in an efficient * way for fast lookup. It provides the provides the method to * hyphenate a word. * * @author Carlos Villegas <cav@uniscope.co.jp> */public class HyphenationTree extends TernaryTree implements PatternConsumer,        Serializable {    /**     * value space: stores the inteletter values     */    protected ByteVector vspace;    /**     * This map stores hyphenation exceptions     */    protected Hashtable stoplist;    /**     * This map stores the character classes     */    protected TernaryTree classmap;    /**     * Temporary map to store interletter values on pattern loading.     */    private transient TernaryTree ivalues;    public HyphenationTree() {        stoplist = new Hashtable(23);    // usually a small table        classmap = new TernaryTree();        vspace = new ByteVector();        vspace.alloc(1);    // this reserves index 0, which we don't use    }    /**     * Packs the values by storing them in 4 bits, two values into a byte     * Values range is from 0 to 9. We use zero as terminator,     * so we'll add 1 to the value.     * @param values a string of digits from '0' to '9' representing the     * interletter values.     * @return the index into the vspace array where the packed values     * are stored.     */    protected int packValues(String values) {        int i, n = values.length();        int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;        int offset = vspace.alloc(m);        byte[] va = vspace.getArray();        for (i = 0; i < n; i++) {            int j = i >> 1;            byte v = (byte)((values.charAt(i) - '0' + 1) & 0x0f);            if ((i & 1) == 1)                va[j + offset] = (byte)(va[j + offset] | v);            else                va[j + offset] = (byte)(v << 4);    // big endian        }        va[m - 1 + offset] = 0;    // terminator        return offset;    }    protected String unpackValues(int k) {        StringBuffer buf = new StringBuffer();        byte v = vspace.get(k++);        while (v != 0) {            char c = (char)((v >>> 4) - 1 + '0');            buf.append(c);            c = (char)(v & 0x0f);            if (c == 0)                break;            c = (char)(c - 1 + '0');            buf.append(c);            v = vspace.get(k++);        }        return buf.toString();    }    /**     * Read hyphenation patterns from an XML file.     *//*    public void loadPatterns(String filename) throws HyphenationException {        PatternParser pp = new PatternParser(this);        ivalues = new TernaryTree();        pp.parse(filename);        // patterns/values should be now in the tree        // let's optimize a bit        trimToSize();        vspace.trimToSize();        classmap.trimToSize();        // get rid of the auxiliary map        ivalues = null;    }*/    /**     * Read hyphenation patterns from an internal format file.     */    public void loadInternalPatterns(String filename) throws HyphenationException {        PatternInternalParser pp = new PatternInternalParser(this);        ivalues = new TernaryTree();        pp.parse(filename);        // patterns/values should be now in the tree        // let's optimize a bit        trimToSize();        vspace.trimToSize();        classmap.trimToSize();        // get rid of the auxiliary map        ivalues = null;    }    /**     * Read hyphenation patterns from an internal format file.     */    public void loadInternalPatterns(InputStream is) throws HyphenationException {        PatternInternalParser pp = new PatternInternalParser(this);        ivalues = new TernaryTree();        pp.parse(is);        // patterns/values should be now in the tree        // let's optimize a bit        trimToSize();        vspace.trimToSize();        classmap.trimToSize();        // get rid of the auxiliary map        ivalues = null;    }    public String findPattern(String pat) {        int k = super.find(pat);        if (k >= 0)            return unpackValues(k);        return "";    }    /**     * String compare, returns 0 if equal or     * t is a substring of s     */    protected int hstrcmp(char[] s, int si, char[] t, int ti) {        for (; s[si] == t[ti]; si++, ti++)            if (s[si] == 0)                return 0;        if (t[ti] == 0)            return 0;        return s[si] - t[ti];    }    protected byte[] getValues(int k) {        StringBuffer buf = new StringBuffer();        byte v = vspace.get(k++);        while (v != 0) {            char c = (char)((v >>> 4) - 1);            buf.append(c);            c = (char)(v & 0x0f);            if (c == 0)                break;            c = (char)(c - 1);            buf.append(c);            v = vspace.get(k++);        }        byte[] res = new byte[buf.length()];        for (int i = 0; i < res.length; i++)            res[i] = (byte)buf.charAt(i);        return res;    }    /**     * <p>Search for all possible partial matches of word starting     * at index an update interletter values. In other words, it     * does something like:</p>     * <code>     * for(i=0; i<patterns.length; i++) {     * if ( word.substring(index).startsWidth(patterns[i]) )     * update_interletter_values(patterns[i]);     * }     * </code>     * <p>But it is done in an efficient way since the patterns are     * stored in a ternary tree. In fact, this is the whole purpose     * of having the tree: doing this search without having to test     * every single pattern. The number of patterns for languages     * such as English range from 4000 to 10000. Thus, doing thousands     * of string comparisons for each word to hyphenate would be     * really slow without the tree. The tradeoff is memory, but     * using a ternary tree instead of a trie, almost halves the     * the memory used by Lout or TeX. It's also faster than using     * a hash table</p>     * @param word null terminated word to match     * @param index start index from word     * @param il interletter values array to update     */    protected void searchPatterns(char[] word, int index, byte[] il) {        byte[] values;        int i = index;        char p, q;        char sp = word[i];        p = root;        while (p > 0 && p < sc.length) {            if (sc[p] == 0xFFFF) {                if (hstrcmp(word, i, kv.getArray(), lo[p]) == 0) {                    values = getValues(eq[p]);    // data pointer is in eq[]                    int j = index;                    for (int k = 0; k < values.length; k++) {                        if (j < il.length && values[k] > il[j])                            il[j] = values[k];                        j++;                    }                }                return;            }            int d = sp - sc[p];            if (d == 0) {                if (sp == 0) {                    break;                }                sp = word[++i];                p = eq[p];                q = p;                // look for a pattern ending at this position by searching for                // the null char ( splitchar == 0 )                while (q > 0 && q < sc.length) {                    if (sc[q] == 0xFFFF) {        // stop at compressed branch                        break;                    }                    if (sc[q] == 0) {                        values = getValues(eq[q]);                        int j = index;                        for (int k = 0; k < values.length; k++) {                            if (j < il.length && values[k] > il[j]) {                                il[j] = values[k];                            }                            j++;                        }                        break;                    } else {                        q = lo[q];                        /**                         * actually the code should be:                         * q = sc[q] < 0 ? hi[q] : lo[q];                         * but java chars are unsigned                         */                    }                }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -