pdfencodings.java

来自「有关对pdf操作的代码」· Java 代码 · 共 798 行 · 第 1/3 页

JAVA
798
字号
            String text = extra.byteToChar(bytes, encoding);            if (text != null)                return text;        }        char ch[] = null;        if (encoding.equals(BaseFont.WINANSI))            ch = winansiByteToChar;        else if (encoding.equals(PdfObject.TEXT_PDFDOCENCODING))            ch = pdfEncodingByteToChar;        if (ch != null) {            int len = bytes.length;            char c[] = new char[len];            for (int k = 0; k < len; ++k) {                c[k] = ch[bytes[k] & 0xff];            }            return new String(c);        }        try {            return new String(bytes, encoding);        }        catch (UnsupportedEncodingException e) {            throw new ExceptionConverter(e);        }    }        /** Checks is <CODE>text</CODE> only has PdfDocEncoding characters.     * @param text the <CODE>String</CODE> to test     * @return <CODE>true</CODE> if only PdfDocEncoding characters are present     */        public static boolean isPdfDocEncoding(String text) {        if (text == null)            return true;        int len = text.length();        for (int k = 0; k < len; ++k) {            char char1 = text.charAt(k);            if (char1 < 128 || (char1 > 160 && char1 <= 255))                continue;            if (!pdfEncoding.containsKey(char1))                return false;        }        return true;    }        static final HashMap cmaps = new HashMap();    /** Assumes that '\\n' and '\\r\\n' are the newline sequences. It may not work for     * all CJK encodings. To be used with loadCmap().     */        public static final byte CRLF_CID_NEWLINE[][] = new byte[][]{{(byte)'\n'}, {(byte)'\r', (byte)'\n'}};    /** Clears the CJK cmaps from the cache. If <CODE>name</CODE> is the     * empty string then all the cache is cleared. Calling this method     * has no consequences other than the need to reload the cmap     * if needed.     * @param name the name of the cmap to clear or all the cmaps if the empty string     */        public static void clearCmap(String name) {        synchronized (cmaps) {            if (name.length() == 0)                cmaps.clear();            else                cmaps.remove(name);        }    }        /** Loads a CJK cmap to the cache with the option of associating     * sequences to the newline.     * @param name the CJK cmap name     * @param newline the sequences to be replaced by a newline in the resulting CID. See <CODE>CRLF_CID_NEWLINE</CODE>     */        public static void loadCmap(String name, byte newline[][]) {        try {            char planes[][] = null;            synchronized (cmaps) {                planes = (char[][])cmaps.get(name);            }            if (planes == null) {                planes = readCmap(name, newline);                synchronized (cmaps) {                    cmaps.put(name, planes);                }            }        }        catch (IOException e) {            throw new ExceptionConverter(e);        }            }        /** Converts a <CODE>byte</CODE> array encoded as <CODE>name</CODE>     * to a CID string. This is needed to reach some CJK characters     * that don't exist in 16 bit Unicode.</p>     * The font to use this result must use the encoding "Identity-H"     * or "Identity-V".</p>     * See ftp://ftp.oreilly.com/pub/examples/nutshell/cjkv/adobe/.     * @param name the CJK encoding name     * @param seq the <CODE>byte</CODE> array to be decoded     * @return the CID string     */        public static String convertCmap(String name, byte seq[]) {        return convertCmap(name, seq, 0, seq.length);    }        /** Converts a <CODE>byte</CODE> array encoded as <CODE>name</CODE>     * to a CID string. This is needed to reach some CJK characters     * that don't exist in 16 bit Unicode.</p>     * The font to use this result must use the encoding "Identity-H"     * or "Identity-V".</p>     * See ftp://ftp.oreilly.com/pub/examples/nutshell/cjkv/adobe/.     * @param name the CJK encoding name     * @param start the start offset in the data     * @param length the number of bytes to convert     * @param seq the <CODE>byte</CODE> array to be decoded     * @return the CID string     */        public static String convertCmap(String name, byte seq[], int start, int length) {        try {            char planes[][] = null;            synchronized (cmaps) {                planes = (char[][])cmaps.get(name);            }            if (planes == null) {                planes = readCmap(name, (byte[][])null);                synchronized (cmaps) {                    cmaps.put(name, planes);                }            }            return decodeSequence(seq, start, length, planes);        }        catch (IOException e) {            throw new ExceptionConverter(e);        }            }        static String decodeSequence(byte seq[], int start, int length, char planes[][]) {        StringBuffer buf = new StringBuffer();        int end = start + length;        int currentPlane = 0;        for (int k = start; k < end; ++k) {            int one = (int)seq[k] & 0xff;            char plane[] = planes[currentPlane];            int cid = plane[one];            if ((cid & 0x8000) == 0) {                buf.append((char)cid);                currentPlane = 0;            }            else                currentPlane = cid & 0x7fff;        }        return buf.toString();    }    static char[][] readCmap(String name, byte newline[][]) throws IOException {        ArrayList planes = new ArrayList();        planes.add(new char[256]);        readCmap(name, planes);        if (newline != null) {            for (int k = 0; k < newline.length; ++k)                encodeSequence(newline[k].length, newline[k], BaseFont.CID_NEWLINE, planes);        }        char ret[][] = new char[planes.size()][];        return (char[][])planes.toArray(ret);    }        static void readCmap(String name, ArrayList planes) throws IOException {        String fullName = BaseFont.RESOURCE_PATH + "cmaps/" + name;        InputStream in = BaseFont.getResourceStream(fullName);        if (in == null)            throw new IOException("The Cmap " + name + " was not found.");        encodeStream(in, planes);        in.close();    }        static void encodeStream(InputStream in, ArrayList planes) throws IOException {        BufferedReader rd = new BufferedReader(new InputStreamReader(in, "iso-8859-1"));        String line = null;        int state = CIDNONE;        byte seqs[] = new byte[7];        while ((line = rd.readLine()) != null) {            if (line.length() < 6)                continue;            switch (state) {                case CIDNONE: {                    if (line.indexOf("begincidrange") >= 0)                        state = CIDRANGE;                    else if (line.indexOf("begincidchar") >= 0)                        state = CIDCHAR;                    else if (line.indexOf("usecmap") >= 0) {                        StringTokenizer tk = new StringTokenizer(line);                        String t = tk.nextToken();                        readCmap(t.substring(1), planes);                    }                    break;                }                case CIDRANGE: {                    if (line.indexOf("endcidrange") >= 0) {                        state = CIDNONE;                        break;                    }                    StringTokenizer tk = new StringTokenizer(line);                    String t = tk.nextToken();                    int size = t.length() / 2 - 1;                    long start = Long.parseLong(t.substring(1, t.length() - 1), 16);                    t = tk.nextToken();                    long end = Long.parseLong(t.substring(1, t.length() - 1), 16);                    t = tk.nextToken();                    int cid = Integer.parseInt(t);                    for (long k = start; k <= end; ++k) {                        breakLong(k, size, seqs);                        encodeSequence(size, seqs, (char)cid, planes);                        ++cid;                    }                    break;                }                case CIDCHAR: {                    if (line.indexOf("endcidchar") >= 0) {                        state = CIDNONE;                        break;                    }                    StringTokenizer tk = new StringTokenizer(line);                    String t = tk.nextToken();                    int size = t.length() / 2 - 1;                    long start = Long.parseLong(t.substring(1, t.length() - 1), 16);                    t = tk.nextToken();                    int cid = Integer.parseInt(t);                    breakLong(start, size, seqs);                    encodeSequence(size, seqs, (char)cid, planes);                    break;                }            }        }    }        static void breakLong(long n, int size, byte seqs[]) {        for (int k = 0; k < size; ++k) {            seqs[k] = (byte)(n >> ((size - 1 - k) * 8));        }    }    static void encodeSequence(int size, byte seqs[], char cid, ArrayList planes) {        --size;        int nextPlane = 0;        for (int idx = 0; idx < size; ++idx) {            char plane[] = (char[])planes.get(nextPlane);            int one = (int)seqs[idx] & 0xff;            char c = plane[one];            if (c != 0 && (c & 0x8000) == 0)                throw new RuntimeException("Inconsistent mapping.");            if (c == 0) {                planes.add(new char[256]);                c = (char)((planes.size() - 1) | 0x8000);                plane[one] = c;            }            nextPlane = c & 0x7fff;        }        char plane[] = (char[])planes.get(nextPlane);        int one = (int)seqs[size] & 0xff;        char c = plane[one];        if ((c & 0x8000) != 0)            throw new RuntimeException("Inconsistent mapping.");        plane[one] = cid;    }    /** Adds an extra encoding.     * @param name the name of the encoding. The encoding recognition is case insensitive     * @param enc the conversion class     */        public static void addExtraEncoding(String name, ExtraEncoding enc) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?