pdfencodings.java
来自「有关对pdf操作的代码」· Java 代码 · 共 798 行 · 第 1/3 页
JAVA
798 行
String text = extra.byteToChar(bytes, encoding); if (text != null) return text; } char ch[] = null; if (encoding.equals(BaseFont.WINANSI)) ch = winansiByteToChar; else if (encoding.equals(PdfObject.TEXT_PDFDOCENCODING)) ch = pdfEncodingByteToChar; if (ch != null) { int len = bytes.length; char c[] = new char[len]; for (int k = 0; k < len; ++k) { c[k] = ch[bytes[k] & 0xff]; } return new String(c); } try { return new String(bytes, encoding); } catch (UnsupportedEncodingException e) { throw new ExceptionConverter(e); } } /** Checks is <CODE>text</CODE> only has PdfDocEncoding characters. * @param text the <CODE>String</CODE> to test * @return <CODE>true</CODE> if only PdfDocEncoding characters are present */ public static boolean isPdfDocEncoding(String text) { if (text == null) return true; int len = text.length(); for (int k = 0; k < len; ++k) { char char1 = text.charAt(k); if (char1 < 128 || (char1 > 160 && char1 <= 255)) continue; if (!pdfEncoding.containsKey(char1)) return false; } return true; } static final HashMap cmaps = new HashMap(); /** Assumes that '\\n' and '\\r\\n' are the newline sequences. It may not work for * all CJK encodings. To be used with loadCmap(). */ public static final byte CRLF_CID_NEWLINE[][] = new byte[][]{{(byte)'\n'}, {(byte)'\r', (byte)'\n'}}; /** Clears the CJK cmaps from the cache. If <CODE>name</CODE> is the * empty string then all the cache is cleared. Calling this method * has no consequences other than the need to reload the cmap * if needed. * @param name the name of the cmap to clear or all the cmaps if the empty string */ public static void clearCmap(String name) { synchronized (cmaps) { if (name.length() == 0) cmaps.clear(); else cmaps.remove(name); } } /** Loads a CJK cmap to the cache with the option of associating * sequences to the newline. * @param name the CJK cmap name * @param newline the sequences to be replaced by a newline in the resulting CID. See <CODE>CRLF_CID_NEWLINE</CODE> */ public static void loadCmap(String name, byte newline[][]) { try { char planes[][] = null; synchronized (cmaps) { planes = (char[][])cmaps.get(name); } if (planes == null) { planes = readCmap(name, newline); synchronized (cmaps) { cmaps.put(name, planes); } } } catch (IOException e) { throw new ExceptionConverter(e); } } /** Converts a <CODE>byte</CODE> array encoded as <CODE>name</CODE> * to a CID string. This is needed to reach some CJK characters * that don't exist in 16 bit Unicode.</p> * The font to use this result must use the encoding "Identity-H" * or "Identity-V".</p> * See ftp://ftp.oreilly.com/pub/examples/nutshell/cjkv/adobe/. * @param name the CJK encoding name * @param seq the <CODE>byte</CODE> array to be decoded * @return the CID string */ public static String convertCmap(String name, byte seq[]) { return convertCmap(name, seq, 0, seq.length); } /** Converts a <CODE>byte</CODE> array encoded as <CODE>name</CODE> * to a CID string. This is needed to reach some CJK characters * that don't exist in 16 bit Unicode.</p> * The font to use this result must use the encoding "Identity-H" * or "Identity-V".</p> * See ftp://ftp.oreilly.com/pub/examples/nutshell/cjkv/adobe/. * @param name the CJK encoding name * @param start the start offset in the data * @param length the number of bytes to convert * @param seq the <CODE>byte</CODE> array to be decoded * @return the CID string */ public static String convertCmap(String name, byte seq[], int start, int length) { try { char planes[][] = null; synchronized (cmaps) { planes = (char[][])cmaps.get(name); } if (planes == null) { planes = readCmap(name, (byte[][])null); synchronized (cmaps) { cmaps.put(name, planes); } } return decodeSequence(seq, start, length, planes); } catch (IOException e) { throw new ExceptionConverter(e); } } static String decodeSequence(byte seq[], int start, int length, char planes[][]) { StringBuffer buf = new StringBuffer(); int end = start + length; int currentPlane = 0; for (int k = start; k < end; ++k) { int one = (int)seq[k] & 0xff; char plane[] = planes[currentPlane]; int cid = plane[one]; if ((cid & 0x8000) == 0) { buf.append((char)cid); currentPlane = 0; } else currentPlane = cid & 0x7fff; } return buf.toString(); } static char[][] readCmap(String name, byte newline[][]) throws IOException { ArrayList planes = new ArrayList(); planes.add(new char[256]); readCmap(name, planes); if (newline != null) { for (int k = 0; k < newline.length; ++k) encodeSequence(newline[k].length, newline[k], BaseFont.CID_NEWLINE, planes); } char ret[][] = new char[planes.size()][]; return (char[][])planes.toArray(ret); } static void readCmap(String name, ArrayList planes) throws IOException { String fullName = BaseFont.RESOURCE_PATH + "cmaps/" + name; InputStream in = BaseFont.getResourceStream(fullName); if (in == null) throw new IOException("The Cmap " + name + " was not found."); encodeStream(in, planes); in.close(); } static void encodeStream(InputStream in, ArrayList planes) throws IOException { BufferedReader rd = new BufferedReader(new InputStreamReader(in, "iso-8859-1")); String line = null; int state = CIDNONE; byte seqs[] = new byte[7]; while ((line = rd.readLine()) != null) { if (line.length() < 6) continue; switch (state) { case CIDNONE: { if (line.indexOf("begincidrange") >= 0) state = CIDRANGE; else if (line.indexOf("begincidchar") >= 0) state = CIDCHAR; else if (line.indexOf("usecmap") >= 0) { StringTokenizer tk = new StringTokenizer(line); String t = tk.nextToken(); readCmap(t.substring(1), planes); } break; } case CIDRANGE: { if (line.indexOf("endcidrange") >= 0) { state = CIDNONE; break; } StringTokenizer tk = new StringTokenizer(line); String t = tk.nextToken(); int size = t.length() / 2 - 1; long start = Long.parseLong(t.substring(1, t.length() - 1), 16); t = tk.nextToken(); long end = Long.parseLong(t.substring(1, t.length() - 1), 16); t = tk.nextToken(); int cid = Integer.parseInt(t); for (long k = start; k <= end; ++k) { breakLong(k, size, seqs); encodeSequence(size, seqs, (char)cid, planes); ++cid; } break; } case CIDCHAR: { if (line.indexOf("endcidchar") >= 0) { state = CIDNONE; break; } StringTokenizer tk = new StringTokenizer(line); String t = tk.nextToken(); int size = t.length() / 2 - 1; long start = Long.parseLong(t.substring(1, t.length() - 1), 16); t = tk.nextToken(); int cid = Integer.parseInt(t); breakLong(start, size, seqs); encodeSequence(size, seqs, (char)cid, planes); break; } } } } static void breakLong(long n, int size, byte seqs[]) { for (int k = 0; k < size; ++k) { seqs[k] = (byte)(n >> ((size - 1 - k) * 8)); } } static void encodeSequence(int size, byte seqs[], char cid, ArrayList planes) { --size; int nextPlane = 0; for (int idx = 0; idx < size; ++idx) { char plane[] = (char[])planes.get(nextPlane); int one = (int)seqs[idx] & 0xff; char c = plane[one]; if (c != 0 && (c & 0x8000) == 0) throw new RuntimeException("Inconsistent mapping."); if (c == 0) { planes.add(new char[256]); c = (char)((planes.size() - 1) | 0x8000); plane[one] = c; } nextPlane = c & 0x7fff; } char plane[] = (char[])planes.get(nextPlane); int one = (int)seqs[size] & 0xff; char c = plane[one]; if ((c & 0x8000) != 0) throw new RuntimeException("Inconsistent mapping."); plane[one] = cid; } /** Adds an extra encoding. * @param name the name of the encoding. The encoding recognition is case insensitive * @param enc the conversion class */ public static void addExtraEncoding(String name, ExtraEncoding enc) {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?