📄 pdfreader.java
字号:
/* * $Id: PdfReader.java,v 1.21 2002/11/19 08:33:38 blowagie Exp $ * $Name: $ * * Copyright 2001, 2002 Paulo Soares * * The contents of this file are subject to the Mozilla Public License Version 1.1 * (the "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the License. * * The Original Code is 'iText, a free JAVA-PDF library'. * * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie. * All Rights Reserved. * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved. * * Contributor(s): all the names of the contributors are added in the source code * where applicable. * * Alternatively, the contents of this file may be used under the terms of the * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the * provisions of LGPL are applicable instead of those above. If you wish to * allow use of your version of this file only under the terms of the LGPL * License and not to allow others to use your version of this file under * the MPL, indicate your decision by deleting the provisions above and * replace them with the notice and other provisions required by the LGPL. * If you do not delete the provisions above, a recipient may use your version * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE. * * This library is free software; you can redistribute it and/or modify it * under the terms of the MPL as stated above or under the terms of the GNU * Library General Public License as published by the Free Software Foundation; * either version 2 of the License, or any later version. * * This library is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more * details. * * If you didn't download this code from the following link, you should check if * you aren't using an obsolete version: * http://www.lowagie.com/iText/ */package com.lowagie.text.pdf;import java.io.*;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import com.lowagie.text.Rectangle;import java.util.zip.InflaterInputStream;import java.util.zip.ZipInputStream;/** Reads a PDF document and prepares it to import pages to our * document. This class is not mutable and is thread safe; this means that * a single instance can serve as many output documents as needed and can even be static. * @author Paulo Soares (psoares@consiste.pt) */public class PdfReader { static final PdfName pageInhCandidates[] = { PdfName.MEDIABOX, PdfName.ROTATE, PdfName.RESOURCES, PdfName.CROPBOX}; protected PRTokeniser tokens; protected int xref[]; protected PdfObject xrefObj[]; protected PdfDictionary trailer; protected PdfDictionary pages[]; protected PdfDictionary catalog; protected PRIndirectReference pageRefs[]; protected PRAcroForm acroForm = null; protected ArrayList pageInh; protected int pagesCount; protected boolean encrypted = false; protected boolean rebuilt = false; /** Reads and parses a PDF document. * @param filename the file name of the document * @throws IOException on error */ public PdfReader(String filename) throws IOException { tokens = new PRTokeniser(filename); readPdf(); } /** Reads and parses a PDF document. * @param pdfIn the byte array with the document * @throws IOException on error */ public PdfReader(byte pdfIn[]) throws IOException { tokens = new PRTokeniser(pdfIn); readPdf(); } public RandomAccessFileOrArray getSafeFile() { return tokens.getSafeFile(); } protected PdfReaderInstance getPdfReaderInstance(PdfWriter writer) { return new PdfReaderInstance(this, writer, xrefObj, pages); } /** Gets the number of pages in the document. * @return the number of pages in the document */ public int getNumberOfPages() { return pages.length; } /** * Returns the document's catalog */ public PdfDictionary getCatalog() { return catalog; } /** * Returns the document's acroform, if it has one */ public PRAcroForm getAcroForm() { return acroForm; } /** * Gets the page rotation. This value can be 0, 90, 180 or 270. * @param index the page number. The first page is 1 * @return the page rotation */ public int getPageRotation(int index) { PdfDictionary page = pages[index - 1]; PdfNumber rotate = (PdfNumber)getPdfObject(page.get(PdfName.ROTATE)); if (rotate == null) return 0; else return rotate.intValue(); } /** Gets the page size, taking rotation into account. This * is a <CODE>Rectangle</CODE> with the value of the /MediaBox and the /Rotate key. * @param index the page number. The first page is 1 * @return a <CODE>Rectangle</CODE> */ public Rectangle getPageSizeWithRotation(int index) { Rectangle rect = getPageSize(index); int rotation = getPageRotation(index); while (rotation > 0) { rect = rect.rotate(); rotation -= 90; } return rect; } /** Gets the page size without taking rotation into account. This * is the value of the /MediaBox key. * @param index the page number. The first page is 1 * @return the page size */ public Rectangle getPageSize(int index) { PdfDictionary page = pages[index - 1]; PdfArray mediaBox = (PdfArray)getPdfObject(page.get(PdfName.MEDIABOX)); return getNormalizedRectangle(mediaBox); } /** Gets the crop box without taking rotation into account. This * is the value of the /CropBox key. The crop box is the part * of the document to be displayed or printed. It usually is the same * as the media box but may be smaller. * @param index the page number. The first page is 1 * @return the crop box */ public Rectangle getCropBox(int index) { PdfDictionary page = pages[index - 1]; PdfArray cropBox = (PdfArray)getPdfObject(page.get(PdfName.CROPBOX)); if (cropBox == null) return getPageSize(index); return getNormalizedRectangle(cropBox); } /** Returns the content of the document information dictionary as a <CODE>HashMap</CODE> * of <CODE>String</CODE>. * @return content of the document information dictionary */ public HashMap getInfo() { HashMap map = new HashMap(); PdfDictionary info = (PdfDictionary)getPdfObject(trailer.get(PdfName.INFO)); if (info == null) return map; for (Iterator it = info.getKeys().iterator(); it.hasNext();) { PdfName key = (PdfName)it.next(); PdfObject obj = (PdfObject)getPdfObject(info.get(key)); if (obj == null) continue; String value = obj.toString(); switch (obj.type()) { case PdfObject.STRING: { byte b[] = PdfEncodings.convertToBytes(value, null); if (b.length >= 2 && b[0] == (byte)254 && b[1] == (byte)255) value = PdfEncodings.convertToString(b, PdfObject.TEXT_UNICODE); else value = PdfEncodings.convertToString(b, PdfObject.ENCODING); break; } case PdfObject.NAME: { value = PdfName.decodeName(value); break; } } map.put(PdfName.decodeName(key.toString()), value); } return map; } public static Rectangle getNormalizedRectangle(PdfArray box) { ArrayList rect = box.getArrayList(); float llx = ((PdfNumber)rect.get(0)).floatValue(); float lly = ((PdfNumber)rect.get(1)).floatValue(); float urx = ((PdfNumber)rect.get(2)).floatValue(); float ury = ((PdfNumber)rect.get(3)).floatValue(); return new Rectangle(Math.min(llx, urx), Math.min(lly, ury), Math.max(llx, urx), Math.max(lly, ury)); } protected void readPdf() throws IOException { try { tokens.checkPdfHeader(); try { readXref(); } catch (Exception e) { if (encrypted) throw (IOException)e; try { rebuilt = true; rebuildXref(); } catch (Exception ne) { throw new IOException("Rebuild failed: " + ne.getMessage() + "; Original message: " + e.getMessage()); } } readDocObj(); readPages(); PdfObject form = catalog.get(PdfName.ACROFORM); if (form != null) { acroForm = new PRAcroForm(this); acroForm.readAcroForm((PdfDictionary)getPdfObject(form)); } } finally { try { tokens.close(); } catch (Exception e) { // empty on purpose } } } public PdfObject getPdfObject(PdfObject obj) { if (obj == null) return null; if (obj.type() != PdfObject.INDIRECT) return obj; int idx = ((PRIndirectReference)obj).getNumber(); obj = xrefObj[idx]; if (obj == null) return PdfNull.PDFNULL; else return obj; } protected void pushPageAttributes(PdfDictionary nodePages) { PdfDictionary dic = new PdfDictionary(); if (pageInh.size() != 0) { dic.putAll((PdfDictionary)pageInh.get(pageInh.size() - 1)); } for (int k = 0; k < pageInhCandidates.length; ++k) { PdfObject obj = nodePages.get(pageInhCandidates[k]); if (obj != null) dic.put(pageInhCandidates[k], obj); } pageInh.add(dic); } protected void popPageAttributes() { pageInh.remove(pageInh.size() - 1); } protected void iteratePages(PdfDictionary page) throws IOException { PdfName type = (PdfName)getPdfObject(page.get(PdfName.TYPE)); if (type.equals(PdfName.PAGE)) { PdfDictionary dic = (PdfDictionary)pageInh.get(pageInh.size() - 1); PdfName key; for (Iterator i = dic.getKeys().iterator(); i.hasNext();) { key = (PdfName)i.next(); if (page.get(key) == null) page.put(key, dic.get(key)); } pages[pagesCount++] = page; } else { pushPageAttributes(page); PdfArray kidsPR = (PdfArray)getPdfObject(page.get(PdfName.KIDS)); ArrayList kids = kidsPR.getArrayList(); for (int k = 0; k < kids.size(); ++k){ pageRefs[pagesCount] = (PRIndirectReference)kids.get(k); PdfDictionary kid = (PdfDictionary)getPdfObject(pageRefs[pagesCount]); iteratePages(kid); } popPageAttributes(); } } protected void readPages() throws IOException { pageInh = new ArrayList(); catalog = (PdfDictionary)getPdfObject(trailer.get(PdfName.ROOT)); PdfDictionary rootPages = (PdfDictionary)getPdfObject(catalog.get(PdfName.PAGES)); PdfNumber count = (PdfNumber)getPdfObject(rootPages.get(PdfName.COUNT)); pages = new PdfDictionary[count.intValue()]; pageRefs = new PRIndirectReference[pages.length]; pagesCount = 0; iteratePages(rootPages); pageInh = null; } protected void readDocObj() throws IOException { ArrayList streams = new ArrayList(); xrefObj = new PdfObject[xref.length]; for (int k = 1; k < xrefObj.length; ++k) { int pos = xref[k]; if (pos <= 0) continue; tokens.seek(pos); tokens.nextValidToken(); if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) tokens.throwError("Invalid object number."); int objNum = tokens.intValue(); tokens.nextValidToken(); if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) tokens.throwError("Invalid generation number."); int objGen = tokens.intValue(); tokens.nextValidToken(); if (!tokens.getStringValue().equals("obj")) tokens.throwError("Token 'obj' expected."); PdfObject obj = readPRObject(); xrefObj[k] = obj; if (obj.type() == PdfObject.STREAM) streams.add(obj); } for (int k = 0; k < streams.size(); ++k) { PRStream stream = (PRStream)streams.get(k); PdfObject length = getPdfObject(stream.get(PdfName.LENGTH)); stream.setLength(((PdfNumber)length).intValue()); } } protected void readXref() throws IOException {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -