📄 pdfreader.java

📁 一个java操作pdf文件的开发包,很好用的.
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * $Id: PdfReader.java,v 1.21 2002/11/19 08:33:38 blowagie Exp $ * $Name:  $ * * Copyright 2001, 2002 Paulo Soares * * The contents of this file are subject to the Mozilla Public License Version 1.1 * (the "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the License. * * The Original Code is 'iText, a free JAVA-PDF library'. * * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie. * All Rights Reserved. * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved. * * Contributor(s): all the names of the contributors are added in the source code * where applicable. * * Alternatively, the contents of this file may be used under the terms of the * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the * provisions of LGPL are applicable instead of those above.  If you wish to * allow use of your version of this file only under the terms of the LGPL * License and not to allow others to use your version of this file under * the MPL, indicate your decision by deleting the provisions above and * replace them with the notice and other provisions required by the LGPL. * If you do not delete the provisions above, a recipient may use your version * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE. * * This library is free software; you can redistribute it and/or modify it * under the terms of the MPL as stated above or under the terms of the GNU * Library General Public License as published by the Free Software Foundation; * either version 2 of the License, or any later version. * * This library is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more * details. * * If you didn't download this code from the following link, you should check if * you aren't using an obsolete version: * http://www.lowagie.com/iText/ */package com.lowagie.text.pdf;import java.io.*;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import com.lowagie.text.Rectangle;import java.util.zip.InflaterInputStream;import java.util.zip.ZipInputStream;/** Reads a PDF document and prepares it to import pages to our * document. This class is not mutable and is thread safe; this means that * a single instance can serve as many output documents as needed and can even be static. * @author Paulo Soares (psoares@consiste.pt) */public class PdfReader {    static final PdfName pageInhCandidates[] = {        PdfName.MEDIABOX, PdfName.ROTATE, PdfName.RESOURCES, PdfName.CROPBOX};    protected PRTokeniser tokens;    protected int xref[];    protected PdfObject xrefObj[];    protected PdfDictionary trailer;    protected PdfDictionary pages[];    protected PdfDictionary catalog;    protected PRIndirectReference pageRefs[];    protected PRAcroForm acroForm = null;    protected ArrayList pageInh;    protected int pagesCount;    protected boolean encrypted = false;    protected boolean rebuilt = false;        /** Reads and parses a PDF document.     * @param filename the file name of the document     * @throws IOException on error     */        public PdfReader(String filename) throws IOException {        tokens = new PRTokeniser(filename);        readPdf();    }        /** Reads and parses a PDF document.     * @param pdfIn the byte array with the document     * @throws IOException on error     */        public PdfReader(byte pdfIn[]) throws IOException {        tokens = new PRTokeniser(pdfIn);        readPdf();    }        public RandomAccessFileOrArray getSafeFile() {        return tokens.getSafeFile();    }        protected PdfReaderInstance getPdfReaderInstance(PdfWriter writer) {        return new PdfReaderInstance(this, writer, xrefObj, pages);    }        /** Gets the number of pages in the document.     * @return the number of pages in the document     */        public int getNumberOfPages() {        return pages.length;    }        /**     * Returns the document's catalog     */    public PdfDictionary getCatalog() {        return catalog;    }    /**     * Returns the document's acroform, if it has one     */     public PRAcroForm getAcroForm() {	    return acroForm;     }    /**     * Gets the page rotation. This value can be 0, 90, 180 or 270.     * @param index the page number. The first page is 1     * @return the page rotation     */        public int getPageRotation(int index) {        PdfDictionary page = pages[index - 1];        PdfNumber rotate = (PdfNumber)getPdfObject(page.get(PdfName.ROTATE));        if (rotate == null)            return 0;        else            return rotate.intValue();    }        /** Gets the page size, taking rotation into account. This     * is a <CODE>Rectangle</CODE> with the value of the /MediaBox and the /Rotate key.     * @param index the page number. The first page is 1     * @return a <CODE>Rectangle</CODE>     */        public Rectangle getPageSizeWithRotation(int index) {        Rectangle rect = getPageSize(index);        int rotation = getPageRotation(index);        while (rotation > 0) {            rect = rect.rotate();            rotation -= 90;        }        return rect;    }        /** Gets the page size without taking rotation into account. This     * is the value of the /MediaBox key.     * @param index the page number. The first page is 1     * @return the page size     */        public Rectangle getPageSize(int index) {        PdfDictionary page = pages[index - 1];        PdfArray mediaBox = (PdfArray)getPdfObject(page.get(PdfName.MEDIABOX));        return getNormalizedRectangle(mediaBox);    }        /** Gets the crop box without taking rotation into account. This     * is the value of the /CropBox key. The crop box is the part     * of the document to be displayed or printed. It usually is the same     * as the media box but may be smaller.     * @param index the page number. The first page is 1     * @return the crop box     */        public Rectangle getCropBox(int index) {        PdfDictionary page = pages[index - 1];        PdfArray cropBox = (PdfArray)getPdfObject(page.get(PdfName.CROPBOX));        if (cropBox == null)            return getPageSize(index);        return getNormalizedRectangle(cropBox);    }        /** Returns the content of the document information dictionary as a <CODE>HashMap</CODE>     * of <CODE>String</CODE>.     * @return content of the document information dictionary     */        public HashMap getInfo() {        HashMap map = new HashMap();        PdfDictionary info = (PdfDictionary)getPdfObject(trailer.get(PdfName.INFO));        if (info == null)            return map;        for (Iterator it = info.getKeys().iterator(); it.hasNext();) {            PdfName key = (PdfName)it.next();            PdfObject obj = (PdfObject)getPdfObject(info.get(key));            if (obj == null)                continue;            String value = obj.toString();            switch (obj.type()) {                case PdfObject.STRING: {                    byte b[] = PdfEncodings.convertToBytes(value, null);                    if (b.length >= 2 && b[0] == (byte)254 && b[1] == (byte)255)                        value = PdfEncodings.convertToString(b, PdfObject.TEXT_UNICODE);                    else                        value = PdfEncodings.convertToString(b, PdfObject.ENCODING);                    break;                }                case PdfObject.NAME: {                    value = PdfName.decodeName(value);                    break;                }            }            map.put(PdfName.decodeName(key.toString()), value);        }        return map;    }        public static Rectangle getNormalizedRectangle(PdfArray box) {        ArrayList rect = box.getArrayList();        float llx = ((PdfNumber)rect.get(0)).floatValue();        float lly = ((PdfNumber)rect.get(1)).floatValue();        float urx = ((PdfNumber)rect.get(2)).floatValue();        float ury = ((PdfNumber)rect.get(3)).floatValue();        return new Rectangle(Math.min(llx, urx), Math.min(lly, ury),            Math.max(llx, urx), Math.max(lly, ury));    }        protected void readPdf() throws IOException {        try {            tokens.checkPdfHeader();            try {                readXref();            }            catch (Exception e) {                if (encrypted)                    throw (IOException)e;                try {                    rebuilt = true;                    rebuildXref();                }                catch (Exception ne) {                    throw new IOException("Rebuild failed: " + ne.getMessage() + "; Original message: " + e.getMessage());                }            }            readDocObj();            readPages();            PdfObject form = catalog.get(PdfName.ACROFORM);            if (form != null) {	      acroForm = new PRAcroForm(this);	      acroForm.readAcroForm((PdfDictionary)getPdfObject(form));            }        }        finally {            try {                tokens.close();            }            catch (Exception e) {                // empty on purpose            }        }    }    public PdfObject getPdfObject(PdfObject obj) {        if (obj == null)            return null;        if (obj.type() != PdfObject.INDIRECT)            return obj;        int idx = ((PRIndirectReference)obj).getNumber();        obj = xrefObj[idx];        if (obj == null)            return PdfNull.PDFNULL;        else            return obj;    }        protected void pushPageAttributes(PdfDictionary nodePages) {        PdfDictionary dic = new PdfDictionary();        if (pageInh.size() != 0) {            dic.putAll((PdfDictionary)pageInh.get(pageInh.size() - 1));        }        for (int k = 0; k < pageInhCandidates.length; ++k) {            PdfObject obj = nodePages.get(pageInhCandidates[k]);            if (obj != null)                dic.put(pageInhCandidates[k], obj);        }        pageInh.add(dic);    }    protected void popPageAttributes() {        pageInh.remove(pageInh.size() - 1);    }        protected void iteratePages(PdfDictionary page) throws IOException {        PdfName type = (PdfName)getPdfObject(page.get(PdfName.TYPE));        if (type.equals(PdfName.PAGE)) {            PdfDictionary dic = (PdfDictionary)pageInh.get(pageInh.size() - 1);            PdfName key;            for (Iterator i = dic.getKeys().iterator(); i.hasNext();) {                key = (PdfName)i.next();                if (page.get(key) == null)                    page.put(key, dic.get(key));            }            pages[pagesCount++] = page;        }        else {            pushPageAttributes(page);            PdfArray kidsPR = (PdfArray)getPdfObject(page.get(PdfName.KIDS));            ArrayList kids = kidsPR.getArrayList();            for (int k = 0; k < kids.size(); ++k){                pageRefs[pagesCount] = (PRIndirectReference)kids.get(k);                PdfDictionary kid = (PdfDictionary)getPdfObject(pageRefs[pagesCount]);                iteratePages(kid);            }            popPageAttributes();        }    }        protected void readPages() throws IOException {        pageInh = new ArrayList();        catalog = (PdfDictionary)getPdfObject(trailer.get(PdfName.ROOT));        PdfDictionary rootPages = (PdfDictionary)getPdfObject(catalog.get(PdfName.PAGES));        PdfNumber count = (PdfNumber)getPdfObject(rootPages.get(PdfName.COUNT));        pages = new PdfDictionary[count.intValue()];        pageRefs = new PRIndirectReference[pages.length];        pagesCount = 0;        iteratePages(rootPages);        pageInh = null;    }        protected void readDocObj() throws IOException {        ArrayList streams = new ArrayList();        xrefObj = new PdfObject[xref.length];        for (int k = 1; k < xrefObj.length; ++k) {            int pos = xref[k];            if (pos <= 0)                continue;            tokens.seek(pos);            tokens.nextValidToken();            if (tokens.getTokenType() != PRTokeniser.TK_NUMBER)                tokens.throwError("Invalid object number.");            int objNum = tokens.intValue();            tokens.nextValidToken();            if (tokens.getTokenType() != PRTokeniser.TK_NUMBER)                tokens.throwError("Invalid generation number.");            int objGen = tokens.intValue();            tokens.nextValidToken();            if (!tokens.getStringValue().equals("obj"))                tokens.throwError("Token 'obj' expected.");            PdfObject obj = readPRObject();            xrefObj[k] = obj;            if (obj.type() == PdfObject.STREAM)                streams.add(obj);        }        for (int k = 0; k < streams.size(); ++k) {            PRStream stream = (PRStream)streams.get(k);            PdfObject length = getPdfObject(stream.get(PdfName.LENGTH));            stream.setLength(((PdfNumber)length).intValue());        }    }        protected void readXref() throws IOException {
12 下一页
💿 文件大小 4457 K
👤 上传用户 KMPlayer33
📂 所属分类 Java编程
🏷️ 相关标签

#java #操作 #开发包
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -