pdfreader.java

来自「有关对pdf操作的代码」· Java 代码 · 共 1,757 行 · 第 1/5 页

JAVA
1,757
字号
        }    }    /**     * @return the percentage of the cross reference table that has been read     */    public double dumpPerc() {        int total = 0;        for (int k = 0; k < xrefObj.size(); ++k) {            if (xrefObj.get(k) != null)                ++total;        }        return (total * 100.0 / xrefObj.size());    }    protected void readDocObj() throws IOException {        ArrayList streams = new ArrayList();        xrefObj = new ArrayList(xref.length / 2);        xrefObj.addAll(Collections.nCopies(xref.length / 2, null));        for (int k = 2; k < xref.length; k += 2) {            int pos = xref[k];            if (pos <= 0 || xref[k + 1] > 0)                continue;            tokens.seek(pos);            tokens.nextValidToken();            if (tokens.getTokenType() != PRTokeniser.TK_NUMBER)                tokens.throwError("Invalid object number.");            objNum = tokens.intValue();            tokens.nextValidToken();            if (tokens.getTokenType() != PRTokeniser.TK_NUMBER)                tokens.throwError("Invalid generation number.");            objGen = tokens.intValue();            tokens.nextValidToken();            if (!tokens.getStringValue().equals("obj"))                tokens.throwError("Token 'obj' expected.");            PdfObject obj;            try {                obj = readPRObject();                if (obj.isStream()) {                    streams.add(obj);                }            }            catch (Exception e) {                obj = null;            }            xrefObj.set(k / 2, obj);        }        for (int k = 0; k < streams.size(); ++k) {            checkPRStreamLength((PRStream)streams.get(k));        }        readDecryptedDocObj();        if (objStmMark != null) {            for (Iterator i = objStmMark.entrySet().iterator(); i.hasNext();) {                Map.Entry entry = (Map.Entry)i.next();                int n = ((Integer)entry.getKey()).intValue();                IntHashtable h = (IntHashtable)entry.getValue();                readObjStm((PRStream)xrefObj.get(n), h);                xrefObj.set(n, null);            }            objStmMark = null;        }        xref = null;    }    private void checkPRStreamLength(PRStream stream) throws IOException {        int fileLength = tokens.length();        int start = stream.getOffset();        boolean calc = false;        int streamLength = 0;        PdfObject obj = getPdfObjectRelease(stream.get(PdfName.LENGTH));        if (obj != null && obj.type() == PdfObject.NUMBER) {            streamLength = ((PdfNumber)obj).intValue();            if (streamLength + start > fileLength - 20)                calc = true;            else {                tokens.seek(start + streamLength);                String line = tokens.readString(20);                if (!line.startsWith("\nendstream") &&                !line.startsWith("\r\nendstream") &&                !line.startsWith("\rendstream") &&                !line.startsWith("endstream"))                    calc = true;            }        }        else            calc = true;        if (calc) {            byte tline[] = new byte[16];            tokens.seek(start);            while (true) {                int pos = tokens.getFilePointer();                if (!tokens.readLineSegment(tline))                    break;                if (equalsn(tline, endstream)) {                    streamLength = pos - start;                    break;                }                if (equalsn(tline, endobj)) {                    tokens.seek(pos - 16);                    String s = tokens.readString(16);                    int index = s.indexOf("endstream");                    if (index >= 0)                        pos = pos - 16 + index;                    streamLength = pos - start;                    break;                }            }        }        stream.setLength(streamLength);    }    protected void readObjStm(PRStream stream, IntHashtable map) throws IOException {        int first = ((PdfNumber)getPdfObject(stream.get(PdfName.FIRST))).intValue();        int n = ((PdfNumber)getPdfObject(stream.get(PdfName.N))).intValue();        byte b[] = getStreamBytes(stream, tokens.getFile());        PRTokeniser saveTokens = tokens;        tokens = new PRTokeniser(b);        try {            int address[] = new int[n];            int objNumber[] = new int[n];            boolean ok = true;            for (int k = 0; k < n; ++k) {                ok = tokens.nextToken();                if (!ok)                    break;                if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) {                    ok = false;                    break;                }                objNumber[k] = tokens.intValue();                ok = tokens.nextToken();                if (!ok)                    break;                if (tokens.getTokenType() != PRTokeniser.TK_NUMBER) {                    ok = false;                    break;                }                address[k] = tokens.intValue() + first;            }            if (!ok)                throw new IOException("Error reading ObjStm");            for (int k = 0; k < n; ++k) {                if (map.containsKey(k)) {                    tokens.seek(address[k]);                    PdfObject obj = readPRObject();                    xrefObj.set(objNumber[k], obj);                }            }        }        finally {            tokens = saveTokens;        }    }    /**     * Eliminates the reference to the object freeing the memory used by it and clearing     * the xref entry.     * @param obj the object. If it's an indirect reference it will be eliminated     * @return the object or the already erased dereferenced object     */    public static PdfObject killIndirect(PdfObject obj) {        if (obj == null || obj.isNull())            return null;        PdfObject ret = getPdfObjectRelease(obj);        if (obj.isIndirect()) {            PRIndirectReference ref = (PRIndirectReference)obj;            PdfReader reader = ref.getReader();            int n = ref.getNumber();            reader.xrefObj.set(n, null);            if (reader.partial)                reader.xref[n * 2] = -1;        }        return ret;    }    private void ensureXrefSize(int size) {        if (size == 0)            return;        if (xref == null)            xref = new int[size];        else {            if (xref.length < size) {                int xref2[] = new int[size];                System.arraycopy(xref, 0, xref2, 0, xref.length);                xref = xref2;            }        }    }    protected void readXref() throws IOException {        hybridXref = false;        newXrefType = false;        tokens.seek(tokens.getStartxref());        tokens.nextToken();        if (!tokens.getStringValue().equals("startxref"))            throw new IOException("startxref not found.");        tokens.nextToken();        if (tokens.getTokenType() != PRTokeniser.TK_NUMBER)            throw new IOException("startxref is not followed by a number.");        int startxref = tokens.intValue();        lastXref = startxref;        eofPos = tokens.getFilePointer();        try {            if (readXRefStream(startxref)) {                newXrefType = true;                return;            }        }        catch (Exception e) {}        xref = null;        tokens.seek(startxref);        trailer = readXrefSection();        PdfDictionary trailer2 = trailer;        while (true) {            PdfNumber prev = (PdfNumber)trailer2.get(PdfName.PREV);            if (prev == null)                break;            tokens.seek(prev.intValue());            trailer2 = readXrefSection();        }    }    protected PdfDictionary readXrefSection() throws IOException {        tokens.nextValidToken();        if (!tokens.getStringValue().equals("xref"))            tokens.throwError("xref subsection not found");        int start = 0;        int end = 0;        int pos = 0;        int gen = 0;        while (true) {            tokens.nextValidToken();            if (tokens.getStringValue().equals("trailer"))                break;            if (tokens.getTokenType() != PRTokeniser.TK_NUMBER)                tokens.throwError("Object number of the first object in this xref subsection not found");            start = tokens.intValue();            tokens.nextValidToken();            if (tokens.getTokenType() != PRTokeniser.TK_NUMBER)                tokens.throwError("Number of entries in this xref subsection not found");            end = tokens.intValue() + start;            if (start == 1) { // fix incorrect start number                int back = tokens.getFilePointer();                tokens.nextValidToken();                pos = tokens.intValue();                tokens.nextValidToken();                gen = tokens.intValue();                if (pos == 0 && gen == 65535) {                    --start;                    --end;                }                tokens.seek(back);            }            ensureXrefSize(end * 2);            for (int k = start; k < end; ++k) {                tokens.nextValidToken();                pos = tokens.intValue();                tokens.nextValidToken();                gen = tokens.intValue();                tokens.nextValidToken();                int p = k * 2;                if (tokens.getStringValue().equals("n")) {                    if (xref[p] == 0 && xref[p + 1] == 0) {//                        if (pos == 0)//                            tokens.throwError("File position 0 cross-reference entry in this xref subsection");                        xref[p] = pos;                    }                }                else if (tokens.getStringValue().equals("f")) {                    if (xref[p] == 0 && xref[p + 1] == 0)                        xref[p] = -1;                }                else                    tokens.throwError("Invalid cross-reference entry in this xref subsection");            }        }        PdfDictionary trailer = (PdfDictionary)readPRObject();        PdfNumber xrefSize = (PdfNumber)trailer.get(PdfName.SIZE);        ensureXrefSize(xrefSize.intValue() * 2);        PdfObject xrs = trailer.get(PdfName.XREFSTM);        if (xrs != null && xrs.isNumber()) {            int loc = ((PdfNumber)xrs).intValue();            try {                readXRefStream(loc);                newXrefType = true;                hybridXref = true;            }            catch (IOException e) {                xref = null;                throw e;            }        }        return trailer;    }    protected boolean readXRefStream(int ptr) throws IOException {        tokens.seek(ptr);        int thisStream = 0;        if (!tokens.nextToken())            return false;        if (tokens.getTokenType() != PRTokeniser.TK_NUMBER)            return false;        thisStream = tokens.intValue();        if (!tokens.nextToken() || tokens.getTokenType() != PRTokeniser.TK_NUMBER)            return false;        if (!tokens.nextToken() || !tokens.getStringValue().equals("obj"))            return false;        PdfObject object = readPRObject();        PRStream stm = null;        if (object.isStream()) {            stm = (PRStream)object;            if (!PdfName.XREF.equals(stm.get(PdfName.TYPE)))                return false;        }        else            return false;        if (trailer == null) {            trailer = new PdfDictionary();            trailer.putAll(stm);        }        stm.setLength(((PdfNumber)stm.get(PdfName.LENGTH)).intValue());        int size = ((PdfNumber)stm.get(PdfName.SIZE)).intValue();        PdfArray index;        PdfObject obj = stm.get(PdfName.INDEX);        if (obj == null) {            index = new PdfArray();            index.add(new int[]{0, size});        }        else            index = (PdfArray)obj;        PdfArray w = (PdfArray)stm.get(PdfName.W);        int prev = -1;        obj = stm.get(PdfName.PREV);        if (obj != null)            prev = ((PdfNumber)obj).intValue();        // Each xref pair is a position        // type 0 -> -1, 0        // type 1 -> offset, 0        // type 2 -> index, obj num        ensureXrefSize(size * 2);        if (objStmMark == null && !partial)            objStmMark = new HashMap();        if (objStmToOffset == null && partial)            objStmToOffset = new IntHashtable();        byte b[] = getStreamBytes(stm, tokens.getFile());        int bptr = 0;        ArrayList wa = w.getArrayList();        int wc[] = new int[3];        for (int k = 0; k < 3; ++k)            wc[k] = ((PdfNumber)wa.get(k)).intValue();        ArrayList sections = index.getArrayList();        for (int idx = 0; idx < sections.size(); idx += 2) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?