⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pdfstreamengine.java

📁 非常有用的操作pdf文件的java源码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/**
 * Copyright (c) 2003-2006, www.pdfbox.org
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of pdfbox; nor the names of its
 *    contributors may be used to endorse or promote products derived from this
 *    software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * http://www.pdfbox.org
 *
 */
package org.pdfbox.util;

import java.io.IOException;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Stack;

import org.pdfbox.cos.COSObject;
import org.pdfbox.cos.COSStream;
import org.pdfbox.exceptions.WrappedIOException;

import org.pdfbox.pdmodel.PDPage;
import org.pdfbox.pdmodel.PDResources;

import org.pdfbox.pdmodel.font.PDFont;

import org.pdfbox.pdmodel.graphics.PDGraphicsState;

import org.pdfbox.util.operator.OperatorProcessor;

/**
 * This class will run through a PDF content stream and execute certain operations
 * and provide a callback interface for clients that want to do things with the stream.
 * See the PDFTextStripper class for an example of how to use this class.
 *
 * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
 * @version $Revision: 1.35 $
 */
public class PDFStreamEngine
{
    private static final byte[] SPACE_BYTES = { (byte)32 };

    private PDGraphicsState graphicsState = null;

    private Matrix textMatrix = null;
    private Matrix textLineMatrix = null;
    private Stack graphicsStack = new Stack();
    //private PDResources resources = null;
    
    private Map operators = new HashMap();
    
    private Stack streamResourcesStack = new Stack();
    
    private PDPage page;
    
    private Map documentFontCache = new HashMap();
    
    /**
     * This is a simple internal class used by the Stream engine to handle the 
     * resources stack.
     */
    private static class StreamResources
    {
        private Map fonts;
        private Map colorSpaces;
        private Map xobjects;
        private Map graphicsStates;
        private PDResources resources;
    }

    /**
     * Constructor.
     */
    public PDFStreamEngine()
    {
        //default constructor
    }
    
    /**
     * Constructor with engine properties.  The property keys are all
     * PDF operators, the values are class names used to execute those
     * operators.
     * 
     * @param properties The engine properties.
     * 
     * @throws IOException If there is an error setting the engine properties.
     */
    public PDFStreamEngine( Properties properties ) throws IOException
    {
        try
        {
            Iterator keys = properties.keySet().iterator();
            while( keys.hasNext() )
            {
                String operator = (String)keys.next();
                String operatorClass = properties.getProperty( operator );
                OperatorProcessor op = (OperatorProcessor)Class.forName( operatorClass ).newInstance();
                registerOperatorProcessor(operator, op);
            }
        }
        catch( Exception e )
        {
            throw new WrappedIOException( e );
        }
    }
    
    /**
     * Register a custom operator processor with the engine.
     * 
     * @param operator The operator as a string.
     * @param op Processor instance.
     */
    public void registerOperatorProcessor( String operator, OperatorProcessor op )
    {
        op.setContext( this );
        operators.put( operator, op );
    }
    
    /**
     * This method must be called between processing documents.  The 
     * PDFStreamEngine caches information for the document between pages
     * and this will release the cached information.  This only needs
     * to be called if processing a new document.
     *
     */
    public void resetEngine()
    {
        documentFontCache.clear();
    }

    /**
     * This will process the contents of the stream.
     *
     * @param aPage The page.
     * @param resources The location to retrieve resources.
     * @param cosStream the Stream to execute.
     * 
     *
     * @throws IOException if there is an error accessing the stream.
     */
    public void processStream( PDPage aPage, PDResources resources, COSStream cosStream ) throws IOException
    {
        graphicsState = new PDGraphicsState();
        textMatrix = null;
        textLineMatrix = null;
        graphicsStack.clear();
        streamResourcesStack.clear();
        
        processSubStream( aPage, resources, cosStream );
    }
    
    /**
     * Process a sub stream of the current stream.
     * 
     * @param aPage The page used for drawing.
     * @param resources The resources used when processing the stream.
     * @param cosStream The stream to process.
     * 
     * @throws IOException If there is an exception while processing the stream.
     */
    public void processSubStream( PDPage aPage, PDResources resources, COSStream cosStream ) throws IOException
    {
        page = aPage;
        if( resources != null )
        {
            StreamResources sr = new StreamResources();
            sr.fonts = resources.getFonts( documentFontCache );
            sr.colorSpaces = resources.getColorSpaces();
            sr.xobjects = resources.getXObjects();
            sr.graphicsStates = resources.getGraphicsStates();
            sr.resources = resources;
            streamResourcesStack.push(sr);
        }
        try
        {
            List arguments = new ArrayList();
            List tokens = cosStream.getStreamTokens();
            if( tokens != null )
            {
                Iterator iter = tokens.iterator();
                while( iter.hasNext() )
                {
                    Object next = iter.next();
                    if( next instanceof COSObject )
                    {
                        arguments.add( ((COSObject)next).getObject() );
                    }
                    else if( next instanceof PDFOperator )
                    {
                        processOperator( (PDFOperator)next, arguments );
                        arguments = new ArrayList();
                    }
                    else
                    {
                        arguments.add( next );
                    }
                }
            }
        }
        finally
        {
            if( resources != null )
            {
                streamResourcesStack.pop();
            }
        }
        
    }

    /**
     * A method provided as an event interface to allow a subclass to perform
     * some specific functionality when a character needs to be displayed.
     *
     * @param text The character to be displayed.
     */
    protected void showCharacter( TextPosition text )
    {
        //subclasses can override to provide specific functionality.
    }

    /**
     * You should override this method if you want to perform an action when a
     * string is being shown.
     *
     * @param string The string to display.
     *
     * @throws IOException If there is an error showing the string
     */
    public void showString( byte[] string ) throws IOException
    {
        float spaceWidth = 0;
        float spacing = 0;
        StringBuffer stringResult = new StringBuffer(string.length);
        
        float characterHorizontalDisplacement = 0;
        float characterVerticalDisplacement = 0;
        float spaceDisplacement = 0;
        float fontSize = graphicsState.getTextState().getFontSize();
        float horizontalScaling = graphicsState.getTextState().getHorizontalScalingPercent()/100f;
        float verticalScaling = horizontalScaling;//not sure if this is right but what else to do???
        float rise = graphicsState.getTextState().getRise();
        final float wordSpacing = graphicsState.getTextState().getWordSpacing();
        final float characterSpacing = graphicsState.getTextState().getCharacterSpacing();
        float wordSpacingDisplacement = 0;
        
        PDFont font = graphicsState.getTextState().getFont();
        
        //This will typically be 1000 but in the case of a type3 font
        //this might be a different number
        float glyphSpaceToTextSpaceFactor = 1f/font.getFontMatrix().getValue( 0, 0 );
        float averageWidth = font.getAverageFontWidth();

        Matrix initialMatrix = new Matrix();
        initialMatrix.setValue(0,0,1);
        initialMatrix.setValue(0,1,0);
        initialMatrix.setValue(0,2,0);
        initialMatrix.setValue(1,0,0);
        initialMatrix.setValue(1,1,1);
        initialMatrix.setValue(1,2,0);
        initialMatrix.setValue(2,0,0);
        initialMatrix.setValue(2,1,rise);
        initialMatrix.setValue(2,2,1);


        //this

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -