📄 sstdeserializer.java

📁 Office格式转换代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2003 The Apache Software Foundation.  All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * 3. The end-user documentation included with the redistribution, *    if any, must include the following acknowledgment: *       "This product includes software developed by the *        Apache Software Foundation (http://www.apache.org/)." *    Alternately, this acknowledgment may appear in the software itself, *    if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and *    "Apache POI" must not be used to endorse or promote products *    derived from this software without prior written permission. For *    written permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", *    "Apache POI", nor may "Apache" appear in their name, without *    prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation.  For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */package org.apache.poi.hssf.record;import org.apache.poi.util.BinaryTree;import org.apache.poi.util.LittleEndian;import org.apache.poi.util.LittleEndianConsts;/** * Handles the task of deserializing a SST string.  The two main entry points are * * @author Glen Stampoultzis (glens at apache.org) * @author Jason Height (jheight at apache.org) */class SSTDeserializer{    private BinaryTree strings;    /** this is the number of characters that have been read prior to the continuation */    private int continuationReadChars;    /** this is the string we were working on before hitting the end of the current record. This string is NOT finished. */    private String unfinishedString;    /** this is true if the string uses wide characters */    private boolean wideChar;    /** this is true if the string is a rich text string */    private boolean richText;    /** this is true if the string is a far east string or some other wierd string */    private boolean extendedText;    /** Number of formatting runs in this rich text field */    private short runCount;    /** Number of characters in current string */    private int charCount;    private int extensionLength;    private int continueSkipBytes = 0;    public SSTDeserializer( BinaryTree strings )    {        this.strings = strings;        initVars();    }    private void initVars()    {        runCount = 0;        continuationReadChars = 0;        unfinishedString = "";//        bytesInCurrentSegment = 0;//        stringDataOffset = 0;        wideChar = false;        richText = false;        extendedText = false;        continueSkipBytes = 0;    }    /**     * This is the starting point where strings are constructed.  Note that     * strings may span across multiple continuations. Read the SST record     * carefully before beginning to hack.     */    public void manufactureStrings( final byte[] data, final int initialOffset)    {        initVars();        int offset = initialOffset;        final int dataSize = data.length;        while ( offset < dataSize )        {            int remaining = dataSize - offset;            if ( ( remaining > 0 ) && ( remaining < LittleEndianConsts.SHORT_SIZE ) )            {                throw new RecordFormatException( "Cannot get length of the last string in SSTRecord" );            }            if ( remaining == LittleEndianConsts.SHORT_SIZE )            {              //JMH Dont know about this                setContinuationCharsRead( 0 );//LittleEndian.getUShort( data, offset ) );                unfinishedString = "";                break;            }            charCount = LittleEndian.getUShort( data, offset );            int charsRead = charCount;            readStringHeader( data, offset );            boolean stringContinuesOverContinuation = remaining < totalStringSize();            if ( stringContinuesOverContinuation )            {                int remainingBytes = dataSize - offset - stringHeaderOverhead();                //Only read the size of the string or whatever is left before the                //continuation                charsRead = Math.min(charsRead, calculateCharCount( remainingBytes ));                setContinuationCharsRead( charsRead );                                if (charsRead == charCount) {                  //Since all of the characters will have been read, but the entire string (including formatting runs etc)                  //hasnt, Compute the number of bytes to skip when the continue record starts                  continueSkipBytes = offsetForContinuedRecord(0) - (remainingBytes - calculateByteCount(charsRead));                }            }            processString( data, offset, charsRead );            offset += totalStringSize();            if ( stringContinuesOverContinuation )            {                break;            }        }    }//    private void dump( final byte[] data, int offset, int length )//    {//        try//        {//            System.out.println( "------------------- SST DUMP -------------------------" );//            HexDump.dump( (byte[]) data, offset, System.out, offset, length );//        }//        catch ( IOException e )//        {//        }//        catch ( ArrayIndexOutOfBoundsException e )//        {//        }//        catch ( IllegalArgumentException e )//        {//        }//    }    /**     * Detemines the option types for the string (ie, compressed or uncompressed unicode, rich text string or     * plain string etc) and calculates the length and offset for the string.     *     */    private void readStringHeader( final byte[] data, final int index )    {        byte optionFlag = data[index + LittleEndianConsts.SHORT_SIZE];        wideChar = ( optionFlag & 1 ) == 1;        extendedText = ( optionFlag & 4 ) == 4;        richText = ( optionFlag & 8 ) == 8;        runCount = 0;        if ( richText )        {            runCount = LittleEndian.getShort( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD );        }        extensionLength = 0;        if ( extendedText )        {            extensionLength = LittleEndian.getInt( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD                    + (richText ? LittleEndianConsts.SHORT_SIZE : 0) );        }    }    /**     * Reads a string or the first part of a string.     *     * @param characters the number of characters to write.     *     * @return the number of bytes written.     */    private int processString( final byte[] data, final int dataIndex, final int characters )    {        // length is the length we store it as.  not the length that is read.        int length = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( characters );        byte[] unicodeStringBuffer = new byte[length];        int offset = 0;        // Set the length in characters        LittleEndian.putUShort( unicodeStringBuffer, offset, characters );        offset += LittleEndianConsts.SHORT_SIZE;        // Set the option flags        unicodeStringBuffer[offset] = data[dataIndex + offset];        // Copy in the string data        int bytesRead = unicodeStringBuffer.length - SSTRecord.STRING_MINIMAL_OVERHEAD;        arraycopy( data, dataIndex + stringHeaderOverhead(), unicodeStringBuffer, SSTRecord.STRING_MINIMAL_OVERHEAD, bytesRead );        // Create the unicode string        UnicodeString string = new UnicodeString( UnicodeString.sid,                (short) unicodeStringBuffer.length,                unicodeStringBuffer );        setContinuationCharsRead( calculateCharCount(bytesRead));        if ( isStringFinished() )        {            Integer integer = new Integer( strings.size() );            addToStringTable( strings, integer, string );        }        else        {            unfinishedString = string.getString();        }        return bytesRead;    }    private boolean isStringFinished()    {        return getContinuationCharsRead() == charCount;    }    /**     * Okay, we are doing some major cheating here. Because we can't handle rich text strings properly     * we end up getting duplicate strings.  To get around this I'm doing two things: 1. Converting rich     * text to normal text and 2. If there's a duplicate I'm adding a space onto the end.  Sneaky perhaps     * but it gets the job done until we can handle this a little better.     */    static public void addToStringTable( BinaryTree strings, Integer integer, UnicodeString string )    {        if ( string.isRichText() )            string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~8 ) ) );        if ( string.isExtendedText() )            string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~4 ) ) );        boolean added = false;        while ( added == false )        {            try            {                strings.put( integer, string );                added = true;            }            catch ( Exception ignore )            {                string.setString( string.getString() + " " );            }        }    }    private int calculateCharCount( final int byte_count )    {        return byte_count / ( wideChar ? LittleEndianConsts.SHORT_SIZE : LittleEndianConsts.BYTE_SIZE );    }    /**     * Process a Continue record. A Continue record for an SST record
12 下一页
💿 文件大小 8883 K
👤 上传用户 lanyunhan
📂 所属分类 Java编程
🏷️ 相关标签

#Office #格式转换 #代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -