📄 sstdeserializer.java
字号:
/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2003 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache POI" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * "Apache POI", nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */package org.apache.poi.hssf.record;import org.apache.poi.util.BinaryTree;import org.apache.poi.util.LittleEndian;import org.apache.poi.util.LittleEndianConsts;/** * Handles the task of deserializing a SST string. The two main entry points are * * @author Glen Stampoultzis (glens at apache.org) * @author Jason Height (jheight at apache.org) */class SSTDeserializer{ private BinaryTree strings; /** this is the number of characters that have been read prior to the continuation */ private int continuationReadChars; /** this is the string we were working on before hitting the end of the current record. This string is NOT finished. */ private String unfinishedString; /** this is true if the string uses wide characters */ private boolean wideChar; /** this is true if the string is a rich text string */ private boolean richText; /** this is true if the string is a far east string or some other wierd string */ private boolean extendedText; /** Number of formatting runs in this rich text field */ private short runCount; /** Number of characters in current string */ private int charCount; private int extensionLength; private int continueSkipBytes = 0; public SSTDeserializer( BinaryTree strings ) { this.strings = strings; initVars(); } private void initVars() { runCount = 0; continuationReadChars = 0; unfinishedString = "";// bytesInCurrentSegment = 0;// stringDataOffset = 0; wideChar = false; richText = false; extendedText = false; continueSkipBytes = 0; } /** * This is the starting point where strings are constructed. Note that * strings may span across multiple continuations. Read the SST record * carefully before beginning to hack. */ public void manufactureStrings( final byte[] data, final int initialOffset) { initVars(); int offset = initialOffset; final int dataSize = data.length; while ( offset < dataSize ) { int remaining = dataSize - offset; if ( ( remaining > 0 ) && ( remaining < LittleEndianConsts.SHORT_SIZE ) ) { throw new RecordFormatException( "Cannot get length of the last string in SSTRecord" ); } if ( remaining == LittleEndianConsts.SHORT_SIZE ) { //JMH Dont know about this setContinuationCharsRead( 0 );//LittleEndian.getUShort( data, offset ) ); unfinishedString = ""; break; } charCount = LittleEndian.getUShort( data, offset ); int charsRead = charCount; readStringHeader( data, offset ); boolean stringContinuesOverContinuation = remaining < totalStringSize(); if ( stringContinuesOverContinuation ) { int remainingBytes = dataSize - offset - stringHeaderOverhead(); //Only read the size of the string or whatever is left before the //continuation charsRead = Math.min(charsRead, calculateCharCount( remainingBytes )); setContinuationCharsRead( charsRead ); if (charsRead == charCount) { //Since all of the characters will have been read, but the entire string (including formatting runs etc) //hasnt, Compute the number of bytes to skip when the continue record starts continueSkipBytes = offsetForContinuedRecord(0) - (remainingBytes - calculateByteCount(charsRead)); } } processString( data, offset, charsRead ); offset += totalStringSize(); if ( stringContinuesOverContinuation ) { break; } } }// private void dump( final byte[] data, int offset, int length )// {// try// {// System.out.println( "------------------- SST DUMP -------------------------" );// HexDump.dump( (byte[]) data, offset, System.out, offset, length );// }// catch ( IOException e )// {// }// catch ( ArrayIndexOutOfBoundsException e )// {// }// catch ( IllegalArgumentException e )// {// }// } /** * Detemines the option types for the string (ie, compressed or uncompressed unicode, rich text string or * plain string etc) and calculates the length and offset for the string. * */ private void readStringHeader( final byte[] data, final int index ) { byte optionFlag = data[index + LittleEndianConsts.SHORT_SIZE]; wideChar = ( optionFlag & 1 ) == 1; extendedText = ( optionFlag & 4 ) == 4; richText = ( optionFlag & 8 ) == 8; runCount = 0; if ( richText ) { runCount = LittleEndian.getShort( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD ); } extensionLength = 0; if ( extendedText ) { extensionLength = LittleEndian.getInt( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD + (richText ? LittleEndianConsts.SHORT_SIZE : 0) ); } } /** * Reads a string or the first part of a string. * * @param characters the number of characters to write. * * @return the number of bytes written. */ private int processString( final byte[] data, final int dataIndex, final int characters ) { // length is the length we store it as. not the length that is read. int length = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( characters ); byte[] unicodeStringBuffer = new byte[length]; int offset = 0; // Set the length in characters LittleEndian.putUShort( unicodeStringBuffer, offset, characters ); offset += LittleEndianConsts.SHORT_SIZE; // Set the option flags unicodeStringBuffer[offset] = data[dataIndex + offset]; // Copy in the string data int bytesRead = unicodeStringBuffer.length - SSTRecord.STRING_MINIMAL_OVERHEAD; arraycopy( data, dataIndex + stringHeaderOverhead(), unicodeStringBuffer, SSTRecord.STRING_MINIMAL_OVERHEAD, bytesRead ); // Create the unicode string UnicodeString string = new UnicodeString( UnicodeString.sid, (short) unicodeStringBuffer.length, unicodeStringBuffer ); setContinuationCharsRead( calculateCharCount(bytesRead)); if ( isStringFinished() ) { Integer integer = new Integer( strings.size() ); addToStringTable( strings, integer, string ); } else { unfinishedString = string.getString(); } return bytesRead; } private boolean isStringFinished() { return getContinuationCharsRead() == charCount; } /** * Okay, we are doing some major cheating here. Because we can't handle rich text strings properly * we end up getting duplicate strings. To get around this I'm doing two things: 1. Converting rich * text to normal text and 2. If there's a duplicate I'm adding a space onto the end. Sneaky perhaps * but it gets the job done until we can handle this a little better. */ static public void addToStringTable( BinaryTree strings, Integer integer, UnicodeString string ) { if ( string.isRichText() ) string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~8 ) ) ); if ( string.isExtendedText() ) string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~4 ) ) ); boolean added = false; while ( added == false ) { try { strings.put( integer, string ); added = true; } catch ( Exception ignore ) { string.setString( string.getString() + " " ); } } } private int calculateCharCount( final int byte_count ) { return byte_count / ( wideChar ? LittleEndianConsts.SHORT_SIZE : LittleEndianConsts.BYTE_SIZE ); } /** * Process a Continue record. A Continue record for an SST record
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -