📄 hdfobjectfactory.java
字号:
/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2003 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache POI" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * "Apache POI", nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. *//* * HDFObjectFactory.java * * Created on February 24, 2002, 2:17 PM */package org.apache.poi.hdf.model;//import java.io;import java.util.ArrayList;import java.io.InputStream;import java.io.FileInputStream;import java.io.IOException;import java.util.List;import java.util.TreeSet;import org.apache.poi.hdf.model.hdftypes.*;import org.apache.poi.hdf.event.HDFLowLevelParsingListener;import org.apache.poi.hdf.model.util.BTreeSet;import org.apache.poi.hdf.model.util.ParsingState;import org.apache.poi.poifs.filesystem.POIFSFileSystem;import org.apache.poi.poifs.filesystem.POIFSDocument;import org.apache.poi.poifs.filesystem.DocumentEntry;import org.apache.poi.util.LittleEndian;/** * The Object Factory takes in a stream and creates the low level objects * that represent the data. * @author andy */public class HDFObjectFactory{ /** OLE stuff*/ private POIFSFileSystem _filesystem; /** The FIB*/ private FileInformationBlock _fib; /** Used to set up the object model*/ private HDFLowLevelParsingListener _listener; /** parsing state for characters */ private ParsingState _charParsingState; /** parsing state for paragraphs */ private ParsingState _parParsingState; /** main document stream buffer*/ byte[] _mainDocument; /** table stream buffer*/ byte[] _tableBuffer; public static void main(String args[]) { try { HDFObjectFactory f = new HDFObjectFactory(new FileInputStream("c:\\test.doc")); int k = 0; } catch(Throwable t) { t.printStackTrace(); } } /** Creates a new instance of HDFObjectFactory * * @param istream The InputStream that is the Word document * */ protected HDFObjectFactory(InputStream istream, HDFLowLevelParsingListener l) throws IOException { if (l == null) { _listener = new HDFObjectModel(); } else { _listener = l; } //do Ole stuff _filesystem = new POIFSFileSystem(istream); DocumentEntry headerProps = (DocumentEntry)_filesystem.getRoot().getEntry("WordDocument"); _mainDocument = new byte[headerProps.getSize()]; _filesystem.createDocumentInputStream("WordDocument").read(_mainDocument); _fib = new FileInformationBlock(_mainDocument); initTableStream(); initTextPieces(); initFormattingProperties(); } /** Creates a new instance of HDFObjectFactory * * @param istream The InputStream that is the Word document * */ public HDFObjectFactory(InputStream istream) throws IOException { this(istream, null); } public static List getTypes(InputStream istream) throws IOException { List results = new ArrayList(1); //do Ole stuff POIFSFileSystem filesystem = new POIFSFileSystem(istream); DocumentEntry headerProps = (DocumentEntry)filesystem.getRoot().getEntry("WordDocument"); byte[] mainDocument = new byte[headerProps.getSize()]; filesystem.createDocumentInputStream("WordDocument").read(mainDocument); FileInformationBlock fib = new FileInformationBlock(mainDocument); results.add(fib); return results; } /** * Initializes the table stream * * @throws IOException */ private void initTableStream() throws IOException { String tablename = null; if(_fib.isFWhichTblStm()) { tablename="1Table"; } else { tablename="0Table"; } DocumentEntry tableEntry = (DocumentEntry)_filesystem.getRoot().getEntry(tablename); //load the table stream into a buffer int size = tableEntry.getSize(); _tableBuffer = new byte[size]; _filesystem.createDocumentInputStream(tablename).read(_tableBuffer); } /** * Initializes the text pieces. Text is divided into pieces because some * "pieces" may only contain unicode characters. * * @throws IOException */ private void initTextPieces() throws IOException { int pos = _fib.getFcClx(); //skips through the prms before we reach the piece table. These contain data //for actual fast saved files while (_tableBuffer[pos] == 1) { pos++; int skip = LittleEndian.getShort(_tableBuffer, pos); pos += 2 + skip; } if(_tableBuffer[pos] != 2) { throw new IOException("The text piece table is corrupted"); } else { //parse out the text pieces int pieceTableSize = LittleEndian.getInt(_tableBuffer, ++pos); pos += 4; int pieces = (pieceTableSize - 4) / 12; for (int x = 0; x < pieces; x++) { int filePos = LittleEndian.getInt(_tableBuffer, pos + ((pieces + 1) * 4) + (x * 8) + 2); boolean unicode = false; if ((filePos & 0x40000000) == 0) { unicode = true; } else { unicode = false; filePos &= ~(0x40000000);//gives me FC in doc stream filePos /= 2; } int totLength = LittleEndian.getInt(_tableBuffer, pos + (x + 1) * 4) - LittleEndian.getInt(_tableBuffer, pos + (x * 4)); TextPiece piece = new TextPiece(filePos, totLength, unicode); _listener.text(piece); } } } /** * initializes all of the formatting properties for a Word Document */ private void initFormattingProperties() { createStyleSheet(); createListTables(); createFontTable(); initDocumentProperties(); initSectionProperties(); //initCharacterProperties(); //initParagraphProperties(); } private void initCharacterProperties(int charOffset, PlexOfCps charPlcf, int start, int end) { //Initialize paragraph property stuff //int currentCharPage = _charParsingState.getCurrentPage(); int charPlcfLen = charPlcf.length(); int currentPageIndex = _charParsingState.getCurrentPageIndex(); FormattedDiskPage fkp = _charParsingState.getFkp(); int currentChpxIndex = _charParsingState.getCurrentPropIndex(); int currentArraySize = fkp.size(); //get the character runs for this paragraph int charStart = 0; int charEnd = 0; //add the character runs do { if (currentChpxIndex < currentArraySize) { charStart = fkp.getStart(currentChpxIndex); charEnd = fkp.getEnd(currentChpxIndex); byte[] chpx = fkp.getGrpprl(currentChpxIndex); _listener.characterRun(new ChpxNode(Math.max(charStart, start), Math.min(charEnd, end), chpx)); if (charEnd < end) { currentChpxIndex++; } else { _charParsingState.setState(currentPageIndex, fkp, currentChpxIndex); break; } } else { int currentCharPage = LittleEndian.getInt(_tableBuffer, charOffset + charPlcf.getStructOffset(++currentPageIndex)); byte[] byteFkp = new byte[512]; System.arraycopy(_mainDocument, (currentCharPage * 512), byteFkp, 0, 512); fkp = new CHPFormattedDiskPage(byteFkp); currentChpxIndex = 0; currentArraySize = fkp.size(); } } while(currentPageIndex < charPlcfLen); } private void initParagraphProperties(int parOffset, PlexOfCps parPlcf, int charOffset, PlexOfCps charPlcf, int start, int end) { //Initialize paragraph property stuff //int currentParPage = _parParsingState.getCurrentPage();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -