⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rtfparser.java

📁 源码包含生成 PDF 和 HTML 的类库
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
/*
 * $Id: RtfParser.java 3580 2008-08-06 15:52:00Z howard_s $
 *
 * Copyright 2007 by Howard Shank (hgshank@yahoo.com)
 *
 * The contents of this file are subject to the Mozilla Public License Version 1.1
 * (the "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the License.
 *
 * The Original Code is 'iText, a free JAVA-PDF library'.
 *
 * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
 * the Initial Developer are Copyright (C) 1999-2006 by Bruno Lowagie.
 * All Rights Reserved.
 * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
 * are Copyright (C) 2000-2006 by Paulo Soares. All Rights Reserved.
 *
 * Contributor(s): all the names of the contributors are added in the source code
 * where applicable.
 *
 * Alternatively, the contents of this file may be used under the terms of the
 * LGPL license (the ?GNU LIBRARY GENERAL PUBLIC LICENSE?), in which case the
 * provisions of LGPL are applicable instead of those above.  If you wish to
 * allow use of your version of this file only under the terms of the LGPL
 * License and not to allow others to use your version of this file under
 * the MPL, indicate your decision by deleting the provisions above and
 * replace them with the notice and other provisions required by the LGPL.
 * If you do not delete the provisions above, a recipient may use your version
 * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the MPL as stated above or under the terms of the GNU
 * Library General Public License as published by the Free Software Foundation;
 * either version 2 of the License, or any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
 * details.
 *
 * If you didn't download this code from the following link, you should check if
 * you aren't using an obsolete version:
 * http://www.lowagie.com/iText/
 */
package com.lowagie.text.rtf.parser;

import java.awt.Color;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.EventListener;
import java.util.Iterator;
import java.util.Stack;

import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
import com.lowagie.text.Element;
import com.lowagie.text.List;
import com.lowagie.text.rtf.direct.RtfDirectContent;
import com.lowagie.text.rtf.document.RtfDocument;
import com.lowagie.text.rtf.parser.ctrlwords.RtfCtrlWordData;
import com.lowagie.text.rtf.parser.ctrlwords.RtfCtrlWordListener;
import com.lowagie.text.rtf.parser.ctrlwords.RtfCtrlWordMgr;
import com.lowagie.text.rtf.parser.destinations.RtfDestination;
import com.lowagie.text.rtf.parser.destinations.RtfDestinationMgr;

/**
 * The RtfParser allows the importing of RTF documents or
 * RTF document fragments. The RTF document or fragment is tokenised,
 * font and color definitions corrected and then added to
 * the document being written.
 * 
 * @author Mark Hall (Mark.Hall@mail.room3b.eu)
 * @author Howard Shank (hgshank@yahoo.com)
 * @since 2.0.8
 */

public class RtfParser {
	/**
	 * Debugging flag.
	 */
	private static final boolean debugParser = false;	// DEBUG Files are unlikely to be read by any reader!
	private String logFile = null;
	private boolean logging = false;
	private boolean logAppend = false;
	
	/**
	 * The iText element to add the RTF document to.
	 * @since 2.1.3
	 */
	private Element elem = null;
	/**
	 * The iText document to add the RTF document to.
	 */
	private Document document = null;
	/**
	 * The RtfDocument to add the RTF document or fragment to.
	 */
	private RtfDocument rtfDoc = null;
	/**
	 * The RtfKeywords that creates and handles keywords that are implemented.
	 */
	private RtfCtrlWordMgr rtfKeywordMgr = null;
	/**
	 * The RtfImportHeader to store imported font and color mappings in.
	 */
	private RtfImportMgr importMgr = null;
	/**
	 * The RtfDestinationMgr object to manage destinations.
	 */
	private RtfDestinationMgr destinationMgr = null;
	/**
	 * Stack for saving states for groups
	 */
	private Stack stackState = null;
	/**
	 * The current parser state.
	 */	
	private RtfParserState currentState = null;
	/**
	 * The pushback reader to read the input stream.
	 */
	private PushbackInputStream pbReader = null;
	/**
	 * Conversion type. Identifies if we are doing in import or a convert.
	 */
	private int conversionType = TYPE_IMPORT_FULL;
	

	/*
	 * Bitmapping:
	 * 
	 * 0111 1111 1111 1111 = Unkown state
	 * 0xxx xxxx xxxx xxxx = In Header
	 * 1xxx xxxx xxxx xxxx = In Document
	 * 2xxx xxxx xxxx xxxx = Reserved
	 * 4xxx xxxx xxxx xxxx = Other
	 * 8xxx xxxx xxxx xxxx = Errors
	 */
	
	/*
	 * Header state values
	 */

	/**
	 * Currently the RTF document header is being parsed.
	 */
	public static final int PARSER_IN_HEADER = (0x0 << 28) | 0x000000;
	/**
	 * Currently the RTF charset is being parsed.
	 */
	public static final int PARSER_IN_CHARSET = PARSER_IN_HEADER | 0x000001;
	/**
	 * Currently the RTF deffont is being parsed.
	 */
	public static final int PARSER_IN_DEFFONT = PARSER_IN_HEADER | 0x000002;
	/**
	 * Currently the RTF font table is being parsed.
	 */
	public static final int PARSER_IN_FONT_TABLE = PARSER_IN_HEADER | 0x000003;
	/**
	 * Currently a RTF font table info element is being parsed.
	 */
	public static final int PARSER_IN_FONT_TABLE_INFO = PARSER_IN_HEADER | 0x000004;
	/**
	 * Currently the RTF filetbl is being parsed.
	 */
	public static final int PARSER_IN_FILE_TABLE = PARSER_IN_HEADER | 0x000005;
	/**
	 * Currently the RTF color table is being parsed.
	 */
	public static final int PARSER_IN_COLOR_TABLE = PARSER_IN_HEADER | 0x000006;
	/**
	 * Currently the RTF  stylesheet is being parsed.
	 */
	public static final int PARSER_IN_STYLESHEET = PARSER_IN_HEADER | 0x000007;
	/**
	 * Currently the RTF listtables is being parsed.
	 */
	public static final int PARSER_IN_LIST_TABLE = PARSER_IN_HEADER | 0x000008;
	/**
	 * Currently the RTF listtable override is being parsed.
	 */
	public static final int PARSER_IN_LISTOVERRIDE_TABLE = PARSER_IN_HEADER | 0x000009;
	/**
	 * Currently the RTF revtbl is being parsed.
	 */
	public static final int PARSER_IN_REV_TABLE = PARSER_IN_HEADER | 0x00000A;
	/**
	 * Currently the RTF rsidtable is being parsed.
	 */
	public static final int PARSER_IN_RSID_TABLE = PARSER_IN_HEADER | 0x0000B;
	/**
	 * Currently the RTF generator is being parsed.
	 */
	public static final int PARSER_IN_GENERATOR = PARSER_IN_HEADER | 0x00000C;
	/**
	 * Currently the RTF Paragraph group properties Table (word 2002)
	 */
	public static final int PARSER_IN_PARAGRAPH_TABLE = PARSER_IN_HEADER | 0x00000E;
	/**
	 * Currently the RTF Old Properties.
	 */
	public static final int PARSER_IN_OLDCPROPS = PARSER_IN_HEADER | 0x00000F;
	/**
	 * Currently the RTF Old Properties.
	 */
	public static final int PARSER_IN_OLDPPROPS = PARSER_IN_HEADER | 0x000010;
	/**
	 * Currently the RTF Old Properties.
	 */
	public static final int PARSER_IN_OLDTPROPS = PARSER_IN_HEADER | 0x000012;
	/**
	 * Currently the RTF Old Properties.
	 */
	public static final int PARSER_IN_OLDSPROPS = PARSER_IN_HEADER | 0x000013;
	/**
	 * Currently the RTF User Protection Information.
	 */
	public static final int PARSER_IN_PROT_USER_TABLE = PARSER_IN_HEADER | 0x000014;
	/**
	 * Currently the Latent Style and Formatting usage restrictions
	 */
	public static final int PARSER_IN_LATENTSTYLES = PARSER_IN_HEADER | 0x000015;
	
	public static final int PARSER_IN_PARAGRAPH_GROUP_PROPERTIES =PARSER_IN_HEADER | 0x000016;
	
	/*
	 * Document state values
	 */
	
	/**
	 * Currently the RTF document content is being parsed.
	 */
	public static final int PARSER_IN_DOCUMENT = (0x2 << 28 ) | 0x000000;

	/**
	 * Currently the RTF info group is being parsed.
	 */
	public static final int PARSER_IN_INFO_GROUP = PARSER_IN_DOCUMENT | 0x000001;

	
	public static final int PARSER_IN_UPR = PARSER_IN_DOCUMENT | 0x000002;
	/**
	 * Currently a shppict control word is being parsed.
	 */
	public static final int PARSER_IN_SHPPICT = PARSER_IN_DOCUMENT | 0x000010; //16
	/**
	 * Currently a pict control word is being parsed.
	 */
	public static final int PARSER_IN_PICT = PARSER_IN_DOCUMENT | 0x000011; //17
	/**
	 * Currently a picprop control word is being parsed.
	 */
	public static final int PARSER_IN_PICPROP = PARSER_IN_DOCUMENT | 0x000012; //18
	/**
	 * Currently a blipuid control word is being parsed.
	 */
	public static final int PARSER_IN_BLIPUID = PARSER_IN_DOCUMENT | 0x000013; //19

	/* other states */
	/**
	 * The parser is at the beginning or the end of the file.
	 */
	public static final int PARSER_STARTSTOP = (0x4 << 28)| 0x0001;
	/* ERRORS */
	/**
	 * Currently the parser is in an error state.
	 */
	public static final int PARSER_ERROR = (0x8 << 28) | 0x0000;
	/**
	 * The parser reached the end of the file.
	 */
	public static final int PARSER_ERROR_EOF = PARSER_ERROR | 0x0001;
	/**
	 * Currently the parser is in an unknown state.
	 */
	public static final int PARSER_IN_UNKNOWN = PARSER_ERROR | 0x0FFFFFFF;
	
	
	/**
	 * Conversion type is unknown
	 */
	public static final int TYPE_UNIDENTIFIED = -1;
	/**
	 * Conversion type is an import. Uses direct content to add everything.
	 * This is what the original import does.
	 */
	public static final int TYPE_IMPORT_FULL = 0;
	/**
	 * Conversion type is an import of a partial file/fragment. Uses direct content to add everything.
	 */
	public static final int TYPE_IMPORT_FRAGMENT = 1;
	/**
	 * Conversion type is a conversion. This uses the document (not rtfDoc) to add
	 * all the elements making it a different supported documents depending on the writer used.
	 */
	public static final int TYPE_CONVERT = 2;
	/**
	 * Conversion type to import a document into an element. i.e. Chapter, Section, Table Cell, etc.
	 * @since 2.1.4
	 */
	public static final int TYPE_IMPORT_INTO_ELEMENT = 3;

	
	/**
	 * Destination is normal. Text is processed.
	 */
	public static final int DESTINATION_NORMAL = 0;
	/**
	 * Destination is skipping. Text is ignored.
	 */
	public static final int DESTINATION_SKIP = 1;
	
	//////////////////////////////////// TOKENISE VARIABLES ///////////////////
	/*
	 * State flags use 4/28 bitmask.
	 * First 4 bits (nibble) indicates major state. Used for unknown and error
	 * Last 28 bits indicates the value;
	 */
	
	/**
	 * The RtfTokeniser is in its ground state. Any token may follow.
	 */
	public static final int TOKENISER_NORMAL = 0x00000000;
	/**
	 * The last token parsed was a slash.
	 */
	public static final int TOKENISER_SKIP_BYTES = 0x00000001;
	/**
	 * The RtfTokeniser is currently tokenising a control word.
	 */
	public static final int TOKENISER_SKIP_GROUP = 0x00000002;
	/**
	 * The RtfTokeniser is currently reading binary stream.
	 */
	public static final int TOKENISER_BINARY= 0x00000003;
	/**
	 * The RtfTokeniser is currently reading hex data.
	 */
	public static final int TOKENISER_HEX= 0x00000004;
	/**
	 * The RtfTokeniser ignore result
	 */
	public static final int TOKENISER_IGNORE_RESULT= 0x00000005;
	/**
	 * The RtfTokeniser is currently in error state
	 */
	public static final int TOKENISER_STATE_IN_ERROR =  0x80000000; // 1000 0000 0000 0000 0000 0000 0000 0000
	/**
	 * The RtfTokeniser is currently in an unkown state
	 */
	public static final int TOKENISER_STATE_IN_UNKOWN = 0xFF000000; // 1111 0000 0000 0000 0000 0000 0000 0000
	
	/**
	 * The current group nesting level.
	 */
	private int groupLevel = 0;
	/**
	 * The current document group nesting level. Used for fragments.
	 */
	private int docGroupLevel = 0;
	/**
	 * When the tokeniser is Binary.
	 */
	private long binByteCount = 0;
	/**
	 * When the tokeniser is set to skip bytes, binSkipByteCount is the number of bytes to skip.
	 */
	private long binSkipByteCount = 0;
	/**
	 * When the tokeniser is set to skip to next group, this is the group indentifier to return to.
	 */
	private int skipGroupLevel = 0;

	//RTF parser error codes
	public static final int  errOK =0;                        // Everything's fine!
	public static final int  errStackUnderflow   =  -1;       // Unmatched '}'
	public static final int  errStackOverflow    =  -2;       // Too many '{' -- memory exhausted
	public static final int  errUnmatchedBrace   =  -3;       // RTF ended during an open group.
	public static final int  errInvalidHex       =  -4;       // invalid hex character found in data
	public static final int  errBadTable         =  -5;       // RTF table (sym or prop) invalid
	public static final int  errAssertion        =  -6;       // Assertion failure
	public static final int  errEndOfFile        =  -7;       // End of file reached while reading RTF
	public static final int  errCtrlWordNotFound =  -8;		  // control word was not found
	//////////////////////////////////// TOKENISE VARIABLES ///////////////////
	
	
	//////////////////////////////////// STATS VARIABLES ///////////////////
	/**
	 * Total bytes read.
	 */
	private long byteCount = 0;
	/**

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -