📄 rtfparser.java
字号:
* Total control words processed.
*
* Contains both known and unknown.
*
* <code>ctrlWordCount</code> should equal
* <code>ctrlWrodHandlecCount</code> + <code>ctrlWordNotHandledCount</code + <code>ctrlWordSkippedCount</code>
*/
private long ctrlWordCount = 0;
/**
* Total { encountered as an open group token.
*/
private long openGroupCount = 0;
/**
* Total } encountered as a close group token.
*/
private long closeGroupCount = 0;
/**
* Total clear text characters processed.
*/
private long characterCount = 0;
/**
* Total control words recognized.
*/
private long ctrlWordHandledCount = 0;
/**
* Total control words not handled.
*/
private long ctrlWordNotHandledCount = 0;
/**
* Total control words skipped.
*/
private long ctrlWordSkippedCount = 0;
/**
* Total groups skipped. Includes { and } as a group.
*/
private long groupSkippedCount = 0;
/**
* Start time as a long.
*/
private long startTime = 0;
/**
* Stop time as a long.
*/
private long endTime = 0;
/**
* Start date as a date.
*/
private Date startDate = null;
/**
* End date as a date.
*/
private Date endDate = null;
//////////////////////////////////// STATS VARIABLES ///////////////////
/**
* Last control word and parameter processed.
*/
private RtfCtrlWordData lastCtrlWordParam = null;
/** The <code>RtfCtrlWordListener</code>. */
private ArrayList listeners = new ArrayList();
/**
* Constructor
* @param doc
* @since 2.1.3
*/
public RtfParser(Document doc) {
this.document = doc;
}
/* *********
* READER *
***********/
/**
* Imports a complete RTF document.
*
* @param readerIn
* The Reader to read the RTF document from.
* @param rtfDoc
* The RtfDocument to add the imported document to.
* @throws IOException On I/O errors.
* @since 2.1.3
*/
public void importRtfDocument(InputStream readerIn, RtfDocument rtfDoc) throws IOException {
if(readerIn == null || rtfDoc == null) return;
this.init(TYPE_IMPORT_FULL, rtfDoc, readerIn, this.document, null);
this.setCurrentDestination(RtfDestinationMgr.DESTINATION_NULL);
startDate = new Date();
startTime = System.currentTimeMillis();
this.groupLevel = 0;
try {
this.tokenise();
} catch (RuntimeException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
endTime = System.currentTimeMillis();
endDate = new Date();
}
/**
* Imports a complete RTF document into an Element, i.e. Chapter, section, Table Cell, etc.
*
* @param elem The Element the document is to be imported into.
* @param readerIn
* The Reader to read the RTF document from.
* @param rtfDoc
* The RtfDocument to add the imported document to.
* @throws IOException On I/O errors.
* @since 2.1.4
*/
public void importRtfDocumentIntoElement(Element elem, InputStream readerIn, RtfDocument rtfDoc) throws IOException {
if(readerIn == null || rtfDoc == null || elem == null) return;
this.init(TYPE_IMPORT_INTO_ELEMENT, rtfDoc, readerIn, this.document, elem);
this.setCurrentDestination(RtfDestinationMgr.DESTINATION_NULL);
startDate = new Date();
startTime = System.currentTimeMillis();
this.groupLevel = 0;
try {
this.tokenise();
} catch (RuntimeException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
endTime = System.currentTimeMillis();
endDate = new Date();
}
/**
* Converts an RTF document to an iText document.
*
* Usage: Create a parser object and call this method with the input stream and the iText Document object
*
* @param readerIn
* The Reader to read the RTF file from.
* @param doc
* The iText document that the RTF file is to be added to.
* @throws IOException
* On I/O errors.
* @since 2.1.3
*/
public void convertRtfDocument(InputStream readerIn, Document doc) throws IOException {
if(readerIn == null || doc == null) return;
this.init(TYPE_CONVERT, null, readerIn, doc, null);
this.setCurrentDestination(RtfDestinationMgr.DESTINATION_DOCUMENT);
startDate = new Date();
startTime = System.currentTimeMillis();
this.groupLevel = 0;
this.tokenise();
endTime = System.currentTimeMillis();
endDate = new Date();
}
/**
* Imports an RTF fragment.
*
* @param readerIn
* The Reader to read the RTF fragment from.
* @param rtfDoc
* The RTF document to add the RTF fragment to.
* @param importMappings
* The RtfImportMappings defining font and color mappings for the fragment.
* @throws IOException
* On I/O errors.
* @since 2.1.3
*/
public void importRtfFragment(InputStream readerIn, RtfDocument rtfDoc, RtfImportMappings importMappings) throws IOException {
//public void importRtfFragment2(Reader readerIn, RtfDocument rtfDoc, RtfImportMappings importMappings) throws IOException {
if(readerIn == null || rtfDoc == null || importMappings==null) return;
this.init(TYPE_IMPORT_FRAGMENT, rtfDoc, readerIn, null, null);
this.handleImportMappings(importMappings);
this.setCurrentDestination(RtfDestinationMgr.DESTINATION_DOCUMENT);
this.groupLevel = 1;
setParserState(RtfParser.PARSER_IN_DOCUMENT);
startDate = new Date();
startTime = System.currentTimeMillis();
this.tokenise();
endTime = System.currentTimeMillis();
endDate = new Date();
}
// listener methods
/**
* Adds a <CODE>EventListener</CODE> to the <CODE>RtfCtrlWordMgr</CODE>.
*
* @param listener
* the new EventListener.
* @since 2.1.3
*/
public void addListener(EventListener listener) {
listeners.add(listener);
}
/**
* Removes a <CODE>EventListener</CODE> from the <CODE>RtfCtrlWordMgr</CODE>.
*
* @param listener
* the EventListener that has to be removed.
* @since 2.1.3
*/
public void removeListener(EventListener listener) {
listeners.remove(listener);
}
/**
* Initialize the parser object values.
*
* @param type Type of conversion or import
* @param rtfDoc The <code>RtfDocument</code>
* @param readerIn The input stream
* @param doc The iText <code>Document</code>
* @since 2.1.3
*/
private void init(int type, RtfDocument rtfDoc, InputStream readerIn, Document doc, Element elem) {
init_stats();
// initialize reader to a PushbackReader
this.pbReader = init_Reader(readerIn);
this.conversionType = type;
this.rtfDoc = rtfDoc;
this.document = doc;
this.elem = elem;
this.currentState = new RtfParserState();
this.stackState = new Stack();
this.setParserState(PARSER_STARTSTOP);
this.importMgr = new RtfImportMgr(this.rtfDoc, this.document);
// get destination Mgr
this.destinationMgr = RtfDestinationMgr.getInstance(this);
// set the parser
RtfDestinationMgr.setParser(this);
// DEBUG INFO for timing and memory usage of RtfCtrlWordMgr object
// create multiple new RtfCtrlWordMgr objects to check timing and memory usage
// System.gc();
// long endTime = 0;
// Date endDate = null;
// long endFree = 0;
// DecimalFormat df = new DecimalFormat("#,##0");
// Date startDate = new Date();
// long startTime = System.currentTimeMillis();
// long startFree = Runtime.getRuntime().freeMemory();
// System.out.println("1:");
this.rtfKeywordMgr = new RtfCtrlWordMgr(this, this.pbReader);/////////DO NOT COMMENT OUT THIS LINE ///////////
Object listener;
for (Iterator iterator = listeners.iterator(); iterator.hasNext();) {
listener = iterator.next();
if(listener instanceof RtfCtrlWordListener) {
this.rtfKeywordMgr.addRtfCtrlWordListener((RtfCtrlWordListener)listener);
}
}
// endFree = Runtime.getRuntime().freeMemory();
// endTime = System.currentTimeMillis();
// endDate = new Date();
// System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString());
// System.out.println("RtfCtrlWordMgr end date : " + endDate.toLocaleString());
// System.out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds.");
// System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k");
// System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k");
// System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k");
//
// System.gc();
// System.out.println("2:");
// startDate = new Date();
// startTime = System.currentTimeMillis();
// startFree = Runtime.getRuntime().freeMemory();
// RtfCtrlWordMgr rtfKeywordMgr2 = new RtfCtrlWordMgr(this, this.pbReader);
// endFree = Runtime.getRuntime().freeMemory();
// endTime = System.currentTimeMillis();
// endDate = new Date();
// System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString());
// System.out.println("RtfCtrlWordMgr end date : " + endDate.toLocaleString());
// System.out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds.");
// System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k");
// System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k");
// System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k");
//
// System.gc();
// System.out.println("3:");
// startDate = new Date();
// startTime = System.currentTimeMillis();
// startFree = Runtime.getRuntime().freeMemory();
// RtfCtrlWordMgr rtfKeywordMgr3 = new RtfCtrlWordMgr(this, this.pbReader);
// endFree = Runtime.getRuntime().freeMemory();
// endTime = System.currentTimeMillis();
// endDate = new Date();
// System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString());
// System.out.println("RtfCtrlWordMgr end date : " + endDate.toLocaleString());
// System.out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds.");
// System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k");
// System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k");
// System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k");
//
// System.gc();
// System.out.println("4:");
// startDate = new Date();
// startTime = System.currentTimeMillis();
// startFree = Runtime.getRuntime().freeMemory();
// RtfCtrlWordMgr rtfKeywordMgr4 = new RtfCtrlWordMgr(this, this.pbReader);
// endFree = Runtime.getRuntime().freeMemory();
// endTime = System.currentTimeMillis();
// endDate = new Date();
// System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString());
// System.out.println("RtfCtrlWordMgr end date : " + endDate.toLocaleString());
// System.out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds.");
// System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k");
// System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k");
// System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k");
//
// System.gc();
// System.out.println("5:");
// startDate = new Date();
// startTime = System.currentTimeMillis();
// startFree = Runtime.getRuntime().freeMemory();
// RtfCtrlWordMgr rtfKeywordMgr5 = new RtfCtrlWordMgr(this, this.pbReader);
// endFree = Runtime.getRuntime().freeMemory();
// endTime = System.currentTimeMillis();
// endDate = new Date();
// System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString());
// System.out.println("RtfCtrlWordMgr end date : " + endDate.toLocaleString());
// System.out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds.");
// System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k");
// System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k");
// System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k");
// System.gc();
// System.out.println("At ed:");
// startDate = new Date();
// startTime = System.currentTimeMillis();
// startFree = Runtime.getRuntime().freeMemory();
// //RtfCtrlWordMgr rtfKeywordMgr6 = new RtfCtrlWordMgr(this, this.pbReader);
// endFree = Runtime.getRuntime().freeMemory();
// endTime = System.currentTimeMillis();
// endDate = new Date();
// System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString());
// System.out.println("RtfCtrlWordMgr end date : " + endDate.toLocaleString());
// System.out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds.");
// System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k");
// System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k");
// System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k");
}
/**
* Initialize the statistics values.
* @since 2.1.3
*/
protected void init_stats() {
byteCount = 0;
ctrlWordCount = 0;
openGroupCount = 0;
closeGroupCount = 0;
characterCount = 0;
ctrlWordHandledCount = 0;
ctrlWordNotHandledCount = 0;
ctrlWordSkippedCount = 0;
groupSkippedCount = 0;
startTime = 0;
endTime = 0;
startDate = null;
endDate = null;
}
/**
* Casts the input reader to a PushbackReader or
* creates a new PushbackReader from the Reader passed in.
* The reader is also transformed into a BufferedReader if necessary.
*
* @param readerIn
* The Reader object for the input file.
* @return
* PushbackReader object
* @since 2.1.3
*/
private PushbackInputStream init_Reader(InputStream readerIn) {
// Reader newReader = readerIn;
// // Initializing the reader as a BufferedReader
// // cut test processing time by approximately 50%
// // default uses 8192 character buffer
// if(!(newReader instanceof BufferedReader)) {
// newReader = new BufferedReader(newReader); // Since JDK1.1
// }
// // Initializing the reader as a PushbackReader is
// // a requirement of the parser to be able to put back
// // read ahead characters.
// if(!(newReader instanceof PushbackReader)) {
// newReader = new PushbackReader(newReader); // Since JDK1.1
// }
if(!(readerIn instanceof BufferedInputStream)) {
readerIn = new BufferedInputStream(readerIn);
}
if(!(readerIn instanceof PushbackInputStream)) {
readerIn = new PushbackInputStream(readerIn);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -