📄 rtfparser.java
字号:
// switch(nextChar[0]) {
switch(nextChar) {
case '{': // scope delimiter - Open
this.handleOpenGroup();
break;
case '}': // scope delimiter - Close
this.handleCloseGroup();
break;
case 0x0a: // noise character
case 0x0d: // noise character
// if(this.isImport()) {
// this.rtfDoc.add(new RtfDirectContent(new String(nextChar)));
// }
break;
case '\\': // Control word start delimiter
if(parseCtrlWord(pbReader) != errOK) {
// TODO: Indicate some type of error
return;
}
break;
default:
if(groupLevel == 0) { // BOMs
break;
}
if(this.getTokeniserState() == TOKENISER_HEX) {
StringBuffer hexChars = new StringBuffer();
hexChars.append(nextChar);
// if(pbReader.read(nextChar) == -1) {
if((nextChar = pbReader.read()) == -1) {
return;
}
this.byteCount++;
hexChars.append(nextChar);
try {
// nextChar[0]=(char)Integer.parseInt(hexChars.toString(), 16);
nextChar=Integer.parseInt(hexChars.toString(), 16);
} catch (NumberFormatException e) {
return;
}
this.setTokeniserState(TOKENISER_NORMAL);
}
if ((errorCode = parseChar(nextChar)) != errOK) {
return; // some error occurred. we should send a
// real error
}
break;
} // switch(nextChar[0])
} // end if (this.getTokeniserState() == TOKENISER_BINARY)
// if(groupLevel < 1 && this.isImportFragment()) return; //return errOK;
// if(groupLevel < 0 && this.isImportFull()) return; //return errStackUnderflow;
// if(groupLevel < 0 && this.isConvert()) return; //return errStackUnderflow;
}// end while(reader.read(nextChar) != -1)
RtfDestination dest = this.getCurrentDestination();
if(dest != null) {
dest.closeDestination();
}
}
/**
* Process the character and send it to the current destination.
* @param nextChar
* The character to process
* @return
* Returns an error code or errOK if no error.
* @since 2.1.3
*/
private int parseChar(int nextChar) {
// figure out where to put the character
// needs to handle group levels for parsing
// examples
/*
* {\f3\froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;}
* {\f7\fswiss\fcharset0\fprq2{\*\panose 020b0604020202030204}Helv{\*\falt Arial};} <- special case!!!!
* {\f5\froman\fcharset0 Tahoma;}
* {\f6\froman\fcharset0 Arial Black;}
* {\info(\author name}{\company company name}}
* ... document text ...
*/
if (this.getTokeniserState() == TOKENISER_BINARY && --binByteCount <= 0)
this.setTokeniserStateNormal();
if (this.getTokeniserState() == TOKENISER_SKIP_BYTES && --binSkipByteCount <= 0)
this.setTokeniserStateNormal();
return this.handleCharacter(nextChar);
}
/**
* Parses a keyword and it's parameter if one exists
* @param reader
* This is a pushback reader for file input.
* @return
* Returns an error code or errOK if no error.
* @throws IOException
* Catch any file read problem.
* @since 2.1.3
*/
private int parseCtrlWord(PushbackInputStream reader) throws IOException {
int nextChar = 0;
int result = errOK;
if((nextChar = reader.read()) == -1) {
return errEndOfFile;
}
this.byteCount++;
StringBuffer parsedCtrlWord = new StringBuffer();
StringBuffer parsedParam= new StringBuffer();
RtfCtrlWordData ctrlWordParam = new RtfCtrlWordData();
if(!Character.isLetterOrDigit((char)nextChar)) {
parsedCtrlWord.append((char)nextChar);
ctrlWordParam.ctrlWord = parsedCtrlWord.toString();
result = this.handleCtrlWord(ctrlWordParam);
lastCtrlWordParam = ctrlWordParam;
return result;
}
do {
parsedCtrlWord.append((char)nextChar);
//TODO: catch EOF
nextChar = reader.read();
this.byteCount++;
} while (Character.isLetter((char)nextChar));
ctrlWordParam.ctrlWord = parsedCtrlWord.toString();
if(nextChar == '-') {
ctrlWordParam.isNeg = true;
if((nextChar = reader.read()) == -1) {
return errEndOfFile;
}
this.byteCount++;
}
if(Character.isDigit((char)nextChar)) {
ctrlWordParam.hasParam = true;
do {
parsedParam.append((char)nextChar);
//TODO: catch EOF
nextChar = reader.read();
this.byteCount++;
} while (Character.isDigit((char)nextChar));
ctrlWordParam.param = parsedParam.toString();
}
// push this character back into the stream
if(nextChar != ' ') {
reader.unread(nextChar);
}
if(debugParser) {
// // debug: insrsid6254399
// if(ctrlWordParam.ctrlWord.equals("proptype") && ctrlWordParam.param.equals("30")) {
// System.out.print("Debug value found\n");
// }
// if(ctrlWordParam.ctrlWord.equals("cf") ) {
// System.out.print("Debug value found\n");
// }
}
result = this.handleCtrlWord(ctrlWordParam);
lastCtrlWordParam = ctrlWordParam;
return result;
}
/**
* Set the current state of the tokeniser.
* @param value The new state of the tokeniser.
* @return The state of the tokeniser.
* @since 2.1.3
*/
public int setTokeniserState(int value) {
this.currentState.tokeniserState = value;
return this.currentState.tokeniserState;
}
/**
* Get the current state of the tokeniser.
* @return The current state of the tokeniser.
* @since 2.1.3
*/
public int getTokeniserState() {
return this.currentState.tokeniserState;
}
/**
* Gets the current group level
*
* @return
* The current group level value.
* @since 2.1.3
*/
public int getLevel() {
return this.groupLevel;
}
/**
* Set the tokeniser state to skip to the end of the group.
* Sets the state to TOKENISER_SKIP_GROUP and skipGroupLevel to the current group level.
* @since 2.1.3
*/
public void setTokeniserStateNormal() {
this.setTokeniserState(TOKENISER_NORMAL);
}
/**
* Set the tokeniser state to skip to the end of the group.
* Sets the state to TOKENISER_SKIP_GROUP and skipGroupLevel to the current group level.
* @since 2.1.3
*/
public void setTokeniserStateSkipGroup() {
this.setTokeniserState(TOKENISER_SKIP_GROUP);
this.skipGroupLevel = this.groupLevel;
}
/**
* Sets the number of bytes to skip and the state of the tokeniser.
*
* @param numberOfBytesToSkip
* The numbere of bytes to skip in the file.
* @since 2.1.3
*/
public void setTokeniserSkipBytes(long numberOfBytesToSkip) {
this.setTokeniserState(TOKENISER_SKIP_BYTES);
this.binSkipByteCount = numberOfBytesToSkip;
}
/**
* Sets the number of binary bytes.
*
* @param binaryCount
* The number of binary bytes.
* @since 2.1.3
*/
public void setTokeniserStateBinary(int binaryCount) {
this.setTokeniserState(TOKENISER_BINARY);
this.binByteCount = binaryCount;
}
/**
* Sets the number of binary bytes.
*
* @param binaryCount
* The number of binary bytes.
* @since 2.1.3
*/
public void setTokeniserStateBinary(long binaryCount) {
this.setTokeniserState(TOKENISER_BINARY);
this.binByteCount = binaryCount;
}
/**
* Helper method to determin if conversion is TYPE_CONVERT
* @return true if TYPE_CONVERT, otherwise false
* @see com.lowagie.text.rtf.parser.RtfParser#TYPE_CONVERT
* @since 2.1.3
*/
public boolean isConvert() {
return (this.getConversionType() == RtfParser.TYPE_CONVERT);
}
/**
* Helper method to determin if conversion is TYPE_IMPORT_FULL or TYPE_IMPORT_FRAGMENT
* @return true if TYPE_CONVERT, otherwise false
* @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FULL
* @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FRAGMENT
* @since 2.1.3
*/
public boolean isImport() {
return (isImportFull() || this.isImportFragment());
}
/**
* Helper method to determin if conversion is TYPE_IMPORT_FULL
* @return true if TYPE_CONVERT, otherwise false
* @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FULL
* @since 2.1.3
*/
public boolean isImportFull() {
return (this.getConversionType() == RtfParser.TYPE_IMPORT_FULL);
}
/**
* Helper method to determin if conversion is TYPE_IMPORT_FRAGMENT
* @return true if TYPE_CONVERT, otherwise false
* @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FRAGMENT
* @since 2.1.3
*/
public boolean isImportFragment() {
return (this.getConversionType() == RtfParser.TYPE_IMPORT_FRAGMENT);
}
/**
* Helper method to indicate if this control word was a \* control word.
* @return true if it was a \* control word, otherwise false
* @since 2.1.3
*/
public boolean getExtendedDestination() {
return this.currentState.isExtendedDestination;
}
/**
* Helper method to set the extended control word flag.
* @param value Boolean to set the value to.
* @return isExtendedDestination.
* @since 2.1.3
*/
public boolean setExtendedDestination(boolean value) {
this.currentState.isExtendedDestination = value;
return this.currentState.isExtendedDestination;
}
/**
* Get the logfile name.
*
* @return the logFile
* @since 2.1.3
*/
public String getLogFile() {
return logFile;
}
/**
* Set the logFile name
*
* @param logFile the logFile to set
* @since 2.1.3
*/
public void setLogFile(String logFile) {
this.logFile = logFile;
}
/**
* Set the logFile name
*
* @param logFile the logFile to set
* @since 2.1.3
*/
public void setLogFile(String logFile, boolean logAppend) {
this.logFile = logFile;
this.setLogAppend(logAppend);
}
/**
* Get flag indicating if logging is on or off.
*
* @return the logging
* @since 2.1.3
*/
public boolean isLogging() {
return logging;
}
/**
* Set flag indicating if logging is on or off
* @param logging <code>true</code> to turn on logging, <code>false</code> to turn off logging.
* @since 2.1.3
*/
public void setLogging(boolean logging) {
this.logging = logging;
}
/**
* @return the logAppend
* @since 2.1.3
*/
public boolean isLogAppend() {
return logAppend;
}
/**
* @param logAppend the logAppend to set
* @since 2.1.3
*/
public void setLogAppend(boolean logAppend) {
this.logAppend = logAppend;
}
/*
* Statistics
*
public void printStats(PrintStream out) {
if(out == null) return;
out.println("");
out.println("Parser statistics:");
out.println("Process start date: " + startDate.toLocaleString());
out.println("Process end date : " + endDate.toLocaleString());
out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds.");
out.println("Total bytes read : " + Long.toString(byteCount));
out.println("Open group count : " + Long.toString(openGroupCount));
out.print("Close group count : " + Long.toString(closeGroupCount));
out.println(" (Groups Skipped): " + Long.toString(groupSkippedCount));
out.print("Control word count: " + Long.toString(ctrlWordCount));
out.print(" - Handled: " + Long.toString(ctrlWordHandledCount));
out.print(" Not Handled: " + Long.toString(ctrlWordNotHandledCount));
out.println(" Skipped: " + Long.toString(ctrlWordSkippedCount));
out.println("Plain text char count: " + Long.toString(characterCount));
}*/
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -