rtfparser.java
来自「有关对pdf操作的代码」· Java 代码 · 共 1,527 行 · 第 1/4 页
JAVA
1,527 行
if(parseCtrlWord(pbReader) != errOK) {
// TODO: Indicate some type of error
return;
}
break;
default:
if(groupLevel == 0) { // BOMs
break;
}
if(this.getTokeniserState() == TOKENISER_HEX) {
StringBuffer hexChars = new StringBuffer();
hexChars.append(nextChar);
// if(pbReader.read(nextChar) == -1) {
if((nextChar = pbReader.read()) == -1) {
return;
}
this.byteCount++;
hexChars.append(nextChar);
try {
// nextChar[0]=(char)Integer.parseInt(hexChars.toString(), 16);
nextChar=Integer.parseInt(hexChars.toString(), 16);
} catch (NumberFormatException e) {
return;
}
this.setTokeniserState(TOKENISER_NORMAL);
}
if ((errorCode = parseChar(nextChar)) != errOK) {
return; // some error occurred. we should send a
// real error
}
break;
} // switch(nextChar[0])
} // end if (this.getTokeniserState() == TOKENISER_BINARY)
// if(groupLevel < 1 && this.isImportFragment()) return; //return errOK;
// if(groupLevel < 0 && this.isImportFull()) return; //return errStackUnderflow;
// if(groupLevel < 0 && this.isConvert()) return; //return errStackUnderflow;
}// end while(reader.read(nextChar) != -1)
RtfDestination dest = (RtfDestination)this.getCurrentDestination();
if(dest != null) {
dest.closeDestination();
}
}
/**
* Process the character and send it to the current destination.
* @param nextChar
* The character to process
* @return
* Returns an error code or errOK if no error.
*/
// private int parseChar(char[] ch) {
private int parseChar(int nextChar) {
// figure out where to put the character
// needs to handle group levels for parsing
// examples
/*
* {\f3\froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;}
* {\f7\fswiss\fcharset0\fprq2{\*\panose 020b0604020202030204}Helv{\*\falt Arial};} <- special case!!!!
* {\f5\froman\fcharset0 Tahoma;}
* {\f6\froman\fcharset0 Arial Black;}
* {\info(\author name}{\company company name}}
* ... document text ...
*/
if (this.getTokeniserState() == TOKENISER_BINARY && --binByteCount <= 0)
this.setTokeniserStateNormal();
if (this.getTokeniserState() == TOKENISER_SKIP_BYTES && --binSkipByteCount <= 0)
this.setTokeniserStateNormal();
return this.handleCharacter(nextChar);
}
/**
* Parses a keyword and it's parameter if one exists
* @param reader
* This is a pushback reader for file input.
* @return
* Returns an error code or errOK if no error.
* @throws IOException
* Catch any file read problem.
*/
private int parseCtrlWord(PushbackInputStream reader) throws IOException {
// char[] nextChar = new char[1];
int nextChar = 0;
int result = errOK;
// if(reader.read(nextChar) == -1) {
if((nextChar = reader.read()) == -1) {
return errEndOfFile;
}
this.byteCount++;
StringBuffer parsedCtrlWord = new StringBuffer();
StringBuffer parsedParam= new StringBuffer();
RtfCtrlWordData ctrlWordParam = new RtfCtrlWordData();
// if(!Character.isLetterOrDigit(nextChar[0])) {
if(!Character.isLetterOrDigit((char)nextChar)) {
// parsedCtrlWord.append(nextChar[0]);
parsedCtrlWord.append((char)nextChar);
ctrlWordParam.ctrlWord = parsedCtrlWord.toString();
result = this.handleCtrlWord(ctrlWordParam);
lastCtrlWordParam = ctrlWordParam;
return result;
}
// for( ; Character.isLetter(nextChar[0]); reader.read(nextChar) ) {
// parsedCtrlWord.append(nextChar[0]);
// }
do {
// parsedCtrlWord.append(nextChar[0]);
parsedCtrlWord.append((char)nextChar);
//TODO: catch EOF
// reader.read(nextChar);
nextChar = reader.read();
this.byteCount++;
// } while (Character.isLetter(nextChar[0]));
} while (Character.isLetter((char)nextChar));
ctrlWordParam.ctrlWord = parsedCtrlWord.toString();
// if(nextChar[0] == '-') {
if(nextChar == '-') {
ctrlWordParam.isNeg = true;
// if(reader.read(nextChar) == -1) {
if((nextChar = reader.read()) == -1) {
return errEndOfFile;
}
this.byteCount++;
}
// if(Character.isDigit(nextChar[0])) {
if(Character.isDigit((char)nextChar)) {
ctrlWordParam.hasParam = true;
// for( ; Character.isDigit(nextChar[0]); reader.read(nextChar) ) {
// parsedParam.append(nextChar[0]);
// }
do {
// parsedParam.append(nextChar[0]);
parsedParam.append((char)nextChar);
//TODO: catch EOF
// reader.read(nextChar);
nextChar = reader.read();
this.byteCount++;
// } while (Character.isDigit(nextChar[0]));
} while (Character.isDigit((char)nextChar));
ctrlWordParam.param = parsedParam.toString();
}
// push this character back into the stream
// if(nextChar[0] != ' ') { // || this.isImport() ) {
if(nextChar != ' ') { // || this.isImport() ) {
reader.unread(nextChar);
}
if(debugParser) {
// // debug: insrsid6254399
// if(ctrlWordParam.ctrlWord.equals("proptype") && ctrlWordParam.param.equals("30")) {
// System.out.print("Debug value found\n");
// }
// if(ctrlWordParam.ctrlWord.equals("panose") ) {
// System.out.print("Debug value found\n");
// }
}
result = this.handleCtrlWord(ctrlWordParam);
lastCtrlWordParam = ctrlWordParam;
return result;
}
/**
* Set the current state of the tokeniser.
* @param value The new state of the tokeniser.
* @return The state of the tokeniser.
*/
public int setTokeniserState(int value) {
this.currentState.tokeniserState = value;
return this.currentState.tokeniserState;
}
/**
* Get the current state of the tokeniser.
* @return The current state of the tokeniser.
*/
public int getTokeniserState() {
return this.currentState.tokeniserState;
}
/**
* Gets the current group level
*
* @return
* The current group level value.
*/
public int getLevel() {
return this.groupLevel;
}
/**
* Set the tokeniser state to skip to the end of the group.
* Sets the state to TOKENISER_SKIP_GROUP and skipGroupLevel to the current group level.
*/
public void setTokeniserStateNormal() {
this.setTokeniserState(TOKENISER_NORMAL);
}
/**
* Set the tokeniser state to skip to the end of the group.
* Sets the state to TOKENISER_SKIP_GROUP and skipGroupLevel to the current group level.
*/
public void setTokeniserStateSkipGroup() {
this.setTokeniserState(TOKENISER_SKIP_GROUP);
this.skipGroupLevel = this.groupLevel;
}
/**
* Sets the number of bytes to skip and the state of the tokeniser.
*
* @param numberOfBytesToSkip
* The numbere of bytes to skip in the file.
*/
public void setTokeniserSkipBytes(long numberOfBytesToSkip) {
this.setTokeniserState(TOKENISER_SKIP_BYTES);
this.binSkipByteCount = numberOfBytesToSkip;
}
/**
* Sets the number of binary bytes.
*
* @param binaryCount
* The number of binary bytes.
*/
public void setTokeniserStateBinary(int binaryCount) {
this.setTokeniserState(TOKENISER_BINARY);
this.binByteCount = binaryCount;
}
/**
* Sets the number of binary bytes.
*
* @param binaryCount
* The number of binary bytes.
*/
public void setTokeniserStateBinary(long binaryCount) {
this.setTokeniserState(TOKENISER_BINARY);
this.binByteCount = binaryCount;
}
/**
* Helper method to determin if conversion is TYPE_CONVERT
* @return true if TYPE_CONVERT, otherwise false
* @see com.lowagie.text.rtf.parser.RtfParser#TYPE_CONVERT
*/
public boolean isConvert() {
return (this.getConversionType() == RtfParser.TYPE_CONVERT);
}
/**
* Helper method to determin if conversion is TYPE_IMPORT_FULL or TYPE_IMPORT_FRAGMENT
* @return true if TYPE_CONVERT, otherwise false
* @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FULL
* @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FRAGMENT
*/
public boolean isImport() {
return (isImportFull() || this.isImportFragment());
}
/**
* Helper method to determin if conversion is TYPE_IMPORT_FULL
* @return true if TYPE_CONVERT, otherwise false
* @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FULL
*/
public boolean isImportFull() {
return (this.getConversionType() == RtfParser.TYPE_IMPORT_FULL);
}
/**
* Helper method to determin if conversion is TYPE_IMPORT_FRAGMENT
* @return true if TYPE_CONVERT, otherwise false
* @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FRAGMENT
*/
public boolean isImportFragment() {
return (this.getConversionType() == RtfParser.TYPE_IMPORT_FRAGMENT);
}
/**
* Helper method to indicate if this control word was a \* control word.
* @return true if it was a \* control word, otherwise false
*/
public boolean getExtendedDestination() {
return this.currentState.isExtendedDestination;
}
/**
* Helper method to set the extended control word flag.
* @param value Boolean to set the value to.
* @return isExtendedDestination.
*/
public boolean setExtendedDestination(boolean value) {
this.currentState.isExtendedDestination = value;
return this.currentState.isExtendedDestination;
}
/**
* Get the logfile name.
*
* @return the logFile
*/
public String getLogFile() {
return logFile;
}
/**
* Set the logFile name
*
* @param logFile the logFile to set
*/
public void setLogFile(String logFile) {
this.logFile = logFile;
}
/**
* Set the logFile name
*
* @param logFile the logFile to set
*/
public void setLogFile(String logFile, boolean logAppend) {
this.logFile = logFile;
this.setLogAppend(logAppend);
}
/**
* Get flag indicating if logging is on or off.
*
* @return the logging
*/
public boolean isLogging() {
return logging;
}
/**
* Set flag indicating if logging is on or off
* @param logging <code>true</code> to turn on logging, <code>false</code> to turn off logging.
*/
public void setLogging(boolean logging) {
this.logging = logging;
}
/**
* @return the logAppend
*/
public boolean isLogAppend() {
return logAppend;
}
/**
* @param logAppend the logAppend to set
*/
public void setLogAppend(boolean logAppend) {
this.logAppend = logAppend;
}
/*
* Statistics
*
public void printStats(PrintStream out) {
if(out == null) return;
out.println("");
out.println("Parser statistics:");
out.println("Process start date: " + startDate.toLocaleString());
out.println("Process end date : " + endDate.toLocaleString());
out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds.");
out.println("Total bytes read : " + Long.toString(byteCount));
out.println("Open group count : " + Long.toString(openGroupCount));
out.print("Close group count : " + Long.toString(closeGroupCount));
out.println(" (Groups Skipped): " + Long.toString(groupSkippedCount));
out.print("Control word count: " + Long.toString(ctrlWordCount));
out.print(" - Handled: " + Long.toString(ctrlWordHandledCount));
out.print(" Not Handled: " + Long.toString(ctrlWordNotHandledCount));
out.println(" Skipped: " + Long.toString(ctrlWordSkippedCount));
out.println("Plain text char count: " + Long.toString(characterCount));
}*/
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?