📄 baseparser.java
字号:
}
currentIndex++;
}
*/
pdfSource.unread( ENDSTREAM );
}
/**
* This basically checks to see if the next compareTo.length bytes of the
* buffer match the compareTo byte array.
*/
private boolean cmpCircularBuffer( byte[] buffer, int currentIndex, byte[] compareTo )
{
int cmpLen = compareTo.length;
int buflen = buffer.length;
boolean match = true;
int off = currentIndex-cmpLen;
if( off < 0 )
{
match = false;
}
for( int i=0; match && i<cmpLen; ++i )
{
match = buffer[(off+i)%buflen] == compareTo[i];
}
return match;
}
/**
* This will parse a PDF string.
*
* @return The parsed PDF string.
*
* @throws IOException If there is an error reading from the stream.
*/
protected COSString parseCOSString() throws IOException
{
char nextChar = (char)pdfSource.read();
COSString retval = new COSString();
char openBrace;
char closeBrace;
if( nextChar == '(' )
{
openBrace = '(';
closeBrace = ')';
}
else if( nextChar == '<' )
{
openBrace = '<';
closeBrace = '>';
}
else
{
throw new IOException( "parseCOSString string should start with '(' or '<' and not '" +
nextChar + "' " + pdfSource );
}
//This is the number of braces read
//
int braces = 1;
int c = pdfSource.read();
while( braces > 0 && c != -1)
{
char ch = (char)c;
int nextc = -2; // not yet read
//if( log.isDebugEnabled() )
//{
// log.debug( "Parsing COSString character '" + c + "' code=" + (int)c );
//}
if(ch == closeBrace)
{
braces--;
byte[] nextThreeBytes = new byte[3];
int amountRead = pdfSource.read(nextThreeBytes);
//lets handle the special case seen in Bull River Rules and Regulations.pdf
//The dictionary looks like this
// 2 0 obj
// <<
// /Type /Info
// /Creator (PaperPort http://www.scansoft.com)
// /Producer (sspdflib 1.0 http://www.scansoft.com)
// /Title ( (5)
// /Author ()
// /Subject ()
//
// Notice the /Title, the braces are not even but they should
// be. So lets assume that if we encounter an this scenario
// <end_brace><new_line><opening_slash> then that
// means that there is an error in the pdf and assume that
// was the end of the document.
if( amountRead == 3 )
{
if( nextThreeBytes[0] == 0x0d &&
nextThreeBytes[1] == 0x0a &&
nextThreeBytes[2] == 0x2f )
{
braces = 0;
}
}
pdfSource.unread( nextThreeBytes, 0, amountRead );
if( braces != 0 )
{
retval.append( ch );
}
}
else if( ch == openBrace )
{
braces++;
retval.append( ch );
}
else if( ch == '\\' )
{
//patched by ram
char next = (char)pdfSource.read();
switch(next)
{
case 'n':
retval.append( '\n' );
break;
case 'r':
retval.append( '\r' );
break;
case 't':
retval.append( '\t' );
break;
case 'b':
retval.append( '\b' );
break;
case 'f':
retval.append( '\f' );
break;
case '(':
case ')':
case '\\':
retval.append( next );
break;
case 10:
case 13:
//this is a break in the line so ignore it and the newline and continue
c = pdfSource.read();
while( isEOL(c) && c != -1)
{
c = pdfSource.read();
}
nextc = c;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
{
StringBuffer octal = new StringBuffer();
octal.append( next );
c = pdfSource.read();
char digit = (char)c;
if( digit >= '0' && digit <= '7' )
{
octal.append( digit );
c = pdfSource.read();
digit = (char)c;
if( digit >= '0' && digit <= '7' )
{
octal.append( digit );
}
else
{
nextc = c;
}
}
else
{
nextc = c;
}
int character = 0;
try
{
character = Integer.parseInt( octal.toString(), 8 );
}
catch( NumberFormatException e )
{
throw new IOException( "Error: Expected octal character, actual='" + octal + "'" );
}
retval.append( character );
break;
}
default:
{
retval.append( '\\' );
retval.append( next );
//another ficken problem with PDF's, sometimes the \ doesn't really
//mean escape like the PDF spec says it does, sometimes is should be literal
//which is what we will assume here.
//throw new IOException( "Unexpected break sequence '" + next + "' " + pdfSource );
}
}
}
else
{
if( openBrace == '<' )
{
if( isHexDigit(ch) )
{
retval.append( ch );
}
}
else
{
retval.append( ch );
}
}
if (nextc != -2)
{
c = nextc;
}
else
{
c = pdfSource.read();
}
}
if (c != -1)
{
pdfSource.unread(c);
}
if( openBrace == '<' )
{
retval = COSString.createFromHexString( retval.getString() );
}
return retval;
}
/**
* This will parse a PDF array object.
*
* @return The parsed PDF array.
*
* @throws IOException If there is an error parsing the stream.
*/
protected COSArray parseCOSArray() throws IOException
{
char ch = (char)pdfSource.read();
if( ch != '[')
{
throw new IOException( "expected='[' actual='" + ch + "'" );
}
COSArray po = new COSArray();
COSBase pbo = null;
skipSpaces();
int i = 0;
while( ((i = pdfSource.peek()) > 0) && ((char)i != ']') )
{
pbo = parseDirObject();
if( pbo instanceof COSObject )
{
COSInteger genNumber = (COSInteger)po.remove( po.size() -1 );
COSInteger number = (COSInteger)po.remove( po.size() -1 );
COSObjectKey key = new COSObjectKey(number.intValue(), genNumber.intValue());
pbo = document.getObjectFromPool(key);
}
if( pbo != null )
{
po.add( pbo );
}
else
{
//it could be a bad object in the array which is just skipped
}
skipSpaces();
}
pdfSource.read(); //read ']'
skipSpaces();
return po;
}
/**
* Determine if a character terminates a PDF name.
*
* @param ch The character
* @return <code>true</code> if the character terminates a PDF name, otherwise <code>false</code>.
*/
protected boolean isEndOfName(char ch)
{
return (ch == ' ' || ch == 13 || ch == 10 || ch == 9 || ch == '>' || ch == '<'
|| ch == '[' || ch =='/' || ch ==']' || ch ==')' || ch =='(' ||
ch == -1 //EOF
);
}
/**
* This will parse a PDF name from the stream.
*
* @return The parsed PDF name.
*
* @throws IOException If there is an error reading from the stream.
*/
protected COSName parseCOSName() throws IOException
{
COSName retval = null;
int c = pdfSource.read();
if( (char)c != '/')
{
throw new IOException("expected='/' actual='" + (char)c + "'-" + c + " " + pdfSource );
}
// costruisce il nome
StringBuffer buffer = new StringBuffer();
c = pdfSource.read();
while( c != -1 )
{
char ch = (char)c;
if(ch == '#')
{
char ch1 = (char)pdfSource.read();
char ch2 = (char)pdfSource.read();
// Prior to PDF v1.2, the # was not a special character. Also,
// it has been observed that various PDF tools do not follow the
// spec with respect to the # escape, even though they report
// PDF versions of 1.2 or later. The solution here is that we
// interpret the # as an escape only when it is followed by two
// valid hex digits.
//
if (isHexDigit(ch1) && isHexDigit(ch2))
{
String hex = "" + ch1 + ch2;
try
{
buffer.append( (char) Integer.parseInt(hex, 16));
}
catch (NumberFormatException e)
{
throw new IOException("Error: expected hex number, actual='" + hex + "'");
}
c = pdfSource.read();
}
else
{
pdfSource.unread(ch2);
c = ch1;
buffer.append( ch );
}
}
else if (isEndOfName(ch))
{
break;
}
else
{
buffer.append( ch );
c = pdfSource.read();
}
}
if (c != -1)
{
pdfSource.unread(c);
}
retval = COSName.getPDFName( buffer.toString() );
return retval;
}
/**
* This will parse a boolean object from the stream.
*
* @return The parsed boolean object.
*
* @throws IOException If an IO error occurs during parsing.
*/
protected COSBoolean parseBoolean() throws IOException
{
COSBoolean retval = null;
char c = (char)pdfSource.peek();
if( c == 't' )
{
byte[] trueArray = new byte[ 4 ];
int amountRead = pdfSource.read( trueArray, 0, 4 );
String trueString = new String( trueArray, 0, amountRead );
if( !trueString.equals( "true" ) )
{
throw new IOException( "Error parsing boolean: expected='true' actual='" + trueString + "'" );
}
else
{
retval = COSBoolean.TRUE;
}
}
else if( c == 'f' )
{
byte[] falseArray = new byte[ 5 ];
int amountRead = pdfSource.read( falseArray, 0, 5 );
String falseString = new String( falseArray, 0, amountRead );
if( !falseString.equals( "false" ) )
{
throw new IOException( "Error parsing boolean: expected='true' actual='" + falseString + "'" );
}
else
{
retval = COSBoolean.FALSE;
}
}
else
{
throw new IOException( "Error parsing boolean expected='t or f' actual='" + c + "'" );
}
return retval;
}
/**
* This will parse a directory object from the stream.
*
* @return The parsed object.
*
* @throws IOException If there is an error during parsing.
*/
protected COSBase parseDirObject() throws IOException
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -