📄 utf8reader.java
字号:
if ((b0 & 0xF0) == 0xE0) {
int b1 = -1;
if (++in < total) {
b1 = fBuffer[in] & 0x00FF;
}
else {
b1 = fInputStream.read();
if (b1 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fOffset = 1;
return out - offset;
}
expectedByte(2, 3);
}
count++;
}
if ((b1 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fOffset = 2;
return out - offset;
}
invalidByte(2, 3, b1);
}
int b2 = -1;
if (++in < total) {
b2 = fBuffer[in] & 0x00FF;
}
else {
b2 = fInputStream.read();
if (b2 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fOffset = 2;
return out - offset;
}
expectedByte(3, 3);
}
count++;
}
if ((b2 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fBuffer[2] = (byte)b2;
fOffset = 3;
return out - offset;
}
invalidByte(3, 3, b2);
}
int c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
(b2 & 0x003F);
ch[out++] = (char)c;
count -= 2;
continue;
}
// UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
// Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
// [1101 11yy] [yyxx xxxx] (low surrogate)
// * uuuuu = wwww + 1
if ((b0 & 0xF8) == 0xF0) {
int b1 = -1;
if (++in < total) {
b1 = fBuffer[in] & 0x00FF;
}
else {
b1 = fInputStream.read();
if (b1 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fOffset = 1;
return out - offset;
}
expectedByte(2, 4);
}
count++;
}
if ((b1 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fOffset = 2;
return out - offset;
}
invalidByte(2, 4, b1);
}
int b2 = -1;
if (++in < total) {
b2 = fBuffer[in] & 0x00FF;
}
else {
b2 = fInputStream.read();
if (b2 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fOffset = 2;
return out - offset;
}
expectedByte(3, 4);
}
count++;
}
if ((b2 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fBuffer[2] = (byte)b2;
fOffset = 3;
return out - offset;
}
invalidByte(3, 4, b2);
}
int b3 = -1;
if (++in < total) {
b3 = fBuffer[in] & 0x00FF;
}
else {
b3 = fInputStream.read();
if (b3 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fBuffer[2] = (byte)b2;
fOffset = 3;
return out - offset;
}
expectedByte(4, 4);
}
count++;
}
if ((b3 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fBuffer[2] = (byte)b2;
fBuffer[3] = (byte)b3;
fOffset = 4;
return out - offset;
}
invalidByte(4, 4, b2);
}
// decode bytes into surrogate characters
int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
if (uuuuu > 0x10) {
invalidSurrogate(uuuuu);
}
int wwww = uuuuu - 1;
int zzzz = b1 & 0x000F;
int yyyyyy = b2 & 0x003F;
int xxxxxx = b3 & 0x003F;
int hs = 0xD800 | ((wwww << 6) & 0x03C0) | (zzzz << 2) | (yyyyyy >> 4);
int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx;
// set characters
ch[out++] = (char)hs;
ch[out++] = (char)ls;
count -= 2;
continue;
}
// error
if (out > offset) {
fBuffer[0] = (byte)b0;
fOffset = 1;
return out - offset;
}
invalidByte(1, 1, b0);
}
// return number of characters converted
if (DEBUG_READ) {
if (log.isDebugEnabled())
log.debug("read(char[],"+offset+','+length+"): count="+count);
}
return count;
} // read(char[],int,int)
/**
* Skip characters. This method will block until some characters are
* available, an I/O error occurs, or the end of the stream is reached.
*
* @param n The number of characters to skip
*
* @return The number of characters actually skipped
*
* @exception IOException If an I/O error occurs
*/
public long skip(long n) throws IOException {
long remaining = n;
final char[] ch = new char[fBuffer.length];
do {
int length = ch.length < remaining ? ch.length : (int)remaining;
int count = read(ch, 0, length);
if (count > 0) {
remaining -= count;
}
else {
break;
}
} while (remaining > 0);
long skipped = n - remaining;
return skipped;
} // skip(long):long
/**
* Tell whether this stream is ready to be read.
*
* @return True if the next read() is guaranteed not to block for input,
* false otherwise. Note that returning false does not guarantee that the
* next read will block.
*
* @exception IOException If an I/O error occurs
*/
public boolean ready() throws IOException {
return false;
} // ready()
/**
* Tell whether this stream supports the mark() operation.
*/
public boolean markSupported() {
return false;
} // markSupported()
/**
* Mark the present position in the stream. Subsequent calls to reset()
* will attempt to reposition the stream to this point. Not all
* character-input streams support the mark() operation.
*
* @param readAheadLimit Limit on the number of characters that may be
* read while still preserving the mark. After
* reading this many characters, attempting to
* reset the stream may fail.
*
* @exception IOException If the stream does not support mark(),
* or if some other I/O error occurs
*/
public void mark(int readAheadLimit) throws IOException {
throw new IOException(
Localizer.getMessage("jsp.error.xml.operationNotSupported",
"mark()", "UTF-8"));
}
/**
* Reset the stream. If the stream has been marked, then attempt to
* reposition it at the mark. If the stream has not been marked, then
* attempt to reset it in some way appropriate to the particular stream,
* for example by repositioning it to its starting point. Not all
* character-input streams support the reset() operation, and some support
* reset() without supporting mark().
*
* @exception IOException If the stream has not been marked,
* or if the mark has been invalidated,
* or if the stream does not support reset(),
* or if some other I/O error occurs
*/
public void reset() throws IOException {
fOffset = 0;
fSurrogate = -1;
} // reset()
/**
* Close the stream. Once a stream has been closed, further read(),
* ready(), mark(), or reset() invocations will throw an IOException.
* Closing a previously-closed stream, however, has no effect.
*
* @exception IOException If an I/O error occurs
*/
public void close() throws IOException {
fInputStream.close();
} // close()
//
// Private methods
//
/** Throws an exception for expected byte. */
private void expectedByte(int position, int count)
throws UTFDataFormatException {
throw new UTFDataFormatException(
Localizer.getMessage("jsp.error.xml.expectedByte",
Integer.toString(position),
Integer.toString(count)));
} // expectedByte(int,int,int)
/** Throws an exception for invalid byte. */
private void invalidByte(int position, int count, int c)
throws UTFDataFormatException {
throw new UTFDataFormatException(
Localizer.getMessage("jsp.error.xml.invalidByte",
Integer.toString(position),
Integer.toString(count)));
} // invalidByte(int,int,int,int)
/** Throws an exception for invalid surrogate bits. */
private void invalidSurrogate(int uuuuu) throws UTFDataFormatException {
throw new UTFDataFormatException(
Localizer.getMessage("jsp.error.xml.invalidHighSurrogate",
Integer.toHexString(uuuuu)));
} // invalidSurrogate(int)
} // class UTF8Reader
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -