utf8reader.java

来自「RESIN 3.2 最新源码」· Java 代码 · 共 165 行

JAVA
165
字号
/* * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved * * This file is part of Resin(R) Open Source * * Each copy or derived work must preserve the copyright notice and this * notice unmodified. * * Resin Open Source is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Resin Open Source is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty * of NON-INFRINGEMENT.  See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with Resin Open Source; if not, write to the *   Free SoftwareFoundation, Inc. *   59 Temple Place, Suite 330 *   Boston, MA 02111-1307  USA * * @author Scott Ferguson */package com.caucho.xml2.readers;import com.caucho.util.CharBuffer;import com.caucho.vfs.ReadStream;import com.caucho.xml2.XmlParser;import java.io.CharConversionException;import java.io.EOFException;import java.io.IOException;/** * A fast reader to convert bytes to characters for parsing XML. */public class Utf8Reader extends XmlReader {  /**   * Create a new reader.   */  public Utf8Reader()  {  }  /**   * Create a new reader with the given read stream.   */  public Utf8Reader(XmlParser parser, ReadStream is)  {    super(parser, is);  }  /**   * Read the next character, returning -1 on end of file..   */  public int read()    throws IOException  {    int ch1 = _is.read();    if (ch1 == '\n') {      _parser.setLine(++_line);      return ch1;    }    else if (ch1 == '\r') {      _parser.setLine(++_line);      int ch2 = _is.read();      if (ch2 == '\n')        return '\n';      if (ch2 < 0) {      }      else if (ch2 < 0x80)        _parser.unread(ch2);      else        _parser.unread(readSecond(ch2));            return '\n';    }    else if (ch1 < 0x80)      return ch1;    else      return readSecond(ch1);  }      private int readSecond(int ch1)    throws IOException  {    if ((ch1 & 0xe0) == 0xc0) {      int ch2 = _is.read();      if (ch2 < 0)        throw new EOFException("unexpected end of file in utf8 character");      else if ((ch2 & 0xc0) != 0x80)        throw error(L.l("illegal utf8 encoding {0}", hex(ch1)));            return ((ch1 & 0x1f) << 6) + (ch2 & 0x3f);    }    else if ((ch1 & 0xf0) == 0xe0) {      int ch2 = _is.read();      int ch3 = _is.read();            if (ch2 < 0)        throw new EOFException("unexpected end of file in utf8 character");      else if ((ch2 & 0xc0) != 0x80)        throw error(L.l("illegal utf8 encoding at {0} {1} {2}", hex(ch1), hex(ch2), hex(ch3)));            if (ch3 < 0)        throw new EOFException("unexpected end of file in utf8 character");      else if ((ch3 & 0xc0) != 0x80)        throw error(L.l("illegal utf8 encoding {0} {1} {2}",                        hex(ch1), hex(ch2), hex(ch3)));      int ch = ((ch1 & 0x1f) << 12) + ((ch2 & 0x3f) << 6) + (ch3 & 0x3f);      if (ch == 0xfeff) // handle some writers, e.g. microsoft        return read();      else        return ch;    }    else      throw error(L.l("illegal utf8 encoding at {0}", hex(ch1)));  }  private String hex(int n)  {    n = n & 0xff;        CharBuffer cb = CharBuffer.allocate();    cb.append("0x");    int d = n / 16;    if (d >= 0 && d <= 9)      cb.append((char) ('0' + d));    else      cb.append((char) ('a' + d - 10));        d = n % 16;    if (d >= 0 && d <= 9)      cb.append((char) ('0' + d));    else      cb.append((char) ('a' + d - 10));    return cb.close();  }  private CharConversionException error(String msg)  {    String filename = _parser.getFilename();    int line = _parser.getLine();    if (filename != null)      return new CharConversionException(filename + ":" + line + ": " + msg);    else      return new CharConversionException(msg);  }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?