utf8codec.scala

来自「JAVA 语言的函数式编程扩展」· SCALA 代码 · 共 120 行

SCALA
120
字号
/*                     __                                               *\**     ________ ___   / /  ___     Scala API                            ****    / __/ __// _ | / /  / _ |    (c) 2003-2008, LAMP/EPFL             ****  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **** /____/\___/_/ |_/____/_/ | |                                         ****                          |/                                          **\*                                                                      */// $Id: UTF8Codec.scala 14483 2008-04-02 14:43:16Z odersky $package scala.io/** *  @author  Martin Odersky *  @version 1.0, 04/10/2004 */object UTF8Codec {  final val UNI_REPLACEMENT_CHAR: Int = 0x0000FFFD  /** convert a codepoint to utf-8 bytes   * @author buraq   * @param ch codepoint   */  def encode(ch1: Int): Array[Byte] = {    var ch = ch1    val byteMask = 0xBF    val byteMark = 0x80     var bytesToWrite = 0    val firstByteMark = List[Byte](0x00.asInstanceOf[Byte], 0x00.asInstanceOf[Byte], 0xC0.asInstanceOf[Byte], 0xE0.asInstanceOf[Byte], 0xF0.asInstanceOf[Byte], 0xF8.asInstanceOf[Byte], 0xFC.asInstanceOf[Byte])    if      (ch < 0x80)        { bytesToWrite = 1 }    else if (ch < 0x800)       { bytesToWrite = 2 }    else if (ch < 0x10000)     { bytesToWrite = 3 }    else if (ch <= 0x0010FFFF) { bytesToWrite = 4 }    else return encode(UNI_REPLACEMENT_CHAR)        val res = new Array[Byte](bytesToWrite)    var bw = bytesToWrite    if(bw>=4) {       res(3) = ((ch | byteMark) & byteMask).asInstanceOf[Byte]; ch = ch >> 6; bw -= 1    }    if(bw>=3) {       res(2) = ((ch | byteMark) & byteMask).asInstanceOf[Byte]; ch = ch >> 6; bw -= 1    }    if(bw>=2) {       res(1) = ((ch | byteMark) & byteMask).asInstanceOf[Byte]; ch = ch >> 6; bw -= 1    }    if(bw>=1) {       res(0) = (ch | firstByteMark(bytesToWrite)).asInstanceOf[Byte]    }    return res  }  def encode(src: Array[Char], from: Int, dst: Array[Byte], to: Int, len: Int): Int = {    var i = from    var j = to    val end = from + len    while (i < end) {      val ch = src(i)      i += 1      if (ch < 128) {        dst(j) = ch.toByte        j += 1      }      else if (ch <= 0x3FF) {        dst(j)   = (0xC0 | (ch >> 6)).toByte        dst(j+1) = (0x80 | (ch & 0x3F)).toByte        j += 2      } else {        dst(j)   = (0xE0 | (ch >> 12)).toByte        dst(j+1) = (0x80 | ((ch >> 6) & 0x3F)).toByte        dst(j+2) = (0x80 | (ch & 0x3F)).toByte        j += 3      }    }    j  }  def encode(s: String, dst: Array[Byte], to: Int): Int =    encode(s.toCharArray(), 0, dst, to, s.length())  def encode(s: String): Array[Byte] = {    val dst = new Array[Byte](s.length() * 3)    val len = encode(s, dst, 0)    dst.subArray(0, len)  }  def decode(src: Array[Byte], from: Int,             dst: Array[Char], to: Int, len: Int): Int =  {    var i = from    var j = to    val end = from + len    while (i < end) {      var b = src(i) & 0xFF      i += 1      if (b >= 0xE0) {        b = ((b & 0x0F) << 12) | (src(i) & 0x3F) << 6        b = b | (src(i+1) & 0x3F)        i += 2      } else if (b >= 0xC0) {        b = ((b & 0x1F) << 6) | (src(i) & 0x3F)        i += 1      }      dst(j) = b.toChar      j += 1    }    j  }  def decode(src: Array[Byte], from: Int, len: Int): String = {    val cs = new Array[Char](len)    new String(cs, 0, decode(src, 0, cs, 0, len))  }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?