utf8.java

来自「Hadoop是一个用于运行应用程序在大型集群的廉价硬件设备上的框架。Hadoop」· Java 代码 · 共 285 行

JAVA

285 行

/** * Copyright 2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.apache.hadoop.io;import java.io.IOException;import java.io.DataInput;import java.io.DataOutput;import java.util.logging.Logger;import org.apache.hadoop.util.LogFormatter;/** A WritableComparable for strings that uses the UTF8 encoding. *  * <p>Also includes utilities for efficiently reading and writing UTF-8. * * @author Doug Cutting */public class UTF8 implements WritableComparable {  private static final Logger LOG= LogFormatter.getLogger("org.apache.hadoop.io.UTF8");  private static final DataOutputBuffer OBUF = new DataOutputBuffer();  private static final DataInputBuffer IBUF = new DataInputBuffer();  private static final byte[] EMPTY_BYTES = new byte[0];  private byte[] bytes = EMPTY_BYTES;  private int length;  public UTF8() {    //set("");  }  /** Construct from a given string. */  public UTF8(String string) {    set(string);  }  /** Construct from a given string. */  public UTF8(UTF8 utf8) {    set(utf8);  }  /** The raw bytes. */  public byte[] getBytes() {    return bytes;  }  /** The number of bytes in the encoded string. */  public int getLength() {    return length;  }  /** Set to contain the contents of a string. */  public void set(String string) {    if (string.length() > 0xffff/3) {             // maybe too long      LOG.warning("truncating long string: " + string.length()                  + " chars, starting with " + string.substring(0, 20));      string = string.substring(0, 0xffff/3);    }    length = utf8Length(string);                  // compute length    if (length > 0xffff)                          // double-check length      throw new RuntimeException("string too long!");    if (bytes == null || length > bytes.length)   // grow buffer      bytes = new byte[length];    try {                                         // avoid sync'd allocations      synchronized (OBUF) {        OBUF.reset();        writeChars(OBUF, string, 0, string.length());        System.arraycopy(OBUF.getData(), 0, bytes, 0, length);      }    } catch (IOException e) {      throw new RuntimeException(e);    }  }  /** Set to contain the contents of a string. */  public void set(UTF8 other) {    length = other.length;    if (bytes == null || length > bytes.length)   // grow buffer      bytes = new byte[length];    System.arraycopy(other.bytes, 0, bytes, 0, length);  }  public void readFields(DataInput in) throws IOException {    length = in.readUnsignedShort();    if (bytes == null || bytes.length < length)      bytes = new byte[length];    in.readFully(bytes, 0, length);  }  /** Skips over one UTF8 in the input. */  public static void skip(DataInput in) throws IOException {    int length = in.readUnsignedShort();    in.skipBytes(length);  }  public void write(DataOutput out) throws IOException {    out.writeShort(length);    out.write(bytes, 0, length);  }  /** Compare two UTF8s. */  public int compareTo(Object o) {    UTF8 that = (UTF8)o;    return WritableComparator.compareBytes(bytes, 0, length,                                           that.bytes, 0, that.length);  }  /** Convert to a String. */  public String toString() {    StringBuffer buffer = new StringBuffer(length);    try {      synchronized (IBUF) {        IBUF.reset(bytes, length);        readChars(IBUF, buffer, length);      }    } catch (IOException e) {      throw new RuntimeException(e);    }    return buffer.toString();  }  /** Returns true iff <code>o</code> is a UTF8 with the same contents.  */  public boolean equals(Object o) {    if (!(o instanceof UTF8))      return false;    UTF8 that = (UTF8)o;    if (this.length != that.length)      return false;    else      return WritableComparator.compareBytes(bytes, 0, length,                                             that.bytes, 0, that.length) == 0;  }  public int hashCode() {    return WritableComparator.hashBytes(bytes, length);  }  /** A WritableComparator optimized for UTF8 keys. */  public static class Comparator extends WritableComparator {    public Comparator() {      super(UTF8.class);    }    public int compare(byte[] b1, int s1, int l1,                       byte[] b2, int s2, int l2) {      int n1 = readUnsignedShort(b1, s1);      int n2 = readUnsignedShort(b2, s2);      return compareBytes(b1, s1+2, n1, b2, s2+2, n2);    }  }  static {                                        // register this comparator    WritableComparator.define(UTF8.class, new Comparator());  }  /// STATIC UTILITIES FROM HERE DOWN  /// These are probably not used much anymore, and might be removed...  /** Convert a string to a UTF-8 encoded byte array.   * @see String#getBytes(String)   */  public static byte[] getBytes(String string) {    byte[] result = new byte[utf8Length(string)];    try {                                         // avoid sync'd allocations      synchronized (OBUF) {        OBUF.reset();        writeChars(OBUF, string, 0, string.length());        System.arraycopy(OBUF.getData(), 0, result, 0, OBUF.getLength());      }    } catch (IOException e) {      throw new RuntimeException(e);    }    return result;  }  /** Read a UTF-8 encoded string.   *   * @see DataInput#readUTF()   */  public static String readString(DataInput in) throws IOException {    int bytes = in.readUnsignedShort();    StringBuffer buffer = new StringBuffer(bytes);    readChars(in, buffer, bytes);    return buffer.toString();  }  private static void readChars(DataInput in, StringBuffer buffer, int nBytes)    throws IOException {    synchronized (OBUF) {      OBUF.reset();      OBUF.write(in, nBytes);      byte[] bytes = OBUF.getData();      int i = 0;      while (i < nBytes) {        byte b = bytes[i++];        if ((b & 0x80) == 0) {          buffer.append((char)(b & 0x7F));        } else if ((b & 0xE0) != 0xE0) {          buffer.append((char)(((b & 0x1F) << 6)                               | (bytes[i++] & 0x3F)));        } else {          buffer.append((char)(((b & 0x0F) << 12)                               | ((bytes[i++] & 0x3F) << 6)                               |  (bytes[i++] & 0x3F)));        }      }    }  }  /** Write a UTF-8 encoded string.   *   * @see DataOutput#writeUTF(String)   */  public static int writeString(DataOutput out, String s) throws IOException {    if (s.length() > 0xffff/3) {         // maybe too long      LOG.warning("truncating long string: " + s.length()                  + " chars, starting with " + s.substring(0, 20));      s = s.substring(0, 0xffff/3);    }    int len = utf8Length(s);    if (len > 0xffff)                             // double-check length      throw new IOException("string too long!");          out.writeShort(len);    writeChars(out, s, 0, s.length());    return len;  }  /** Returns the number of bytes required to write this. */  private static int utf8Length(String string) {    int stringLength = string.length();    int utf8Length = 0;    for (int i = 0; i < stringLength; i++) {      int c = string.charAt(i);      if ((c >= 0x0001) && (c <= 0x007F)) {        utf8Length++;      } else if (c > 0x07FF) {        utf8Length += 3;      } else {        utf8Length += 2;      }    }    return utf8Length;  }  private static void writeChars(DataOutput out,                                 String s, int start, int length)    throws IOException {    final int end = start + length;    for (int i = start; i < end; i++) {      int code = s.charAt(i);      if (code >= 0x01 && code <= 0x7F) {        out.writeByte((byte)code);      } else if (code <= 0x07FF) {        out.writeByte((byte)(0xC0 | ((code >> 6) & 0x1F)));        out.writeByte((byte)(0x80 |   code       & 0x3F));      } else {        out.writeByte((byte)(0xE0 | ((code >> 12) & 0X0F)));        out.writeByte((byte)(0x80 | ((code >>  6) & 0x3F)));        out.writeByte((byte)(0x80 |  (code        & 0x3F)));      }    }  }}

utf8.java - 源码说明

本页面展示了「Hadoop是一个用于运行应用程序在大型集群的廉价硬件设备上的框架。Hadoop为应用程序透明的提供了一组稳定/可靠的接口和数据运动。在 Hadoop中实现了Google的MapReduce算法」中的 utf8.java 源码文件，采用 Java 编程语言编写，共 285 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与Hadoop相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?