📄 text.java
字号:
/** * Copyright 2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.apache.hadoop.io;import java.io.IOException;import java.io.DataInput;import java.io.DataOutput;import java.nio.ByteBuffer;import java.nio.CharBuffer;import java.nio.charset.CharacterCodingException;import java.nio.charset.Charset;import java.nio.charset.CharsetDecoder;import java.nio.charset.CharsetEncoder;import java.nio.charset.CodingErrorAction;import java.nio.charset.MalformedInputException;import java.text.CharacterIterator;import java.text.StringCharacterIterator;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;/** This class stores text using standard UTF8 encoding. It provides methods * to serialize, deserialize, and compare texts at byte level. The type of * length is integer and is serialized using zero-compressed format. <p>In * addition, it provides methods for string traversal without converting the * byte array to a string. <p>Also includes utilities for * serializing/deserialing a string, coding/decoding a string, checking if a * byte array contains valid UTF8 code, calculating the length of an encoded * string. */public class Text implements WritableComparable { private static final Log LOG= LogFactory.getLog("org.apache.hadoop.io.Text"); private static final CharsetDecoder DECODER = Charset.forName("UTF-8").newDecoder(). onMalformedInput(CodingErrorAction.REPORT). onUnmappableCharacter(CodingErrorAction.REPORT); private static final CharsetEncoder ENCODER = Charset.forName("UTF-8").newEncoder(). onMalformedInput(CodingErrorAction.REPORT). onUnmappableCharacter(CodingErrorAction.REPORT); private static final byte [] EMPTY_BYTES = new byte[0]; private byte[] bytes; private int length; public Text() { bytes = EMPTY_BYTES; } /** Construct from a string. */ public Text(String string) { set(string); } /** Construct from another text. */ public Text(Text utf8) { set(utf8); } /** Construct from a byte array. */ public Text(byte[] utf8) { set(utf8); } /** Retuns the raw bytes. */ public byte[] getBytes() { return bytes; } /** Returns the number of bytes in the byte array */ public int getLength() { return length; } /** * Returns the Unicode Scalar Value (32-bit integer value) * for the character at <code>position</code>. Note that this * method avoids using the converter or doing String instatiation * @returns the Unicode scalar value at position or -1 * if the position is invalid or points to a * trailing byte */ public int charAt(int position) { if (position > this.length) return -1; // too long if (position < 0) return -1; // duh. ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position); return bytesToCodePoint(bb.slice()); } public int find(String what) { return find(what, 0); } /** * Finds any occurence of <code>what</code> in the backing * buffer, starting as position <code>start</code>. The starting * position is measured in bytes and the return value is in * terms of byte position in the buffer. The backing buffer is * not converted to a string for this operation. * @return byte position of the first occurence of the search * string in the UTF-8 buffer or -1 if not found */ public int find(String what, int start) { try { ByteBuffer src = ByteBuffer.wrap(this.bytes); ByteBuffer tgt = encode(what); byte b = tgt.get(); src.position(start); while (src.hasRemaining()) { if (b == src.get()) { // matching first byte src.mark(); // save position in loop tgt.mark(); // save position in target boolean found = true; int pos = src.position()-1; while (tgt.hasRemaining()) { if (!src.hasRemaining()) { // src expired first tgt.reset(); src.reset(); found = false; break; } if (!(tgt.get() == src.get())) { tgt.reset(); src.reset(); found = false; break; // no match } } if (found) return pos; } } return -1; // not found } catch (CharacterCodingException e) { // can't get here e.printStackTrace(); return -1; } } /** Set to contain the contents of a string. */ public void set(String string) { try { ByteBuffer bb = encode(string, true); bytes = bb.array(); length = bb.limit(); }catch(CharacterCodingException e) { throw new RuntimeException("Should not have happened " + e.toString()); } } /** Set to a utf8 byte array */ public void set(byte[] utf8) { set(utf8, 0, utf8.length); } /** copy a text. */ public void set(Text other) { set(other.bytes, 0, other.length); } /** * Set the Text to range of bytes * @param utf8 the data to copy from * @param start the first position of the new string * @param len the number of bytes of the new string */ public void set(byte[] utf8, int start, int len) { setCapacity(len); System.arraycopy(utf8, start, bytes, 0, len); this.length = len; } /* * Sets the capacity of this Text object to <em>at least</em> * <code>len</code> bytes. If the current buffer is longer, * then the capacity and existing content of the buffer are * unchanged. If <code>len</code> is larger * than the current capacity, the Text object's capacity is * increased to match. The existing contents of the buffer * (if any) are deleted. */ private void setCapacity( int len ) { if (bytes == null || bytes.length < len) bytes = new byte[len]; } /** * Convert text back to string * @see java.lang.Object#toString() */ public String toString() { try { return decode(bytes, 0, length); } catch (CharacterCodingException e) { return null; } } /** deserialize */ public void readFields(DataInput in) throws IOException { length = WritableUtils.readVInt(in); setCapacity(length); in.readFully(bytes, 0, length); } /** Skips over one Text in the input. */ public static void skip(DataInput in) throws IOException { int length = WritableUtils.readVInt(in); in.skipBytes(length); } /** serialize * write this object to out * length uses zero-compressed encoding * @see Writable#write(DataOutput) */ public void write(DataOutput out) throws IOException { WritableUtils.writeVInt(out, length); out.write(bytes, 0, length); } /** Compare two Texts bytewise using standard UTF8 ordering. */ public int compareTo(Object o) { Text that = (Text)o; if(this == that) return 0; else return WritableComparator.compareBytes(bytes, 0, length, that.bytes, 0, that.length); } /** Returns true iff <code>o</code> is a Text with the same contents. */ public boolean equals(Object o) { if (!(o instanceof Text)) return false; Text that = (Text)o; if (this == that) return true; else if (this.length != that.length) return false; else return WritableComparator.compareBytes(bytes, 0, length, that.bytes, 0, that.length) == 0; } /** hash function */ public int hashCode() { return WritableComparator.hashBytes(bytes, length); } /** A WritableComparator optimized for Text keys. */ public static class Comparator extends WritableComparator { public Comparator() { super(Text.class); } public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { try { int n1 = readVInt(b1, s1); int n2 = readVInt(b2, s2); return compareBytes(b1, s1+WritableUtils.getVIntSize(n1), n1, b2, s2+WritableUtils.getVIntSize(n2), n2); }catch(IOException e) { LOG.warn(e); throw new RuntimeException(e); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -