📄 cbstring.java
字号:
/*
* Copyright (c) 2000-2004, Rickard C鰏ter, Martin Svensson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of SICS nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
*/
package com.mellowtech.disc;
import java.util.*;
import java.text.*;
import java.nio.*;
/**
* String that can be represented as bytes in conformance with the
* ByteStorable definition. It represents itself as an UTF-8 encoded
* string. The CBString also allows for comparison on a byte level, i.e.
* directly comparing the UTF-8 encoded strings.
* <p>
* The CBString can be used with a Collator to get correct comparison
* of Strings at a language specific level. In the current implementation
* this does not work with the compare method directly on the UTF-8
* encoded Strings.
* </p>
*
* @author Martin Svensson
* @version 1.0
*/
public class CBString extends ByteStorable implements ByteComparable{
private static Collator collator = null;
private String str;
private int utfLength;
/**
* Returns the current collator that is used when comparing Strings.
* @return a <code>Collator</code> value
*/
public static Collator getCollator(){
return collator;
}
/**
* Set the collator to be used when comparing Strings.
*
* @param collator a <code>Collator</code> value
*/
public static void setCollator(Collator collator){
CBString.collator = collator;
}
/**
* Creates a new <code>CBString</code> instance with an
* empty string
*
*/
public CBString(){
this.str = "";
utfLength = 0;
}
/**
* Creates a new <code>CBString</code> instance. If
* str is null a new empty string will be created.
*
* @param str the String that it represents.
*/
public CBString(String str){
this.str = (str == null) ? "" : str;
utfLength = getUTFLength();
}
/**
* Sets the string for this CBString. If the
* str is null an empty string will be created.
* @param str new string
*/
public void setString(String str){
this.str = (str == null) ? "" : str;
utfLength = getUTFLength();
}
/**
* Get the String that this CBString contains.
* @return the value
*/
public String getString(){
return str;
}
public String toString(){
return str;
}
/**
* Uses the hashcode of the current string.
* @return hashcode
* @see String#hashCode()
*/
public int hashCode(){
return str.hashCode();
}
/**
* If the Collator has been set use that, otherwise use the
* String's equals method
*
* @param o an <code>Object</code> to compare with
* @return true if the Strings are equal
* @see String#equals(Object)
* @see Collator#equals(String, String)
*/
public boolean equals(Object o){
return (collator == null)?str.equals(((CBString)o).str):collator.equals(str,
((CBString)o).str);
}
//IMPLEMENTED ByteComparable
public final int byteCompare(int offset1, int offset2, byte b[]){
int tmp, len1 = 0, len2 = 0, i = 0;
//get Size of first string:
tmp = b[offset1++] & 0xFF;
while((tmp & 0x80) == 0){
len1 |= (tmp << (7*i++));
tmp = b[offset1++] & 0xFF;
}
len1 |= ((tmp & ~(0x80)) << (7*i));
//get Size of second string:
i = 0;
tmp = b[offset2++] & 0xFF;
while((tmp & 0x80) == 0){
len2 |= (tmp << (7*i++));
tmp = b[offset2++] & 0xFF;
}
len2 |= ((tmp & ~(0x80)) << (7*i));
//now loop:
int n = Math.min(len1, len2);
while(n-- != 0){
if(b[offset1] != b[offset2])
return ((int)b[offset1] & 0x0FF) - ((int) b[offset2] & 0x0FF);
offset1++; offset2++;
}
return len1 - len2;
}
public final int byteCompare(int offset1, int offset2, ByteBuffer bb){
int tmp, tmp2, len1 = 0, len2 = 0, i = 0;
//get Size of first string:
tmp = bb.get(offset1++) & 0xFF;
while((tmp & 0x80) == 0){
len1 |= (tmp << (7*i++));
tmp = bb.get(offset1++) & 0xFF;
}
len1 |= ((tmp & ~(0x80)) << (7*i));
//get Size of second string:
i = 0;
tmp = bb.get(offset2++) & 0xFF;
while((tmp & 0x80) == 0){
len2 |= (tmp << (7*i++));
tmp = bb.get(offset2++) & 0xFF;
}
len2 |= ((tmp & ~(0x80)) << (7*i));
//now loop:
int n = Math.min(len1, len2);
while(n-- != 0){
tmp = (int) bb.get(offset1);
tmp2 = (int) bb.get(offset2);
if(tmp != tmp2)
return (tmp & 0xFF) - (tmp2 & 0xFF);
offset1++; offset2++;
}
return len1 - len2;
}
//Overwritten ByteStorable
public int compareTo(Object o){
return (collator == null)?str.compareTo(((CBString)o).str):collator.compare(str,
((CBString)o).str);
}
public int byteSize(){
return utfLength + sizeBytesNeeded(utfLength);
}
public int byteSize(ByteBuffer bb){
int pos = bb.position();
int length = getSize(bb);
bb.position(pos);
return length + sizeBytesNeeded(length);
}
public void toBytes(ByteBuffer bb){
encodeUTF(bb);
}
public ByteStorable fromBytes(ByteBuffer bb){
return new CBString(decodeUTF(bb));
}
/**
* Finds the shortest string that separates two strings:<br>
* example 1: martin and rickard would be "r"<br>
* example 2: martin and mary would be "mary"
*
* @param str1 a <code>ByteStorable</code> value
* @param str2 a <code>ByteStorable</code> value
* @return the smallest separator
*/
public ByteStorable separate(ByteStorable str1, ByteStorable str2){
String small, large;
/*
if(str1.compareTo(str2) < 0)
return str2;
else
return str1;
*/
if(str1.compareTo(str2) < 0){
small = ((CBString)str1).str;
large = ((CBString)str2).str;
}
else{
small = ((CBString)str2).str;
large = ((CBString)str1).str;
}
int i;
for(i = 0; i < small.length(); i++){
if(small.charAt(i) != large.charAt(i))
break;
}
CBString newStr = new CBString();
if(small.length() == large.length() && i == large.length()){
newStr.setString(large);
return newStr;
}
newStr.setString(new String(large.substring(0,i+1)));
return newStr;
}
/*************PRIVATE METHODS GOES HERE*****************************/
private String decodeUTF(ByteBuffer bb){
int slength = getSize(bb);
if(slength == 0)
return null;
StringBuffer str = new StringBuffer(slength);
int c, char2, char3;
while(slength > 0){
c = (int) bb.get() & 0xff;
switch (c >> 4){
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
/* 0xxxxxxx*/
slength--;
str.append((char)c);
break;
case 12: case 13:
/* 110x xxxx 10xx xxxx*/
slength -= 2;
char2 = (int) bb.get();
str.append((char)(((c & 0x1F) << 6) | (char2 & 0x3F)));
break;
case 14:
/* 1110 xxxx 10xx xxxx 10xx xxxx */
slength -= 3;
char2 = (int) bb.get();
char3 = (int) bb.get();
str.append((char)(((c & 0x0F) << 12) |
((char2 & 0x3F) << 6) |
((char3 & 0x3F) << 0)));
break;
default:
;
}
}
return str.toString();
}
private void encodeUTF(ByteBuffer bb){
int strlen = (utfLength == 0) ? 0 : str.length();
int c;
//code length;
putSize(utfLength, bb);
for (int i = 0; i < strlen; i++){
c = str.charAt(i);
if((c >= 0x0001) && (c <= 0x007F))
bb.put((byte) c);
else if(c > 0x07FF){
bb.put((byte) (0xE0 | ((c >> 12) & 0x0F)));
bb.put((byte) (0x80 | ((c >> 6) & 0x3F)));
bb.put((byte) (0x80 | ((c >> 0) & 0x3F)));
}
else{
bb.put((byte) (0xC0 | ((c >> 6) & 0x1F)));
bb.put((byte) (0x80 | ((c >> 0) & 0x3F)));
}
};
}
private int getUTFLength(){
if(str == null)
return 0;
int strlen = str.length();
int c;
int utflength = 0;
for(int i = 0; i < strlen; i++){
c = str.charAt(i);
if((c >= 0x0001) && (c <=0x007F))
utflength++;
else if (c > 0x07FF)
utflength +=3;
else
utflength +=2;
}
return utflength;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -