📄 bitvector.java
字号:
/*
* Copyright (c) 2000-2004, Rickard C鰏ter, Martin Svensson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of SICS nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
*/
package com.mellowtech.disc;
import java.nio.ByteBuffer;
import se.sics.util.DataTypeUtils;
import java.util.Arrays;
/**
* Vector of bits. Methods for compressing integers using Elias
* Gamma/Delta and Variable Byte coding. <p> Note: Do not mix variable
* byte coding with Elias gamma/delta coding in this version, since
* this is not yet taken care of in this implementatation. <p>
*
* The algorithms for Elias delta/gamma coding was adapted from a C
* implementation accompanying the paper <br>
* <pre>H.E. Williams, and J. Zobel, Compressing Integers for Fast File
* Access, Computer Journal, 42(3), 193-201, 1999. </pre>
*
* @author Rickard C鰏ter
* @version 1.0
*/
public class BitVector extends ByteStorable {
/**
* Default size of the vector
*
*/
public static int DEFAULT_BYTE_SIZE = 256;
/**
* define log10 for speed
*
*/
public static double log10 = 0.43429448190325176;
private static int masks[]
= { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff };
private byte []vector;// Sequence of bytes to hold bitvector
private int pos; // Current byte number
private int bit; // Current bit in byte
private int cur; // Temporary space for putting, getting bits
private int len; // Number of bits used
/**
* Creates a new <code>BitVector</code> instance, whose vector is 1024
* bytes large
*/
public BitVector(){
this(DEFAULT_BYTE_SIZE);
}
/**
* Creates a new <code>BitVector</code> instance, whose vector
* is 'bytesize' bytes large
*
* @param bytesize the byte size
*/
public BitVector(int bytesize){
init(bytesize);
}
static double log10(double x){
return log10*Math.log(x);
}
static double log2(double f){
return log10((f)) / 0.301029995; // log10(2.0)
}
/**
* Reset markers, i.e set pos() and bit() to zero, and set
* current byte to the first byte in vector
*/
public void reset(){
pos = bit = 0;
cur = vector[0];
}
/**
* Initialize (and remove any previous data) current vector
* to 'bytesize' number of zero-bytes, and reset markers
*
* @param bytesize number of bytes
*/
public void init(int bytesize){
vector = new byte[bytesize];
reset();
}
/**
* Current byte position in vector
*
* @return the byte position
*/
public int pos() {
return pos;
}
/**
* Current bit position in current byte
*
* @return the bit position
*/
public int bit() {
return bit;
}
/**
* Length in bits of current vector, i.e 8 * pos() + bit()
*
* @return length in bits of current vector
*/
public int len() {
return (8 * pos()) + bit();
}
/**
* Size in bytes of allocated vector
*
* @return size in bytes of allocated vector
*/
public int size() {
return (vector == null) ? 0 : vector.length;
}
public int byteSize() {
return 4 + (len() / 8) + 1;
}
public int byteSize(ByteBuffer bb) {
int position = bb.position();
int lenbits = bb.getInt();
bb.position(position);
return 4 + (lenbits / 8) + 1;
}
public ByteStorable fromBytes(ByteBuffer bb) {
len = bb.getInt();
if(vector == null || vector.length < ((len / 8) + 1))
vector = new byte[(len / 8) + 1];
bb.get(vector, 0, (len / 8) + 1);
pos = len / 8;
bit = len % 8;
cur = vector[pos];
return this;
}
public void toBytes(ByteBuffer bb) {
padVector();
bb.putInt(len());
bb.put(vector, 0, (len() / 8) + 1);
}
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append("\nsize of byte vector = " + vector.length);
sb.append("\nposition in byte vector = " + pos);
sb.append("\nposition in current byte = " + bit);
sb.append("\nbyte value of current byte = " + (int) (cur & 0xff));
sb.append("\nlength of vector in bits = " + len());
sb.append("\nbytes for length indicator = " + sizeBytesNeeded(len()));
sb.append("\nunused bits = " +
((vector.length*8) - len() -(sizeBytesNeeded(len())*8)));
return sb.toString();
}
/**
* Byte align the vector, always call this method when
* compression is completed
*/
public void padVector() {
if(pos >= vector.length)
expand();
vector[pos] = (byte) (( cur << ( 8-bit ) ) & 0xff);
len = 8*pos + bit;
}
/**
* Clear (set to zero) the last 'n' bits in current vector,
* not including the current bit, since it is not set. Does NOT
* work with the variableByte methods.
*
* @param n number of bits to clear
*/
public void clearLastBits(int n) {
if (n < bit) {
// case 1. n < bit, only clear n bits from cur
cur = (cur >> n);
bit -= n;
}
else {
// case 2. n >= bit, clear several bytes
n -= bit; // discard cur, contains bit bits
pos--;
int bytes = n / 8; // number of full bytes to clear
while (bytes > 0) {
vector[pos--] = (byte) 0;
bytes--;
}
n = n % 8; // number of bits left
if (n == 0) {
bit = 0; // first bit in next byte
cur = 0; // empty
pos++; // next byte
}
else {
cur = vector[pos]; // current byte
cur = (cur >> n); // remove last n bits
bit = 8 - n; // n bits removed
vector[pos] = (byte) 0;
}
}
}
/**
* Get next Gamma coded value.
*
* @return returns next value or -1 when end of vector is reached
*/
public int getGamma(){
int b, mag, val;
for(mag = 0; (b = getBit()) == 0; mag++);
if(b < 0)
return -1;
val = getBits(mag);
if(val < 0)
return -1;
return (1 << mag) | val;
}
/**
* Standard Gamma (Elias)
*
* @param val the number to compress
* @return number of bits put
*/
public int putGamma(int val){
int mag;
int ret;
mag = (int) Math.floor(log2((double) val));
ret = putBits(0, mag);
ret += putBits(val, mag+1);
return ret;
}
/**
* Calculate number of bits for val using Elias Gamma coding
*
* @param val the number to compress
* @return number of bits
*/
public int gammaBits(int val){
int mag;
int ret;
mag = (int) Math.floor(log2((double) val));
ret = mag;
ret += mag + 1;
return ret;
}
/**
* Get next Delta coded value.
* @return returns next value or -1 when end of vector is reached,
*/
public int getDelta() {
int mag, val;
mag = getGamma() - 1;
if(mag < 0)
return -1;
val = getBits(mag);
if(val < 0)
return -1;
return (1 << mag) | val;
}
/**
* Standard Delta (Elias)
*
* @param val the number to compress
* @return number of bits put
*/
public int putDelta(int val) {
int mag;
int ret;
mag = (int) Math.floor(log2((double) val));
ret = putGamma(mag+1);
ret += putBits(val, mag);
return ret;
}
/**
* Calculate number of bits for val using Elias Delta coding
*
* @param val the number to compress
* @return number of bits
*/
public int deltaBits(int val) {
int mag;
int ret;
mag = (int) Math.floor(log2((double) val));
// ret = putGamma(mag+1);
ret = gammaBits(mag + 1);
//ret += putBits(val, mag);
ret += mag;
return ret;
}
/**
* Put num bits into the vector from n
*
* @param val number to put
* @param num number of bits to put
* @return return number of bits written (i.e num)
*/
public int putBits(int val, int num){
int i, ret;
for(ret = 0, i = num-1; i >= 0; i--)
ret += putBit((val >> i) & 0x1);
return ret;
}
/**
* Get num bits from vector.
*
* @param num number of bits
* @return -1 if end of vector
*/
public int getBits(int num) {
int mask, shift, val, b;
b = val = 0;
for(shift = 8-bit; num >= shift; num -= shift, shift = 8-bit){
if(8*pos + bit >= len)
return -1;
mask = masks[shift];
val = ( val << shift ) | ( cur & mask );
bit = 0;
pos++;
cur = vector[pos];
}
for(; num > 0 && (b = getBit()) >= 0; num--)
val = (val << 1) | (b & 0x1);
return (b<0) ? -1 : val;
}
/**
* Get next byte from vector. NOTE that current bit must be
* at position 0 in the current byte.
*
* @return -1 if end of vector, otherwise the unsigned
* byte as an integer value from 0..255
*/
public int getByte() {
if(8*pos + bit >= len)
return -1;
cur = vector[pos++];
return (cur & 0xFF);
}
/**
* Put byte (first 8 bits from num) into vector. NOTE that
* current bit must be at position 0 in the current byte.
*
* @return number of bits put, i.e 8
*/
public int putByte(int num) {
if(pos >= vector.length) //expand the vector
expand();
vector[pos++] = (byte) (num & 0xFF);
return 8;
}
/**
* Code num by variable byte coding. NOTE that
* current bit must be at position 0 in the current byte.
*
* @param num the number
* @return number of bits put
*/
public int putVariableByte(int num) {
int c, bits = 0;
c = (num & 0x7F);
num = (num >> 7);
while (num > 0) {
bits += putByte(c);
c = (num & 0x7F);
num = num >> 7;
}
bits += putByte(c | 0x80);
return bits;
}
/**
* Calculate number of bits for value using variable byte coding. NOTE that
* current bit must be at position 0 in the current byte.
*
* @param num the number
* @return number of bits put
*/
public int variableByteBits(int num) {
int c, bits = 0;
c = (num & 0x7F);
num = (num >> 7);
while (num > 0) {
// bits += putByte(c);
bits += 8;
c = (num & 0x7F);
num = num >> 7;
}
//bits += putByte(c | 0x80);
bits += 8;
return bits;
}
/**
* Read next variable byte coded integer from vector. NOTE that
* current bit must be at position 0 in the current byte.
*
* @return next integer
*/
public int getVariableByte() {
int c, num = 0, i = 0;
c = getByte();
while ((c & 0x80) == 0) {
num |= (c << (7*i));
c = getByte();
i++;
}
num |= ((c & ~(0x80))<< (7*i));
return num;
}
/**
* Put a bit (zero or one) at current bit position
*
* @param b zero or one
* @return number of bits put, i.e 1
*/
public int putBit(int b){
cur = (cur << 1) | (b & 0x1);
bit++;
if(bit == 8){ //go to next byte
if(pos >= vector.length) //expand the vector
expand();
vector[pos] = (byte) (cur & 0xff);
cur = 0;
pos++;
bit = 0;
}
return 1;
}
/**
* Get bit (zero or one) from current position
*
* @return 0 or 1
*/
public int getBit(){
int b;
if(8*pos + bit >= len)
return -1;
b = (cur >> (7 - bit)) & 0x1;
bit++;
if(bit == 8){
pos++;
bit = 0;
cur = vector[pos];
}
return b;
}
/**
* Expand (1.75 times the size of) current vector while keeping
* the old data intact
*/
public void expand() {
byte [] newVector = null;
int i;
int newsize = (int) (((double) vector.length) * 1.75);
if ((newVector = new byte[newsize]) == null) {
System.err.println("Error in expand() memory allocation."
+ "Tried to allocate: "
+ (newsize) + " bytes\n");
throw new OutOfMemoryError(""+(newsize));
}
else {
System.arraycopy(vector, 0, newVector, 0, vector.length);
vector = newVector;
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -