📄 bitvector.java

📁 实现数据库的storage manager 功能
💻 JAVA
字号:
/*
 * Copyright (c) 2000-2004, Rickard C鰏ter, Martin Svensson
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions are met:
 * 
 * Redistributions of source code must retain the above copyright notice, 
 * this list of conditions and the following disclaimer. 
 * Redistributions in binary form must reproduce the above copyright notice, 
 * this list of conditions and the following disclaimer in the documentation 
 * and/or other materials provided with the distribution. 
 * Neither the name of SICS nor the names of its contributors 
 * may be used to endorse or promote products derived from this software 
 * without specific prior written permission. 
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
 * POSSIBILITY OF SUCH DAMAGE.
 *
 *
 */

package com.mellowtech.disc;

import java.nio.ByteBuffer;
import se.sics.util.DataTypeUtils;
import java.util.Arrays;

/**
 * Vector of bits. Methods for compressing integers using Elias
 * Gamma/Delta and Variable Byte coding. <p> Note: Do not mix variable
 * byte coding with Elias gamma/delta coding in this version, since
 * this is not yet taken care of in this implementatation. <p>
 *
 * The algorithms for Elias delta/gamma coding was adapted from a C
 * implementation accompanying the paper <br>
 * <pre>H.E. Williams, and J. Zobel, Compressing Integers for Fast File 
 * Access, Computer Journal, 42(3), 193-201, 1999. </pre>
 * 
 * @author Rickard C鰏ter
 * @version 1.0 
 */
public class BitVector extends ByteStorable {
  
  /**
   * Default size of the vector
   *
   */
  public static int DEFAULT_BYTE_SIZE = 256;

  /**
   * define log10 for speed
   *
   */
  public static double log10 = 0.43429448190325176;


  private static int masks[] 
    = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff };
  
  private byte []vector;// Sequence of bytes to hold bitvector 
  private int	pos;	// Current byte number 
  private int	bit;	// Current bit in byte 
  private int	cur;	// Temporary space for putting, getting bits 
  private int	len;	// Number of bits used
    
  /**
   * Creates a new <code>BitVector</code> instance, whose vector is 1024 
   * bytes large 
   */
  public BitVector(){
    this(DEFAULT_BYTE_SIZE);
  }

  /**
   * Creates a new <code>BitVector</code> instance, whose vector 
   * is 'bytesize' bytes large
   *
   * @param bytesize the byte size
   */
  public BitVector(int bytesize){
    init(bytesize);
  }

  static double log10(double x){
    return log10*Math.log(x);
  }

  static double log2(double f){ 
    return log10((f)) / 0.301029995; // log10(2.0)
  }

  /**
   * Reset markers, i.e set pos() and bit() to zero, and set 
   * current byte to the first byte in vector
   */
  public void reset(){
    pos = bit = 0;
    cur = vector[0];
  }

  /**
   * Initialize (and remove any previous data) current vector 
   * to 'bytesize' number of zero-bytes, and reset markers
   *
   * @param bytesize number of bytes
   */
  public void init(int bytesize){
    vector = new byte[bytesize];
    reset();
  }
  
  /**
   * Current byte position in vector
   *
   * @return the byte position
   */
  public int pos() {
    return pos;
  }
  
  /**
   * Current bit position in current byte
   *
   * @return the bit position
   */
  public int bit() {
    return bit;
  }
  
  /**
   * Length in bits of current vector, i.e 8 * pos() + bit()
   *
   * @return length in bits of current vector
   */
  public int len() {
    return (8 * pos()) + bit();
  }
  
  /**
   * Size in bytes of allocated vector
   *
   * @return size in bytes of allocated vector
   */
  public int size() {
    return (vector == null) ? 0 : vector.length;
  }
  
  public int byteSize() {
    return 4 + (len() / 8) + 1;
  }
  
  public int byteSize(ByteBuffer bb) {
    int position = bb.position();
    int lenbits = bb.getInt();
    bb.position(position);
    return 4 + (lenbits / 8) + 1;
  }
  
  public ByteStorable fromBytes(ByteBuffer bb) {
    len = bb.getInt();
    
    if(vector == null || vector.length < ((len / 8) + 1))
      vector = new byte[(len / 8) + 1];
    
    bb.get(vector, 0, (len / 8) + 1);
    
    pos = len / 8;
    bit = len % 8;
    cur = vector[pos];
    return this;
  }
  
  public void toBytes(ByteBuffer bb) {
    padVector();
    bb.putInt(len());
    bb.put(vector, 0, (len() / 8) + 1);
  }
  
  public String toString() {
    StringBuffer sb = new StringBuffer();
    sb.append("\nsize of byte vector        = " + vector.length);
    sb.append("\nposition in byte vector    = " + pos);
    sb.append("\nposition in current byte   = " + bit);
    sb.append("\nbyte value of current byte = " + (int) (cur & 0xff));
    sb.append("\nlength of vector in bits   = " + len());
    sb.append("\nbytes for length indicator = " + sizeBytesNeeded(len()));
    sb.append("\nunused bits                = " + 
	      ((vector.length*8) - len() -(sizeBytesNeeded(len())*8)));
    return sb.toString();
  }
 
  
  /**
   * Byte align the vector, always call this method when 
   * compression is completed
   */
  public void padVector() {
    if(pos >= vector.length)
      expand();
    
    vector[pos] = (byte) (( cur << ( 8-bit ) ) & 0xff);
    len = 8*pos + bit;
  }

  
  /**
   * Clear (set to zero) the last 'n' bits in current vector, 
   * not including the current bit, since it is not set. Does NOT 
   * work with the variableByte methods. 
   *
   * @param n number of bits to clear
   */
  public void clearLastBits(int n) {
    if (n < bit) {
      // case 1. n < bit, only clear n bits from cur
      cur = (cur >> n);
      bit -= n;
    }
    else {
      // case 2. n >= bit, clear several bytes
      n -= bit;            // discard cur, contains bit bits
      pos--;
      int bytes = n / 8;   // number of full bytes to clear
      while (bytes > 0) {
	vector[pos--] = (byte) 0;
	bytes--;
      }
      n = n % 8;           // number of bits left
      
      if (n == 0) {
	bit = 0;           // first bit in next byte
	cur = 0;           // empty
	pos++;             // next byte
      }
      else {
	cur = vector[pos]; // current byte
	cur = (cur >> n);  // remove last n bits
	bit = 8 - n;       // n bits removed 
	vector[pos] = (byte) 0;
      }
    }
  }
  
  /**
   * Get next Gamma coded value.
   *
   * @return returns next value or -1 when end of vector is reached
   */
  public int getGamma(){
    int b, mag, val;

    for(mag = 0; (b = getBit()) == 0; mag++);
    
    if(b < 0)
      return -1;
    
    val = getBits(mag);
    
    if(val < 0)
      return -1;
    
    return (1 << mag) | val;
  }

  /**
   * Standard Gamma (Elias)
   *
   * @param val the number to compress
   * @return number of bits put
   */
  public int putGamma(int val){
    int mag;
    int ret;
    
    mag = (int) Math.floor(log2((double) val));
    ret = putBits(0, mag);
    ret += putBits(val, mag+1);
    
    return ret;
  }
  
  /**
   * Calculate number of bits for val using Elias Gamma coding 
   *
   * @param val the number to compress
   * @return number of bits 
   */
  public int gammaBits(int val){
    int mag;
    int ret;
    
    mag = (int) Math.floor(log2((double) val));
    ret = mag;
    ret += mag + 1;
    
    return ret;
  }
  
  /**
   * Get next Delta coded value.
   * @return returns next value or -1 when end of vector is reached,
   */
  public int getDelta() {
    int	mag, val;
    
    mag = getGamma() - 1;

    if(mag < 0)
      return -1;
    
    val = getBits(mag);
    
    if(val < 0)
      return -1;

    return (1 << mag) | val;
  }
  
  /**
   * Standard Delta (Elias)
   *
   * @param val the number to compress
   * @return number of bits put
   */
  public int putDelta(int val) {
    int mag;
    int	ret;

    mag = (int) Math.floor(log2((double) val));
    
    ret = putGamma(mag+1);
    ret += putBits(val, mag);
    return ret;
  }
  
  /**
   * Calculate number of bits for val using Elias Delta coding 
   *
   * @param val the number to compress
   * @return number of bits 
   */
  public int deltaBits(int val) {
    int mag;
    int	ret;

    mag = (int) Math.floor(log2((double) val));
    
    // ret = putGamma(mag+1);
    ret = gammaBits(mag + 1);
    //ret += putBits(val, mag);
    ret += mag;
    return ret;
  }
  
  /**
   * Put num bits into the vector from n
   *
   * @param val number to put
   * @param num number of bits to put
   * @return return number of bits written (i.e num)
   */
  public int putBits(int val, int num){
    int i, ret;
    for(ret = 0, i = num-1; i >= 0; i--)
      ret += putBit((val >> i) & 0x1);
    return ret;
  }


  /**
   * Get num bits from vector.
   *
   * @param num number of bits
   * @return -1 if end of vector
   */
  public int getBits(int num) {
    int	mask, shift, val, b;
    b = val = 0;
    
    for(shift = 8-bit; num >= shift; num -= shift, shift = 8-bit){
      if(8*pos + bit >= len)
	return -1;
      mask = masks[shift];
      val = ( val << shift ) | ( cur & mask );
      bit = 0;
      pos++;
      cur = vector[pos];
    }
  
    for(; num > 0 && (b = getBit()) >= 0; num--)
      val = (val << 1) | (b & 0x1);
    
    return (b<0) ? -1 : val;
  }

  /**
   * Get next byte from vector. NOTE that current bit must be 
   * at position 0 in the current byte.
   *
   * @return -1 if end of vector, otherwise the unsigned 
   * byte as an integer value from 0..255
   */
  public int getByte() {
    if(8*pos + bit >= len)
      return -1;
    
    cur = vector[pos++];
    return (cur & 0xFF);
  }
  
  /**
   * Put byte (first 8 bits from num) into vector. NOTE that 
   * current bit must be at position 0 in the current byte.
   *
   * @return number of bits put, i.e 8
   */
  public int putByte(int num) {
    if(pos >= vector.length) //expand the vector
      expand();
    vector[pos++] = (byte) (num & 0xFF);
    return 8;
  }
  
  /**
   * Code num by variable byte coding. NOTE that 
   * current bit must be at position 0 in the current byte.
   *
   * @param num the number
   * @return number of bits put
   */
  public int putVariableByte(int num) {
    int c, bits = 0;
    c = (num & 0x7F);
    num = (num >> 7);
    while (num > 0) {
      bits += putByte(c);
      c = (num & 0x7F);
      num = num >> 7;
    }
    bits += putByte(c | 0x80);
    return bits;
  }
  
  /**
   * Calculate number of bits for value using variable byte coding. NOTE that 
   * current bit must be at position 0 in the current byte.
   *
   * @param num the number
   * @return number of bits put
   */
  public int variableByteBits(int num) {
    int c, bits = 0;
    c = (num & 0x7F);
    num = (num >> 7);
    while (num > 0) {
      // bits += putByte(c);
      bits += 8;
      c = (num & 0x7F);
      num = num >> 7;
    }
    //bits += putByte(c | 0x80);
    bits += 8;
    return bits;
  }
  
  
  /**
   * Read next variable byte coded integer from vector. NOTE that 
   * current bit must be at position 0 in the current byte.
   *
   * @return next integer
   */
  public int getVariableByte() {
    int c, num = 0, i = 0;
    c = getByte();
    while ((c & 0x80) == 0) {
      num |= (c << (7*i));
      c = getByte();
      i++;
    }
    num |= ((c & ~(0x80))<< (7*i));
    return num;
  }
  
  
  
  
  /**
   * Put a bit (zero or one) at current bit position 
   *
   * @param b zero or one
   * @return number of bits put, i.e 1
   */
  public int putBit(int b){
    cur = (cur << 1) | (b & 0x1);
    bit++;
    
    if(bit == 8){ //go to next byte
      if(pos >= vector.length) //expand the vector
	expand();
      
      vector[pos] = (byte) (cur & 0xff);
      cur = 0;
      pos++;
      bit = 0;
    }
    return 1;
  }

  /**
   * Get bit (zero or one) from current position
   *
   * @return 0 or 1
   */
  public int getBit(){
    int	b;
        
    if(8*pos + bit >= len)
      return -1;
    
    b = (cur >> (7 - bit)) & 0x1;
    bit++;
    
    if(bit == 8){
      pos++;
      bit = 0;
      cur = vector[pos];
    }
    return b;
  }

  /**
   * Expand (1.75 times the size of) current vector while keeping 
   * the old data intact
   */
  public void expand() {
    byte [] newVector = null;
    int	 i;
    int  newsize = (int) (((double) vector.length) * 1.75);
    if ((newVector = new byte[newsize]) == null) {
      System.err.println("Error in expand() memory allocation." 
			 + "Tried to allocate: " 
			 + (newsize) + " bytes\n");
      throw new OutOfMemoryError(""+(newsize));
    }	
    else {
      System.arraycopy(vector, 0, newVector, 0, vector.length);
      vector = newVector;
    }
  }
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -