📄 bitvector.java.old
字号:
package com.mellowtech.disc;import java.nio.ByteBuffer;import se.sics.util.DataTypeUtils;/** * Vector of bits. Methods for inserting Elias Gamma/Delta, Golomb * and Variable Byte coding of integers. <p> * The variableByte methods should NOT be mixed with the * other methods such as putGamma/Delta, since the variable * bytes are not inserted as a sequence of 8 bits, instead * as whole bytes in the vector. * * @author Rickard C鰏ter * @version 1.0 */public class BitVector extends ByteStorable { /** * Default size of the vector * */ public static int DEFAULT_BYTE_SIZE = 1024; /** * define log10 for speed * */ public static double log10 = 0.43429448190325176; private static int masks[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff }; private static GolHashTable sGHTable = null; private byte []vector;// Sequence of bytes to hold bitvector private int pos; // Current byte number private int bit; // Current bit in byte private int cur; // Temporary space for putting, getting bits private int len; // Number of bits used static { sGHTable = new GolHashTable(); } /** * Creates a new <code>BitVector</code> instance, whose vector is 1024 * bytes large */ public BitVector(){ this(DEFAULT_BYTE_SIZE); } /** * Creates a new <code>BitVector</code> instance, whose vector * is 'bytesize' bytes large * * @param bytesize the byte size */ public BitVector(int bytesize){ init(bytesize); } static double log10(double x){ return log10*Math.log(x); } static double L2(double f){ return log10((f)) / 0.301029995; // log10(2.0) } static double L4(double f){ return log10((f)) / 0.602059999; //log10(4.0) } static double L6(double f){ return log10((f)) / 0.778151250; //log10(6.0) } static double L8(double f){ return log10((f)) / 0.903089990;//log10(8.0) } static double LN(double f){ return log10((f)) / 0.434294480; //log10(e) } /** * Reset markers, i.e set pos() and bit() both to zero, and set * current byte to the first byte in vector */ public void reset(){ pos = 0; bit = 0; cur = vector[0]; } /** * Initialize (and remove any previous data) current vector * to 'bytesize' number of zero-bytes, and reset markers * * @param bytesize number of bytes */ public void init(int bytesize){ vector = new byte[bytesize]; reset(); } /** * Current byte position in vector * * @return the byte position */ public int pos() { return pos; } /** * Current bit position in current byte * * @return the bit position */ public int bit() { return bit; } /** * Length in bits of current vector, i.e 8 * pos() + bit() * * @return length in bits of current vector */ public int len() { return 8 * pos() + bit(); } /** * Size in bytes of allocated vector * * @return size in bytes of allocated vector */ public int size() { return (vector == null) ? 0 : vector.length; } public int byteSize() { return sizeBytesNeeded(len()) + len() / 8 + 1; } public int byteSize(ByteBuffer bb) { int position = bb.position(); int len_bits = getSize(bb); bb.position(position); return sizeBytesNeeded(len_bits) + len_bits / 8 + 1; } public ByteStorable fromBytes(ByteBuffer bb) { len = getSize(bb); if(vector == null || vector.length < ((len / 8) + 1)) vector = new byte[len / 8 + 1]; bb.get(vector, 0, (len / 8) + 1); pos = bit = 0; cur = vector[0]; return this; } public void toBytes(ByteBuffer bb) { putSize(len, bb); bb.put(vector, 0, (len / 8) + 1); } public String toString() { StringBuffer sb = new StringBuffer(); sb.append("\nsize of byte vector = " + vector.length); sb.append("\nposition in byte vector = " + pos); sb.append("\nposition in current byte = " + bit); sb.append("\nbyte value of current byte = " + (int) (cur & 0xff)); sb.append("\nlength of vector in bits = " + len); sb.append("\nbytes for length indicator = " + sizeBytesNeeded(len)); sb.append("\nunused bits = " + ((vector.length*8) - len() -(sizeBytesNeeded(len)*8))); return sb.toString(); } /** * Byte align the vector, always call this method when * compression is completed */ public void padVector() { if(pos >= vector.length) expand(); vector[pos] = (byte) (( cur << ( 8-bit ) ) & 0xff); len = 8*pos + bit; } /** * Clear (set to zero) the last 'n' bits in current vector, * not including the current bit, since it is not set. Does NOT * work with the variableByte methods. * * @param n number of bits to clear */ public void clearLastBits(int n) { if (n < bit) { // case 1. n < bit, only clear n bits from cur cur = (cur >> n); bit -= n; } else { // case 2. n >= bit, clear several bytes n -= bit; // discard cur, contains bit bits pos--; int bytes = n / 8; // number of full bytes to clear while (bytes > 0) { vector[pos--] = (byte) 0; bytes--; } n = n % 8; // number of bits left if (n == 0) { bit = 0; // first bit in next byte cur = 0; // empty pos++; // next byte } else { cur = vector[pos]; // current byte cur = (cur >> n); // remove last n bits bit = 8 - n; // n bits removed vector[pos] = (byte) 0; } } } /** * Get next Golomb number using parameter b * * @param b coding parameter * @return next value */ public int getGolomb(int b) { int mag; int q, d; int rem; int ret; int slot; GolHash golHash; for(mag = 0; getBit() == 0; mag++); slot = b % sGHTable.size(); golHash = sGHTable.get(slot); if(golHash != null && golHash.b == b) q = golHash.q; else{ golHash = new GolHash(); q = (int) Math.floor(L2 ((double) b)); golHash.b = b; golHash.q = q; sGHTable.set(slot, golHash); } rem = getBits(q); d = (1 << (q+1)) - b; if(rem >= d) rem = ((rem << 1) | getBit()) - d; ret = mag * b + rem + 1; ret--; return ret; } /** * Code value using Golomb coding with parameter b * * @param val the value * @param b coding parameter * @return next value */ public int putGolomb(int val, int b){ int mag; int ret; int rem; int q; int d; val++; //ensure non-zero values /* work out floor of (x - 1)/b */ mag = (int) Math.floor(((double) (val - 1)) / ((double) b)); ret = putBits(0, mag); ret += putBit(1); rem = val - (mag * b) - 1; q = (int) Math.floor(L2((double) b)); d = (1 << (q+1)) - b; if (rem < d) ret += putBits(rem, q); else ret += putBits(d+rem, q+1); return ret; } /** * Calculate number of bits for Golomb code for val with parameter b * * @param val the value * @param b coding parameter * @return number of bits for the Golomb code */ public int golombBits(int val, int b){ int mag; int ret; int rem; int q; int d; val++; //ensure non-zero values /* work out floor of (x - 1)/b */ mag = (int) Math.floor(((double) (val - 1)) / ((double) b)); //ret = putBits(0, mag); //ret += putBit(1); ret = mag + 1; rem = val - (mag * b) - 1; q = (int) Math.floor(L2((double) b)); d = (1 << (q+1)) - b; if (rem < d) // ret += putBits(rem, q); ret += q; else // ret += putBits(d+rem, q+1); ret += q + 1; return ret; } /** * Get next Gamma coded value. * * @return returns next value or -1 when end of vector is reached */ public int getGamma(){ int b, mag, val; for(mag = 0; (b = getBit()) == 0; mag++); if(b < 0) return -1; val = getBits(mag); if(val < 0) return -1; return (1 << mag) | val; } /** * Standard Gamma (Elias) * * @param val the number to compress * @return number of bits put */ public int putGamma(int val){ int mag; int ret; mag = (int) Math.floor(L2((double) val)); ret = putBits(0, mag); ret += putBits(val, mag+1); return ret; } /** * Calculate number of bits for val using Elias Gamma coding * * @param val the number to compress * @return number of bits */ public int gammaBits(int val){ int mag; int ret; mag = (int) Math.floor(L2((double) val)); ret = mag; ret += mag + 1; return ret; } /** * Get next Delta coded value. * @return returns next value or -1 when end of vector is reached, */ public int getDelta() { int mag, val; mag = getGamma() - 1; if(mag < 0) return -1; val = getBits(mag); if(val < 0) return -1; return (1 << mag) | val; } /** * Standard Delta (Elias) * * @param val the number to compress * @return number of bits put */ public int putDelta(int val) { int mag; int ret; mag = (int) Math.floor(L2((double) val)); ret = putGamma(mag+1); ret += putBits(val, mag); return ret; } /** * Calculate number of bits for val using Elias Delta coding * * @param val the number to compress * @return number of bits */ public int deltaBits(int val) { int mag; int ret; mag = (int) Math.floor(L2((double) val)); // ret = putGamma(mag+1); ret = gammaBits(mag + 1); //ret += putBits(val, mag); ret += mag; return ret; } /** * Put num bits into the vector from n * * @param val number to put * @param num number of bits to put * @return return number of bits written (i.e num) */ public int putBits(int val, int num){ int i, ret; for(ret = 0, i = num-1; i >= 0; i--) ret += putBit((val >> i) & 0x1); return ret; } /** * Get num bits from vector. * * @param num number of bits * @return -1 if end of vector */ public int getBits(int num) { int mask, shift, val, b; b = val = 0; for(shift = 8-bit; num >= shift; num -= shift, shift = 8-bit){ if(8*pos + bit >= len) return -1; mask = masks[shift]; val = ( val << shift ) | ( cur & mask ); bit = 0; pos++; cur = vector[pos]; } for(; num > 0 && (b = getBit()) >= 0; num--) val = (val << 1) | (b & 0x1); return (b<0) ? -1 : val; } /** * Get next byte from vector. NOTE that current bit must be * at position 0 in the current byte. * * @return -1 if end of vector, otherwise the unsigned * byte as an integer value from 0..255 */ public int getByte() { if(8*pos + bit >= len) return -1; cur = vector[pos++]; return (cur & 0xFF); } /** * Put byte (first 8 bits from num) into vector. NOTE that * current bit must be at position 0 in the current byte. * * @return number of bits put, i.e 8 */ public int putByte(int num) { if(pos >= vector.length) //expand the vector expand(); vector[pos++] = (byte) (num & 0xFF); return 8; } /** * Code num by variable byte coding. NOTE that * current bit must be at position 0 in the current byte. * * @param num the number * @return number of bits put */ public int putVariableByte(int num) { int c, bits = 0; c = (num & 0x7F); num = (num >> 7); while (num > 0) { bits += putByte(c); c = (num & 0x7F); num = num >> 7; } bits += putByte(c | 0x80); return bits; } /** * Calculate number of bits for value using variable byte coding. NOTE that * current bit must be at position 0 in the current byte. * * @param num the number * @return number of bits put */ public int variableByteBits(int num) { int c, bits = 0; c = (num & 0x7F); num = (num >> 7); while (num > 0) { // bits += putByte(c); bits += 8; c = (num & 0x7F); num = num >> 7; } //bits += putByte(c | 0x80); bits += 8; return bits; } /** * Read next variable byte coded integer from vector. NOTE that * current bit must be at position 0 in the current byte. * * @return next integer */ public int getVariableByte() { int c, num = 0, i = 0; c = getByte(); while ((c & 0x80) == 0) { num |= (c << (7*i)); c = getByte(); i++; } num |= ((c & ~(0x80))<< (7*i)); return num; } /** * Put a bit (zero or one) at current bit position * * @param b zero or one * @return number of bits put, i.e 1 */ public int putBit(int b){ cur = (cur << 1) | (b & 0x1); bit++; if(bit == 8){ //go to next byte if(pos >= vector.length) //expand the vector expand(); vector[pos] = (byte) (cur & 0xff); cur = 0; pos++; bit = 0; } return 1; } /** * Get bit (zero or one) from current position * * @return 0 or 1 */ public int getBit(){ int b; if(8*pos + bit >= len) return -1; b = (cur >> (7 - bit)) & 0x1; bit++; if(bit == 8){ pos++; bit = 0; cur = vector[pos]; } return b; } /** * Expand (double the size of) current vector while keeping * the old data intact */ public void expand() { byte [] newVector = null; int i; if ((newVector = new byte[vector.length*2]) == null) { System.err.println("Error in expandvec malloc. " + "Tried to allocate: " + (vector.length*2) + " bytes\n"); throw new OutOfMemoryError(""+(vector.length*2)); } else { System.arraycopy(vector, 0, newVector, 0, vector.length); vector = newVector; } } class Posting implements Comparable { int docno; int freq; public Posting(int docno, int freq) { this.docno = docno; this.freq = freq; } public int compareTo(Object o) { Posting p = (Posting) o; if (this.freq > p.freq) return -1; else if (this.freq < p.freq) return 1; else if (this.docno > p.docno) return 1; else return -1; } }}class GolHashTable{ public static int GOLHASHTABLESIZE = 102397; GolHash[] table; public GolHashTable(){ table = new GolHash[GOLHASHTABLESIZE]; } public int size(){ return table.length; } public GolHash get(int i){ return table[i]; } public void set(int i, GolHash gh){ table[i] = gh; }}class GolHash{ int b; /* b value used for lookup */ int q; /* q value that is floor(log_2(b)) */}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -