📄 hashfile.java
字号:
/** * Syncs any unstored data to the hash file. **/ public void sync() throws IOException { // Update the file header _writeFileHeader(); } public void close() throws IOException { _raf.close(); _raf = null; _fileChannel = null; }/*----------------+| Utility methods |+----------------*/ /** * Writes the bucket count, bucket size and item count to the file header. **/ private void _writeFileHeader() throws IOException { ByteBuffer buf = ByteBuffer.allocate(12); buf.putInt(0, _bucketCount); buf.putInt(4, _bucketSize); buf.putInt(8, _itemCount); _fileChannel.write(buf, 0L); } /** * Reads the bucket count, bucket size and item count from the file header. **/ private void _readFileHeader() throws IOException { ByteBuffer buf = ByteBuffer.allocate(12); _fileChannel.read(buf, 0L); _bucketCount = buf.getInt(0); _bucketSize = buf.getInt(4); _itemCount = buf.getInt(8); } /** * Returns the offset of the bucket for the specified hash code. **/ private long _getBucketOffset(int hash) { int bucketNo = hash % _bucketCount; if (bucketNo < 0) { bucketNo += _bucketCount; } return HEADER_LENGTH + (long)bucketNo * _recordSize; } /** * Returns the offset of the overflow bucket with the specified ID. **/ private long _getOverflowBucketOffset(int bucketID) { return HEADER_LENGTH + ((long)_bucketCount + (long)bucketID - 1L) * _recordSize; } /** * Creates a new overflow bucket and returns its ID. **/ private int _createOverflowBucket() throws IOException { long offset = _fileChannel.size(); _writeEmptyBuckets(offset, 1); return (int) ((offset - HEADER_LENGTH) / _recordSize) - _bucketCount + 1; } private void _writeEmptyBuckets(long fileOffset, int bucketCount) throws IOException { ByteBuffer emptyBucket = ByteBuffer.allocate(_recordSize); for (int i = 0; i < bucketCount; i++) { _fileChannel.write(emptyBucket, fileOffset + i*(long)_recordSize); emptyBucket.rewind(); } } private int _findEmptySlotInBucket(ByteBuffer bucket) { for (int slotNo = 0; slotNo < _bucketSize; slotNo++) { // Check for offsets that are equal to 0 if (bucket.getLong(ITEM_SIZE*slotNo + 4) == 0L) { return slotNo; } } return -1; } /** * Double the number of buckets in the hash file and rehashes the * stored items. **/ private void _increaseHashTable() throws IOException { //System.out.println("Increasing hash table to " + (2*_bucketCount) + " buckets..."); //long startTime = System.currentTimeMillis(); long oldTableSize = HEADER_LENGTH + (long)_bucketCount * _recordSize; long newTableSize = HEADER_LENGTH + (long)_bucketCount * _recordSize * 2; long oldFileSize = _fileChannel.size(); // includes overflow buckets // Move any overflow buckets out of the way to a temporary file File tmpFile = new File(_file.getParentFile(), "rehash_" + _file.getName()); RandomAccessFile tmpRaf = _createEmptyFile(tmpFile); FileChannel tmpChannel = tmpRaf.getChannel(); // Transfer the overflow buckets to the temp file TransferUtil.transferTo(_fileChannel, oldTableSize, oldFileSize, tmpChannel); // Increase hash table by factor 2 _writeEmptyBuckets(oldTableSize, _bucketCount); _bucketCount *= 2; // Discard any remaining overflow buffers _fileChannel.truncate(newTableSize); ByteBuffer bucket = ByteBuffer.allocate(_recordSize); ByteBuffer newBucket = ByteBuffer.allocate(_recordSize); // Rehash items in 'normal' buckets, half of these will move to a new location, // but none of them will trigger the creation of new overflow buckets. Any (now // deprecated) references to overflow buckets are removed too. // All items that are moved to a new location end up in one and the same new and // empty bucket. All items are divided between the old and the new bucket and the // changes to the buckets are written to disk only once. for (long bucketOffset = HEADER_LENGTH; bucketOffset < oldTableSize; bucketOffset += _recordSize) { _fileChannel.read(bucket, bucketOffset); boolean bucketChanged = false; long newBucketOffset = 0L; for (int slotNo = 0; slotNo < _bucketSize; slotNo++) { long dataOffset = bucket.getLong(ITEM_SIZE*slotNo + 4); if (dataOffset != 0L) { // Slot is not empty int hash = bucket.getInt(ITEM_SIZE*slotNo); long newOffset = _getBucketOffset(hash); if (newOffset != bucketOffset) { // Move this item to new bucket... newBucket.putInt(hash); newBucket.putLong(dataOffset); // ...and remove it from the current bucket bucket.putInt(ITEM_SIZE*slotNo, 0); bucket.putLong(ITEM_SIZE*slotNo + 4, 0L); bucketChanged = true; newBucketOffset = newOffset; } } } if (bucketChanged) { // Some of the items were moved to the new bucket, write it to the file newBucket.flip(); _fileChannel.write(newBucket, newBucketOffset); newBucket.clear(); } // Reset overflow ID in the old bucket to 0 if necessary if (bucket.getInt(ITEM_SIZE*_bucketSize) != 0) { bucket.putInt(ITEM_SIZE*_bucketSize, 0); bucketChanged = true; } if (bucketChanged) { // Some of the items were moved to the new bucket or the overflow // ID has been reset; write the bucket back to the file bucket.rewind(); _fileChannel.write(bucket, bucketOffset); } bucket.clear(); } // Rehash items in overflow buckets. This might trigger the creation of // new overflow buckets so we can't optimize this in the same way as we // rehash the normal buckets. long tmpFileSize = tmpChannel.size(); for (long bucketOffset = 0L; bucketOffset < tmpFileSize; bucketOffset += _recordSize) { tmpChannel.read(bucket, bucketOffset); for (int slotNo = 0; slotNo < _bucketSize; slotNo++) { long dataOffset = bucket.getLong(ITEM_SIZE*slotNo + 4); if (dataOffset != 0L) { // Slot is not empty int hash = bucket.getInt(ITEM_SIZE*slotNo); long newBucketOffset = _getBucketOffset(hash); // Move this item to new location... _storeOffset(newBucketOffset, hash, dataOffset); // ...and remove it from the current bucket bucket.putInt(ITEM_SIZE*slotNo, 0); bucket.putLong(ITEM_SIZE*slotNo + 4, 0L); } } bucket.clear(); } // Discard the temp file tmpRaf.close(); tmpFile.delete(); //long endTime = System.currentTimeMillis(); //System.out.println("Hash table rehashed in " + (endTime-startTime) + " ms"); } public void dumpContents(PrintStream out) throws IOException { out.println(); out.println("*** hash file contents ***"); out.println("_bucketCount="+_bucketCount); out.println("_bucketSize="+_bucketSize); out.println("_itemCount="+_itemCount); ByteBuffer buf = ByteBuffer.allocate(_recordSize); _fileChannel.position(HEADER_LENGTH); out.println("---Buckets---"); for (int bucketNo = 1; bucketNo <= _bucketCount; bucketNo++) { buf.clear(); _fileChannel.read(buf); out.print("Bucket " + bucketNo + ": "); for (int slotNo = 0; slotNo < _bucketSize; slotNo++) { int hash = buf.getInt(ITEM_SIZE*slotNo); long offset = buf.getLong(ITEM_SIZE*slotNo + 4); if (slotNo > 0) { out.print(" "); } out.print("["+toHexString(hash)+","+offset+"]"); } int overflowID = buf.getInt(ITEM_SIZE*_bucketSize); out.println("---> "+overflowID); } out.println("---Overflow Buckets---"); int bucketNo = 0; while (_fileChannel.position() < _fileChannel.size()) { buf.clear(); _fileChannel.read(buf); bucketNo++; out.print("Bucket " + bucketNo + ": "); for (int slotNo = 0; slotNo < _bucketSize; slotNo++) { int hash = buf.getInt(ITEM_SIZE*slotNo); long offset = buf.getLong(ITEM_SIZE*slotNo + 4); if (slotNo > 0) { out.print(" "); } out.print("["+toHexString(hash)+","+offset+"]"); } int overflowID = buf.getInt(ITEM_SIZE*_bucketSize); out.println("---> "+overflowID); } out.println("*** end of hash file contents ***"); out.println(); } private String toHexString(int decimal) { String hex = Integer.toHexString(decimal); StringBuffer result = new StringBuffer(8); for (int i = hex.length(); i < 8; i++) { result.append("0"); } result.append(hex); return result.toString(); }} // End inner class HashFile0/*---------------------------+| Inner class OffsetIterator |+---------------------------*/public static class OffsetIterator { private HashFile0 _hashFile; private int _queryHash; private ByteBuffer _bucketBuffer; private long _bucketOffset; private int _slotNo; private OffsetIterator(HashFile0 hashFile, int hash) throws IOException { _hashFile = hashFile; _queryHash = hash; _bucketBuffer = ByteBuffer.allocate(_hashFile.getRecordSize()); // Calculate offset for initial bucket _bucketOffset = _hashFile._getBucketOffset(hash); // Read initial bucket _hashFile.getFileChannel().read(_bucketBuffer, _bucketOffset); _slotNo = -1; } /** * Returns the next offset that has been mapped to the specified hash * code, or <tt>-1</tt> if no more offset were found. **/ public long next() throws IOException { while (_bucketBuffer != null) { // Search through current bucket _slotNo++; while (_slotNo < _hashFile.getBucketSize()) { if (_bucketBuffer.getInt(ITEM_SIZE*_slotNo) == _queryHash) { return _bucketBuffer.getLong(ITEM_SIZE*_slotNo + 4); } _slotNo++; } // No matching hash code in current bucket, check overflow bucket int overflowID = _bucketBuffer.getInt(ITEM_SIZE*_hashFile.getBucketSize()); if (overflowID == 0) { // No overflow bucket, end the search _bucketBuffer = null; _bucketOffset = 0L; } else { // Continue with overflow bucket _bucketOffset = _hashFile._getOverflowBucketOffset(overflowID); _bucketBuffer.clear(); _hashFile.getFileChannel().read(_bucketBuffer, _bucketOffset); _slotNo = -1; } } return -1; }} // End inner class OffsetIterator public static void main(String[] args) throws Exception { HashFile hashFile = new HashFile(new File(args[0])); hashFile._hashFile.dumpContents(System.out); hashFile.close(); }} // End class HashFile
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -