📄 hash.cs
字号:
//******************************
// Written by Peter Golde
// Copyright (c) 2004-2005, Wintellect
//
// Use and restribution of this code is subject to the license agreement
// contained in the file "License.txt" accompanying this file.
//******************************
using System;
using System.Diagnostics;
using System.Collections.Generic;
using System.Runtime.Serialization;
namespace Wintellect.PowerCollections
{
/// <summary>
/// The base implementation for various collections classes that use hash tables
/// as part of their implementation. This class should not (and can not) be
/// used directly by end users; it's only for internal use by the collections package. The Hash
/// does not handle duplicate values.
/// </summary>
/// <remarks>
/// The Hash manages items of type T, and uses a IComparer<ItemTYpe> that
/// hashes compares items to hash items into the table.
///</remarks>
[Serializable]
internal class Hash<T> : IEnumerable<T>, ISerializable, IDeserializationCallback
{
// NOTE: If you add new member variables, you very well may need to change the serialization
// code to serialize that member.
private IEqualityComparer<T> equalityComparer; // interface for comparing elements
private int count; // The count of elements in the table.
private int usedSlots; // Includes real elements and deleted elements with the collision bit on. Used to determine
// when we need to resize.
private int totalSlots; // Size of the table. Always a power of two.
private float loadFactor; // maximal load factor for the table.
private int thresholdGrow; // floor(totalSlots * loadFactor);
private int thresholdShrink; // thresholdGrow / 3.
private int hashMask; // Mask to convert hash values to the size of the table.
private int secondaryShift; // Shift to get the secondary skip value.
private Slot[] table; // The hash table.
private int changeStamp; // An integer that is changed every time the table structurally changes.
// Used so that enumerations throw an exception if the tree is changed
// during enumeration.
private const int MINSIZE = 16; // minimum number of slots.
private SerializationInfo serializationInfo; // Info used during deserialization.
/// <summary>
/// The structure that has each slot in the hash table. Each slot has three parts:
/// 1. The collision bit. Indicates whether some item visited this slot but had to
/// keep looking because the slot was full.
/// 2. 31-bit full hash value of the item. If zero, the slot is empty.
/// 3. The item itself.
/// </summary>
struct Slot
{
private uint hash_collision; // Lower 31 bits: the hash value. Top bit: the collision bit.
public T item; // The item.
/// <summary>
/// The full hash value associated with the value in this slot, or zero
/// if the slot is empty.
/// </summary>
public int HashValue
{
get {
return (int) (hash_collision & 0x7FFFFFFF);
}
set {
Debug.Assert((value & 0x80000000) == 0); // make sure sign bit isn't set.
hash_collision = (uint)value | (hash_collision & 0x80000000);
}
}
/// <summary>
/// Is this slot empty?
/// </summary>
public bool Empty {
get {
return HashValue == 0;
}
}
/// <summary>
/// Clear this slot, leaving the collision bit alone.
/// </summary>
public void Clear() {
HashValue = 0;
item = default(T); // Done to avoid keeping things alive that shouldn't be.
}
/// <summary>
/// The "Collision" bit indicates that some value hit this slot and
/// collided, so had to try another slot.
/// </summary>
public bool Collision
{
get
{
return (hash_collision & 0x80000000) != 0;
}
set
{
if (value)
hash_collision |= 0x80000000;
else
hash_collision &= 0x7FFFFFFF;
}
}
}
/// <summary>
/// Constructor. Create a new hash table.
/// </summary>
/// <param name="equalityComparer">The comparer to use to compare items. </param>
public Hash(IEqualityComparer<T> equalityComparer)
{
this.equalityComparer = equalityComparer;
this.loadFactor = 0.70F; // default load factor.
}
/// <summary>
/// Gets the current enumeration stamp. Call CheckEnumerationStamp later
/// with this value to throw an exception if the hash table is changed.
/// </summary>
/// <returns>The current enumeration stamp.</returns>
internal int GetEnumerationStamp()
{
return changeStamp;
}
/// <summary>
/// Must be called whenever there is a structural change in the tree. Causes
/// changeStamp to be changed, which causes any in-progress enumerations
/// to throw exceptions.
/// </summary>
internal void StopEnumerations()
{
++changeStamp;
}
/// <summary>
/// Checks the given stamp against the current change stamp. If different, the
/// collection has changed during enumeration and an InvalidOperationException
/// must be thrown
/// </summary>
/// <param name="startStamp">changeStamp at the start of the enumeration.</param>
internal void CheckEnumerationStamp(int startStamp)
{
if (startStamp != changeStamp) {
throw new InvalidOperationException(Strings.ChangeDuringEnumeration);
}
}
/// <summary>
/// Gets the full hash code for an item.
/// </summary>
/// <param name="item">Item to get hash code for.</param>
/// <returns>The full hash code. It is never zero.</returns>
private int GetFullHash(T item)
{
uint hash;
hash = (uint)Util.GetHashCode(item, equalityComparer);
// The .NET framework tends to produce pretty bad hash codes.
// Scramble them up to be much more random!
hash += ~(hash << 15);
hash ^= (hash >> 10);
hash += (hash << 3);
hash ^= (hash >> 6);
hash += ~(hash << 11);
hash ^= (hash >> 16);
hash &= 0x7FFFFFFF;
if (hash == 0)
hash = 0x7FFFFFFF; // Make sure it isn't zero.
return (int)hash;
}
/// <summary>
/// Get the initial bucket number and skip amount from the full hash value.
/// </summary>
/// <param name="hash">The full hash value.</param>
/// <param name="initialBucket">Returns the initial bucket. Always in the range 0..(totalSlots - 1).</param>
/// <param name="skip">Returns the skip values. Always odd in the range 0..(totalSlots - 1).</param>
private void GetHashValuesFromFullHash(int hash, out int initialBucket, out int skip)
{
initialBucket = hash & hashMask;
// The skip value must be relatively prime to the table size. Since the table size is a
// power of two, any odd number is relatively prime, so oring in 1 will do it.
skip = ((hash >> secondaryShift) & hashMask) | 1;
}
/// <summary>
/// Gets the full hash value, initial bucket number, and skip amount for an item.
/// </summary>
/// <param name="item">Item to get hash value of.</param>
/// <param name="initialBucket">Returns the initial bucket. Always in the range 0..(totalSlots - 1).</param>
/// <param name="skip">Returns the skip values. Always odd in the range 0..(totalSlots - 1).</param>
/// <returns>The full hash value. This is never zero.</returns>
private int GetHashValues(T item, out int initialBucket, out int skip)
{
int hash = GetFullHash(item);
GetHashValuesFromFullHash(hash, out initialBucket, out skip);
return hash;
}
/// <summary>
/// Make sure there are enough slots in the hash table that <paramref name="additionalItems"/>
/// items can be inserted into the table.
/// </summary>
/// <param name="additionalItems">Number of additional items we are inserting.</param>
private void EnsureEnoughSlots(int additionalItems)
{
StopEnumerations();
if (usedSlots + additionalItems > thresholdGrow) {
// We need to expand the table. Figure out to what size.
int newSize;
newSize = Math.Max(totalSlots, MINSIZE);
while ((int)(newSize * loadFactor) < usedSlots + additionalItems) {
newSize *= 2;
if (newSize <= 0) {
// Must have overflowed the size of an int. Hard to believe we didn't run out of memory first.
throw new InvalidOperationException(Strings.CollectionTooLarge);
}
}
ResizeTable(newSize);
}
}
/// <summary>
/// Check if the number of items in the table is small enough that
/// we should shrink the table again.
/// </summary>
private void ShrinkIfNeeded()
{
if (count < thresholdShrink) {
int newSize;
if (count > 0) {
newSize = MINSIZE;
while ((int)(newSize * loadFactor) < count)
newSize *= 2;
}
else {
// We've removed all the elements. Shrink to zero.
newSize = 0;
}
ResizeTable(newSize);
}
}
/// <summary>
/// Given the size of a hash table, compute the "secondary shift" value -- the shift
/// that is used to determine the skip amount for collision resolution.
/// </summary>
/// <param name="newSize">The new size of the table.</param>
/// <returns>The secondary skip amount.</returns>
private int GetSecondaryShift(int newSize)
{
int x = newSize - 2; // x is of the form 0000111110 -- a single string of 1's followed by a single zero.
int secondaryShift = 0;
// Keep shifting x until it is the set of bits we want to extract: it be the highest bits possible,
// but can't overflow into the sign bit.
while ((x & 0x40000000) == 0) {
x <<= 1;
++secondaryShift;
}
return secondaryShift;
}
/// <summary>
/// Resize the hash table to the given new size, moving all items into the
/// new hash table.
/// </summary>
/// <param name="newSize">The new size of the hash table. Must be a power
/// of two.</param>
private void ResizeTable(int newSize)
{
Slot[] oldTable = table; // Move all the items from this table to the new table.
Debug.Assert((newSize & (newSize - 1)) == 0); // Check newSize is a power of two.
totalSlots = newSize;
thresholdGrow = (int)(totalSlots * loadFactor);
thresholdShrink = thresholdGrow / 3;
if (thresholdShrink <= MINSIZE)
thresholdShrink = 1;
hashMask = newSize - 1;
secondaryShift = GetSecondaryShift(newSize);
if (totalSlots > 0)
table = new Slot[totalSlots];
else
table = null;
if (oldTable != null && table != null) {
foreach (Slot oldSlot in oldTable) {
int hash, bucket, skip;
hash = oldSlot.HashValue;
GetHashValuesFromFullHash(hash, out bucket, out skip);
// Find an empty bucket.
while (! table[bucket].Empty) {
// The slot is used, but isn't our item. Set the collision bit and keep looking.
table[bucket].Collision = true;
bucket = (bucket + skip) & hashMask;
}
// We found an empty bucket.
table[bucket].HashValue = hash;
table[bucket].item = oldSlot.item;
}
}
usedSlots = count; // no deleted elements have the collision bit on now.
}
/// <summary>
/// Get the number of items in the hash table.
/// </summary>
/// <value>The number of items stored in the hash table.</value>
public int ElementCount
{
get
{
return count;
}
}
/// <summary>
/// Get the number of slots in the hash table. Exposed internally
/// for testing purposes.
/// </summary>
/// <value>The number of slots in the hash table.</value>
internal int SlotCount
{
get
{
return totalSlots;
}
}
/// <summary>
/// Get or change the load factor. Changing the load factor may cause
/// the size of the table to grow or shrink accordingly.
/// </summary>
/// <value></value>
public float LoadFactor
{
get
{
return loadFactor;
}
set
{
// Don't allow hopelessly inefficient load factors.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -