📄 sparsehashtable.h
字号:
// Copyright (c) 2005, Google Inc.// All rights reserved.//// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met://// * Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.// * Redistributions in binary form must reproduce the above// copyright notice, this list of conditions and the following disclaimer// in the documentation and/or other materials provided with the// distribution.// * Neither the name of Google Inc. nor the names of its// contributors may be used to endorse or promote products derived from// this software without specific prior written permission.//// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.// ---// Author: Craig Silverstein//// A sparse hashtable is a particular implementation of// a hashtable: one that is meant to minimize memory use.// It does this by using a *sparse table* (cf sparsetable.h),// which uses between 1 and 2 bits to store empty buckets// (we may need another bit for hashtables that support deletion).//// When empty buckets are so cheap, an appealing hashtable// implementation is internal probing, in which the hashtable// is a single table, and collisions are resolved by trying// to insert again in another bucket. The most cache-efficient// internal probing schemes are linear probing (which suffers,// alas, from clumping) and quadratic probing, which is what// we implement by default.//// Deleted buckets are a bit of a pain. We have to somehow mark// deleted buckets (the probing must distinguish them from empty// buckets). The most principled way is to have another bitmap,// but that's annoying and takes up space. Instead we let the// user specify an "impossible" key. We set deleted buckets// to have the impossible key.//// Note it is possible to change the value of the delete key// on the fly; you can even remove it, though after that point// the hashtable is insert_only until you set it again.//// You probably shouldn't use this code directly. Use// <google/sparse_hash_table> or <google/sparse_hash_set> instead.//// You can following below:// HT_OCCUPANCY_FLT -- how full before we double size// HT_EMPTY_FLT -- how empty before we halve size// HT_MIN_BUCKETS -- smallest bucket size//// You can also change enlarge_resize_percent (which defaults to// HT_OCCUPANCY_FLT), and shrink_resize_percent (which defaults to// HT_EMPTY_FLT) with set_resizing_parameters().//// How to decide what values to use?// shrink_resize_percent's default of .4 * OCCUPANCY_FLT, is probably good.// HT_MIN_BUCKETS is probably unnecessary since you can specify// (indirectly) the starting number of buckets at construct-time.// For enlarge_resize_percent, you can use this chart to try to trade-off// expected lookup time to the space taken up. By default, this// code uses quadratic probing, though you can change it to linear// via _JUMP below if you really want to.//// From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html// NUMBER OF PROBES / LOOKUP Successful Unsuccessful// Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L)// Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2//// -- enlarge_resize_percent -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99// QUADRATIC COLLISION RES.// probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11// probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6// LINEAR COLLISION RES.// probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5// probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0//// The value type is required to be copy constructible and default// constructible, but it need not be (and commonly isn't) assignable.#ifndef _SPARSEHASHTABLE_H_#define _SPARSEHASHTABLE_H_#ifndef SPARSEHASH_STAT_UPDATE#define SPARSEHASH_STAT_UPDATE(x) ((void) 0)#endif// The probing method// Linear probing// #define JUMP_(key, num_probes) ( 1 )// Quadratic-ish probing#define JUMP_(key, num_probes) ( num_probes )// Hashtable class, used to implement the hashed associative containers// hash_set and hash_map.#include <google/sparsehash/sparseconfig.h>#include <assert.h>#include <algorithm> // For swap(), eg#include <iterator> // for facts about iterator tags#include <utility> // for pair<>#include <google/sparsetable> // Since that's basically what we are_START_GOOGLE_NAMESPACE_using STL_NAMESPACE::pair;// Alloc is completely ignored. It is present as a template parameter only// for the sake of being compatible with the old SGI hashtable interface.// TODO(csilvers): is that the right thing to do?template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey, class Alloc>class sparse_hashtable;template <class V, class K, class HF, class ExK, class EqK, class A>struct sparse_hashtable_iterator;template <class V, class K, class HF, class ExK, class EqK, class A>struct sparse_hashtable_const_iterator;// As far as iterating, we're basically just a sparsetable// that skips over deleted elements.template <class V, class K, class HF, class ExK, class EqK, class A>struct sparse_hashtable_iterator { public: typedef sparse_hashtable_iterator<V,K,HF,ExK,EqK,A> iterator; typedef sparse_hashtable_const_iterator<V,K,HF,ExK,EqK,A> const_iterator; typedef typename sparsetable<V>::nonempty_iterator st_iterator; typedef STL_NAMESPACE::forward_iterator_tag iterator_category; typedef V value_type; typedef ptrdiff_t difference_type; typedef size_t size_type; typedef V& reference; // Value typedef V* pointer; // "Real" constructor and default constructor sparse_hashtable_iterator(const sparse_hashtable<V,K,HF,ExK,EqK,A> *h, st_iterator it, st_iterator it_end) : ht(h), pos(it), end(it_end) { advance_past_deleted(); } sparse_hashtable_iterator() { } // not ever used internally // The default destructor is fine; we don't define one // The default operator= is fine; we don't define one // Happy dereferencer reference operator*() const { return *pos; } pointer operator->() const { return &(operator*()); } // Arithmetic. The only hard part is making sure that // we're not on a marked-deleted array element void advance_past_deleted() { while ( pos != end && ht->test_deleted(*this) ) ++pos; } iterator& operator++() { assert(pos != end); ++pos; advance_past_deleted(); return *this; } iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } // Comparison. bool operator==(const iterator& it) const { return pos == it.pos; } bool operator!=(const iterator& it) const { return pos != it.pos; } // The actual data const sparse_hashtable<V,K,HF,ExK,EqK,A> *ht; st_iterator pos, end;};// Now do it all again, but with const-ness!template <class V, class K, class HF, class ExK, class EqK, class A>struct sparse_hashtable_const_iterator { public: typedef sparse_hashtable_iterator<V,K,HF,ExK,EqK,A> iterator; typedef sparse_hashtable_const_iterator<V,K,HF,ExK,EqK,A> const_iterator; typedef typename sparsetable<V>::const_nonempty_iterator st_iterator; typedef STL_NAMESPACE::forward_iterator_tag iterator_category; typedef V value_type; typedef ptrdiff_t difference_type; typedef size_t size_type; typedef const V& reference; // Value typedef const V* pointer; // "Real" constructor and default constructor sparse_hashtable_const_iterator(const sparse_hashtable<V,K,HF,ExK,EqK,A> *h, st_iterator it, st_iterator it_end) : ht(h), pos(it), end(it_end) { advance_past_deleted(); } // This lets us convert regular iterators to const iterators sparse_hashtable_const_iterator() { } // never used internally sparse_hashtable_const_iterator(const iterator &it) : ht(it.ht), pos(it.pos), end(it.end) { } // The default destructor is fine; we don't define one // The default operator= is fine; we don't define one // Happy dereferencer reference operator*() const { return *pos; } pointer operator->() const { return &(operator*()); } // Arithmetic. The only hard part is making sure that // we're not on a marked-deleted array element void advance_past_deleted() { while ( pos != end && ht->test_deleted(*this) ) ++pos; } const_iterator& operator++() { assert(pos != end); ++pos; advance_past_deleted(); return *this; } const_iterator operator++(int) { const_iterator tmp(*this); ++*this; return tmp; } // Comparison. bool operator==(const const_iterator& it) const { return pos == it.pos; } bool operator!=(const const_iterator& it) const { return pos != it.pos; } // The actual data const sparse_hashtable<V,K,HF,ExK,EqK,A> *ht; st_iterator pos, end;};// And once again, but this time freeing up memory as we iteratetemplate <class V, class K, class HF, class ExK, class EqK, class A>struct sparse_hashtable_destructive_iterator { public: typedef sparse_hashtable_destructive_iterator<V,K,HF,ExK,EqK,A> iterator; typedef typename sparsetable<V>::destructive_iterator st_iterator; typedef STL_NAMESPACE::forward_iterator_tag iterator_category; typedef V value_type; typedef ptrdiff_t difference_type; typedef size_t size_type; typedef V& reference; // Value typedef V* pointer; // "Real" constructor and default constructor sparse_hashtable_destructive_iterator(const sparse_hashtable<V,K,HF,ExK,EqK,A> *h, st_iterator it, st_iterator it_end) : ht(h), pos(it), end(it_end) { advance_past_deleted(); } sparse_hashtable_destructive_iterator() { } // never used internally // The default destructor is fine; we don't define one // The default operator= is fine; we don't define one // Happy dereferencer reference operator*() const { return *pos; } pointer operator->() const { return &(operator*()); } // Arithmetic. The only hard part is making sure that // we're not on a marked-deleted array element void advance_past_deleted() { while ( pos != end && ht->test_deleted(*this) ) ++pos; } iterator& operator++() { assert(pos != end); ++pos; advance_past_deleted(); return *this; } iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } // Comparison. bool operator==(const iterator& it) const { return pos == it.pos; } bool operator!=(const iterator& it) const { return pos != it.pos; } // The actual data const sparse_hashtable<V,K,HF,ExK,EqK,A> *ht; st_iterator pos, end;};template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey, class Alloc>class sparse_hashtable { public: typedef Key key_type; typedef Value value_type; typedef HashFcn hasher; typedef EqualKey key_equal; typedef size_t size_type; typedef ptrdiff_t difference_type; typedef value_type* pointer; typedef const value_type* const_pointer; typedef value_type& reference; typedef const value_type& const_reference; typedef sparse_hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc> iterator; typedef sparse_hashtable_const_iterator<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc> const_iterator; typedef sparse_hashtable_destructive_iterator<Value, Key, HashFcn,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -