📄 sparsehashtable.h
字号:
shrink_resize_percent(ht.shrink_resize_percent), table() { reset_thresholds(); copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries } sparse_hashtable(MoveDontCopyT mover, sparse_hashtable& ht, size_type min_buckets_wanted=0) : hash(ht.hash), equals(ht.equals), get_key(ht.get_key), num_deleted(0), use_deleted(ht.use_deleted), delval(ht.delval), enlarge_resize_percent(ht.enlarge_resize_percent), shrink_resize_percent(ht.shrink_resize_percent), table() { reset_thresholds(); move_from(mover, ht, min_buckets_wanted); // ignores deleted entries } sparse_hashtable& operator= (const sparse_hashtable& ht) { if (&ht == this) return *this; // don't copy onto ourselves clear(); hash = ht.hash; equals = ht.equals; get_key = ht.get_key; use_deleted = ht.use_deleted; set_value(&delval, ht.delval); copy_from(ht); // sets num_deleted to 0 too return *this; } // Many STL algorithms use swap instead of copy constructors void swap(sparse_hashtable& ht) { STL_NAMESPACE::swap(hash, ht.hash); STL_NAMESPACE::swap(equals, ht.equals); STL_NAMESPACE::swap(get_key, ht.get_key); STL_NAMESPACE::swap(num_deleted, ht.num_deleted); STL_NAMESPACE::swap(use_deleted, ht.use_deleted); STL_NAMESPACE::swap(enlarge_resize_percent, ht.enlarge_resize_percent); STL_NAMESPACE::swap(shrink_resize_percent, ht.shrink_resize_percent); { value_type tmp; // for annoying reasons, swap() doesn't work set_value(&tmp, delval); set_value(&delval, ht.delval); set_value(&ht.delval, tmp); } table.swap(ht.table); reset_thresholds(); ht.reset_thresholds(); } // It's always nice to be able to clear a table without deallocating it void clear() { table.clear(); reset_thresholds(); num_deleted = 0; } // LOOKUP ROUTINES private: // Returns a pair of positions: 1st where the object is, 2nd where // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET // if object is not found; 2nd is ILLEGAL_BUCKET if it is. // Note: because of deletions where-to-insert is not trivial: it's the // first deleted bucket we see, as long as we don't find the key later pair<size_type, size_type> find_position(const key_type &key) const { size_type num_probes = 0; // how many times we've probed const size_type bucket_count_minus_one = bucket_count() - 1; size_type bucknum = hash(key) & bucket_count_minus_one; size_type insert_pos = ILLEGAL_BUCKET; // where we would insert SPARSEHASH_STAT_UPDATE(total_lookups += 1); while ( 1 ) { // probe until something happens if ( !table.test(bucknum) ) { // bucket is empty SPARSEHASH_STAT_UPDATE(total_probes += num_probes); if ( insert_pos == ILLEGAL_BUCKET ) // found no prior place to insert return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum); else return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos); } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert if ( insert_pos == ILLEGAL_BUCKET ) insert_pos = bucknum; } else if ( equals(key, get_key(table.get(bucknum))) ) { SPARSEHASH_STAT_UPDATE(total_probes += num_probes); return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET); } ++num_probes; // we're doing another probe bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one; assert(num_probes < bucket_count()); // don't probe too many times! } } public: iterator find(const key_type& key) { if ( size() == 0 ) return end(); pair<size_type, size_type> pos = find_position(key); if ( pos.first == ILLEGAL_BUCKET ) // alas, not there return end(); else return iterator(this, table.get_iter(pos.first), table.nonempty_end()); } const_iterator find(const key_type& key) const { if ( size() == 0 ) return end(); pair<size_type, size_type> pos = find_position(key); if ( pos.first == ILLEGAL_BUCKET ) // alas, not there return end(); else return const_iterator(this, table.get_iter(pos.first), table.nonempty_end()); } // Counts how many elements have key key. For maps, it's either 0 or 1. size_type count(const key_type &key) const { pair<size_type, size_type> pos = find_position(key); return pos.first == ILLEGAL_BUCKET ? 0 : 1; } // Likewise, equal_range doesn't really make sense for us. Oh well. pair<iterator,iterator> equal_range(const key_type& key) { const iterator pos = find(key); // either an iterator or end return pair<iterator,iterator>(pos, pos); } pair<const_iterator,const_iterator> equal_range(const key_type& key) const { const const_iterator pos = find(key); // either an iterator or end return pair<iterator,iterator>(pos, pos); } // INSERTION ROUTINES private: // If you know *this is big enough to hold obj, use this routine pair<iterator, bool> insert_noresize(const value_type& obj) { const pair<size_type,size_type> pos = find_position(get_key(obj)); if ( pos.first != ILLEGAL_BUCKET) { // object was already there return pair<iterator,bool>(iterator(this, table.get_iter(pos.first), table.nonempty_end()), false); // false: we didn't insert } else { // pos.second says where to put it if ( test_deleted(pos.second) ) { // just replace if it's been del. // The set() below will undelete this object. We just worry about stats assert(num_deleted > 0); --num_deleted; // used to be, now it isn't } table.set(pos.second, obj); return pair<iterator,bool>(iterator(this, table.get_iter(pos.second), table.nonempty_end()), true); // true: we did insert } } public: // This is the normal insert routine, used by the outside world pair<iterator, bool> insert(const value_type& obj) { resize_delta(1); // adding an object, grow if need be return insert_noresize(obj); } // When inserting a lot at a time, we specialize on the type of iterator template <class InputIterator> void insert(InputIterator f, InputIterator l) { // specializes on iterator type insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category()); } // Iterator supports operator-, resize before inserting template <class ForwardIterator> void insert(ForwardIterator f, ForwardIterator l, STL_NAMESPACE::forward_iterator_tag) { size_type n = STL_NAMESPACE::distance(f, l); // TODO(csilvers): standard? resize_delta(n); for ( ; n > 0; --n, ++f) insert_noresize(*f); } // Arbitrary iterator, can't tell how much to resize template <class InputIterator> void insert(InputIterator f, InputIterator l, STL_NAMESPACE::input_iterator_tag) { for ( ; f != l; ++f) insert(*f); } // DELETION ROUTINES size_type erase(const key_type& key) { const_iterator pos = find(key); // shrug: shouldn't need to be const if ( pos != end() ) { assert(!test_deleted(pos)); // or find() shouldn't have returned it set_deleted(pos); ++num_deleted; consider_shrink = true; // will think about shrink after next insert return 1; // because we deleted one thing } else { return 0; // because we deleted nothing } } // This is really evil: really it should be iterator, not const_iterator. // But...the only reason keys are const is to allow lookup. // Since that's a moot issue for deleted keys, we allow const_iterators void erase(const_iterator pos) { if ( pos == end() ) return; // sanity check if ( set_deleted(pos) ) { // true if object has been newly deleted ++num_deleted; consider_shrink = true; // will think about shrink after next insert } } void erase(const_iterator f, const_iterator l) { for ( ; f != l; ++f) { if ( set_deleted(f) ) // should always be true ++num_deleted; } consider_shrink = true; // will think about shrink after next insert } // COMPARISON bool operator==(const sparse_hashtable& ht) const { // We really want to check that the hash functions are the same // but alas there's no way to do this. We just hope. return ( num_deleted == ht.num_deleted && table == ht.table ); } bool operator!=(const sparse_hashtable& ht) const { return !(*this == ht); } // I/O // We support reading and writing hashtables to disk. NOTE that // this only stores the hashtable metadata, not the stuff you've // actually put in the hashtable! Alas, since I don't know how to // write a hasher or key_equal, you have to make sure everything // but the table is the same. We compact before writing. bool write_metadata(FILE *fp) { squash_deleted(); // so we don't have to worry about delkey return table.write_metadata(fp); } bool read_metadata(FILE *fp) { num_deleted = 0; // since we got rid before writing bool result = table.read_metadata(fp); reset_thresholds(); return result; } // Only meaningful if value_type is a POD. bool write_nopointer_data(FILE *fp) { return table.write_nopointer_data(fp); } // Only meaningful if value_type is a POD. bool read_nopointer_data(FILE *fp) { return table.read_nopointer_data(fp); } private: // The actual data hasher hash; // required by hashed_associative_container key_equal equals; ExtractKey get_key; size_type num_deleted; // how many occupied buckets are marked deleted bool use_deleted; // false until delval has been set value_type delval; // which key marks deleted entries float enlarge_resize_percent; // how full before resize float shrink_resize_percent; // how empty before resize size_type shrink_threshold; // table.size() * shrink_resize_percent size_type enlarge_threshold; // table.size() * enlarge_resize_percent sparsetable<value_type> table; // holds num_buckets and num_elements too bool consider_shrink; // true if we should try to shrink before next insert void reset_thresholds() { enlarge_threshold = static_cast<size_type>(table.size() * enlarge_resize_percent); shrink_threshold = static_cast<size_type>(table.size() * shrink_resize_percent); consider_shrink = false; // whatever caused us to reset already considered }};// We need a global swap as welltemplate <class V, class K, class HF, class ExK, class EqK, class A>inline void swap(sparse_hashtable<V,K,HF,ExK,EqK,A> &x, sparse_hashtable<V,K,HF,ExK,EqK,A> &y) { x.swap(y);}#undef JUMP_template <class V, class K, class HF, class ExK, class EqK, class A>const typename sparse_hashtable<V,K,HF,ExK,EqK,A>::size_type sparse_hashtable<V,K,HF,ExK,EqK,A>::ILLEGAL_BUCKET;// How full we let the table get before we resize. Knuth says .8 is// good -- higher causes us to probe too much, though saves memorytemplate <class V, class K, class HF, class ExK, class EqK, class A>const float sparse_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT = 0.8f;// How empty we let the table get before we resize lower.// It should be less than OCCUPANCY_FLT / 2 or we thrash resizingtemplate <class V, class K, class HF, class ExK, class EqK, class A>const float sparse_hashtable<V,K,HF,ExK,EqK,A>::HT_EMPTY_FLT = 0.4f *sparse_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT;_END_GOOGLE_NAMESPACE_#endif /* _SPARSEHASHTABLE_H_ */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -