📄 sparsehashtable.h
字号:
ExtractKey, EqualKey, Alloc> destructive_iterator; // How full we let the table get before we resize. Knuth says .8 is // good -- higher causes us to probe too much, though saves memory static const float HT_OCCUPANCY_FLT; // = 0.8f; // How empty we let the table get before we resize lower. // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing static const float HT_EMPTY_FLT; // = 0.4 * HT_OCCUPANCY_FLT; // Minimum size we're willing to let hashtables be. // Must be a power of two, and at least 4. // Note, however, that for a given hashtable, the minimum size is // determined by the first constructor arg, and may be >HT_MIN_BUCKETS. static const size_t HT_MIN_BUCKETS = 32; // ITERATOR FUNCTIONS iterator begin() { return iterator(this, table.nonempty_begin(), table.nonempty_end()); } iterator end() { return iterator(this, table.nonempty_end(), table.nonempty_end()); } const_iterator begin() const { return const_iterator(this, table.nonempty_begin(), table.nonempty_end()); } const_iterator end() const { return const_iterator(this, table.nonempty_end(), table.nonempty_end()); } // This is used when resizing destructive_iterator destructive_begin() { return destructive_iterator(this, table.destructive_begin(), table.destructive_end()); } destructive_iterator destructive_end() { return destructive_iterator(this, table.destructive_end(), table.destructive_end()); } // ACCESSOR FUNCTIONS for the things we templatize on, basically hasher hash_funct() const { return hash; } key_equal key_eq() const { return equals; } // We need to copy values when we set the special marker for deleted // elements, but, annoyingly, we can't just use the copy assignment // operator because value_type might not be assignable (it's often // pair<const X, Y>). We use explicit destructor invocation and // placement new to get around this. Arg. private: void set_value(value_type* dst, const value_type src) { dst->~value_type(); // delete the old value, if any new(dst) value_type(src); } // This is used as a tag for the copy constructor, saying to destroy its // arg We have two ways of destructively copying: with potentially growing // the hashtable as we copy, and without. To make sure the outside world // can't do a destructive copy, we make the typename private. enum MoveDontCopyT {MoveDontCopy, MoveDontGrow}; // DELETE HELPER FUNCTIONS // This lets the user describe a key that will indicate deleted // table entries. This key should be an "impossible" entry -- // if you try to insert it for real, you won't be able to retrieve it! // (NB: while you pass in an entire value, only the key part is looked // at. This is just because I don't know how to assign just a key.) private: void squash_deleted() { // gets rid of any deleted entries we have if ( num_deleted ) { // get rid of deleted before writing sparse_hashtable tmp(MoveDontGrow, *this); swap(tmp); // now we are tmp } assert(num_deleted == 0); } public: void set_deleted_key(const value_type &val) { // It's only safe to change what "deleted" means if we purge deleted guys squash_deleted(); use_deleted = true; set_value(&delval, val); // save the key (and rest of val too) } void clear_deleted_key() { squash_deleted(); use_deleted = false; } // These are public so the iterators can use them // True if the item at position bucknum is "deleted" marker bool test_deleted(size_type bucknum) const { // The num_deleted test is crucial for read(): after read(), the ht values // are garbage, and we don't want to think some of them are deleted. return (use_deleted && num_deleted > 0 && table.test(bucknum) && equals(get_key(delval), get_key(table.get(bucknum)))); } bool test_deleted(const iterator &it) const { return (use_deleted && num_deleted > 0 && equals(get_key(delval), get_key(*it))); } bool test_deleted(const const_iterator &it) const { return (use_deleted && num_deleted > 0 && equals(get_key(delval), get_key(*it))); } bool test_deleted(const destructive_iterator &it) const { return (use_deleted && num_deleted > 0 && equals(get_key(delval), get_key(*it))); } // Set it so test_deleted is true. true if object didn't used to be deleted // See below (at erase()) to explain why we allow const_iterators bool set_deleted(const_iterator &it) { assert(use_deleted); // bad if set_deleted_key() wasn't called bool retval = !test_deleted(it); // &* converts from iterator to value-type set_value(const_cast<value_type*>(&(*it)), delval); return retval; } // Set it so test_deleted is false. true if object used to be deleted bool clear_deleted(const_iterator &it) { assert(use_deleted); // bad if set_deleted_key() wasn't called // happens automatically when we assign something else in its place return test_deleted(it); } // FUNCTIONS CONCERNING SIZE size_type size() const { return table.num_nonempty() - num_deleted; } // Buckets are always a power of 2 size_type max_size() const { return (size_type(-1) >> 1U) + 1; } bool empty() const { return size() == 0; } size_type bucket_count() const { return table.size(); } size_type max_bucket_count() const { return max_size(); } private: // Because of the above, size_type(-1) is never legal; use it for errors static const size_type ILLEGAL_BUCKET = size_type(-1); private: // This is the smallest size a hashtable can be without being too crowded // If you like, you can give a min #buckets as well as a min #elts size_type min_size(size_type num_elts, size_type min_buckets_wanted) { size_type sz = HT_MIN_BUCKETS; while ( sz < min_buckets_wanted || num_elts >= sz * enlarge_resize_percent ) sz *= 2; return sz; } // Used after a string of deletes void maybe_shrink() { assert(table.num_nonempty() >= num_deleted); assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two assert(bucket_count() >= HT_MIN_BUCKETS); if (shrink_threshold > 0 && (table.num_nonempty()-num_deleted) < shrink_threshold && bucket_count() > HT_MIN_BUCKETS ) { size_type sz = bucket_count() / 2; // find how much we should shrink while ( sz > HT_MIN_BUCKETS && (table.num_nonempty() - num_deleted) <= sz * shrink_resize_percent ) sz /= 2; // stay a power of 2 sparse_hashtable tmp(MoveDontCopy, *this, sz); swap(tmp); // now we are tmp } consider_shrink = false; // because we just considered it } // We'll let you resize a hashtable -- though this makes us copy all! // When you resize, you say, "make it big enough for this many more elements" void resize_delta(size_type delta, size_type min_buckets_wanted = 0) { if ( consider_shrink ) // see if lots of deletes happened maybe_shrink(); if ( bucket_count() > min_buckets_wanted && (table.num_nonempty() + delta) <= enlarge_threshold ) return; // we're ok as we are // Sometimes, we need to resize just to get rid of all the // "deleted" buckets that are clogging up the hashtable. So when // deciding whether to resize, count the deleted buckets (which // are currently taking up room). But later, when we decide what // size to resize to, *don't* count deleted buckets, since they // get discarded during the resize. const size_type needed_size = min_size(table.num_nonempty() + delta, min_buckets_wanted); if ( needed_size > bucket_count() ) { // we don't have enough buckets const size_type resize_to = min_size(table.num_nonempty() - num_deleted + delta, min_buckets_wanted); sparse_hashtable tmp(MoveDontCopy, *this, resize_to); swap(tmp); // now we are tmp } } // Used to actually do the rehashing when we grow/shrink a hashtable void copy_from(const sparse_hashtable &ht, size_type min_buckets_wanted = 0) { clear(); // clear table, set num_deleted to 0 // If we need to change the size of our table, do it now const size_type resize_to = min_size(ht.size(), min_buckets_wanted); if ( resize_to > bucket_count() ) { // we don't have enough buckets table.resize(resize_to); // sets the number of buckets reset_thresholds(); } // We use a normal iterator to get non-deleted bcks from ht // We could use insert() here, but since we know there are // no duplicates and no deleted items, we can be more efficient assert( (bucket_count() & (bucket_count()-1)) == 0); // a power of two for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) { size_type num_probes = 0; // how many times we've probed size_type bucknum; const size_type bucket_count_minus_one = bucket_count() - 1; for (bucknum = hash(get_key(*it)) & bucket_count_minus_one; table.test(bucknum); // not empty bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) { ++num_probes; assert(num_probes < bucket_count()); // or else the hashtable is full } table.set(bucknum, *it); // copies the value to here } } // Implementation is like copy_from, but it destroys the table of the // "from" guy by freeing sparsetable memory as we iterate. This is // useful in resizing, since we're throwing away the "from" guy anyway. void move_from(MoveDontCopyT mover, sparse_hashtable &ht, size_type min_buckets_wanted = 0) { clear(); // clear table, set num_deleted to 0 // If we need to change the size of our table, do it now size_t resize_to; if ( mover == MoveDontGrow ) resize_to = ht.bucket_count(); // keep same size as old ht else // MoveDontCopy resize_to = min_size(ht.size(), min_buckets_wanted); if ( resize_to > bucket_count() ) { // we don't have enough buckets table.resize(resize_to); // sets the number of buckets reset_thresholds(); } // We use a normal iterator to get non-deleted bcks from ht // We could use insert() here, but since we know there are // no duplicates and no deleted items, we can be more efficient assert( (bucket_count() & (bucket_count()-1)) == 0); // a power of two // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM(): for ( destructive_iterator it = ht.destructive_begin(); it != ht.destructive_end(); ++it ) { size_type num_probes = 0; // how many times we've probed size_type bucknum; for ( bucknum = hash(get_key(*it)) & (bucket_count()-1); // h % buck_cnt table.test(bucknum); // not empty bucknum = (bucknum + JUMP_(key, num_probes)) & (bucket_count()-1) ) { ++num_probes; assert(num_probes < bucket_count()); // or else the hashtable is full } table.set(bucknum, *it); // copies the value to here } } // Required by the spec for hashed associative container public: // Though the docs say this should be num_buckets, I think it's much // more useful as num_elements. As a special feature, calling with // req_elements==0 will cause us to shrink if we can, saving space. void resize(size_type req_elements) { // resize to this or larger if ( consider_shrink || req_elements == 0 ) maybe_shrink(); if ( req_elements > table.num_nonempty() ) // we only grow resize_delta(req_elements - table.num_nonempty(), 0); } // Change the value of shrink_resize_percent and // enlarge_resize_percent. The description at the beginning of this // file explains how to choose the values. Setting the shrink // parameter to 0.0 ensures that the table never shrinks. void set_resizing_parameters(float shrink, float grow) { assert(shrink >= 0.0); assert(grow <= 1.0); assert(shrink <= grow/2.0); shrink_resize_percent = shrink; enlarge_resize_percent = grow; reset_thresholds(); } // CONSTRUCTORS -- as required by the specs, we take a size, // but also let you specify a hashfunction, key comparator, // and key extractor. We also define a copy constructor and =. // DESTRUCTOR -- the default is fine, surprisingly. explicit sparse_hashtable(size_type n = 0, const HashFcn& hf = HashFcn(), const EqualKey& eql = EqualKey(), const ExtractKey& ext = ExtractKey()) : hash(hf), equals(eql), get_key(ext), num_deleted(0), use_deleted(false), delval(), enlarge_resize_percent(HT_OCCUPANCY_FLT), shrink_resize_percent(HT_EMPTY_FLT), table(min_size(0, n)) { // start small reset_thresholds(); } // As a convenience for resize(), we allow an optional second argument // which lets you make this new hashtable a different size than ht. // We also provide a mechanism of saying you want to "move" the ht argument // into us instead of copying. sparse_hashtable(const sparse_hashtable& ht, size_type min_buckets_wanted = 0) : hash(ht.hash), equals(ht.equals), get_key(ht.get_key), num_deleted(0), use_deleted(ht.use_deleted), delval(ht.delval), enlarge_resize_percent(ht.enlarge_resize_percent),
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -