📄 sparsehashtable.h

📁 Cromfs is a compressed read-only filesystem for Linux. Cromfs is best at archiving gigabytes of big
💻 H
📖 第 1 页 / 共 3 页
字号:
                                                ExtractKey, EqualKey, Alloc>  destructive_iterator;  // How full we let the table get before we resize.  Knuth says .8 is  // good -- higher causes us to probe too much, though saves memory  static const float HT_OCCUPANCY_FLT; // = 0.8f;  // How empty we let the table get before we resize lower.  // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing  static const float HT_EMPTY_FLT; // = 0.4 * HT_OCCUPANCY_FLT;  // Minimum size we're willing to let hashtables be.  // Must be a power of two, and at least 4.  // Note, however, that for a given hashtable, the minimum size is  // determined by the first constructor arg, and may be >HT_MIN_BUCKETS.  static const size_t HT_MIN_BUCKETS = 32;  // ITERATOR FUNCTIONS  iterator begin()             { return iterator(this, table.nonempty_begin(),                                                 table.nonempty_end()); }  iterator end()               { return iterator(this, table.nonempty_end(),                                                 table.nonempty_end()); }  const_iterator begin() const { return const_iterator(this,                                                       table.nonempty_begin(),                                                       table.nonempty_end()); }  const_iterator end() const   { return const_iterator(this,                                                       table.nonempty_end(),                                                       table.nonempty_end()); }  // This is used when resizing  destructive_iterator destructive_begin() {    return destructive_iterator(this, table.destructive_begin(),                                table.destructive_end());  }  destructive_iterator destructive_end() {    return destructive_iterator(this, table.destructive_end(),                                table.destructive_end());  }  // ACCESSOR FUNCTIONS for the things we templatize on, basically  hasher hash_funct() const { return hash; }  key_equal key_eq() const  { return equals; }  // We need to copy values when we set the special marker for deleted  // elements, but, annoyingly, we can't just use the copy assignment  // operator because value_type might not be assignable (it's often  // pair<const X, Y>).  We use explicit destructor invocation and  // placement new to get around this.  Arg. private:  void set_value(value_type* dst, const value_type src) {    dst->~value_type();   // delete the old value, if any    new(dst) value_type(src);  }  // This is used as a tag for the copy constructor, saying to destroy its  // arg We have two ways of destructively copying: with potentially growing  // the hashtable as we copy, and without.  To make sure the outside world  // can't do a destructive copy, we make the typename private.  enum MoveDontCopyT {MoveDontCopy, MoveDontGrow};  // DELETE HELPER FUNCTIONS  // This lets the user describe a key that will indicate deleted  // table entries.  This key should be an "impossible" entry --  // if you try to insert it for real, you won't be able to retrieve it!  // (NB: while you pass in an entire value, only the key part is looked  // at.  This is just because I don't know how to assign just a key.) private:  void squash_deleted() {           // gets rid of any deleted entries we have    if ( num_deleted ) {            // get rid of deleted before writing      sparse_hashtable tmp(MoveDontGrow, *this);      swap(tmp);                    // now we are tmp    }    assert(num_deleted == 0);  } public:  void set_deleted_key(const value_type &val) {    // It's only safe to change what "deleted" means if we purge deleted guys    squash_deleted();    use_deleted = true;    set_value(&delval, val);        // save the key (and rest of val too)  }  void clear_deleted_key() {    squash_deleted();    use_deleted = false;  }  // These are public so the iterators can use them  // True if the item at position bucknum is "deleted" marker  bool test_deleted(size_type bucknum) const {    // The num_deleted test is crucial for read(): after read(), the ht values    // are garbage, and we don't want to think some of them are deleted.    return (use_deleted && num_deleted > 0 && table.test(bucknum) &&            equals(get_key(delval), get_key(table.get(bucknum))));  }  bool test_deleted(const iterator &it) const {    return (use_deleted && num_deleted > 0 &&            equals(get_key(delval), get_key(*it)));  }  bool test_deleted(const const_iterator &it) const {    return (use_deleted && num_deleted > 0 &&            equals(get_key(delval), get_key(*it)));  }  bool test_deleted(const destructive_iterator &it) const {    return (use_deleted && num_deleted > 0 &&            equals(get_key(delval), get_key(*it)));  }  // Set it so test_deleted is true.  true if object didn't used to be deleted  // See below (at erase()) to explain why we allow const_iterators  bool set_deleted(const_iterator &it) {    assert(use_deleted);             // bad if set_deleted_key() wasn't called    bool retval = !test_deleted(it);    // &* converts from iterator to value-type    set_value(const_cast<value_type*>(&(*it)), delval);    return retval;  }  // Set it so test_deleted is false.  true if object used to be deleted  bool clear_deleted(const_iterator &it) {    assert(use_deleted);             // bad if set_deleted_key() wasn't called    // happens automatically when we assign something else in its place    return test_deleted(it);  }  // FUNCTIONS CONCERNING SIZE  size_type size() const      { return table.num_nonempty() - num_deleted; }  // Buckets are always a power of 2  size_type max_size() const  { return (size_type(-1) >> 1U) + 1; }  bool empty() const          { return size() == 0; }  size_type bucket_count() const      { return table.size(); }  size_type max_bucket_count() const  { return max_size(); } private:  // Because of the above, size_type(-1) is never legal; use it for errors  static const size_type ILLEGAL_BUCKET = size_type(-1); private:  // This is the smallest size a hashtable can be without being too crowded  // If you like, you can give a min #buckets as well as a min #elts  size_type min_size(size_type num_elts, size_type min_buckets_wanted) {    size_type sz = HT_MIN_BUCKETS;    while ( sz < min_buckets_wanted || num_elts >= sz * enlarge_resize_percent )      sz *= 2;    return sz;  }  // Used after a string of deletes  void maybe_shrink() {    assert(table.num_nonempty() >= num_deleted);    assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two    assert(bucket_count() >= HT_MIN_BUCKETS);    if (shrink_threshold > 0        && (table.num_nonempty()-num_deleted) < shrink_threshold &&        bucket_count() > HT_MIN_BUCKETS ) {      size_type sz = bucket_count() / 2;    // find how much we should shrink      while ( sz > HT_MIN_BUCKETS &&              (table.num_nonempty() - num_deleted) <= sz *              shrink_resize_percent )        sz /= 2;                            // stay a power of 2      sparse_hashtable tmp(MoveDontCopy, *this, sz);      swap(tmp);                            // now we are tmp    }    consider_shrink = false;                // because we just considered it  }  // We'll let you resize a hashtable -- though this makes us copy all!  // When you resize, you say, "make it big enough for this many more elements"  void resize_delta(size_type delta, size_type min_buckets_wanted = 0) {    if ( consider_shrink )                   // see if lots of deletes happened      maybe_shrink();    if ( bucket_count() > min_buckets_wanted &&         (table.num_nonempty() + delta) <= enlarge_threshold )      return;                                // we're ok as we are    // Sometimes, we need to resize just to get rid of all the    // "deleted" buckets that are clogging up the hashtable.  So when    // deciding whether to resize, count the deleted buckets (which    // are currently taking up room).  But later, when we decide what    // size to resize to, *don't* count deleted buckets, since they    // get discarded during the resize.    const size_type needed_size = min_size(table.num_nonempty() + delta,                                           min_buckets_wanted);    if ( needed_size > bucket_count() ) {      // we don't have enough buckets      const size_type resize_to = min_size(table.num_nonempty() - num_deleted                                           + delta, min_buckets_wanted);      sparse_hashtable tmp(MoveDontCopy, *this, resize_to);      swap(tmp);                             // now we are tmp    }  }  // Used to actually do the rehashing when we grow/shrink a hashtable  void copy_from(const sparse_hashtable &ht, size_type min_buckets_wanted = 0) {    clear();            // clear table, set num_deleted to 0    // If we need to change the size of our table, do it now    const size_type resize_to = min_size(ht.size(), min_buckets_wanted);    if ( resize_to > bucket_count() ) {      // we don't have enough buckets      table.resize(resize_to);               // sets the number of buckets      reset_thresholds();    }    // We use a normal iterator to get non-deleted bcks from ht    // We could use insert() here, but since we know there are    // no duplicates and no deleted items, we can be more efficient    assert( (bucket_count() & (bucket_count()-1)) == 0);      // a power of two    for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) {      size_type num_probes = 0;              // how many times we've probed      size_type bucknum;      const size_type bucket_count_minus_one = bucket_count() - 1;      for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;           table.test(bucknum);                          // not empty           bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {        ++num_probes;        assert(num_probes < bucket_count()); // or else the hashtable is full      }      table.set(bucknum, *it);               // copies the value to here    }  }  // Implementation is like copy_from, but it destroys the table of the  // "from" guy by freeing sparsetable memory as we iterate.  This is  // useful in resizing, since we're throwing away the "from" guy anyway.  void move_from(MoveDontCopyT mover, sparse_hashtable &ht,                 size_type min_buckets_wanted = 0) {    clear();            // clear table, set num_deleted to 0    // If we need to change the size of our table, do it now    size_t resize_to;    if ( mover == MoveDontGrow )      resize_to = ht.bucket_count();         // keep same size as old ht    else                                     // MoveDontCopy      resize_to = min_size(ht.size(), min_buckets_wanted);    if ( resize_to > bucket_count() ) {      // we don't have enough buckets      table.resize(resize_to);               // sets the number of buckets      reset_thresholds();    }    // We use a normal iterator to get non-deleted bcks from ht    // We could use insert() here, but since we know there are    // no duplicates and no deleted items, we can be more efficient    assert( (bucket_count() & (bucket_count()-1)) == 0);      // a power of two    // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM():    for ( destructive_iterator it = ht.destructive_begin();          it != ht.destructive_end(); ++it ) {      size_type num_probes = 0;              // how many times we've probed      size_type bucknum;      for ( bucknum = hash(get_key(*it)) & (bucket_count()-1);  // h % buck_cnt            table.test(bucknum);                          // not empty            bucknum = (bucknum + JUMP_(key, num_probes)) & (bucket_count()-1) ) {        ++num_probes;        assert(num_probes < bucket_count()); // or else the hashtable is full      }      table.set(bucknum, *it);               // copies the value to here    }  }  // Required by the spec for hashed associative container public:  // Though the docs say this should be num_buckets, I think it's much  // more useful as num_elements.  As a special feature, calling with  // req_elements==0 will cause us to shrink if we can, saving space.  void resize(size_type req_elements) {       // resize to this or larger    if ( consider_shrink || req_elements == 0 )      maybe_shrink();    if ( req_elements > table.num_nonempty() )    // we only grow      resize_delta(req_elements - table.num_nonempty(), 0);  }  // Change the value of shrink_resize_percent and  // enlarge_resize_percent.  The description at the beginning of this  // file explains how to choose the values.  Setting the shrink  // parameter to 0.0 ensures that the table never shrinks.  void set_resizing_parameters(float shrink, float grow) {    assert(shrink >= 0.0);    assert(grow <= 1.0);    assert(shrink <= grow/2.0);    shrink_resize_percent = shrink;    enlarge_resize_percent = grow;    reset_thresholds();  }  // CONSTRUCTORS -- as required by the specs, we take a size,  // but also let you specify a hashfunction, key comparator,  // and key extractor.  We also define a copy constructor and =.  // DESTRUCTOR -- the default is fine, surprisingly.  explicit sparse_hashtable(size_type n = 0,                            const HashFcn& hf = HashFcn(),                            const EqualKey& eql = EqualKey(),                            const ExtractKey& ext = ExtractKey())    : hash(hf), equals(eql), get_key(ext), num_deleted(0), use_deleted(false),      delval(), enlarge_resize_percent(HT_OCCUPANCY_FLT),      shrink_resize_percent(HT_EMPTY_FLT),      table(min_size(0, n)) {  // start small    reset_thresholds();  }  // As a convenience for resize(), we allow an optional second argument  // which lets you make this new hashtable a different size than ht.  // We also provide a mechanism of saying you want to "move" the ht argument  // into us instead of copying.  sparse_hashtable(const sparse_hashtable& ht, size_type min_buckets_wanted = 0)    : hash(ht.hash), equals(ht.equals), get_key(ht.get_key),      num_deleted(0), use_deleted(ht.use_deleted), delval(ht.delval),      enlarge_resize_percent(ht.enlarge_resize_percent),
💿 文件大小 690 K
👤 上传用户 tiantianyuehui
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#Cromfs #compressed #filesystem #archiving
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -