📄 sparsehashtable.h

📁 Cromfs is a compressed read-only filesystem for Linux. Cromfs is best at archiving gigabytes of big
💻 H
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
      shrink_resize_percent(ht.shrink_resize_percent),      table() {    reset_thresholds();    copy_from(ht, min_buckets_wanted);   // copy_from() ignores deleted entries  }  sparse_hashtable(MoveDontCopyT mover, sparse_hashtable& ht,                   size_type min_buckets_wanted=0)    : hash(ht.hash), equals(ht.equals), get_key(ht.get_key),      num_deleted(0), use_deleted(ht.use_deleted), delval(ht.delval),      enlarge_resize_percent(ht.enlarge_resize_percent),      shrink_resize_percent(ht.shrink_resize_percent),      table() {    reset_thresholds();    move_from(mover, ht, min_buckets_wanted);  // ignores deleted entries  }  sparse_hashtable& operator= (const sparse_hashtable& ht) {    if (&ht == this)  return *this;        // don't copy onto ourselves    clear();    hash = ht.hash;    equals = ht.equals;    get_key = ht.get_key;    use_deleted = ht.use_deleted;    set_value(&delval, ht.delval);    copy_from(ht);                         // sets num_deleted to 0 too    return *this;  }  // Many STL algorithms use swap instead of copy constructors  void swap(sparse_hashtable& ht) {    STL_NAMESPACE::swap(hash, ht.hash);    STL_NAMESPACE::swap(equals, ht.equals);    STL_NAMESPACE::swap(get_key, ht.get_key);    STL_NAMESPACE::swap(num_deleted, ht.num_deleted);    STL_NAMESPACE::swap(use_deleted, ht.use_deleted);    STL_NAMESPACE::swap(enlarge_resize_percent, ht.enlarge_resize_percent);    STL_NAMESPACE::swap(shrink_resize_percent, ht.shrink_resize_percent);    { value_type tmp;     // for annoying reasons, swap() doesn't work      set_value(&tmp, delval);      set_value(&delval, ht.delval);      set_value(&ht.delval, tmp);    }    table.swap(ht.table);    reset_thresholds();    ht.reset_thresholds();  }  // It's always nice to be able to clear a table without deallocating it  void clear() {    table.clear();    reset_thresholds();    num_deleted = 0;  }  // LOOKUP ROUTINES private:  // Returns a pair of positions: 1st where the object is, 2nd where  // it would go if you wanted to insert it.  1st is ILLEGAL_BUCKET  // if object is not found; 2nd is ILLEGAL_BUCKET if it is.  // Note: because of deletions where-to-insert is not trivial: it's the  // first deleted bucket we see, as long as we don't find the key later  pair<size_type, size_type> find_position(const key_type &key) const {    size_type num_probes = 0;              // how many times we've probed    const size_type bucket_count_minus_one = bucket_count() - 1;    size_type bucknum = hash(key) & bucket_count_minus_one;    size_type insert_pos = ILLEGAL_BUCKET; // where we would insert    SPARSEHASH_STAT_UPDATE(total_lookups += 1);    while ( 1 ) {                          // probe until something happens      if ( !table.test(bucknum) ) {        // bucket is empty        SPARSEHASH_STAT_UPDATE(total_probes += num_probes);        if ( insert_pos == ILLEGAL_BUCKET )  // found no prior place to insert          return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum);        else          return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos);      } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert        if ( insert_pos == ILLEGAL_BUCKET )          insert_pos = bucknum;      } else if ( equals(key, get_key(table.get(bucknum))) ) {        SPARSEHASH_STAT_UPDATE(total_probes += num_probes);        return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET);      }      ++num_probes;                        // we're doing another probe      bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;      assert(num_probes < bucket_count()); // don't probe too many times!    }  } public:  iterator find(const key_type& key) {    if ( size() == 0 ) return end();    pair<size_type, size_type> pos = find_position(key);    if ( pos.first == ILLEGAL_BUCKET )     // alas, not there      return end();    else      return iterator(this, table.get_iter(pos.first), table.nonempty_end());  }  const_iterator find(const key_type& key) const {    if ( size() == 0 ) return end();    pair<size_type, size_type> pos = find_position(key);    if ( pos.first == ILLEGAL_BUCKET )     // alas, not there      return end();    else      return const_iterator(this,                            table.get_iter(pos.first), table.nonempty_end());  }  // Counts how many elements have key key.  For maps, it's either 0 or 1.  size_type count(const key_type &key) const {    pair<size_type, size_type> pos = find_position(key);    return pos.first == ILLEGAL_BUCKET ? 0 : 1;  }  // Likewise, equal_range doesn't really make sense for us.  Oh well.  pair<iterator,iterator> equal_range(const key_type& key) {    const iterator pos = find(key);      // either an iterator or end    return pair<iterator,iterator>(pos, pos);  }  pair<const_iterator,const_iterator> equal_range(const key_type& key) const {    const const_iterator pos = find(key);      // either an iterator or end    return pair<iterator,iterator>(pos, pos);  }  // INSERTION ROUTINES private:  // If you know *this is big enough to hold obj, use this routine  pair<iterator, bool> insert_noresize(const value_type& obj) {    const pair<size_type,size_type> pos = find_position(get_key(obj));    if ( pos.first != ILLEGAL_BUCKET) {      // object was already there      return pair<iterator,bool>(iterator(this, table.get_iter(pos.first),                                          table.nonempty_end()),                                 false);          // false: we didn't insert    } else {                                 // pos.second says where to put it      if ( test_deleted(pos.second) ) {      // just replace if it's been del.        // The set() below will undelete this object.  We just worry about stats        assert(num_deleted > 0);        --num_deleted;                       // used to be, now it isn't      }      table.set(pos.second, obj);      return pair<iterator,bool>(iterator(this, table.get_iter(pos.second),                                          table.nonempty_end()),                                 true);           // true: we did insert    }  } public:  // This is the normal insert routine, used by the outside world  pair<iterator, bool> insert(const value_type& obj) {    resize_delta(1);                      // adding an object, grow if need be    return insert_noresize(obj);  }  // When inserting a lot at a time, we specialize on the type of iterator  template <class InputIterator>  void insert(InputIterator f, InputIterator l) {    // specializes on iterator type    insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());  }  // Iterator supports operator-, resize before inserting  template <class ForwardIterator>  void insert(ForwardIterator f, ForwardIterator l,              STL_NAMESPACE::forward_iterator_tag) {    size_type n = STL_NAMESPACE::distance(f, l);   // TODO(csilvers): standard?    resize_delta(n);    for ( ; n > 0; --n, ++f)      insert_noresize(*f);  }  // Arbitrary iterator, can't tell how much to resize  template <class InputIterator>  void insert(InputIterator f, InputIterator l,              STL_NAMESPACE::input_iterator_tag) {    for ( ; f != l; ++f)      insert(*f);  }  // DELETION ROUTINES  size_type erase(const key_type& key) {    const_iterator pos = find(key);   // shrug: shouldn't need to be const    if ( pos != end() ) {      assert(!test_deleted(pos));  // or find() shouldn't have returned it      set_deleted(pos);      ++num_deleted;      consider_shrink = true;      // will think about shrink after next insert      return 1;                    // because we deleted one thing    } else {      return 0;                    // because we deleted nothing    }  }  // This is really evil: really it should be iterator, not const_iterator.  // But...the only reason keys are const is to allow lookup.  // Since that's a moot issue for deleted keys, we allow const_iterators  void erase(const_iterator pos) {    if ( pos == end() ) return;    // sanity check    if ( set_deleted(pos) ) {      // true if object has been newly deleted      ++num_deleted;      consider_shrink = true;      // will think about shrink after next insert    }  }  void erase(const_iterator f, const_iterator l) {    for ( ; f != l; ++f) {      if ( set_deleted(f)  )       // should always be true        ++num_deleted;    }    consider_shrink = true;        // will think about shrink after next insert  }  // COMPARISON  bool operator==(const sparse_hashtable& ht) const {    // We really want to check that the hash functions are the same    // but alas there's no way to do this.  We just hope.    return ( num_deleted == ht.num_deleted && table == ht.table );  }  bool operator!=(const sparse_hashtable& ht) const {    return !(*this == ht);  }  // I/O  // We support reading and writing hashtables to disk.  NOTE that  // this only stores the hashtable metadata, not the stuff you've  // actually put in the hashtable!  Alas, since I don't know how to  // write a hasher or key_equal, you have to make sure everything  // but the table is the same.  We compact before writing.  bool write_metadata(FILE *fp) {    squash_deleted();           // so we don't have to worry about delkey    return table.write_metadata(fp);  }  bool read_metadata(FILE *fp) {    num_deleted = 0;            // since we got rid before writing    bool result = table.read_metadata(fp);    reset_thresholds();    return result;  }  // Only meaningful if value_type is a POD.  bool write_nopointer_data(FILE *fp) {    return table.write_nopointer_data(fp);  }  // Only meaningful if value_type is a POD.  bool read_nopointer_data(FILE *fp) {    return table.read_nopointer_data(fp);  } private:  // The actual data  hasher hash;                      // required by hashed_associative_container  key_equal equals;  ExtractKey get_key;  size_type num_deleted;        // how many occupied buckets are marked deleted  bool use_deleted;                          // false until delval has been set  value_type delval;                         // which key marks deleted entries  float enlarge_resize_percent;                       // how full before resize  float shrink_resize_percent;                       // how empty before resize  size_type shrink_threshold;           // table.size() * shrink_resize_percent  size_type enlarge_threshold;         // table.size() * enlarge_resize_percent  sparsetable<value_type> table;      // holds num_buckets and num_elements too  bool consider_shrink;   // true if we should try to shrink before next insert  void reset_thresholds() {    enlarge_threshold = static_cast<size_type>(table.size()                                               * enlarge_resize_percent);    shrink_threshold = static_cast<size_type>(table.size()                                              * shrink_resize_percent);    consider_shrink = false;   // whatever caused us to reset already considered  }};// We need a global swap as welltemplate <class V, class K, class HF, class ExK, class EqK, class A>inline void swap(sparse_hashtable<V,K,HF,ExK,EqK,A> &x,                 sparse_hashtable<V,K,HF,ExK,EqK,A> &y) {  x.swap(y);}#undef JUMP_template <class V, class K, class HF, class ExK, class EqK, class A>const typename sparse_hashtable<V,K,HF,ExK,EqK,A>::size_type  sparse_hashtable<V,K,HF,ExK,EqK,A>::ILLEGAL_BUCKET;// How full we let the table get before we resize.  Knuth says .8 is// good -- higher causes us to probe too much, though saves memorytemplate <class V, class K, class HF, class ExK, class EqK, class A>const float sparse_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT = 0.8f;// How empty we let the table get before we resize lower.// It should be less than OCCUPANCY_FLT / 2 or we thrash resizingtemplate <class V, class K, class HF, class ExK, class EqK, class A>const float sparse_hashtable<V,K,HF,ExK,EqK,A>::HT_EMPTY_FLT = 0.4f *sparse_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT;_END_GOOGLE_NAMESPACE_#endif /* _SPARSEHASHTABLE_H_ */
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -