Sierra Toolkit  Version of the Day
sparsehashtable.h
1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // ---
31 // Author: Craig Silverstein
32 //
33 // A sparse hashtable is a particular implementation of
34 // a hashtable: one that is meant to minimize memory use.
35 // It does this by using a *sparse table* (cf sparsetable.h),
36 // which uses between 1 and 2 bits to store empty buckets
37 // (we may need another bit for hashtables that support deletion).
38 //
39 // When empty buckets are so cheap, an appealing hashtable
40 // implementation is internal probing, in which the hashtable
41 // is a single table, and collisions are resolved by trying
42 // to insert again in another bucket. The most cache-efficient
43 // internal probing schemes are linear probing (which suffers,
44 // alas, from clumping) and quadratic probing, which is what
45 // we implement by default.
46 //
47 // Deleted buckets are a bit of a pain. We have to somehow mark
48 // deleted buckets (the probing must distinguish them from empty
49 // buckets). The most principled way is to have another bitmap,
50 // but that's annoying and takes up space. Instead we let the
51 // user specify an "impossible" key. We set deleted buckets
52 // to have the impossible key.
53 //
54 // Note it is possible to change the value of the delete key
55 // on the fly; you can even remove it, though after that point
56 // the hashtable is insert_only until you set it again.
57 //
58 // You probably shouldn't use this code directly. Use
59 // <google/sparse_hash_table> or <google/sparse_hash_set> instead.
60 //
61 // You can modify the following, below:
62 // HT_OCCUPANCY_PCT -- how full before we double size
63 // HT_EMPTY_PCT -- how empty before we halve size
64 // HT_MIN_BUCKETS -- smallest bucket size
65 // HT_DEFAULT_STARTING_BUCKETS -- default bucket size at construct-time
66 //
67 // You can also change enlarge_factor (which defaults to
68 // HT_OCCUPANCY_PCT), and shrink_factor (which defaults to
69 // HT_EMPTY_PCT) with set_resizing_parameters().
70 //
71 // How to decide what values to use?
72 // shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good.
73 // HT_MIN_BUCKETS is probably unnecessary since you can specify
74 // (indirectly) the starting number of buckets at construct-time.
75 // For enlarge_factor, you can use this chart to try to trade-off
76 // expected lookup time to the space taken up. By default, this
77 // code uses quadratic probing, though you can change it to linear
78 // via _JUMP below if you really want to.
79 //
80 // From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
81 // NUMBER OF PROBES / LOOKUP Successful Unsuccessful
82 // Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L)
83 // Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2
84 //
85 // -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99
86 // QUADRATIC COLLISION RES.
87 // probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11
88 // probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6
89 // LINEAR COLLISION RES.
90 // probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5
91 // probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0
92 //
93 // The value type is required to be copy constructible and default
94 // constructible, but it need not be (and commonly isn't) assignable.
95 
96 #ifndef _SPARSEHASHTABLE_H_
97 #define _SPARSEHASHTABLE_H_
98 
99 #ifndef SPARSEHASH_STAT_UPDATE
100 #define SPARSEHASH_STAT_UPDATE(x) ((void) 0)
101 #endif
102 
103 // The probing method
104 // Linear probing
105 // #define JUMP_(key, num_probes) ( 1 )
106 // Quadratic probing
107 #define JUMP_(key, num_probes) ( num_probes )
108 
109 #include <stk_util/util/sparseconfig.h>
110 #include <assert.h>
111 #include <algorithm> // For swap(), eg
112 #include <stdexcept> // For length_error
113 #include <iterator> // for facts about iterator tags
114 #include <limits> // for numeric_limits<>
115 #include <utility> // for pair<>
116 #include <stk_util/util/hashtable-common.h>
117 #include <stk_util/util/sparsetable> // Since that's basically what we are
118 
119 _START_GOOGLE_NAMESPACE_
120 
121 using STL_NAMESPACE::pair;
122 
123 // The smaller this is, the faster lookup is (because the group bitmap is
124 // smaller) and the faster insert is, because there's less to move.
125 // On the other hand, there are more groups. Since group::size_type is
126 // a short, this number should be of the form 32*x + 16 to avoid waste.
127 static const u_int16_t DEFAULT_GROUP_SIZE = 48; // fits in 1.5 words
128 
129 // Hashtable class, used to implement the hashed associative containers
130 // hash_set and hash_map.
131 //
132 // Value: what is stored in the table (each bucket is a Value).
133 // Key: something in a 1-to-1 correspondence to a Value, that can be used
134 // to search for a Value in the table (find() takes a Key).
135 // HashFcn: Takes a Key and returns an integer, the more unique the better.
136 // ExtractKey: given a Value, returns the unique Key associated with it.
137 // Must inherit from unary_function, or at least have a
138 // result_type enum indicating the return type of operator().
139 // SetKey: given a Value* and a Key, modifies the value such that
140 // ExtractKey(value) == key. We guarantee this is only called
141 // with key == deleted_key.
142 // EqualKey: Given two Keys, says whether they are the same (that is,
143 // if they are both associated with the same Value).
144 // Alloc: STL allocator to use to allocate memory.
145 
146 template <class Value, class Key, class HashFcn,
147  class ExtractKey, class SetKey, class EqualKey, class Alloc>
148 class sparse_hashtable;
149 
150 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
151 struct sparse_hashtable_iterator;
152 
153 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
154 struct sparse_hashtable_const_iterator;
155 
156 // As far as iterating, we're basically just a sparsetable
157 // that skips over deleted elements.
158 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
159 struct sparse_hashtable_iterator {
160  private:
161  typedef typename A::template rebind<V>::other value_alloc_type;
162 
163  public:
164  typedef sparse_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A> iterator;
165  typedef sparse_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
166  typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::nonempty_iterator
167  st_iterator;
168 
169  typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
170  typedef V value_type;
171  typedef typename value_alloc_type::difference_type difference_type;
172  typedef typename value_alloc_type::size_type size_type;
173  typedef typename value_alloc_type::reference reference;
174  typedef typename value_alloc_type::pointer pointer;
175 
176  // "Real" constructor and default constructor
177  sparse_hashtable_iterator(const sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *h,
178  st_iterator it, st_iterator it_end)
179  : ht(h), pos(it), end(it_end) { advance_past_deleted(); }
180  sparse_hashtable_iterator() { } // not ever used internally
181  // The default destructor is fine; we don't define one
182  // The default operator= is fine; we don't define one
183 
184  // Happy dereferencer
185  reference operator*() const { return *pos; }
186  pointer operator->() const { return &(operator*()); }
187 
188  // Arithmetic. The only hard part is making sure that
189  // we're not on a marked-deleted array element
190  void advance_past_deleted() {
191  while ( pos != end && ht->test_deleted(*this) )
192  ++pos;
193  }
194  iterator& operator++() {
195  assert(pos != end); ++pos; advance_past_deleted(); return *this;
196  }
197  iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
198 
199  // Comparison.
200  bool operator==(const iterator& it) const { return pos == it.pos; }
201  bool operator!=(const iterator& it) const { return pos != it.pos; }
202 
203 
204  // The actual data
205  const sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *ht;
206  st_iterator pos, end;
207 };
208 
209 // Now do it all again, but with const-ness!
210 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
211 struct sparse_hashtable_const_iterator {
212  private:
213  typedef typename A::template rebind<V>::other value_alloc_type;
214 
215  public:
216  typedef sparse_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A> iterator;
217  typedef sparse_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
218  typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::const_nonempty_iterator
219  st_iterator;
220 
221  typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
222  typedef V value_type;
223  typedef typename value_alloc_type::difference_type difference_type;
224  typedef typename value_alloc_type::size_type size_type;
225  typedef typename value_alloc_type::const_reference reference;
226  typedef typename value_alloc_type::const_pointer pointer;
227 
228  // "Real" constructor and default constructor
229  sparse_hashtable_const_iterator(const sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *h,
230  st_iterator it, st_iterator it_end)
231  : ht(h), pos(it), end(it_end) { advance_past_deleted(); }
232  // This lets us convert regular iterators to const iterators
233  sparse_hashtable_const_iterator() { } // never used internally
234  sparse_hashtable_const_iterator(const iterator &it)
235  : ht(it.ht), pos(it.pos), end(it.end) { }
236  // The default destructor is fine; we don't define one
237  // The default operator= is fine; we don't define one
238 
239  // Happy dereferencer
240  reference operator*() const { return *pos; }
241  pointer operator->() const { return &(operator*()); }
242 
243  // Arithmetic. The only hard part is making sure that
244  // we're not on a marked-deleted array element
245  void advance_past_deleted() {
246  while ( pos != end && ht->test_deleted(*this) )
247  ++pos;
248  }
249  const_iterator& operator++() {
250  assert(pos != end); ++pos; advance_past_deleted(); return *this;
251  }
252  const_iterator operator++(int) { const_iterator tmp(*this); ++*this; return tmp; }
253 
254  // Comparison.
255  bool operator==(const const_iterator& it) const { return pos == it.pos; }
256  bool operator!=(const const_iterator& it) const { return pos != it.pos; }
257 
258 
259  // The actual data
260  const sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *ht;
261  st_iterator pos, end;
262 };
263 
264 // And once again, but this time freeing up memory as we iterate
265 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
266 struct sparse_hashtable_destructive_iterator {
267  private:
268  typedef typename A::template rebind<V>::other value_alloc_type;
269 
270  public:
271  typedef sparse_hashtable_destructive_iterator<V,K,HF,ExK,SetK,EqK,A> iterator;
272  typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::destructive_iterator
273  st_iterator;
274 
275  typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
276  typedef V value_type;
277  typedef typename value_alloc_type::difference_type difference_type;
278  typedef typename value_alloc_type::size_type size_type;
279  typedef typename value_alloc_type::reference reference;
280  typedef typename value_alloc_type::pointer pointer;
281 
282  // "Real" constructor and default constructor
283  sparse_hashtable_destructive_iterator(const
284  sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *h,
285  st_iterator it, st_iterator it_end)
286  : ht(h), pos(it), end(it_end) { advance_past_deleted(); }
287  sparse_hashtable_destructive_iterator() { } // never used internally
288  // The default destructor is fine; we don't define one
289  // The default operator= is fine; we don't define one
290 
291  // Happy dereferencer
292  reference operator*() const { return *pos; }
293  pointer operator->() const { return &(operator*()); }
294 
295  // Arithmetic. The only hard part is making sure that
296  // we're not on a marked-deleted array element
297  void advance_past_deleted() {
298  while ( pos != end && ht->test_deleted(*this) )
299  ++pos;
300  }
301  iterator& operator++() {
302  assert(pos != end); ++pos; advance_past_deleted(); return *this;
303  }
304  iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
305 
306  // Comparison.
307  bool operator==(const iterator& it) const { return pos == it.pos; }
308  bool operator!=(const iterator& it) const { return pos != it.pos; }
309 
310 
311  // The actual data
312  const sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *ht;
313  st_iterator pos, end;
314 };
315 
316 
317 template <class Value, class Key, class HashFcn,
318  class ExtractKey, class SetKey, class EqualKey, class Alloc>
319 class sparse_hashtable {
320  private:
321  typedef typename Alloc::template rebind<Value>::other value_alloc_type;
322 
323  public:
324  typedef Key key_type;
325  typedef Value value_type;
326  typedef HashFcn hasher;
327  typedef EqualKey key_equal;
328  typedef Alloc allocator_type;
329 
330  typedef typename value_alloc_type::size_type size_type;
331  typedef typename value_alloc_type::difference_type difference_type;
332  typedef typename value_alloc_type::reference reference;
333  typedef typename value_alloc_type::const_reference const_reference;
334  typedef typename value_alloc_type::pointer pointer;
335  typedef typename value_alloc_type::const_pointer const_pointer;
336  typedef sparse_hashtable_iterator<Value, Key, HashFcn, ExtractKey,
337  SetKey, EqualKey, Alloc>
338  iterator;
339 
340  typedef sparse_hashtable_const_iterator<Value, Key, HashFcn, ExtractKey,
341  SetKey, EqualKey, Alloc>
342  const_iterator;
343 
344  typedef sparse_hashtable_destructive_iterator<Value, Key, HashFcn, ExtractKey,
345  SetKey, EqualKey, Alloc>
346  destructive_iterator;
347 
348  // These come from tr1. For us they're the same as regular iterators.
349  typedef iterator local_iterator;
350  typedef const_iterator const_local_iterator;
351 
352  // How full we let the table get before we resize, by default.
353  // Knuth says .8 is good -- higher causes us to probe too much,
354  // though it saves memory.
355  static const int HT_OCCUPANCY_PCT; // = 80 (out of 100);
356 
357  // How empty we let the table get before we resize lower, by default.
358  // (0.0 means never resize lower.)
359  // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
360  static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT;
361 
362  // Minimum size we're willing to let hashtables be.
363  // Must be a power of two, and at least 4.
364  // Note, however, that for a given hashtable, the initial size is a
365  // function of the first constructor arg, and may be >HT_MIN_BUCKETS.
366  static const size_type HT_MIN_BUCKETS = 4;
367 
368  // By default, if you don't specify a hashtable size at
369  // construction-time, we use this size. Must be a power of two, and
370  // at least HT_MIN_BUCKETS.
371  static const size_type HT_DEFAULT_STARTING_BUCKETS = 32;
372 
373  // ITERATOR FUNCTIONS
374  iterator begin() { return iterator(this, table.nonempty_begin(),
375  table.nonempty_end()); }
376  iterator end() { return iterator(this, table.nonempty_end(),
377  table.nonempty_end()); }
378  const_iterator begin() const { return const_iterator(this,
379  table.nonempty_begin(),
380  table.nonempty_end()); }
381  const_iterator end() const { return const_iterator(this,
382  table.nonempty_end(),
383  table.nonempty_end()); }
384 
385  // These come from tr1 unordered_map. They iterate over 'bucket' n.
386  // For sparsehashtable, we could consider each 'group' to be a bucket,
387  // I guess, but I don't really see the point. We'll just consider
388  // bucket n to be the n-th element of the sparsetable, if it's occupied,
389  // or some empty element, otherwise.
390  local_iterator begin(size_type i) {
391  if (table.test(i))
392  return local_iterator(this, table.get_iter(i), table.nonempty_end());
393  else
394  return local_iterator(this, table.nonempty_end(), table.nonempty_end());
395  }
396  local_iterator end(size_type i) {
397  local_iterator it = begin(i);
398  if (table.test(i) && !test_deleted(i))
399  ++it;
400  return it;
401  }
402  const_local_iterator begin(size_type i) const {
403  if (table.test(i))
404  return const_local_iterator(this, table.get_iter(i),
405  table.nonempty_end());
406  else
407  return const_local_iterator(this, table.nonempty_end(),
408  table.nonempty_end());
409  }
410  const_local_iterator end(size_type i) const {
411  const_local_iterator it = begin(i);
412  if (table.test(i) && !test_deleted(i))
413  ++it;
414  return it;
415  }
416 
417  // This is used when resizing
418  destructive_iterator destructive_begin() {
419  return destructive_iterator(this, table.destructive_begin(),
420  table.destructive_end());
421  }
422  destructive_iterator destructive_end() {
423  return destructive_iterator(this, table.destructive_end(),
424  table.destructive_end());
425  }
426 
427 
428  // ACCESSOR FUNCTIONS for the things we templatize on, basically
429  hasher hash_funct() const { return settings; }
430  key_equal key_eq() const { return key_info; }
431  allocator_type get_allocator() const { return table.get_allocator(); }
432 
433  // Accessor function for statistics gathering.
434  int num_table_copies() const { return settings.num_ht_copies(); }
435 
436  private:
437  // We need to copy values when we set the special marker for deleted
438  // elements, but, annoyingly, we can't just use the copy assignment
439  // operator because value_type might not be assignable (it's often
440  // pair<const X, Y>). We use explicit destructor invocation and
441  // placement new to get around this. Arg.
442  void set_value(pointer dst, const_reference src) {
443  dst->~value_type(); // delete the old value, if any
444  new(dst) value_type(src);
445  }
446 
447  // This is used as a tag for the copy constructor, saying to destroy its
448  // arg We have two ways of destructively copying: with potentially growing
449  // the hashtable as we copy, and without. To make sure the outside world
450  // can't do a destructive copy, we make the typename private.
451  enum MoveDontCopyT {MoveDontCopy, MoveDontGrow};
452 
453  // DELETE HELPER FUNCTIONS
454  // This lets the user describe a key that will indicate deleted
455  // table entries. This key should be an "impossible" entry --
456  // if you try to insert it for real, you won't be able to retrieve it!
457  // (NB: while you pass in an entire value, only the key part is looked
458  // at. This is just because I don't know how to assign just a key.)
459  private:
460  void squash_deleted() { // gets rid of any deleted entries we have
461  if ( num_deleted ) { // get rid of deleted before writing
462  sparse_hashtable tmp(MoveDontGrow, *this);
463  swap(tmp); // now we are tmp
464  }
465  assert(num_deleted == 0);
466  }
467 
468  bool test_deleted_key(const key_type& key) const {
469  // The num_deleted test is crucial for read(): after read(), the ht values
470  // are garbage, and we don't want to think some of them are deleted.
471  // Invariant: !use_deleted implies num_deleted is 0.
472  assert(settings.use_deleted() || num_deleted == 0);
473  return num_deleted > 0 && equals(key_info.delkey, key);
474  }
475 
476  public:
477  void set_deleted_key(const key_type &key) {
478  // It's only safe to change what "deleted" means if we purge deleted guys
479  squash_deleted();
480  settings.set_use_deleted(true);
481  key_info.delkey = key;
482  }
483  void clear_deleted_key() {
484  squash_deleted();
485  settings.set_use_deleted(false);
486  }
487  key_type deleted_key() const {
488  assert(settings.use_deleted()
489  && "Must set deleted key before calling deleted_key");
490  return key_info.delkey;
491  }
492 
493  // These are public so the iterators can use them
494  // True if the item at position bucknum is "deleted" marker
495  bool test_deleted(size_type bucknum) const {
496  if (num_deleted == 0 || !table.test(bucknum)) return false;
497  return test_deleted_key(get_key(table.unsafe_get(bucknum)));
498  }
499  bool test_deleted(const iterator &it) const {
500  if (!settings.use_deleted()) return false;
501  return test_deleted_key(get_key(*it));
502  }
503  bool test_deleted(const const_iterator &it) const {
504  if (!settings.use_deleted()) return false;
505  return test_deleted_key(get_key(*it));
506  }
507  bool test_deleted(const destructive_iterator &it) const {
508  if (!settings.use_deleted()) return false;
509  return test_deleted_key(get_key(*it));
510  }
511 
512  private:
513  // Set it so test_deleted is true. true if object didn't used to be deleted.
514  // TODO(csilvers): make these private (also in densehashtable.h)
515  bool set_deleted(iterator &it) {
516  assert(settings.use_deleted());
517  bool retval = !test_deleted(it);
518  // &* converts from iterator to value-type.
519  set_key(&(*it), key_info.delkey);
520  return retval;
521  }
522  // Set it so test_deleted is false. true if object used to be deleted.
523  bool clear_deleted(iterator &it) {
524  assert(settings.use_deleted());
525  // Happens automatically when we assign something else in its place.
526  return test_deleted(it);
527  }
528 
529  // We also allow to set/clear the deleted bit on a const iterator.
530  // We allow a const_iterator for the same reason you can delete a
531  // const pointer: it's convenient, and semantically you can't use
532  // 'it' after it's been deleted anyway, so its const-ness doesn't
533  // really matter.
534  bool set_deleted(const_iterator &it) {
535  assert(settings.use_deleted()); // bad if set_deleted_key() wasn't called
536  bool retval = !test_deleted(it);
537  set_key(const_cast<pointer>(&(*it)), key_info.delkey);
538  return retval;
539  }
540  // Set it so test_deleted is false. true if object used to be deleted.
541  bool clear_deleted(const_iterator &it) {
542  assert(settings.use_deleted()); // bad if set_deleted_key() wasn't called
543  return test_deleted(it);
544  }
545 
546  // FUNCTIONS CONCERNING SIZE
547  public:
548  size_type size() const { return table.num_nonempty() - num_deleted; }
549  size_type max_size() const { return table.max_size(); }
550  bool empty() const { return size() == 0; }
551  size_type bucket_count() const { return table.size(); }
552  size_type max_bucket_count() const { return max_size(); }
553  // These are tr1 methods. Their idea of 'bucket' doesn't map well to
554  // what we do. We just say every bucket has 0 or 1 items in it.
555  size_type bucket_size(size_type i) const {
556  return begin(i) == end(i) ? 0 : 1;
557  }
558 
559  private:
560  // Because of the above, size_type(-1) is never legal; use it for errors
561  static const size_type ILLEGAL_BUCKET = size_type(-1);
562 
563  // Used after a string of deletes. Returns true if we actually shrunk.
564  // TODO(csilvers): take a delta so we can take into account inserts
565  // done after shrinking. Maybe make part of the Settings class?
566  bool maybe_shrink() {
567  assert(table.num_nonempty() >= num_deleted);
568  assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
569  assert(bucket_count() >= HT_MIN_BUCKETS);
570  bool retval = false;
571 
572  // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
573  // we'll never shrink until you get relatively big, and we'll never
574  // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something
575  // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
576  // shrink us down to HT_MIN_BUCKETS buckets, which is too small.
577  const size_type num_remain = table.num_nonempty() - num_deleted;
578  const size_type shrink_threshold = settings.shrink_threshold();
579  if (shrink_threshold > 0 && num_remain < shrink_threshold &&
580  bucket_count() > HT_DEFAULT_STARTING_BUCKETS) {
581  const float shrink_factor = settings.shrink_factor();
582  size_type sz = bucket_count() / 2; // find how much we should shrink
583  while (sz > HT_DEFAULT_STARTING_BUCKETS &&
584  num_remain < static_cast<size_type>(sz * shrink_factor)) {
585  sz /= 2; // stay a power of 2
586  }
587  sparse_hashtable tmp(MoveDontCopy, *this, sz);
588  swap(tmp); // now we are tmp
589  retval = true;
590  }
591  settings.set_consider_shrink(false); // because we just considered it
592  return retval;
593  }
594 
595  // We'll let you resize a hashtable -- though this makes us copy all!
596  // When you resize, you say, "make it big enough for this many more elements"
597  // Returns true if we actually resized, false if size was already ok.
598  bool resize_delta(size_type delta) {
599  bool did_resize = false;
600  if ( settings.consider_shrink() ) { // see if lots of deletes happened
601  if ( maybe_shrink() )
602  did_resize = true;
603  }
604  if (table.num_nonempty() >=
605  (STL_NAMESPACE::numeric_limits<size_type>::max)() - delta)
606  throw std::length_error("resize overflow");
607  if ( bucket_count() >= HT_MIN_BUCKETS &&
608  (table.num_nonempty() + delta) <= settings.enlarge_threshold() )
609  return did_resize; // we're ok as we are
610 
611  // Sometimes, we need to resize just to get rid of all the
612  // "deleted" buckets that are clogging up the hashtable. So when
613  // deciding whether to resize, count the deleted buckets (which
614  // are currently taking up room). But later, when we decide what
615  // size to resize to, *don't* count deleted buckets, since they
616  // get discarded during the resize.
617  const size_type needed_size =
618  settings.min_buckets(table.num_nonempty() + delta, 0);
619  if ( needed_size <= bucket_count() ) // we have enough buckets
620  return did_resize;
621 
622  size_type resize_to =
623  settings.min_buckets(table.num_nonempty() - num_deleted + delta,
624  bucket_count());
625  if (resize_to < needed_size && // may double resize_to
626  resize_to < (STL_NAMESPACE::numeric_limits<size_type>::max)() / 2) {
627  // This situation means that we have enough deleted elements,
628  // that once we purge them, we won't actually have needed to
629  // grow. But we may want to grow anyway: if we just purge one
630  // element, say, we'll have to grow anyway next time we
631  // insert. Might as well grow now, since we're already going
632  // through the trouble of copying (in order to purge the
633  // deleted elements).
634  const size_type target =
635  static_cast<size_type>(settings.shrink_size(resize_to*2));
636  if (table.num_nonempty() - num_deleted + delta >= target) {
637  // Good, we won't be below the shrink threshhold even if we double.
638  resize_to *= 2;
639  }
640  }
641 
642  sparse_hashtable tmp(MoveDontCopy, *this, resize_to);
643  swap(tmp); // now we are tmp
644  return true;
645  }
646 
647  // Used to actually do the rehashing when we grow/shrink a hashtable
648  void copy_from(const sparse_hashtable &ht, size_type min_buckets_wanted) {
649  clear(); // clear table, set num_deleted to 0
650 
651  // If we need to change the size of our table, do it now
652  const size_type resize_to =
653  settings.min_buckets(ht.size(), min_buckets_wanted);
654  if ( resize_to > bucket_count() ) { // we don't have enough buckets
655  table.resize(resize_to); // sets the number of buckets
656  settings.reset_thresholds(bucket_count());
657  }
658 
659  // We use a normal iterator to get non-deleted bcks from ht
660  // We could use insert() here, but since we know there are
661  // no duplicates and no deleted items, we can be more efficient
662  assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two
663  for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) {
664  size_type num_probes = 0; // how many times we've probed
665  size_type bucknum;
666  const size_type bucket_count_minus_one = bucket_count() - 1;
667  for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
668  table.test(bucknum); // not empty
669  bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
670  ++num_probes;
671  assert(num_probes < bucket_count()
672  && "Hashtable is full: an error in key_equal<> or hash<>");
673  }
674  table.set(bucknum, *it); // copies the value to here
675  }
676  settings.inc_num_ht_copies();
677  }
678 
679  // Implementation is like copy_from, but it destroys the table of the
680  // "from" guy by freeing sparsetable memory as we iterate. This is
681  // useful in resizing, since we're throwing away the "from" guy anyway.
682  void move_from(MoveDontCopyT mover, sparse_hashtable &ht,
683  size_type min_buckets_wanted) {
684  clear(); // clear table, set num_deleted to 0
685 
686  // If we need to change the size of our table, do it now
687  size_type resize_to;
688  if ( mover == MoveDontGrow )
689  resize_to = ht.bucket_count(); // keep same size as old ht
690  else // MoveDontCopy
691  resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
692  if ( resize_to > bucket_count() ) { // we don't have enough buckets
693  table.resize(resize_to); // sets the number of buckets
694  settings.reset_thresholds(bucket_count());
695  }
696 
697  // We use a normal iterator to get non-deleted bcks from ht
698  // We could use insert() here, but since we know there are
699  // no duplicates and no deleted items, we can be more efficient
700  assert( (bucket_count() & (bucket_count()-1)) == 0); // a power of two
701  // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM():
702  for ( destructive_iterator it = ht.destructive_begin();
703  it != ht.destructive_end(); ++it ) {
704  size_type num_probes = 0; // how many times we've probed
705  size_type bucknum;
706  for ( bucknum = hash(get_key(*it)) & (bucket_count()-1); // h % buck_cnt
707  table.test(bucknum); // not empty
708  bucknum = (bucknum + JUMP_(key, num_probes)) & (bucket_count()-1) ) {
709  ++num_probes;
710  assert(num_probes < bucket_count()
711  && "Hashtable is full: an error in key_equal<> or hash<>");
712  }
713  table.set(bucknum, *it); // copies the value to here
714  }
715  settings.inc_num_ht_copies();
716  }
717 
718 
719  // Required by the spec for hashed associative container
720  public:
721  // Though the docs say this should be num_buckets, I think it's much
722  // more useful as num_elements. As a special feature, calling with
723  // req_elements==0 will cause us to shrink if we can, saving space.
724  void resize(size_type req_elements) { // resize to this or larger
725  if ( settings.consider_shrink() || req_elements == 0 )
726  maybe_shrink();
727  if ( req_elements > table.num_nonempty() ) // we only grow
728  resize_delta(req_elements - table.num_nonempty());
729  }
730 
731  // Get and change the value of shrink_factor and enlarge_factor. The
732  // description at the beginning of this file explains how to choose
733  // the values. Setting the shrink parameter to 0.0 ensures that the
734  // table never shrinks.
735  void get_resizing_parameters(float* shrink, float* grow) const {
736  *shrink = settings.shrink_factor();
737  *grow = settings.enlarge_factor();
738  }
739  void set_resizing_parameters(float shrink, float grow) {
740  settings.set_resizing_parameters(shrink, grow);
741  settings.reset_thresholds(bucket_count());
742  }
743 
744  // CONSTRUCTORS -- as required by the specs, we take a size,
745  // but also let you specify a hashfunction, key comparator,
746  // and key extractor. We also define a copy constructor and =.
747  // DESTRUCTOR -- the default is fine, surprisingly.
748  explicit sparse_hashtable(size_type expected_max_items_in_table = 0,
749  const HashFcn& hf = HashFcn(),
750  const EqualKey& eql = EqualKey(),
751  const ExtractKey& ext = ExtractKey(),
752  const SetKey& set = SetKey(),
753  const Alloc& alloc = Alloc())
754  : settings(hf),
755  key_info(ext, set, eql),
756  num_deleted(0),
757  table((expected_max_items_in_table == 0
758  ? HT_DEFAULT_STARTING_BUCKETS
759  : settings.min_buckets(expected_max_items_in_table, 0)),
760  alloc) {
761  settings.reset_thresholds(bucket_count());
762  }
763 
764  // As a convenience for resize(), we allow an optional second argument
765  // which lets you make this new hashtable a different size than ht.
766  // We also provide a mechanism of saying you want to "move" the ht argument
767  // into us instead of copying.
768  sparse_hashtable(const sparse_hashtable& ht,
769  size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
770  : settings(ht.settings),
771  key_info(ht.key_info),
772  num_deleted(0),
773  table(0, ht.get_allocator()) {
774  settings.reset_thresholds(bucket_count());
775  copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries
776  }
777  sparse_hashtable(MoveDontCopyT mover, sparse_hashtable& ht,
778  size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
779  : settings(ht.settings),
780  key_info(ht.key_info),
781  num_deleted(0),
782  table(0, ht.get_allocator()) {
783  settings.reset_thresholds(bucket_count());
784  move_from(mover, ht, min_buckets_wanted); // ignores deleted entries
785  }
786 
787  sparse_hashtable& operator= (const sparse_hashtable& ht) {
788  if (&ht == this) return *this; // don't copy onto ourselves
789  settings = ht.settings;
790  key_info = ht.key_info;
791  num_deleted = ht.num_deleted;
792  // copy_from() calls clear and sets num_deleted to 0 too
793  copy_from(ht, HT_MIN_BUCKETS);
794  // we purposefully don't copy the allocator, which may not be copyable
795  return *this;
796  }
797 
798  // Many STL algorithms use swap instead of copy constructors
799  void swap(sparse_hashtable& ht) {
800  STL_NAMESPACE::swap(settings, ht.settings);
801  STL_NAMESPACE::swap(key_info, ht.key_info);
802  STL_NAMESPACE::swap(num_deleted, ht.num_deleted);
803  table.swap(ht.table);
804  }
805 
806  // It's always nice to be able to clear a table without deallocating it
807  void clear() {
808  if (!empty() || (num_deleted != 0)) {
809  table.clear();
810  }
811  settings.reset_thresholds(bucket_count());
812  num_deleted = 0;
813  }
814 
815  // LOOKUP ROUTINES
816  private:
817  // Returns a pair of positions: 1st where the object is, 2nd where
818  // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET
819  // if object is not found; 2nd is ILLEGAL_BUCKET if it is.
820  // Note: because of deletions where-to-insert is not trivial: it's the
821  // first deleted bucket we see, as long as we don't find the key later
822  pair<size_type, size_type> find_position(const key_type &key) const {
823  size_type num_probes = 0; // how many times we've probed
824  const size_type bucket_count_minus_one = bucket_count() - 1;
825  size_type bucknum = hash(key) & bucket_count_minus_one;
826  size_type insert_pos = ILLEGAL_BUCKET; // where we would insert
827  SPARSEHASH_STAT_UPDATE(total_lookups += 1);
828  while ( 1 ) { // probe until something happens
829  if ( !table.test(bucknum) ) { // bucket is empty
830  SPARSEHASH_STAT_UPDATE(total_probes += num_probes);
831  if ( insert_pos == ILLEGAL_BUCKET ) // found no prior place to insert
832  return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum);
833  else
834  return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos);
835 
836  } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert
837  if ( insert_pos == ILLEGAL_BUCKET )
838  insert_pos = bucknum;
839 
840  } else if ( equals(key, get_key(table.unsafe_get(bucknum))) ) {
841  SPARSEHASH_STAT_UPDATE(total_probes += num_probes);
842  return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET);
843  }
844  ++num_probes; // we're doing another probe
845  bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
846  assert(num_probes < bucket_count()
847  && "Hashtable is full: an error in key_equal<> or hash<>");
848  }
849  }
850 
851  public:
852  iterator find(const key_type& key) {
853  if ( size() == 0 ) return end();
854  pair<size_type, size_type> pos = find_position(key);
855  if ( pos.first == ILLEGAL_BUCKET ) // alas, not there
856  return end();
857  else
858  return iterator(this, table.get_iter(pos.first), table.nonempty_end());
859  }
860 
861  const_iterator find(const key_type& key) const {
862  if ( size() == 0 ) return end();
863  pair<size_type, size_type> pos = find_position(key);
864  if ( pos.first == ILLEGAL_BUCKET ) // alas, not there
865  return end();
866  else
867  return const_iterator(this,
868  table.get_iter(pos.first), table.nonempty_end());
869  }
870 
871  // This is a tr1 method: the bucket a given key is in, or what bucket
872  // it would be put in, if it were to be inserted. Shrug.
873  size_type bucket(const key_type& key) const {
874  pair<size_type, size_type> pos = find_position(key);
875  return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first;
876  }
877 
878  // Counts how many elements have key key. For maps, it's either 0 or 1.
879  size_type count(const key_type &key) const {
880  pair<size_type, size_type> pos = find_position(key);
881  return pos.first == ILLEGAL_BUCKET ? 0 : 1;
882  }
883 
884  // Likewise, equal_range doesn't really make sense for us. Oh well.
885  pair<iterator,iterator> equal_range(const key_type& key) {
886  iterator pos = find(key); // either an iterator or end
887  if (pos == end()) {
888  return pair<iterator,iterator>(pos, pos);
889  } else {
890  const iterator startpos = pos++;
891  return pair<iterator,iterator>(startpos, pos);
892  }
893  }
894  pair<const_iterator,const_iterator> equal_range(const key_type& key) const {
895  const_iterator pos = find(key); // either an iterator or end
896  if (pos == end()) {
897  return pair<const_iterator,const_iterator>(pos, pos);
898  } else {
899  const const_iterator startpos = pos++;
900  return pair<const_iterator,const_iterator>(startpos, pos);
901  }
902  }
903 
904 
905  // INSERTION ROUTINES
906  private:
907  // Private method used by insert_noresize and find_or_insert.
908  iterator insert_at(const_reference obj, size_type pos) {
909  if (size() >= max_size())
910  throw std::length_error("insert overflow");
911  if ( test_deleted(pos) ) { // just replace if it's been deleted
912  // The set() below will undelete this object. We just worry about stats
913  assert(num_deleted > 0);
914  --num_deleted; // used to be, now it isn't
915  }
916  table.set(pos, obj);
917  return iterator(this, table.get_iter(pos), table.nonempty_end());
918  }
919 
920  // If you know *this is big enough to hold obj, use this routine
921  pair<iterator, bool> insert_noresize(const_reference obj) {
922  // First, double-check we're not inserting delkey
923  assert((!settings.use_deleted() || !equals(get_key(obj), key_info.delkey))
924  && "Inserting the deleted key");
925  const pair<size_type,size_type> pos = find_position(get_key(obj));
926  if ( pos.first != ILLEGAL_BUCKET) { // object was already there
927  return pair<iterator,bool>(iterator(this, table.get_iter(pos.first),
928  table.nonempty_end()),
929  false); // false: we didn't insert
930  } else { // pos.second says where to put it
931  return pair<iterator,bool>(insert_at(obj, pos.second), true);
932  }
933  }
934 
935  // Specializations of insert(it, it) depending on the power of the iterator:
936  // (1) Iterator supports operator-, resize before inserting
937  template <class ForwardIterator>
938  void insert(ForwardIterator f, ForwardIterator l, STL_NAMESPACE::forward_iterator_tag) {
939  size_t dist = STL_NAMESPACE::distance(f, l);
940  if (dist >= (std::numeric_limits<size_type>::max)())
941  throw std::length_error("insert-range overflow");
942  resize_delta(static_cast<size_type>(dist));
943  for ( ; dist > 0; --dist, ++f) {
944  insert_noresize(*f);
945  }
946  }
947 
948  // (2) Arbitrary iterator, can't tell how much to resize
949  template <class InputIterator>
950  void insert(InputIterator f, InputIterator l, STL_NAMESPACE::input_iterator_tag) {
951  for ( ; f != l; ++f)
952  insert(*f);
953  }
954 
955  public:
956  // This is the normal insert routine, used by the outside world
957  pair<iterator, bool> insert(const_reference obj) {
958  resize_delta(1); // adding an object, grow if need be
959  return insert_noresize(obj);
960  }
961 
962  // When inserting a lot at a time, we specialize on the type of iterator
963  template <class InputIterator>
964  void insert(InputIterator f, InputIterator l) {
965  // specializes on iterator type
966  insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());
967  }
968 
969  // DefaultValue is a functor that takes a key and returns a value_type
970  // representing the default value to be inserted if none is found.
971  template <class DefaultValue>
972  value_type& find_or_insert(const key_type& key) {
973  // First, double-check we're not inserting delkey
974  assert((!settings.use_deleted() || !equals(key, key_info.delkey))
975  && "Inserting the deleted key");
976  const pair<size_type,size_type> pos = find_position(key);
977  DefaultValue default_value;
978  if ( pos.first != ILLEGAL_BUCKET) { // object was already there
979  return *table.get_iter(pos.first);
980  } else if (resize_delta(1)) { // needed to rehash to make room
981  // Since we resized, we can't use pos, so recalculate where to insert.
982  return *insert_noresize(default_value(key)).first;
983  } else { // no need to rehash, insert right here
984  return *insert_at(default_value(key), pos.second);
985  }
986  }
987 
988  // DELETION ROUTINES
989  size_type erase(const key_type& key) {
990  // First, double-check we're not erasing delkey.
991  assert((!settings.use_deleted() || !equals(key, key_info.delkey))
992  && "Erasing the deleted key");
993  assert(!settings.use_deleted() || !equals(key, key_info.delkey));
994  const_iterator pos = find(key); // shrug: shouldn't need to be const
995  if ( pos != end() ) {
996  assert(!test_deleted(pos)); // or find() shouldn't have returned it
997  set_deleted(pos);
998  ++num_deleted;
999  // will think about shrink after next insert
1000  settings.set_consider_shrink(true);
1001  return 1; // because we deleted one thing
1002  } else {
1003  return 0; // because we deleted nothing
1004  }
1005  }
1006 
1007  // We return the iterator past the deleted item.
1008  void erase(iterator pos) {
1009  if ( pos == end() ) return; // sanity check
1010  if ( set_deleted(pos) ) { // true if object has been newly deleted
1011  ++num_deleted;
1012  // will think about shrink after next insert
1013  settings.set_consider_shrink(true);
1014  }
1015  }
1016 
1017  void erase(iterator f, iterator l) {
1018  for ( ; f != l; ++f) {
1019  if ( set_deleted(f) ) // should always be true
1020  ++num_deleted;
1021  }
1022  // will think about shrink after next insert
1023  settings.set_consider_shrink(true);
1024  }
1025 
1026  // We allow you to erase a const_iterator just like we allow you to
1027  // erase an iterator. This is in parallel to 'delete': you can delete
1028  // a const pointer just like a non-const pointer. The logic is that
1029  // you can't use the object after it's erased anyway, so it doesn't matter
1030  // if it's const or not.
1031  void erase(const_iterator pos) {
1032  if ( pos == end() ) return; // sanity check
1033  if ( set_deleted(pos) ) { // true if object has been newly deleted
1034  ++num_deleted;
1035  // will think about shrink after next insert
1036  settings.set_consider_shrink(true);
1037  }
1038  }
1039  void erase(const_iterator f, const_iterator l) {
1040  for ( ; f != l; ++f) {
1041  if ( set_deleted(f) ) // should always be true
1042  ++num_deleted;
1043  }
1044  // will think about shrink after next insert
1045  settings.set_consider_shrink(true);
1046  }
1047 
1048 
1049  // COMPARISON
1050  bool operator==(const sparse_hashtable& ht) const {
1051  if (size() != ht.size()) {
1052  return false;
1053  } else if (this == &ht) {
1054  return true;
1055  } else {
1056  // Iterate through the elements in "this" and see if the
1057  // corresponding element is in ht
1058  for ( const_iterator it = begin(); it != end(); ++it ) {
1059  const_iterator it2 = ht.find(get_key(*it));
1060  if ((it2 == ht.end()) || (*it != *it2)) {
1061  return false;
1062  }
1063  }
1064  return true;
1065  }
1066  }
1067  bool operator!=(const sparse_hashtable& ht) const {
1068  return !(*this == ht);
1069  }
1070 
1071 
1072  // I/O
1073  // We support reading and writing hashtables to disk. NOTE that
1074  // this only stores the hashtable metadata, not the stuff you've
1075  // actually put in the hashtable! Alas, since I don't know how to
1076  // write a hasher or key_equal, you have to make sure everything
1077  // but the table is the same. We compact before writing.
1078  bool write_metadata(FILE *fp) {
1079  squash_deleted(); // so we don't have to worry about delkey
1080  return table.write_metadata(fp);
1081  }
1082 
1083  bool read_metadata(FILE *fp) {
1084  num_deleted = 0; // since we got rid before writing
1085  bool result = table.read_metadata(fp);
1086  settings.reset_thresholds(bucket_count());
1087  return result;
1088  }
1089 
1090  // Only meaningful if value_type is a POD.
1091  bool write_nopointer_data(FILE *fp) {
1092  return table.write_nopointer_data(fp);
1093  }
1094 
1095  // Only meaningful if value_type is a POD.
1096  bool read_nopointer_data(FILE *fp) {
1097  return table.read_nopointer_data(fp);
1098  }
1099 
1100  private:
1101  // Table is the main storage class.
1102  typedef sparsetable<value_type, DEFAULT_GROUP_SIZE, value_alloc_type> Table;
1103 
1104  // Package templated functors with the other types to eliminate memory
1105  // needed for storing these zero-size operators. Since ExtractKey and
1106  // hasher's operator() might have the same function signature, they
1107  // must be packaged in different classes.
1108  struct Settings :
1109  sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS> {
1110  explicit Settings(const hasher& hf)
1111  : sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS>(
1112  hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
1113  };
1114 
1115  // KeyInfo stores delete key and packages zero-size functors:
1116  // ExtractKey and SetKey.
1117  class KeyInfo : public ExtractKey, public SetKey, public key_equal {
1118  public:
1119  KeyInfo(const ExtractKey& ek, const SetKey& sk, const key_equal& eq)
1120  : ExtractKey(ek),
1121  SetKey(sk),
1122  key_equal(eq) {
1123  }
1124  // We want to return the exact same type as ExtractKey: Key or const Key&
1125  typename ExtractKey::result_type get_key(const_reference v) const {
1126  return ExtractKey::operator()(v);
1127  }
1128  void set_key(pointer v, const key_type& k) const {
1129  SetKey::operator()(v, k);
1130  }
1131  bool equals(const key_type& a, const key_type& b) const {
1132  return key_equal::operator()(a, b);
1133  }
1134 
1135  // Which key marks deleted entries.
1136  // TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!)
1137  typename remove_const<key_type>::type delkey;
1138  };
1139 
1140  // Utility functions to access the templated operators
1141  size_type hash(const key_type& v) const {
1142  return settings.hash(v);
1143  }
1144  bool equals(const key_type& a, const key_type& b) const {
1145  return key_info.equals(a, b);
1146  }
1147  typename ExtractKey::result_type get_key(const_reference v) const {
1148  return key_info.get_key(v);
1149  }
1150  void set_key(pointer v, const key_type& k) const {
1151  key_info.set_key(v, k);
1152  }
1153 
1154  private:
1155  // Actual data
1156  Settings settings;
1157  KeyInfo key_info;
1158  size_type num_deleted; // how many occupied buckets are marked deleted
1159  Table table; // holds num_buckets and num_elements too
1160 };
1161 
1162 
1163 // We need a global swap as well
1164 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1165 inline void swap(sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &x,
1166  sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &y) {
1167  x.swap(y);
1168 }
1169 
1170 #undef JUMP_
1171 
1172 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1173 const typename sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::size_type
1174  sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::ILLEGAL_BUCKET;
1175 
1176 // How full we let the table get before we resize. Knuth says .8 is
1177 // good -- higher causes us to probe too much, though saves memory
1178 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1179 const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT = 80;
1180 
1181 // How empty we let the table get before we resize lower.
1182 // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
1183 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1184 const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_EMPTY_PCT
1185  = static_cast<int>(0.4 *
1186  sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT);
1187 
1188 _END_GOOGLE_NAMESPACE_
1189 
1190 #endif /* _SPARSEHASHTABLE_H_ */
pair< ForwardIterator, ForwardIterator > equal_range(ForwardIterator first, ForwardIterator last, const T &value)
Part * find(const PartVector &parts, const std::string &name)
Find a part by name in a collection of parts.
Definition: Part.cpp:22
bool insert(PartVector &v, Part &part)
Insert a part into a properly ordered collection of parts. Returns true if this is a new insertion...
Definition: Part.cpp:85
eastl::iterator_traits< InputIterator >::difference_type count(InputIterator first, InputIterator last, const T &value)