TPIE

2362a60
hash_map.h
Go to the documentation of this file.
1 // -*- mode: c++; tab-width: 4; indent-tabs-mode: t; eval: (progn (c-set-style "stroustrup") (c-set-offset 'innamespace 0)); -*-
2 // vi:set ts=4 sts=4 sw=4 noet :
3 // Copyright 2010, 2011, 2012 The TPIE development team
4 //
5 // This file is part of TPIE.
6 //
7 // TPIE is free software: you can redistribute it and/or modify it under
8 // the terms of the GNU Lesser General Public License as published by the
9 // Free Software Foundation, either version 3 of the License, or (at your
10 // option) any later version.
11 //
12 // TPIE is distributed in the hope that it will be useful, but WITHOUT ANY
13 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 // License for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with TPIE. If not, see <http://www.gnu.org/licenses/>
19 #ifndef __TPIE_HASHMAP_H__
20 #define __TPIE_HASHMAP_H__
21 
25 
26 #include <tpie/array.h>
27 #include <tpie/unused.h>
28 #include <cmath>
29 #include <algorithm>
30 #include <iostream>
31 #include <tpie/prime.h>
32 #include <tpie/hash.h>
33 
34 namespace tpie {
35 
43 template <typename value_t, typename hash_t, typename equal_t, typename index_t>
45 private:
46  static const float sc;
47 
48 #pragma pack(push, 1)
49  struct bucket_t {
53  value_t value;
54  index_t next;
55  };
56 #pragma pack(pop)
57 
58  size_t first_free;
59  array<index_t> list;
60  array<bucket_t> buckets;
61 
62  hash_t h;
63  equal_t e;
64 public:
66  size_t size;
67 
69  value_t unused;
70 
75  static double memory_coefficient() {
77  }
78 
83  static double memory_overhead() {
84  return array<index_t>::memory_coefficient() * 100.0
87  - sizeof(array<index_t>)
88  - sizeof(array<bucket_t>);
89  }
90 
95  inline value_t & get(size_t idx) {return buckets[idx].value;}
96 
101  inline const value_t & get(size_t idx) const {return buckets[idx].value;}
102 
106  void clear() {
107  first_free = 0;
108  for (size_t i=0; i < buckets.size(); ++i) {
109  buckets[i].value = unused;
110  buckets[i].next = static_cast<index_t>(i+1);
111  }
112  for (typename array<index_t>::iterator i=list.begin(); i != list.end(); ++i)
113  *i = std::numeric_limits<index_t>::max();
114  }
115 
120  void resize(size_t z) {
121  buckets.resize(z);
122  size_t x=static_cast<size_t>(99+static_cast<double>(z)*sc)|1;
123  while (!is_prime(x)) x -= 2;
124  list.resize(x);
125  clear();
126  }
127 
136  value_t u,
137  const hash_t & hash,
138  const equal_t & equal): h(hash), e(equal), size(0), unused(u) {resize(ee);};
139 
143  inline size_t begin() {
144  if (size == 0) return buckets.size();
145  for(size_t i=0; true; ++i)
146  if (buckets[i].value != unused) return i;
147  }
148 
152  inline size_t end() const {return buckets.size();}
153 
158  inline size_t find(const value_t & value) const {
159  size_t v = list[h(value) % list.size()];
160  while (v != std::numeric_limits<index_t>::max()) {
161  if (e(buckets[v].value, value)) return v;
162  v = buckets[v].next;
163  }
164  return buckets.size();
165  }
166 
173  inline std::pair<size_t, bool> insert(const value_t & val) {
174  // First, look for the value in table.
175  size_t hv = h(val) % list.size();
176  size_t v = list[hv];
177  while (v != std::numeric_limits<index_t>::max()) {
178  if (e(buckets[v].value, val)) return std::make_pair(v, false);
179  v = buckets[v].next;
180  }
181  // It wasn't found. Insert into free bucket.
182  v = first_free;
183  first_free = buckets[v].next;
184  buckets[v].value = val;
185  buckets[v].next = list[hv];
186  list[hv] = v;
187  ++size;
188  return std::make_pair(v, true);
189  }
190 
195  inline void erase(const value_t & val) {
196  size_t hv = h(val) % list.size();
197  size_t cur = list[hv];
198  size_t prev = std::numeric_limits<size_t>::max();
199 
200  while (!e(buckets[cur].value, val)) {
201  prev = cur;
202  cur = buckets[cur].next;
203  }
204 
205  if (prev == std::numeric_limits<size_t>::max())
206  list[hv] = buckets[cur].next;
207  else
208  buckets[prev].next = buckets[cur].next;
209 
210  buckets[cur].next = first_free;
211  buckets[cur].value = unused;
212  first_free = cur;
213  --size;
214  }
215 };
216 
224 template <typename value_t, typename hash_t, typename equal_t, typename index_t>
226 private:
227  static const float sc;
228  array<value_t> elements;
229  hash_t h;
230  equal_t e;
231 public:
233  size_t size;
234 
236  value_t unused;
237 
242  static double memory_coefficient() {
244  }
245 
250  static double memory_overhead() {
251  return array<value_t>::memory_coefficient() * 100.0
253  }
254 
259  void clear() {
260  for (typename array<value_t>::iterator i=elements.begin(); i != elements.end(); ++i)
261  *i = unused;
262  }
263 
268  void resize(size_t element_count) {
269  size_t x=(99+static_cast<size_t>(static_cast<float>(element_count)*sc))|1;
270  while (!is_prime(x)) x -= 2;
271  elements.resize(x, unused);
272  }
273 
278  linear_probing_hash_table(size_t ee, value_t u,
279  const hash_t & hash, const equal_t & equal):
280  h(hash), e(equal), size(0), unused(u) {resize(ee);}
281 
286  inline size_t find(const value_t & value) const {
287  size_t v = h(value) % elements.size();
288  while (elements[v] != unused) {
289  if (e(elements[v], value)) return v;
290  v = (v + 1) % elements.size();
291  }
292  return elements.size();
293  }
294 
295 
300  inline size_t end() const {return elements.size();}
301 
306  inline size_t begin() const {
307  if (size == 0) return elements.size();
308  for(size_t i=0; true; ++i)
309  if (elements[i] != unused) return i;
310  }
311 
316  value_t & get(size_t idx) {return elements[idx];}
317 
322  const value_t & get(size_t idx) const {return elements[idx];}
323 
328  inline std::pair<size_t, bool> insert(const value_t & val) {
329  size_t v = h(val) % elements.size();
330  while (elements[v] != unused) {
331  if (e(elements[v],val)) {return std::make_pair(v, false);}
332  v = (v + 1) % elements.size();
333  }
334  ++size;
335  elements[v] = val;
336  return std::make_pair(v, true);
337  }
338 
343  inline void erase(const value_t & val) {
344  size_t slot = find(val);
345  size_t cur = (slot+1) % elements.size();
346  assert(size < elements.size());
347  while (elements[cur] != unused) {
348  size_t x = h(elements[cur]) % elements.size();
349  if (x <= slot && (cur > slot || x > cur)) {
350  elements[slot] = elements[cur];
351  slot = cur;
352  }
353  cur = (cur+1) % elements.size();
354  }
355  elements[slot] = unused;
356  --size;
357  }
358 };
359 
370 template <typename key_t,
371  typename data_t,
372  typename hash_t=hash<key_t>,
373  typename equal_t=std::equal_to<key_t>,
374  typename index_t=size_t,
375  template <typename, typename, typename, typename> class table_t = chaining_hash_table
376  >
377 class hash_map: public linear_memory_base< hash_map<key_t, data_t, hash_t, equal_t, index_t, table_t> > {
378 public:
379  typedef std::pair<key_t, data_t> value_t;
380 private:
384  struct key_equal_t {
385  equal_t e;
386  key_equal_t(const equal_t & equal): e(equal) {}
387  inline bool operator()(const value_t & a, const value_t & b) const {
388  return e(a.first, b.first);
389  }
390  };
391 
395  struct key_hash_t {
396  hash_t h;
397  key_hash_t(const hash_t & hash): h(hash) {}
398  inline size_t operator()(const value_t & a) const {
399  return h(a.first);
400  }
401  };
402 
403  typedef table_t<value_t, key_hash_t, key_equal_t, index_t> tbl_t;
404 
405  tbl_t tbl;
406 
410  template <typename IT>
411  class iter_base {
412  protected:
413  IT & tbl;
414  size_t cur;
415  iter_base(IT & t, size_t c): tbl(t), cur(c) {};
416  friend class hash_map::iterator;
417  friend class hash_map;
418  public:
419  inline const key_t & key() const {return tbl.get(cur).first;}
420  inline const data_t & value() const {return tbl.get(cur).second;}
421  inline const value_t & operator*() const {return tbl.get(cur);}
422  inline const value_t * operator->() const {return &tbl.get(cur);}
423 
424  template <typename IIT>
425  inline bool operator==(const iter_base<IIT> & o) const {return o.cur == cur;}
426  template <typename IIT>
427  inline bool operator!=(const iter_base<IIT> & o) const {return o.cur != cur;}
428  inline void operator++() {
429  ++cur;
430  while (cur != tbl.end()) {
431  if (tbl.get(cur) != tbl.unused) break;
432  ++cur;
433  }
434  }
435  };
436 public:
438  typedef iter_base<const tbl_t> const_iterator;
439 
443  class iterator: public iter_base<tbl_t> {
444  private:
445  typedef iter_base<tbl_t> p_t;
446  friend class hash_map;
447  inline iterator(tbl_t & tbl, size_t cur): p_t(tbl, cur) {}
448  using p_t::tbl;
449  using p_t::cur;
450  public:
451  inline key_t & key() {return tbl.get(cur)->first;}
452  inline data_t & value() {return tbl.get(cur)->second;}
453  inline value_t & operator*() {return tbl.get(cur);}
454  inline value_t * operator->() {return &tbl.get(cur);}
455  inline operator const_iterator() const {return const_iterator(tbl, cur);}
456  inline bool operator==(const const_iterator & o) const {return o.cur == cur;}
457  inline bool operator!=(const const_iterator & o) const {return o.cur != cur;}
458  };
459 
460 
465  static double memory_coefficient() {
466  return tbl_t::memory_coefficient();
467  }
468 
473  static double memory_overhead() {
474  return tbl_t::memory_overhead() - sizeof(tbl_t) + sizeof(hash_map);
475  }
476 
484  inline hash_map(size_t size=0, const hash_t & hash=hash_t(),
485  const equal_t & equal=equal_t(),
486  value_t u=default_unused<value_t>::v() ):
487  tbl(size, u, key_hash_t(hash), key_equal_t(equal)) {}
488 
493  inline void resize(size_t size) {tbl.resize(size);}
494 
499  inline void erase(const key_t & key) {
500  tbl.erase(value_t(key, tbl.unused.second));
501  }
502 
507  inline void erase(const iterator & iter) {erase(iter.key());}
508 
514  inline bool insert(const key_t & key, const data_t & data) {
515  return tbl.insert(value_t(key, data)).second;
516  }
517 
523  inline data_t & operator[](const key_t & key) {
524  return tbl.get(tbl.insert(value_t(key, tbl.unused.second)).first).second;
525  }
526 
531  inline const data_t & operator[](const key_t & key) const {
532  return tbl.get(tbl.find(key))->second;
533  }
534 
539  inline iterator find(const key_t & key) {
540  return iterator(tbl, tbl.find(value_t(key, tbl.unused.second)));
541  }
542 
547  inline const_iterator find(const key_t & key) const {
548  return const_iterator(tbl, tbl.find(value_t(key, tbl.unused.second)));
549  }
550 
556  inline bool contains(const key_t & key) const {
557  return tbl.find(value_t(key, tbl.unused.second)) != tbl.end();
558  }
559 
563  inline iterator begin() {return iterator(tbl, tbl.begin());}
564 
568  inline const_iterator begin() const {return const_iterator(tbl, tbl.begin());}
569 
573  inline const_iterator cbegin() const {return const_iterator(tbl, tbl.begin());}
574 
578  inline iterator end() {return iterator(tbl, tbl.end());}
579 
583  inline const_iterator end() const {return const_iterator(tbl, tbl.end());}
584 
588  inline const_iterator cend() const {return const_iterator(tbl, tbl.end());}
589 
593  inline size_t size() const {return tbl.size;}
594 
598  inline void clear() {tbl.clear();}
599 };
600 
610 template <typename key_t,
611  typename hash_t=hash<key_t>,
612  typename equal_t=std::equal_to<key_t>,
613  typename index_t=size_t,
614  template <typename, typename, typename, typename> class table_t=linear_probing_hash_table>
615 class hash_set : public linear_memory_base< hash_set<key_t, hash_t, equal_t, index_t, table_t> > {
616 private:
617  typedef table_t<key_t, hash_t, equal_t, index_t> tbl_t;
618  tbl_t tbl;
619  typedef key_t value_t;
620 
624  template <typename IT>
625  class iter_base {
626  protected:
627  IT & tbl;
628  size_t cur;
629  iter_base(IT & t, size_t c): tbl(t), cur(c) {};
630  //friend class hash_set::iterator;
631  friend class hash_set;
632  public:
633  inline const value_t & operator*() const {return tbl.get(cur);}
634  inline const value_t * operator->() const {return &tbl.get(cur);}
635  inline bool operator==(iter_base & o) const {return o.cur == cur;}
636  inline bool operator!=(iter_base & o) const {return o.cur != cur;}
637  inline void operator++() {
638  while (cur != tbl.end()) {
639  ++cur;
640  if (tbl.get(cur) != tbl.unused) break;
641  }
642  }
643  };
644 public:
646  typedef iter_base<const tbl_t> const_iterator;
647 
651  class iterator: public iter_base<tbl_t> {
652  private:
653  typedef iter_base<tbl_t> p_t;
654  friend class hash_set;
655  inline iterator(tbl_t & tbl, size_t cur): p_t(tbl, cur) {}
656  using p_t::tbl;
657  using p_t::cur;
658  public:
659  inline value_t & operator*() {return tbl.get(cur);}
660  inline value_t * operator->() {return &tbl.get(cur);}
661  inline operator const_iterator() const {return const_iterator(tbl, cur);}
662  inline bool operator==(const const_iterator & o) const {return o.cur == cur;}
663  inline bool operator!=(const const_iterator & o) const {return o.cur != cur;}
664  };
665 
666 
671  static double memory_coefficient() {
672  return tbl_t::memory_coefficient();
673  }
674 
679  static double memory_overhead() {
680  return tbl_t::memory_overhead() - sizeof(tbl_t) + sizeof(hash_set);
681  }
682 
690  inline hash_set(size_t size=0,
691  const hash_t & hash=hash_t(), const equal_t & equal=equal_t(),
692  value_t u=default_unused<value_t>::v()):
693  tbl(size, u, hash, equal) {}
694 
699  inline void resize(size_t size) {tbl.resize(size);}
700 
705  inline void erase(const key_t & key) {tbl.erase(key);}
706 
711  inline void erase(const iterator & iter) {erase(iter.key());}
712 
717  inline bool insert(const key_t & key) {
718  return tbl.insert(key).second;
719  }
720 
725  inline iterator find(const key_t & key) {
726  return iterator(tbl, tbl.find(key));
727  }
728 
733  inline const_iterator find(const key_t & key) const {
734  return const_iterator(tbl, tbl.find(key));
735  }
736 
742  inline bool contains(const key_t & key) const {return tbl.find(key) != tbl.end();}
743 
747  inline iterator begin() {return iterator(tbl, tbl.begin());}
748 
752  inline const_iterator begin() const {return const_iterator(tbl, tbl.begin());}
753 
757  inline const_iterator cbegin() const {return const_iterator(tbl, tbl.begin());}
758 
762  inline iterator end() {return iterator(tbl, tbl.end());}
763 
767  inline const_iterator end() const {return const_iterator(tbl, tbl.end());}
768 
772  inline const_iterator cend() const {return const_iterator(tbl, tbl.end());}
773 
777  inline size_t size() const {return tbl.size;}
778 
782  inline void clear() const {tbl.clear();}
783 };
784 
785 
786 template <typename value_t, typename hash_t, typename equal_t, typename index_t>
787 const float linear_probing_hash_table<value_t, hash_t, equal_t, index_t>::sc = 2.0f;
788 
789 template <typename value_t, typename hash_t, typename equal_t, typename index_t>
790 const float chaining_hash_table<value_t, hash_t, equal_t, index_t>::sc = 2.f;
791 
792 }
793 #endif //__TPIE_HASHMAP_H__
Hash table handling hash collisions by chaining.
Definition: hash_map.h:44
void erase(const iterator &iter)
Erase entry from hash set by iterator.
Definition: hash_map.h:711
static double memory_overhead()
Return the memory overhead of the structure.
Definition: hash_map.h:250
const_iterator begin() const
Return const iterator to beginning of map.
Definition: hash_map.h:568
static double memory_overhead()
Return the memory overhead of the structure.
Definition: hash_map.h:679
bool is_prime(size_type i)
Check if i is a prime.
void resize(size_t size)
Resize hash map to given size and remove all entries.
Definition: hash_map.h:493
const_iterator find(const key_t &key) const
Get const iterator to element.
Definition: hash_map.h:733
void clear() const
Clear hash set.
Definition: hash_map.h:782
Base class of data structures with linear memory usage.
Definition: util.h:73
static double memory_coefficient()
Return the memory coefficient of the structure.
Definition: hash_map.h:242
linear_probing_hash_table(size_t ee, value_t u, const hash_t &hash, const equal_t &equal)
Construct a hash table.
Definition: hash_map.h:278
iterator end()
Return iterator to end of set.
Definition: hash_map.h:762
Hash map implementation backed by a template parameterized hash table.
Definition: hash_map.h:377
iterator begin()
Return iterator to beginning of set.
Definition: hash_map.h:747
void resize(size_t element_count)
Resize table to given number of buckets and clear contents.
Definition: hash_map.h:268
size_t begin()
Return first bucket entry in use.
Definition: hash_map.h:143
Non-const iterator type.
Definition: hash_map.h:651
static double memory_coefficient()
Return the memory coefficient of the structure.
Definition: hash_map.h:671
static double memory_coefficient()
Return the memory coefficient of the structure.
Definition: hash_map.h:465
const_iterator cbegin() const
Return const iterator to beginning of set.
Definition: hash_map.h:757
data_t & operator[](const key_t &key)
Look up data by key, creating an unused entry if it does not exist.
Definition: hash_map.h:523
size_t begin() const
Return first bucket entry in use.
Definition: hash_map.h:306
bool contains(const key_t &key) const
Search for key.
Definition: hash_map.h:742
size_t size() const
Return number of keys in set.
Definition: hash_map.h:777
hash_map(size_t size=0, const hash_t &hash=hash_t(), const equal_t &equal=equal_t(), value_t u=default_unused< value_t >::v())
Construct hash map.
Definition: hash_map.h:484
void erase(const key_t &key)
Erase entry from hash map by key.
Definition: hash_map.h:499
Generic internal array with known memory requirements.
bool contains(const key_t &key) const
Search for element with key.
Definition: hash_map.h:556
iterator find(const key_t &key)
Get iterator to element.
Definition: hash_map.h:539
const data_t & operator[](const key_t &key) const
Look up data by key.
Definition: hash_map.h:531
const_iterator cend() const
Return const iterator to end of map.
Definition: hash_map.h:588
Default tabulation-hashing function for integral (size_t-castable) types.
Definition: hash.h:41
chaining_hash_table(size_t ee, value_t u, const hash_t &hash, const equal_t &equal)
Construct a hash table.
Definition: hash_map.h:135
iterator end()
Return iterator to end of map.
Definition: hash_map.h:578
static double memory_overhead()
Return the memory overhead of the structure.
Definition: hash_map.h:83
void erase(const value_t &val)
Erase value from table.
Definition: hash_map.h:343
size_t end() const
Return index greater than any buckets in use.
Definition: hash_map.h:152
void clear()
Clear contents of hash table.
Definition: hash_map.h:106
const_iterator end() const
Return const iterator to end of set.
Definition: hash_map.h:767
Hash set implementation backed by a template parameterized hash table.
Definition: hash_map.h:615
iterator begin()
Return iterator to beginning of map.
Definition: hash_map.h:563
void clear()
Clear contents of hash table.
Definition: hash_map.h:259
bool insert(const key_t &key)
Insert key into set.
Definition: hash_map.h:717
void erase(const iterator &iter)
Erase entry from hash map by iterator.
Definition: hash_map.h:507
Computations related to prime numbers.
value_t unused
Special constant indicating an unused table entry.
Definition: hash_map.h:236
static double memory_coefficient()
Return the memory coefficient of the structure.
Definition: array.h:393
Non-const iterator type.
Definition: hash_map.h:443
void erase(const value_t &val)
Erase value from table.
Definition: hash_map.h:195
void clear()
Clear hash map.
Definition: hash_map.h:598
std::pair< size_t, bool > insert(const value_t &val)
Insert value into hash table.
Definition: hash_map.h:173
void resize(size_t size, const T &elm)
Change the size of the array.
Definition: array.h:485
std::pair< size_t, bool > insert(const value_t &val)
Insert value into hash table.
Definition: hash_map.h:328
void erase(const key_t &key)
Erase entry from hash set by key.
Definition: hash_map.h:705
size_t size
Number of buckets in hash table.
Definition: hash_map.h:233
iter_base< const tbl_t > const_iterator
Const iterator type.
Definition: hash_map.h:646
static double memory_overhead()
Return the memory overhead of the structure.
Definition: array.h:400
iter_base< const tbl_t > const_iterator
Const iterator type.
Definition: hash_map.h:438
static double memory_coefficient()
Return the memory coefficient of the structure.
Definition: hash_map.h:75
size_t find(const value_t &value) const
Find bucket entry containing given value, or end() if not found.
Definition: hash_map.h:158
const_iterator begin() const
Return const iterator to beginning of set.
Definition: hash_map.h:752
hash_set(size_t size=0, const hash_t &hash=hash_t(), const equal_t &equal=equal_t(), value_t u=default_unused< value_t >::v())
Construct hash set.
Definition: hash_map.h:690
Hash table handling hash collisions by linear probing.
Definition: hash_map.h:225
const_iterator cbegin() const
Return const iterator to beginning of map.
Definition: hash_map.h:573
iterator end()
Return an iterator to the end of the array.
Definition: array.h:321
const_iterator end() const
Return const iterator to end of map.
Definition: hash_map.h:583
size_t size
Number of buckets in hash table.
Definition: hash_map.h:66
iterator begin()
Return an iterator to the beginning of the array.
Definition: array.h:307
size_type size() const
Return the size of the array.
Definition: array.h:526
iterator find(const key_t &key)
Get iterator to element.
Definition: hash_map.h:725
static double memory_overhead()
Return the memory overhead of the structure.
Definition: hash_map.h:473
const_iterator find(const key_t &key) const
Get iterator to element.
Definition: hash_map.h:547
size_t find(const value_t &value) const
Find bucket entry containing given value, or end() if not found.
Definition: hash_map.h:286
size_t end() const
Return index greater than any buckets in use.
Definition: hash_map.h:300
value_t unused
Special constant indicating an unused table entry.
Definition: hash_map.h:69
size_t size() const
Return number of elements in map.
Definition: hash_map.h:593
bool insert(const key_t &key, const data_t &data)
Insert data into the hash map.
Definition: hash_map.h:514
const_iterator cend() const
Return const iterator to end of set.
Definition: hash_map.h:772
void resize(size_t z)
Resize table to given number of buckets and clear contents.
Definition: hash_map.h:120
void resize(size_t size)
Resize hash set to given size and remove all entries.
Definition: hash_map.h:699
Default special unused values for standard types.