Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_FixedHashTable_def.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
45 #define TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
46 
47 #include "Tpetra_Details_FixedHashTable_decl.hpp"
49 #ifdef TPETRA_USE_MURMUR_HASH
50 # include "Kokkos_Functional.hpp" // hash function used by Kokkos::UnorderedMap
51 #endif // TPETRA_USE_MURMUR_HASH
52 #include "Kokkos_ArithTraits.hpp"
53 #include "Teuchos_TypeNameTraits.hpp"
54 #include <type_traits>
55 
56 namespace Tpetra {
57 namespace Details {
58 //
59 // This namespace stores utility functions and Kokkos
60 // functors for use in FixedHashTable construction.
61 //
62 namespace FHT {
63 
64 // Is it worth actually using building the FixedHashTable using
65 // parallel threads, instead of just counting in a sequential loop?
66 //
67 // The parallel version of FixedHashTable construction isn't just a
68 // parallelization of the sequential loops. It incurs additional
69 // overheads. For example, the CountBuckets kernel uses atomic update
70 // instructions to count the number of "buckets" per offsets array
71 // (ptr) entry. Atomic updates have overhead, even if only one thread
72 // issues them. The Kokkos kernels are still correct in that case,
73 // but I would rather not incur overhead then. It might make sense to
74 // set the minimum number of threads to something greater than 1, but
75 // we would need experiments to find out.
76 //
77 // FixedHashTable code should call the nonmember function below, that
78 // has the same name but starts with a lower-case w.
79 template<class ExecSpace>
80 struct WorthBuildingFixedHashTableInParallel {
81  typedef typename ExecSpace::execution_space execution_space;
82 
83  static bool isWorth () {
84  // NOTE: Kokkos::Cuda does NOT have this method. That's why we
85  // need the partial specialization below.
86  return execution_space::max_hardware_threads () > 1;
87  }
88 };
89 
90 #ifdef KOKKOS_HAVE_CUDA
91 template<>
92 struct WorthBuildingFixedHashTableInParallel<Kokkos::Cuda> {
93  // There could be more complicated expressions for whether this is
94  // actually worthwhile, but for now I'll just say that with Cuda, we
95  // will ALWAYS count buckets in parallel (that is, run a Kokkos
96  // parallel kernel).
97  static bool isWorth () {
98  return true;
99  }
100 };
101 #endif // KOKKOS_HAVE_CUDA
102 
103 // Is it worth actually using building the FixedHashTable using
104 // parallel threads, instead of just counting in a sequential loop?
105 //
106 // The parallel version of FixedHashTable construction isn't just a
107 // parallelization of the sequential loops. It incurs additional
108 // overheads. For example, the CountBuckets kernel uses atomic update
109 // instructions to count the number of "buckets" per offsets array
110 // (ptr) entry. Atomic updates have overhead, even if only one thread
111 // issues them. The Kokkos kernels are still correct in that case,
112 // but I would rather not incur overhead then. It might make sense to
113 // set the minimum number of threads to something greater than 1, but
114 // we would need experiments to find out.
115 template<class ExecSpace>
116 bool worthBuildingFixedHashTableInParallel () {
117  return WorthBuildingFixedHashTableInParallel<ExecSpace>::isWorth ();
118 }
119 
120 // If the input kokkos::View<const KeyType*, ArrayLayout,
121 // InputExecSpace, Kokkos::MemoryUnamanged> is NOT accessible from the
122 // OutputExecSpace execution space, make and return a deep copy.
123 // Otherwise, just return the original input.
124 //
125 // The point of this is to avoid unnecessary copies, when the input
126 // array of keys comes in as a Teuchos::ArrayView (which we wrap in an
127 // unmanaged Kokkos::View).
128 template<class KeyType,
129  class ArrayLayout,
130  class InputExecSpace,
131  class OutputExecSpace,
132  const bool mustDeepCopy =
133  ! std::is_same<typename InputExecSpace::memory_space,
134  typename OutputExecSpace::memory_space>::value>
135 struct DeepCopyIfNeeded {
136  // The default implementation is trivial; all the work happens in
137  // partial specializations.
138 };
139 
140 // Specialization for when a deep copy is actually needed.
141 template<class KeyType,
142  class ArrayLayout,
143  class InputExecSpace,
144  class OutputExecSpace>
145 struct DeepCopyIfNeeded<KeyType, ArrayLayout, InputExecSpace, OutputExecSpace, true>
146 {
147  typedef Kokkos::View<const KeyType*, ArrayLayout,
148  InputExecSpace, Kokkos::MemoryUnmanaged> input_view_type;
149  // In this case, a deep copy IS needed. As a result, the output
150  // type is a managed Kokkos::View, which differs from the input
151  // type. Clients must get the correct return type from this struct,
152  // either from the typedef below or from 'auto'. Assigning an
153  // unmanaged View to a managed View is a syntax error.
154  typedef Kokkos::View<const KeyType*, ArrayLayout, OutputExecSpace> output_view_type;
155 
156  static output_view_type copy (const input_view_type& src) {
157  typedef typename output_view_type::non_const_type NC;
158 
159  NC dst (Kokkos::ViewAllocateWithoutInitializing (src.tracker ().label ()),
160  src.dimension_0 ());
161  Kokkos::deep_copy (dst, src);
162  return output_view_type (dst);
163  }
164 };
165 
166 // Specialization if no need to make a deep copy.
167 template<class KeyType,
168  class ArrayLayout,
169  class InputExecSpace,
170  class OutputExecSpace>
171 struct DeepCopyIfNeeded<KeyType, ArrayLayout, InputExecSpace, OutputExecSpace, false> {
172  typedef Kokkos::View<const KeyType*, ArrayLayout,
173  InputExecSpace, Kokkos::MemoryUnmanaged> input_view_type;
174  typedef Kokkos::View<const KeyType*, ArrayLayout, OutputExecSpace,
175  Kokkos::MemoryUnmanaged> output_view_type;
176 
177  static output_view_type copy (const input_view_type& src) {
178  return output_view_type (src);
179  }
180 };
181 
182 //
183 // Functors for FixedHashTable initialization
184 //
185 // NOTE (mfh 23 May 2015): Once we can use lambdas with CUDA, we
186 // should consider replacing all of these functors with in-line
187 // lambdas. The only issue is that we would need to bake the
188 // execution space into the policy, since the default execution space
189 // might differ from the one Tpetra wants to use.
190 
200 template<class CountsViewType,
201  class KeysViewType,
202  class SizeType = typename KeysViewType::size_type>
204 public:
205  typedef CountsViewType counts_view_type;
206  typedef KeysViewType keys_view_type;
207  typedef typename CountsViewType::execution_space execution_space;
208  typedef typename CountsViewType::memory_space memory_space;
209  typedef SizeType size_type;
210  typedef typename keys_view_type::non_const_value_type key_type;
211  // mfh 21 May 2015: Having a device_type typedef in the functor
212  // along with an execution_space typedef causes compilation issues.
213  // This is because one of Kokkos' partial specializations picks up
214  // on the device_type typedef, and another picks up on the
215  // execution_space typedef. The former is a legacy of a previous
216  // design iteration of Kokkos, which did not separate memory and
217  // execution spaces.
219 
225  CountBuckets (const counts_view_type& counts,
226  const keys_view_type& keys,
227  const size_type size) :
228  counts_ (counts),
229  keys_ (keys),
230  size_ (size)
231  {}
232 
237  KOKKOS_INLINE_FUNCTION void
238  operator () (const size_type& i) const
239  {
240  typedef typename hash_type::result_type hash_value_type;
241 
242  const hash_value_type hashVal = hash_type::hashFunc (keys_[i], size_);
243  Kokkos::atomic_fetch_add (&counts_[hashVal], 1);
244  }
245 
246 private:
248  counts_view_type counts_;
250  keys_view_type keys_;
252  size_type size_;
253 };
254 
265 template<class KeyType>
267  KOKKOS_INLINE_FUNCTION
268  FillPairsResult () :
269  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
270  // min() for a floating-point type returns the minimum _positive_
271  // normalized value. This is different than for integer types.
272  // lowest() is new in C++11 and returns the least value, always
273  // negative for signed finite types.
274  //
275  // mfh 23 May 2015: I have heard reports that
276  // std::numeric_limits<int>::lowest() does not exist with the
277  // Intel compiler. I'm not sure if the users in question actually
278  // enabled C++11. However, it's easy enough to work around this
279  // issue. The standard floating-point types are signed and have a
280  // sign bit, so lowest() is just -max(). For integer types, we
281  // can use min() instead.
282  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
283  ::Kokkos::Details::ArithTraits<KeyType>::min () :
284  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
285  success_ (true)
286  {
287  static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
288  "KeyType must be some kind of number type.");
289  }
290 
291  KOKKOS_INLINE_FUNCTION
292  FillPairsResult (const KeyType& initMinKey,
293  const KeyType& initMaxKey) :
294  minKey_ (initMinKey),
295  maxKey_ (initMaxKey),
296  success_ (true)
297  {
298  static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
299  "KeyType must be some kind of number type.");
300  }
301 
302  KeyType minKey_;
303  KeyType maxKey_;
304  bool success_;
305 };
306 
336 template<class PairsViewType,
337  class KeysViewType,
338  class CountsViewType,
339  class SizeType = typename KeysViewType::size_type>
340 class FillPairs {
341 public:
342  typedef typename CountsViewType::non_const_type counts_view_type;
343  typedef typename counts_view_type::const_type offsets_view_type;
344 
345  typedef PairsViewType pairs_view_type;
346  typedef typename KeysViewType::const_type keys_view_type;
347  typedef typename offsets_view_type::execution_space execution_space;
348  typedef typename offsets_view_type::memory_space memory_space;
349  typedef SizeType size_type;
350 
351  typedef typename keys_view_type::non_const_value_type key_type;
352  typedef typename pairs_view_type::non_const_value_type pair_type;
353 
355 
356  // mfh 23 May 2015: Having a device_type typedef in the functor
357  // along with an execution_space typedef causes compilation issues.
358  // This is because one of Kokkos' partial specializations picks up
359  // on the device_type typedef, and another picks up on the
360  // execution_space typedef. The former is a legacy of a previous
361  // design iteration of Kokkos, which did not separate memory and
362  // execution spaces.
364 
375  FillPairs (const pairs_view_type& pairs,
376  const counts_view_type& counts,
377  const offsets_view_type& ptr,
378  const keys_view_type& keys,
379  const typename pair_type::second_type startingValue) :
380  pairs_ (pairs),
381  counts_ (counts),
382  ptr_ (ptr),
383  keys_ (keys),
384  size_ (counts.dimension_0 ()),
385  startingValue_ (startingValue),
386  initMinKey_ (::Kokkos::Details::ArithTraits<key_type>::max ()),
387  initMaxKey_ (::Kokkos::Details::ArithTraits<key_type>::is_integer ?
388  ::Kokkos::Details::ArithTraits<key_type>::min () :
389  -::Kokkos::Details::ArithTraits<key_type>::max ())
390  {}
391 
410  FillPairs (const pairs_view_type& pairs,
411  const counts_view_type& counts,
412  const offsets_view_type& ptr,
413  const keys_view_type& keys,
414  const typename pair_type::second_type startingValue,
415  const key_type initMinKey,
416  const key_type initMaxKey) :
417  pairs_ (pairs),
418  counts_ (counts),
419  ptr_ (ptr),
420  keys_ (keys),
421  size_ (counts.dimension_0 ()),
422  startingValue_ (startingValue),
423  initMinKey_ (initMinKey),
424  initMaxKey_ (initMaxKey)
425  {}
426 
428  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
429  {
430  dst.minKey_ = initMinKey_;
431  dst.maxKey_ = initMaxKey_;
432  dst.success_ = true;
433  }
434 
435  KOKKOS_INLINE_FUNCTION void
436  join (volatile value_type& dst,
437  const volatile value_type& src) const
438  {
439  if (src.maxKey_ > dst.maxKey_) {
440  dst.maxKey_ = src.maxKey_;
441  }
442  if (src.minKey_ < dst.minKey_) {
443  dst.minKey_ = src.minKey_;
444  }
445  dst.success_ = dst.success_ && src.success_;
446  }
447 
452  KOKKOS_INLINE_FUNCTION void
453  operator () (const size_type& i, value_type& dst) const
454  {
455  typedef typename hash_type::result_type hash_value_type;
456  typedef typename offsets_view_type::non_const_value_type offset_type;
457  typedef typename pair_type::second_type val_type;
458 
459  const key_type key = keys_[i];
460  if (key > dst.maxKey_) {
461  dst.maxKey_ = key;
462  }
463  if (key < dst.minKey_) {
464  dst.minKey_ = key;
465  }
466  const val_type theVal = startingValue_ + static_cast<val_type> (i);
467  const hash_value_type hashVal = hash_type::hashFunc (key, size_);
468 
469  // Return the old count; decrement afterwards.
470  const offset_type count = Kokkos::atomic_fetch_add (&counts_[hashVal], -1);
471  if (count == 0) {
472  dst.success_ = false; // FAILURE!
473  }
474  else {
475  const offset_type curPos = ptr_[hashVal+1] - count;
476 
477  pairs_[curPos].first = key;
478  pairs_[curPos].second = theVal;
479  }
480  }
481 
482 private:
483  pairs_view_type pairs_;
484  counts_view_type counts_;
485  offsets_view_type ptr_;
486  keys_view_type keys_;
487  size_type size_;
488  typename pair_type::second_type startingValue_;
490  key_type initMinKey_;
492  key_type initMaxKey_;
493 };
494 
518 template<class OffsetsViewType,
519  class PairsViewType,
520  class SizeType = typename OffsetsViewType::size_type>
522 public:
523  typedef typename OffsetsViewType::const_type offsets_view_type;
524  typedef typename PairsViewType::const_type pairs_view_type;
525  typedef typename offsets_view_type::execution_space execution_space;
526  typedef typename offsets_view_type::memory_space memory_space;
527  typedef SizeType size_type;
528 
529  // The result of the check is whether the table has one or more duplicates.
530  typedef int value_type;
531 
536  CheckForDuplicateKeys (const pairs_view_type& pairs,
537  const offsets_view_type& ptr) :
538  pairs_ (pairs),
539  ptr_ (ptr),
540  size_ (ptr_.dimension_0 () == 0 ?
541  size_type (0) :
542  ptr_.dimension_0 () - 1)
543  {}
544 
546  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
547  {
548  dst = 0;
549  }
550 
552  KOKKOS_INLINE_FUNCTION void
553  join (volatile value_type& dst,
554  const volatile value_type& src) const
555  {
556  dst = dst + src > 0?1:0;
557  }
558 
560  KOKKOS_INLINE_FUNCTION void
561  operator () (const size_type& i, value_type& dst) const
562  {
563  typedef typename offsets_view_type::non_const_value_type offset_type;
564  typedef typename pairs_view_type::non_const_value_type pair_type;
565  typedef typename pair_type::first_type key_type;
566 
567  if (dst>0) {
568  return; // we've already found duplicate keys elsewhere
569  }
570  else {
571  const offset_type beg = ptr_[i];
572  const offset_type end = ptr_[i+1];
573  bool foundDuplicateKey = false;
574  // This is an ~ n^2 algorithm in the worst case, where n is the
575  // max number of keys that hash to the same bucket. However, if
576  // the hash function is reasonable, n should be much less than
577  // the total number of keys.
578  for (offset_type j = beg + 1; j < end; ++j) {
579  const key_type curKey = pairs_[j].first;
580  for (offset_type k = beg; k < j; ++k) {
581  if (pairs_[k].first == curKey) {
582  foundDuplicateKey = true;
583  break;
584  }
585  }
586  }
587  dst = (dst>0) || foundDuplicateKey?1:0;
588  }
589  }
590 
591 private:
592  pairs_view_type pairs_;
593  offsets_view_type ptr_;
594  size_type size_;
595 };
596 
597 } // namespace FHT
598 
599 //
600 // Here begins the actual implementation of FixedHashTable.
601 //
602 
603 template<class KeyType, class ValueType, class DeviceType>
604 bool
606 hasKeys () const {
607  // This also works if FixedHashTable has no entries. getKey()
608  // works in that case, but always returns the flag (invalid).
609  //
610  // FIXME (31 May 2015) This only works because vals_ contains no
611  // padding. If we ever pad within a "row" of vals_, we'll have to
612  // change this.
613  return keys_.dimension_0 () == val_.dimension_0 ();
614 }
615 
616 template<class KeyType, class ValueType, class DeviceType>
617 void
619 check () const
620 {
621  // const char prefix[] = "Tpetra::Details::FixedHashTable: ";
622  // const char suffix[] = " Please report this bug to the Tpetra developers.";
623 }
624 
625 template<class KeyType, class ValueType, class DeviceType>
628  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
629  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
630  ::Kokkos::Details::ArithTraits<KeyType>::min () :
631  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
632  minVal_ (::Kokkos::Details::ArithTraits<ValueType>::max ()),
633  maxVal_ (::Kokkos::Details::ArithTraits<ValueType>::is_integer ?
634  ::Kokkos::Details::ArithTraits<ValueType>::min () :
635  -::Kokkos::Details::ArithTraits<ValueType>::max ()),
636  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
637  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
638  ::Kokkos::Details::ArithTraits<KeyType>::min () :
639  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
640  contiguousValues_ (true), // trivially
641  checkedForDuplicateKeys_ (true), // it's an empty table; no need to check
642  hasDuplicateKeys_ (false)
643 {
644 #ifdef HAVE_TPETRA_DEBUG
645  check ();
646 #endif // HAVE_TPETRA_DEBUG
647 }
648 
649 template<class KeyType, class ValueType, class DeviceType>
651 FixedHashTable (const keys_type& keys) :
652  keys_ (keys),
653  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
654  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
655  ::Kokkos::Details::ArithTraits<KeyType>::min () :
656  -::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
657  minVal_ (0),
658  maxVal_ (keys.size () == 0 ?
659  static_cast<ValueType> (0) :
660  static_cast<ValueType> (keys.size () - 1)),
661  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
662  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
663  ::Kokkos::Details::ArithTraits<KeyType>::min () :
664  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
665  contiguousValues_ (true),
666  checkedForDuplicateKeys_ (false),
667  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
668 {
669  const ValueType startingValue = static_cast<ValueType> (0);
670  const KeyType initMinKey = this->minKey_;
671  const KeyType initMaxKey = this->maxKey_;
672  this->init (keys, startingValue, initMinKey, initMaxKey,
673  initMinKey, initMinKey, false);
674 
675 #ifdef HAVE_TPETRA_DEBUG
676  check ();
677 #endif // HAVE_TPETRA_DEBUG
678 }
679 
680 template<class KeyType, class ValueType, class DeviceType>
682 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
683  const bool keepKeys) :
684  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
685  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
686  ::Kokkos::Details::ArithTraits<KeyType>::min () :
687  -::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
688  minVal_ (0),
689  maxVal_ (keys.size () == 0 ?
690  static_cast<ValueType> (0) :
691  static_cast<ValueType> (keys.size () - 1)),
692  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
693  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
694  ::Kokkos::Details::ArithTraits<KeyType>::min () :
695  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
696  contiguousValues_ (true),
697  checkedForDuplicateKeys_ (false),
698  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
699 {
700  typedef typename keys_type::non_const_type nonconst_keys_type;
701 
702  // mfh 01 May 2015: I don't trust that
703  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
704  // so I ensure this manually.
705  const ValueType startingValue = static_cast<ValueType> (0);
706  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
707  keys.size ());
708  using Kokkos::ViewAllocateWithoutInitializing;
709  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
710  keys_k.dimension_0 ());
711  Kokkos::deep_copy (keys_d, keys_k);
712  const KeyType initMinKey = this->minKey_;
713  const KeyType initMaxKey = this->maxKey_;
714  this->init (keys_d, startingValue, initMinKey, initMaxKey,
715  initMinKey, initMinKey, false);
716  if (keepKeys) {
717  keys_ = keys_d;
718 #ifdef HAVE_TPETRA_DEBUG
719  typedef typename keys_type::size_type size_type;
720  TEUCHOS_TEST_FOR_EXCEPTION
721  (keys_.dimension_0 () != static_cast<size_type> (keys.size ()),
722  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
723  "keepKeys is true, but on return, keys_.dimension_0() = " <<
724  keys_.dimension_0 () << " != keys.size() = " << keys.size () <<
725  ". Please report this bug to the Tpetra developers.");
726 #endif // HAVE_TPETRA_DEBUG
727  }
728 
729 #ifdef HAVE_TPETRA_DEBUG
730  check ();
731 #endif // HAVE_TPETRA_DEBUG
732 }
733 
734 template<class KeyType, class ValueType, class DeviceType>
736 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
737  const ValueType startingValue,
738  const bool keepKeys) :
739  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
740  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
741  ::Kokkos::Details::ArithTraits<KeyType>::min () :
742  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
743  minVal_ (startingValue),
744  maxVal_ (keys.size () == 0 ?
745  startingValue :
746  static_cast<ValueType> (startingValue + keys.size () - 1)),
747  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
748  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
749  ::Kokkos::Details::ArithTraits<KeyType>::min () :
750  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
751  contiguousValues_ (true),
752  checkedForDuplicateKeys_ (false),
753  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
754 {
755  typedef typename keys_type::non_const_type nonconst_keys_type;
756 
757  // mfh 01 May 2015: I don't trust that
758  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
759  // so I ensure this manually.
760  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
761  keys.size ());
762  using Kokkos::ViewAllocateWithoutInitializing;
763  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
764  keys_k.dimension_0 ());
765  Kokkos::deep_copy (keys_d, keys_k);
766 
767  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
768  // min() for a floating-point type returns the minimum _positive_
769  // normalized value. This is different than for integer types.
770  // lowest() is new in C++11 and returns the least value, always
771  // negative for signed finite types.
772  //
773  // mfh 23 May 2015: I have heard reports that
774  // std::numeric_limits<int>::lowest() does not exist with the Intel
775  // compiler. I'm not sure if the users in question actually enabled
776  // C++11. However, it's easy enough to work around this issue. The
777  // standard floating-point types are signed and have a sign bit, so
778  // lowest() is just -max(). For integer types, we can use min()
779  // instead.
780  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
781  ::Kokkos::Details::ArithTraits<KeyType>::min () :
782  -::Kokkos::Details::ArithTraits<KeyType>::max ();
783  this->init (keys_d, startingValue, initMinKey, initMaxKey,
784  initMinKey, initMinKey, false);
785  if (keepKeys) {
786  keys_ = keys_d;
787 #ifdef HAVE_TPETRA_DEBUG
788  typedef typename keys_type::size_type size_type;
789  TEUCHOS_TEST_FOR_EXCEPTION
790  (keys_.dimension_0 () != static_cast<size_type> (keys.size ()),
791  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
792  "keepKeys is true, but on return, keys_.dimension_0() = " <<
793  keys_.dimension_0 () << " != keys.size() = " << keys.size () <<
794  ". Please report this bug to the Tpetra developers.");
795 #endif // HAVE_TPETRA_DEBUG
796  }
797 
798 #ifdef HAVE_TPETRA_DEBUG
799  check ();
800 #endif // HAVE_TPETRA_DEBUG
801 }
802 
803 template<class KeyType, class ValueType, class DeviceType>
806  const KeyType firstContigKey,
807  const KeyType lastContigKey,
808  const ValueType startingValue,
809  const bool keepKeys) :
810  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
811  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
812  ::Kokkos::Details::ArithTraits<KeyType>::min () :
813  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
814  minVal_ (startingValue),
815  maxVal_ (keys.size () == 0 ?
816  startingValue :
817  static_cast<ValueType> (startingValue + keys.size () - 1)),
818  firstContigKey_ (firstContigKey),
819  lastContigKey_ (lastContigKey),
820  contiguousValues_ (true),
821  checkedForDuplicateKeys_ (false),
822  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
823 {
824  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
825  // min() for a floating-point type returns the minimum _positive_
826  // normalized value. This is different than for integer types.
827  // lowest() is new in C++11 and returns the least value, always
828  // negative for signed finite types.
829  //
830  // mfh 23 May 2015: I have heard reports that
831  // std::numeric_limits<int>::lowest() does not exist with the Intel
832  // compiler. I'm not sure if the users in question actually enabled
833  // C++11. However, it's easy enough to work around this issue. The
834  // standard floating-point types are signed and have a sign bit, so
835  // lowest() is just -max(). For integer types, we can use min()
836  // instead.
837  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
838  ::Kokkos::Details::ArithTraits<KeyType>::min () :
839  -::Kokkos::Details::ArithTraits<KeyType>::max ();
840  this->init (keys, startingValue, initMinKey, initMaxKey,
841  firstContigKey, lastContigKey, true);
842  if (keepKeys) {
843  keys_ = keys;
844 #ifdef HAVE_TPETRA_DEBUG
845  typedef typename keys_type::size_type size_type;
846  TEUCHOS_TEST_FOR_EXCEPTION
847  (keys_.dimension_0 () != static_cast<size_type> (keys.size ()),
848  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
849  "keepKeys is true, but on return, keys_.dimension_0() = " <<
850  keys_.dimension_0 () << " != keys.size() = " << keys.size () <<
851  ". Please report this bug to the Tpetra developers.");
852 #endif // HAVE_TPETRA_DEBUG
853  }
854 
855 #ifdef HAVE_TPETRA_DEBUG
856  check ();
857 #endif // HAVE_TPETRA_DEBUG
858 }
859 
860 template<class KeyType, class ValueType, class DeviceType>
862 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
863  const KeyType firstContigKey,
864  const KeyType lastContigKey,
865  const ValueType startingValue,
866  const bool keepKeys) :
867  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
868  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
869  ::Kokkos::Details::ArithTraits<KeyType>::min () :
870  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
871  minVal_ (startingValue),
872  maxVal_ (keys.size () == 0 ?
873  startingValue :
874  static_cast<ValueType> (startingValue + keys.size () - 1)),
875  firstContigKey_ (firstContigKey),
876  lastContigKey_ (lastContigKey),
877  contiguousValues_ (true),
878  checkedForDuplicateKeys_ (false),
879  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
880 {
881  typedef typename keys_type::non_const_type nonconst_keys_type;
882 
883  // mfh 01 May 2015: I don't trust that
884  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
885  // so I ensure this manually.
886  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
887  keys.size ());
888  using Kokkos::ViewAllocateWithoutInitializing;
889  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
890  keys_k.dimension_0 ());
891  Kokkos::deep_copy (keys_d, keys_k);
892 
893  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
894  // min() for a floating-point type returns the minimum _positive_
895  // normalized value. This is different than for integer types.
896  // lowest() is new in C++11 and returns the least value, always
897  // negative for signed finite types.
898  //
899  // mfh 23 May 2015: I have heard reports that
900  // std::numeric_limits<int>::lowest() does not exist with the Intel
901  // compiler. I'm not sure if the users in question actually enabled
902  // C++11. However, it's easy enough to work around this issue. The
903  // standard floating-point types are signed and have a sign bit, so
904  // lowest() is just -max(). For integer types, we can use min()
905  // instead.
906  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
907  ::Kokkos::Details::ArithTraits<KeyType>::min () :
908  -::Kokkos::Details::ArithTraits<KeyType>::max ();
909  this->init (keys_d, startingValue, initMinKey, initMaxKey,
910  firstContigKey, lastContigKey, true);
911  if (keepKeys) {
912  keys_ = keys_d;
913 #ifdef HAVE_TPETRA_DEBUG
914  typedef typename keys_type::size_type size_type;
915  TEUCHOS_TEST_FOR_EXCEPTION
916  (keys_.dimension_0 () != static_cast<size_type> (keys.size ()),
917  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
918  "keepKeys is true, but on return, keys_.dimension_0() = " <<
919  keys_.dimension_0 () << " != keys.size() = " << keys.size () <<
920  ". Please report this bug to the Tpetra developers.");
921 #endif // HAVE_TPETRA_DEBUG
922  }
923 
924 #ifdef HAVE_TPETRA_DEBUG
925  check ();
926 #endif // HAVE_TPETRA_DEBUG
927 }
928 
929 template<class KeyType, class ValueType, class DeviceType>
932  const ValueType startingValue) :
933  keys_ (keys),
934  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
935  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
936  ::Kokkos::Details::ArithTraits<KeyType>::min () :
937  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
938  minVal_ (startingValue),
939  maxVal_ (keys.size () == 0 ?
940  startingValue :
941  static_cast<ValueType> (startingValue + keys.size () - 1)),
942  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
943  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
944  ::Kokkos::Details::ArithTraits<KeyType>::min () :
945  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
946  contiguousValues_ (true),
947  checkedForDuplicateKeys_ (false),
948  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
949 {
950  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
951  // min() for a floating-point type returns the minimum _positive_
952  // normalized value. This is different than for integer types.
953  // lowest() is new in C++11 and returns the least value, always
954  // negative for signed finite types.
955  //
956  // mfh 23 May 2015: I have heard reports that
957  // std::numeric_limits<int>::lowest() does not exist with the Intel
958  // compiler. I'm not sure if the users in question actually enabled
959  // C++11. However, it's easy enough to work around this issue. The
960  // standard floating-point types are signed and have a sign bit, so
961  // lowest() is just -max(). For integer types, we can use min()
962  // instead.
963  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
964  ::Kokkos::Details::ArithTraits<KeyType>::min () :
965  -::Kokkos::Details::ArithTraits<KeyType>::max ();
966  this->init (keys, startingValue, initMinKey, initMaxKey,
967  initMinKey, initMinKey, false);
968 
969 #ifdef HAVE_TPETRA_DEBUG
970  check ();
971 #endif // HAVE_TPETRA_DEBUG
972 }
973 
974 template<class KeyType, class ValueType, class DeviceType>
976 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
977  const Teuchos::ArrayView<const ValueType>& vals) :
978  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
979  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
980  ::Kokkos::Details::ArithTraits<KeyType>::min () :
981  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
982  minVal_ (::Kokkos::Details::ArithTraits<ValueType>::max ()),
983  maxVal_ (::Kokkos::Details::ArithTraits<ValueType>::is_integer ?
984  ::Kokkos::Details::ArithTraits<ValueType>::min () :
985  -::Kokkos::Details::ArithTraits<ValueType>::max ()),
986  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
987  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
988  ::Kokkos::Details::ArithTraits<KeyType>::min () :
989  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
990  contiguousValues_ (false),
991  checkedForDuplicateKeys_ (false),
992  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
993 {
994  // mfh 01 May 2015: I don't trust that
995  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
996  // so I ensure this manually.
997  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
998  keys.size ());
999  host_input_vals_type vals_k (vals.size () == 0 ? NULL : vals.getRawPtr (),
1000  vals.size ());
1001  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
1002  // min() for a floating-point type returns the minimum _positive_
1003  // normalized value. This is different than for integer types.
1004  // lowest() is new in C++11 and returns the least value, always
1005  // negative for signed finite types.
1006  //
1007  // mfh 23 May 2015: I have heard reports that
1008  // std::numeric_limits<int>::lowest() does not exist with the Intel
1009  // compiler. I'm not sure if the users in question actually enabled
1010  // C++11. However, it's easy enough to work around this issue. The
1011  // standard floating-point types are signed and have a sign bit, so
1012  // lowest() is just -max(). For integer types, we can use min()
1013  // instead.
1014  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
1015  ::Kokkos::Details::ArithTraits<KeyType>::min () :
1016  -::Kokkos::Details::ArithTraits<KeyType>::max ();
1017  this->init (keys_k, vals_k, initMinKey, initMaxKey);
1018 
1019 #ifdef HAVE_TPETRA_DEBUG
1020  check ();
1021 #endif // HAVE_TPETRA_DEBUG
1022 }
1023 
1024 template<class KeyType, class ValueType, class DeviceType>
1025 void
1027 init (const keys_type& keys,
1028  ValueType startingValue,
1029  KeyType initMinKey,
1030  KeyType initMaxKey,
1031  KeyType firstContigKey,
1032  KeyType lastContigKey,
1033  const bool computeInitContigKeys)
1034 {
1035  using Kokkos::subview;
1036  using Kokkos::ViewAllocateWithoutInitializing;
1037  using Teuchos::TypeNameTraits;
1038  typedef typename std::decay<decltype (keys.dimension_0 ()) >::type size_type;
1039  const char prefix[] = "Tpetra::Details::FixedHashTable: ";
1040 
1041  const offset_type numKeys = static_cast<offset_type> (keys.dimension_0 ());
1042  {
1043  const offset_type maxVal = ::Kokkos::Details::ArithTraits<offset_type>::max ();
1044  const size_type maxValST = static_cast<size_type> (maxVal);
1045  TEUCHOS_TEST_FOR_EXCEPTION
1046  (keys.dimension_0 () > maxValST, std::invalid_argument, prefix << "The "
1047  "number of keys " << keys.dimension_0 () << " does not fit in "
1048  "offset_type = " << TypeNameTraits<offset_type>::name () << ", whose "
1049  "max value is " << maxVal << ". This means that it is not possible to "
1050  "use this constructor.");
1051  }
1052  TEUCHOS_TEST_FOR_EXCEPTION
1053  (static_cast<unsigned long long> (numKeys) >
1054  static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ()),
1055  std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
1056  "keys " << numKeys << " is greater than the maximum representable "
1057  "ValueType value " << ::Kokkos::Details::ArithTraits<ValueType>::max () << ". "
1058  "This means that it is not possible to use this constructor.");
1059  TEUCHOS_TEST_FOR_EXCEPTION
1060  (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, prefix <<
1061  "This class currently only works when the number of keys is <= INT_MAX = "
1062  << INT_MAX << ". If this is a problem for you, please talk to the Tpetra "
1063  "developers.");
1064 
1065  const bool buildInParallel =
1066  FHT::worthBuildingFixedHashTableInParallel<execution_space> ();
1067 
1068  // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
1069  // could change that by setting up ptr and val as Kokkos::DualView
1070  // instances. If we do that, since we are filling on host for now,
1071  // we want to make sure that we only zero-fill ptr on host
1072  // initially, and that we don't fill val at all. Once we finish
1073  // Kokkos-izing all the set-up kernels, we won't need DualView for
1074  // either ptr or val.
1075 
1076  if (computeInitContigKeys) {
1077  // Find the first and last initial contiguous keys. If we find a
1078  // long sequence of initial contiguous keys, we can save space by
1079  // not storing them explicitly as pairs in the hash table.
1080  //
1081  // NOTE (mfh 01 Jun 2015) Doing this in parallel requires a scan
1082  // ("min index such that the difference between the current key and
1083  // the next != 1"), which takes multiple passes over the data. We
1084  // could fuse it with CountBuckets (only update counts on 'final'
1085  // pass). However, we're really just moving this sequential search
1086  // out of Map's constructor here, so there is no loss in doing it
1087  // sequentially for now. Later, we can work on parallelization.
1088  //
1089  // NOTE (mfh 01 Jun 2015, 28 Mar 2016) The code below assumes UVM.
1090  // It is rational to assume UVM here, because this is "sparse"
1091  // access -- we might not need to look at all the entries of keys,
1092  // so it doesn't necessarily make sense to copy the whole thing
1093  // back to host.
1094  if (numKeys > 0) {
1095  firstContigKey_ = keys[0];
1096  // Start with one plus, then decrement at the end. That lets us do
1097  // only one addition per loop iteration, rather than two (if we test
1098  // against lastContigKey + 1 and then increment lastContigKey).
1099  lastContigKey_ = firstContigKey_ + 1;
1100 
1101  // We will only store keys in the table that are not part of the
1102  // initial contiguous sequence. It's possible for the initial
1103  // contiguous sequence to be trivial, which for a nonzero number of
1104  // keys means that the "sequence" has length 1.
1105  for (offset_type k = 1; k < numKeys; ++k) {
1106  if (lastContigKey_ != keys[k]) {
1107  break;
1108  }
1109  ++lastContigKey_;
1110  }
1111  --lastContigKey_;
1112  }
1113  }
1114  else {
1115  firstContigKey_ = firstContigKey;
1116  lastContigKey_ = lastContigKey;
1117  }
1118 
1119  offset_type startIndex;
1120  if (numKeys > 0) {
1121  initMinKey = std::min (initMinKey, firstContigKey_);
1122  initMaxKey = std::max (initMaxKey, lastContigKey_);
1123  startIndex = static_cast<offset_type> (lastContigKey_ - firstContigKey_);
1124  } else {
1125  startIndex = 0;
1126  }
1127 
1128  const offset_type theNumKeys = numKeys - startIndex;
1129  const offset_type size = hash_type::getRecommendedSize (theNumKeys);
1130 #ifdef HAVE_TPETRA_DEBUG
1131  TEUCHOS_TEST_FOR_EXCEPTION(
1132  size == 0 && numKeys != 0, std::logic_error,
1133  "Tpetra::Details::FixedHashTable constructor: "
1134  "getRecommendedSize(" << numKeys << ") returned zero, "
1135  "even though the number of keys " << numKeys << " is nonzero. "
1136  "Please report this bug to the Tpetra developers.");
1137 #endif // HAVE_TPETRA_DEBUG
1138  keys_type theKeys =
1139  subview (keys, std::pair<offset_type, offset_type> (startIndex, numKeys));
1140 
1141  // FIXME (mfh 28 Mar 2016) For some reason, we don't seem to need a
1142  // fence here, but we do before other allocations.
1143 
1144  // The array of counts must be separate from the array of offsets,
1145  // in order for parallel_scan to work correctly.
1146  typedef typename ptr_type::non_const_type counts_type;
1147  counts_type counts ("FixedHashTable::counts", size);
1148 
1149  //
1150  // Count the number of "buckets" per offsets array (ptr) entry.
1151  //
1152 
1153  // The Kokkos kernel uses atomic update instructions to count the
1154  // number of "buckets" per offsets array (ptr) entry. Atomic
1155  // updates incur overhead, even in the sequential case. The Kokkos
1156  // kernel is still correct in that case, but I would rather not
1157  // incur overhead then.
1158  if (buildInParallel) {
1159  FHT::CountBuckets<counts_type, keys_type> functor (counts, theKeys, size);
1160  Kokkos::parallel_for (theNumKeys, functor);
1161  }
1162  else {
1163  // Access to counts is not necessarily contiguous, but is
1164  // irregular and likely touches all pages of the array. Thus, it
1165  // probably makes sense to use a host copy explicitly, rather than
1166  // assume UVM.
1167  auto countsHost = Kokkos::create_mirror_view (counts);
1168  // FIXME (mfh 28 Mar 2016) Does create_mirror_view zero-fill?
1169  Kokkos::deep_copy (countsHost, static_cast<offset_type> (0));
1170 
1171  for (offset_type k = 0; k < theNumKeys; ++k) {
1172  typedef typename hash_type::result_type hash_value_type;
1173  const hash_value_type hashVal = hash_type::hashFunc (theKeys[k], size);
1174  ++countsHost[hashVal];
1175  }
1176  Kokkos::deep_copy (counts, countsHost);
1177  }
1178 
1179  // FIXME (mfh 28 Mar 2016) Need a fence here, otherwise SIGSEGV w/
1180  // CUDA when ptr is filled.
1181  execution_space::fence ();
1182 
1183  // Kokkos::View fills with zeros by default.
1184  typename ptr_type::non_const_type ptr ("FixedHashTable::ptr", size+1);
1185 
1186  // Compute row offsets via prefix sum:
1187  //
1188  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1189  //
1190  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1191  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1192  // formula is ptr[i+1] += ptr[i].
1193  //
1194  // parallel_scan does not incur overhead with Kokkos::Serial, but
1195  // with actual parallel execution spaces, it does require multiple
1196  // passes over the data. Thus, it still makes sense to have a
1197  // sequential fall-back.
1198  if (buildInParallel) {
1200  }
1201  else {
1202  // mfh 28 Mar 2016: We could use UVM here, but it's pretty easy to
1203  // use a host mirror too, so I'll just do that.
1204  typename decltype (ptr)::HostMirror ptrHost = Kokkos::create_mirror_view (ptr);
1205 
1206  ptrHost[0] = 0;
1207  for (offset_type i = 0; i < size; ++i) {
1208  //ptrHost[i+1] += ptrHost[i];
1209  ptrHost[i+1] = ptrHost[i] + counts[i];
1210  }
1211  Kokkos::deep_copy (ptr, ptrHost);
1212  }
1213 
1214  // FIXME (mfh 28 Mar 2016) Need a fence here, otherwise SIGSEGV w/
1215  // CUDA when val is filled.
1216  execution_space::fence ();
1217 
1218  // Allocate the array of (key,value) pairs. Don't fill it with
1219  // zeros, because we will fill it with actual data below.
1220  using Kokkos::ViewAllocateWithoutInitializing;
1221  typedef typename val_type::non_const_type nonconst_val_type;
1222  nonconst_val_type val (ViewAllocateWithoutInitializing ("FixedHashTable::pairs"),
1223  theNumKeys);
1224 
1225  // Fill in the hash table's "values" (the (key,value) pairs).
1226  typedef FHT::FillPairs<typename val_type::non_const_type, keys_type,
1227  typename ptr_type::non_const_type> functor_type;
1228  typename functor_type::value_type result (initMinKey, initMaxKey);
1229 
1230  const ValueType newStartingValue = startingValue + static_cast<ValueType> (startIndex);
1231  if (buildInParallel) {
1232  functor_type functor (val, counts, ptr, theKeys, newStartingValue,
1233  initMinKey, initMaxKey);
1234  Kokkos::parallel_reduce (theNumKeys, functor, result);
1235  }
1236  else {
1237  for (offset_type k = 0; k < theNumKeys; ++k) {
1238  typedef typename hash_type::result_type hash_value_type;
1239  const KeyType key = theKeys[k];
1240  if (key > result.maxKey_) {
1241  result.maxKey_ = key;
1242  }
1243  if (key < result.minKey_) {
1244  result.minKey_ = key;
1245  }
1246  const ValueType theVal = newStartingValue + static_cast<ValueType> (k);
1247  const hash_value_type hashVal = hash_type::hashFunc (key, size);
1248 
1249  // Return the old count; decrement afterwards.
1250  //
1251  // NOTE (mfh 28 Mar 2016) This assumes UVM. It might make more
1252  // sense to use a host mirror here, due to the access pattern.
1253  const offset_type count = counts[hashVal];
1254  --counts[hashVal];
1255  if (count == 0) {
1256  result.success_ = false; // FAILURE!
1257  break;
1258  }
1259  else {
1260  // NOTE (mfh 28 Mar 2016) This assumes UVM.
1261  const offset_type curPos = ptr[hashVal+1] - count;
1262 
1263  // NOTE (mfh 28 Mar 2016) This assumes UVM.
1264  val[curPos].first = key;
1265  val[curPos].second = theVal;
1266  }
1267  }
1268  }
1269 
1270  // FIXME (mfh 01 Jun 2015) Temporarily commented out because of
1271  // reports of exceptions being thrown in Albany.
1272  //
1273  // TEUCHOS_TEST_FOR_EXCEPTION
1274  // (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1275  // "init: Filling the hash table failed! Please report this bug to the "
1276  // "Tpetra developers.");
1277  (void) result; // prevent build warning (set but unused variable)
1278 
1279  // "Commit" the computed arrays and other computed quantities.
1280  ptr_ = ptr;
1281  val_ = val;
1282  minKey_ = result.minKey_;
1283  maxKey_ = result.maxKey_;
1284  // We've already set firstContigKey_ and lastContigKey_ above.
1285 }
1286 
1287 
1288 template<class KeyType, class ValueType, class DeviceType>
1289 void
1290 FixedHashTable<KeyType, ValueType, DeviceType>::
1291 init (const host_input_keys_type& keys,
1292  const host_input_vals_type& vals,
1293  KeyType initMinKey,
1294  KeyType initMaxKey)
1295 {
1296  const offset_type numKeys = static_cast<offset_type> (keys.dimension_0 ());
1297  TEUCHOS_TEST_FOR_EXCEPTION
1298  (static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ()),
1299  std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
1300  "keys " << numKeys << " is greater than the maximum representable "
1301  "ValueType value " << ::Kokkos::Details::ArithTraits<ValueType>::max () << ".");
1302  TEUCHOS_TEST_FOR_EXCEPTION
1303  (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, "Tpetra::"
1304  "Details::FixedHashTable: This class currently only works when the number "
1305  "of keys is <= INT_MAX = " << INT_MAX << ". If this is a problem for you"
1306  ", please talk to the Tpetra developers.");
1307 
1308  // There's no need to find the first and last initial contiguous
1309  // keys in this case, because if we reach this init() function, then
1310  // hasContiguousValues() is false and so get() doesn't use the
1311  // initial contiguous sequence of keys.
1312 
1313  const offset_type size = hash_type::getRecommendedSize (numKeys);
1314 #ifdef HAVE_TPETRA_DEBUG
1315  TEUCHOS_TEST_FOR_EXCEPTION(
1316  size == 0 && numKeys != 0, std::logic_error,
1317  "Tpetra::Details::FixedHashTable constructor: "
1318  "getRecommendedSize(" << numKeys << ") returned zero, "
1319  "even though the number of keys " << numKeys << " is nonzero. "
1320  "Please report this bug to the Tpetra developers.");
1321 #endif // HAVE_TPETRA_DEBUG
1322 
1323  // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
1324  // could change that by setting up ptr and val as Kokkos::DualView
1325  // instances. If we do that, since we are filling on host for now,
1326  // we want to make sure that we only zero-fill ptr on host
1327  // initially, and that we don't fill val at all. Once we finish
1328  // Kokkos-izing all the set-up kernels, we won't need DualView for
1329  // either ptr or val.
1330 
1331  typename ptr_type::non_const_type ptr ("FixedHashTable::ptr", size + 1);
1332 
1333  // Allocate the array of key,value pairs. Don't waste time filling
1334  // it with zeros, because we will fill it with actual data below.
1335  using Kokkos::ViewAllocateWithoutInitializing;
1336  typedef typename val_type::non_const_type nonconst_val_type;
1337  nonconst_val_type val (ViewAllocateWithoutInitializing ("FixedHashTable::pairs"),
1338  numKeys);
1339 
1340  // Compute number of entries in each hash table position.
1341  for (offset_type k = 0; k < numKeys; ++k) {
1342  const typename hash_type::result_type hashVal =
1343  hash_type::hashFunc (keys[k], size);
1344  // Shift over one, so that counts[j] = ptr[j+1]. See below.
1345  ++ptr[hashVal+1];
1346  }
1347 
1348  // Compute row offsets via prefix sum:
1349  //
1350  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1351  //
1352  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1353  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1354  // formula is ptr[i+1] += ptr[i].
1355  for (offset_type i = 0; i < size; ++i) {
1356  ptr[i+1] += ptr[i];
1357  }
1358  //ptr[0] = 0; // We've already done this when initializing ptr above.
1359 
1360  // curRowStart[i] is the offset of the next element in row i.
1361  typename ptr_type::non_const_type curRowStart ("FixedHashTable::curRowStart", size);
1362 
1363  // Fill in the hash table.
1364  FHT::FillPairsResult<KeyType> result (initMinKey, initMaxKey);
1365  for (offset_type k = 0; k < numKeys; ++k) {
1366  typedef typename hash_type::result_type hash_value_type;
1367  const KeyType key = keys[k];
1368  if (key > result.maxKey_) {
1369  result.maxKey_ = key;
1370  }
1371  if (key < result.minKey_) {
1372  result.minKey_ = key;
1373  }
1374  const ValueType theVal = vals[k];
1375  if (theVal > maxVal_) {
1376  maxVal_ = theVal;
1377  }
1378  if (theVal < minVal_) {
1379  minVal_ = theVal;
1380  }
1381  const hash_value_type hashVal = hash_type::hashFunc (key, size);
1382 
1383  const offset_type offset = curRowStart[hashVal];
1384  const offset_type curPos = ptr[hashVal] + offset;
1385  if (curPos >= ptr[hashVal+1]) {
1386  result.success_ = false; // FAILURE!
1387  }
1388  else {
1389  val[curPos].first = key;
1390  val[curPos].second = theVal;
1391  ++curRowStart[hashVal];
1392  }
1393  }
1394 
1395  TEUCHOS_TEST_FOR_EXCEPTION
1396  (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1397  "init: Filling the hash table failed! Please report this bug to the "
1398  "Tpetra developers.");
1399 
1400  // "Commit" the computed arrays and other computed quantities.
1401  ptr_ = ptr;
1402  val_ = val;
1403  minKey_ = result.minKey_;
1404  maxKey_ = result.maxKey_;
1405  // We've already assigned to minVal_ and maxVal_ above.
1406 }
1407 
1408 template <class KeyType, class ValueType, class DeviceType>
1409 bool
1410 FixedHashTable<KeyType, ValueType, DeviceType>::
1411 hasDuplicateKeys ()
1412 {
1413  if (! checkedForDuplicateKeys_) {
1414  hasDuplicateKeys_ = checkForDuplicateKeys ();
1415  checkedForDuplicateKeys_ = true;
1416  }
1417  return hasDuplicateKeys_;
1418 }
1419 
1420 template <class KeyType, class ValueType, class DeviceType>
1421 bool
1423 checkForDuplicateKeys () const
1424 {
1425  const offset_type size = this->getSize ();
1426  // It's allowed for the hash table to have a positive number of
1427  // buckets (getSize()), but still store no entries (numPairs()).
1428  // Both cases trivially entail no duplicates.
1429  if (size == 0 || this->numPairs () == 0) {
1430  return false;
1431  }
1432  else {
1433  typedef FHT::CheckForDuplicateKeys<ptr_type, val_type> functor_type;
1434  functor_type functor (val_, ptr_);
1435  int hasDupKeys = 0;
1436  Kokkos::parallel_reduce (size, functor, hasDupKeys);
1437  return hasDupKeys>0;
1438  }
1439 }
1440 
1441 template <class KeyType, class ValueType, class DeviceType>
1442 std::string
1443 FixedHashTable<KeyType, ValueType, DeviceType>::
1444 description () const
1445 {
1446  std::ostringstream oss;
1447  oss << "FixedHashTable<"
1448  << Teuchos::TypeNameTraits<KeyType>::name () << ","
1449  << Teuchos::TypeNameTraits<ValueType>::name () << ">: "
1450  << "{ numKeys: " << val_.dimension_0 ()
1451  << ", tableSize: " << this->getSize () << " }";
1452  return oss.str();
1453 }
1454 
1455 template <class KeyType, class ValueType, class DeviceType>
1456 void
1458 describe (Teuchos::FancyOStream& out,
1459  const Teuchos::EVerbosityLevel verbLevel) const
1460 {
1461  using std::endl;
1462  using std::setw;
1463  using Teuchos::OSTab;
1464  using Teuchos::rcpFromRef;
1465  using Teuchos::TypeNameTraits;
1466  using Teuchos::VERB_DEFAULT;
1467  using Teuchos::VERB_NONE;
1468  using Teuchos::VERB_LOW;
1469  using Teuchos::VERB_EXTREME;
1470 
1471  // NOTE (mfh 14 May 2015) This method currently assumes UVM for
1472  // access to ptr_ and val_ from the host.
1473 
1474  Teuchos::EVerbosityLevel vl = verbLevel;
1475  if (vl == VERB_DEFAULT) vl = VERB_LOW;
1476 
1477  if (vl == VERB_NONE) {
1478  // do nothing
1479  }
1480  else if (vl == VERB_LOW) {
1481  out << this->description() << endl;
1482  }
1483  else { // MEDIUM, HIGH or EXTREME
1484  out << "FixedHashTable:" << endl;
1485  {
1486  OSTab tab1 (rcpFromRef (out));
1487 
1488  const std::string label = this->getObjectLabel ();
1489  if (label != "") {
1490  out << "label: " << label << endl;
1491  }
1492  out << "Template parameters:" << endl;
1493  {
1494  OSTab tab2 (rcpFromRef (out));
1495  out << "KeyType: " << TypeNameTraits<KeyType>::name () << endl
1496  << "ValueType: " << TypeNameTraits<ValueType>::name () << endl;
1497  }
1498 
1499  const offset_type tableSize = this->getSize ();
1500  const offset_type numKeys = val_.dimension_0 ();
1501 
1502  out << "Table parameters:" << endl;
1503  {
1504  OSTab tab2 (rcpFromRef (out));
1505  out << "numKeys: " << numKeys << endl
1506  << "tableSize: " << tableSize << endl;
1507  }
1508 
1509  if (vl >= VERB_EXTREME) {
1510  out << "Contents: ";
1511  if (tableSize == 0 || numKeys == 0) {
1512  out << "[]" << endl;
1513  } else {
1514  out << "[ " << endl;
1515  {
1516  OSTab tab2 (rcpFromRef (out));
1517  for (offset_type i = 0; i < tableSize; ++i) {
1518  OSTab tab3 (rcpFromRef (out));
1519  out << "[";
1520  for (offset_type k = ptr_[i]; k < ptr_[i+1]; ++k) {
1521  out << "(" << val_[k].first << "," << val_[k].second << ")";
1522  if (k + 1 < ptr_[i+1]) {
1523  out << ", ";
1524  }
1525  }
1526  out << "]" << endl;
1527  } // for each table position i
1528  }
1529  out << "]" << endl;
1530  } // The table contains entries
1531  } // vl >= VERB_EXTREME
1532  }
1533  out << endl;
1534  } // if vl > VERB_LOW
1535 }
1536 
1537 } // namespace Details
1538 } // namespace Tpetra
1539 
1540 // Macro that explicitly instantiates FixedHashTable for the given local
1541 // ordinal (LO) and global ordinal (GO) types. Note that FixedHashTable's
1542 // template parameters occur in the opposite order of most Tpetra
1543 // classes. This is because FixedHashTable performs global-to-local
1544 // lookup, and the convention in templated C++ lookup tables (such as
1545 // std::map) is <KeyType, ValueType>.
1546 //
1547 // This macro must be explanded within the Tpetra::Details namespace.
1548 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT_DEFAULTNODE(LO,GO) \
1549  template class FixedHashTable< GO , LO >;
1550 
1551 // Macro that explicitly instantiates FixedHashTable for the given
1552 // local ordinal (LO), global ordinal (GO), and Kokkos device (DEVICE)
1553 // types. Note that FixedHashTable's first two template parameters
1554 // occur in the opposite order of most Tpetra classes. This is
1555 // because FixedHashTable performs global-to-local lookup, and the
1556 // convention in templated C++ lookup tables (such as std::map) is
1557 // <KeyType, ValueType>.
1558 //
1559 // This macro must be explanded within the Tpetra::Details namespace.
1560 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, DEVICE) \
1561  template class FixedHashTable< GO , LO , DEVICE >;
1562 
1563 #endif // TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
CheckForDuplicateKeys(const pairs_view_type &pairs, const offsets_view_type &ptr)
Constructor.
Parallel reduce functor for filling the FixedHashTable, and computing the min and max keys...
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue, const key_type initMinKey, const key_type initMaxKey)
Constructor that takes initial min and max key values.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
ResultType result_type
Type of the return value of the hash function.
Parallel for functor for counting "buckets" in the FixedHashTable.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
bool success_
Whether fill succeeded (it can only fail on a bug)
Implementation details of Tpetra.
The hash function for FixedHashTable.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body; do this for every entry of keys_.
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body.
FixedHashTable()
Default constructor; makes an empty table.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i) const
Do this for every entry of keys_.
bool hasKeys() const
Whether it is safe to call getKey().
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue)
Constructor.
static KOKKOS_INLINE_FUNCTION result_type hashFunc(const argument_type &key, const offset_type &size)
The hash function.
CountBuckets(const counts_view_type &counts, const keys_view_type &keys, const size_type size)
Constructor.
Functor for checking whether a FixedHashTable has one or more duplicate entries.
KOKKOS_INLINE_FUNCTION void join(volatile value_type &dst, const volatile value_type &src) const
Combine two intermediate reduction results.
Reduction result for FillPairs functor below.
Kokkos::View< const GlobalOrdinal *, Kokkos::LayoutLeft, device_type > keys_type
Type of a 1-D Kokkos::View (array) used to store keys.