Tpetra parallel linear algebra  Version of the Day
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 // mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
45 // incorrect (as() is not a device function) and usually irrelevant
46 // (it would only matter if LocalOrdinal were bigger than size_t on a
47 // particular platform, which is unlikely).
48 
49 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
50 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
51 
52 #include "Kokkos_Core.hpp"
53 #include "Kokkos_ArithTraits.hpp"
54 #include <sstream>
55 #include <stdexcept>
56 
57 namespace Tpetra {
58 namespace KokkosRefactor {
59 namespace Details {
60 
66 namespace Impl {
67 
74 template<class IntegerType,
75  const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
76 struct OutOfBounds {
77  static KOKKOS_INLINE_FUNCTION bool
78  test (const IntegerType x,
79  const IntegerType exclusiveUpperBound);
80 };
81 
82 // Partial specialization for the case where IntegerType IS signed.
83 template<class IntegerType>
84 struct OutOfBounds<IntegerType, true> {
85  static KOKKOS_INLINE_FUNCTION bool
86  test (const IntegerType x,
87  const IntegerType exclusiveUpperBound)
88  {
89  return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
90  }
91 };
92 
93 // Partial specialization for the case where IntegerType is NOT signed.
94 template<class IntegerType>
95 struct OutOfBounds<IntegerType, false> {
96  static KOKKOS_INLINE_FUNCTION bool
97  test (const IntegerType x,
98  const IntegerType exclusiveUpperBound)
99  {
100  return x >= exclusiveUpperBound;
101  }
102 };
103 
106 template<class IntegerType>
107 KOKKOS_INLINE_FUNCTION bool
108 outOfBounds (const IntegerType x, const IntegerType exclusiveUpperBound)
109 {
110  return OutOfBounds<IntegerType>::test (x, exclusiveUpperBound);
111 }
112 
113 } // namespace Impl
114 
115  // Functors for implementing packAndPrepare and unpackAndCombine
116  // through parallel_for
117 
118  template <typename DstView, typename SrcView, typename IdxView>
119  struct PackArraySingleColumn {
120  typedef typename DstView::execution_space execution_space;
121  typedef typename execution_space::size_type size_type;
122 
123  DstView dst;
124  SrcView src;
125  IdxView idx;
126  size_t col;
127 
128  PackArraySingleColumn(const DstView& dst_,
129  const SrcView& src_,
130  const IdxView& idx_,
131  size_t col_) :
132  dst(dst_), src(src_), idx(idx_), col(col_) {}
133 
134  KOKKOS_INLINE_FUNCTION
135  void operator()( const size_type k ) const {
136  dst(k) = src(idx(k), col);
137  }
138 
139  static void pack(const DstView& dst,
140  const SrcView& src,
141  const IdxView& idx,
142  size_t col) {
143  Kokkos::parallel_for( idx.size(),
144  PackArraySingleColumn(dst,src,idx,col) );
145  }
146  };
147 
148  template <typename DstView,
149  typename SrcView,
150  typename IdxView,
151  typename SizeType = typename DstView::execution_space::size_type>
152  class PackArraySingleColumnWithBoundsCheck {
153  private:
154  static_assert (Kokkos::Impl::is_view<DstView>::value,
155  "DstView must be a Kokkos::View.");
156  static_assert (Kokkos::Impl::is_view<SrcView>::value,
157  "SrcView must be a Kokkos::View.");
158  static_assert (Kokkos::Impl::is_view<IdxView>::value,
159  "IdxView must be a Kokkos::View.");
160  static_assert (static_cast<int> (DstView::rank) == 1,
161  "DstView must be a rank-1 Kokkos::View.");
162  static_assert (static_cast<int> (SrcView::rank) == 2,
163  "SrcView must be a rank-2 Kokkos::View.");
164  static_assert (static_cast<int> (IdxView::rank) == 1,
165  "IdxView must be a rank-1 Kokkos::View.");
166  static_assert (std::is_integral<SizeType>::value,
167  "SizeType must be a built-in integer type.");
168  public:
169  typedef SizeType size_type;
171  typedef int value_type;
172 
173  private:
174  DstView dst;
175  SrcView src;
176  IdxView idx;
177  size_type col;
178 
179  public:
180  PackArraySingleColumnWithBoundsCheck (const DstView& dst_,
181  const SrcView& src_,
182  const IdxView& idx_,
183  const size_type col_) :
184  dst (dst_), src (src_), idx (idx_), col (col_) {}
185 
186  KOKKOS_INLINE_FUNCTION void
187  operator() (const size_type& k, value_type& result) const {
188  typedef typename IdxView::non_const_value_type index_type;
189 
190  const index_type lclRow = idx(k);
191  if (lclRow < static_cast<index_type> (0) ||
192  lclRow >= static_cast<index_type> (src.dimension_0 ())) {
193  result = 0; // failed!
194  }
195  else {
196  dst(k) = src(lclRow, col);
197  }
198  }
199 
200  KOKKOS_INLINE_FUNCTION
201  void init (value_type& initialResult) const {
202  initialResult = 1; // success
203  }
204 
205  KOKKOS_INLINE_FUNCTION void
206  join (volatile value_type& dstResult,
207  const volatile value_type& srcResult) const
208  {
209  dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
210  }
211 
212  static void
213  pack (const DstView& dst,
214  const SrcView& src,
215  const IdxView& idx,
216  const size_type col)
217  {
218  typedef typename DstView::execution_space execution_space;
219  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
220  typedef typename IdxView::non_const_value_type index_type;
221 
222  int result = 1;
223  Kokkos::parallel_reduce (range_type (0, idx.size ()),
224  PackArraySingleColumnWithBoundsCheck (dst, src,
225  idx, col),
226  result);
227  if (result != 1) {
228  // Go back and find the out-of-bounds entries in the index
229  // array. Performance doesn't matter since we are already in
230  // an error state, so we can do this sequentially, on host.
231  auto idx_h = Kokkos::create_mirror_view (idx);
232  Kokkos::deep_copy (idx_h, idx);
233 
234  std::vector<index_type> badIndices;
235  const size_type numInds = idx_h.dimension_0 ();
236  for (size_type k = 0; k < numInds; ++k) {
237  if (idx_h(k) < static_cast<index_type> (0) ||
238  idx_h(k) >= static_cast<index_type> (src.dimension_0 ())) {
239  badIndices.push_back (idx_h(k));
240  }
241  }
242 
243  std::ostringstream os;
244  os << "MultiVector single-column pack kernel had "
245  << badIndices.size () << " out-of bounds index/ices. "
246  "Here they are: [";
247  for (size_t k = 0; k < badIndices.size (); ++k) {
248  os << badIndices[k];
249  if (k + 1 < badIndices.size ()) {
250  os << ", ";
251  }
252  }
253  os << "].";
254  throw std::runtime_error (os.str ());
255  }
256  }
257  };
258 
259 
260  template <typename DstView, typename SrcView, typename IdxView>
261  void
262  pack_array_single_column (const DstView& dst,
263  const SrcView& src,
264  const IdxView& idx,
265  const size_t col,
266  const bool debug = true)
267  {
268  static_assert (Kokkos::Impl::is_view<DstView>::value,
269  "DstView must be a Kokkos::View.");
270  static_assert (Kokkos::Impl::is_view<SrcView>::value,
271  "SrcView must be a Kokkos::View.");
272  static_assert (Kokkos::Impl::is_view<IdxView>::value,
273  "IdxView must be a Kokkos::View.");
274  static_assert (static_cast<int> (DstView::rank) == 1,
275  "DstView must be a rank-1 Kokkos::View.");
276  static_assert (static_cast<int> (SrcView::rank) == 2,
277  "SrcView must be a rank-2 Kokkos::View.");
278  static_assert (static_cast<int> (IdxView::rank) == 1,
279  "IdxView must be a rank-1 Kokkos::View.");
280 
281  if (debug) {
282  typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
283  impl_type::pack (dst, src, idx, col);
284  }
285  else {
286  typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
287  impl_type::pack (dst, src, idx, col);
288  }
289  }
290 
291  template <typename DstView, typename SrcView, typename IdxView>
292  struct PackArrayMultiColumn {
293  typedef typename DstView::execution_space execution_space;
294  typedef typename execution_space::size_type size_type;
295 
296  DstView dst;
297  SrcView src;
298  IdxView idx;
299  size_t numCols;
300 
301  PackArrayMultiColumn(const DstView& dst_,
302  const SrcView& src_,
303  const IdxView& idx_,
304  size_t numCols_) :
305  dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
306 
307  KOKKOS_INLINE_FUNCTION
308  void operator()( const size_type k ) const {
309  const typename IdxView::value_type localRow = idx(k);
310  const size_t offset = k*numCols;
311  for (size_t j = 0; j < numCols; ++j)
312  dst(offset + j) = src(localRow, j);
313  }
314 
315  static void pack(const DstView& dst,
316  const SrcView& src,
317  const IdxView& idx,
318  size_t numCols) {
319  Kokkos::parallel_for( idx.size(),
320  PackArrayMultiColumn(dst,src,idx,numCols) );
321  }
322  };
323 
324  template <typename DstView,
325  typename SrcView,
326  typename IdxView,
327  typename SizeType = typename DstView::execution_space::size_type>
328  class PackArrayMultiColumnWithBoundsCheck {
329  public:
330  typedef SizeType size_type;
332  typedef int value_type;
333 
334  private:
335  DstView dst;
336  SrcView src;
337  IdxView idx;
338  size_type numCols;
339 
340  public:
341  PackArrayMultiColumnWithBoundsCheck (const DstView& dst_,
342  const SrcView& src_,
343  const IdxView& idx_,
344  const size_type numCols_) :
345  dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
346 
347  KOKKOS_INLINE_FUNCTION void
348  operator() (const size_type& k, value_type& result) const {
349  typedef typename IdxView::non_const_value_type index_type;
350 
351  const index_type lclRow = idx(k);
352  if (lclRow < static_cast<index_type> (0) ||
353  lclRow >= static_cast<index_type> (src.dimension_0 ())) {
354  result = 0; // failed!
355  }
356  else {
357  const size_type offset = k*numCols;
358  for (size_type j = 0; j < numCols; ++j) {
359  dst(offset + j) = src(lclRow, j);
360  }
361  }
362  }
363 
364  KOKKOS_INLINE_FUNCTION
365  void init (value_type& initialResult) const {
366  initialResult = 1; // success
367  }
368 
369  KOKKOS_INLINE_FUNCTION void
370  join (volatile value_type& dstResult,
371  const volatile value_type& srcResult) const
372  {
373  dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
374  }
375 
376  static void
377  pack (const DstView& dst,
378  const SrcView& src,
379  const IdxView& idx,
380  const size_type numCols)
381  {
382  typedef typename DstView::execution_space execution_space;
383  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
384  typedef typename IdxView::non_const_value_type index_type;
385 
386  int result = 1;
387  Kokkos::parallel_reduce (range_type (0, idx.size ()),
388  PackArrayMultiColumnWithBoundsCheck (dst, src,
389  idx, numCols),
390  result);
391  if (result != 1) {
392  // Go back and find the out-of-bounds entries in the index
393  // array. Performance doesn't matter since we are already in
394  // an error state, so we can do this sequentially, on host.
395  auto idx_h = Kokkos::create_mirror_view (idx);
396  Kokkos::deep_copy (idx_h, idx);
397 
398  std::vector<index_type> badIndices;
399  const size_type numInds = idx_h.dimension_0 ();
400  for (size_type k = 0; k < numInds; ++k) {
401  if (idx_h(k) < static_cast<index_type> (0) ||
402  idx_h(k) >= static_cast<index_type> (src.dimension_0 ())) {
403  badIndices.push_back (idx_h(k));
404  }
405  }
406 
407  std::ostringstream os;
408  os << "MultiVector multiple-column pack kernel had "
409  << badIndices.size () << " out-of bounds index/ices. "
410  "Here they are: [";
411  for (size_t k = 0; k < badIndices.size (); ++k) {
412  os << badIndices[k];
413  if (k + 1 < badIndices.size ()) {
414  os << ", ";
415  }
416  }
417  os << "].";
418  throw std::runtime_error (os.str ());
419  }
420  }
421  };
422 
423 
424  template <typename DstView,
425  typename SrcView,
426  typename IdxView>
427  void
428  pack_array_multi_column (const DstView& dst,
429  const SrcView& src,
430  const IdxView& idx,
431  const size_t numCols,
432  const bool debug = true)
433  {
434  static_assert (Kokkos::Impl::is_view<DstView>::value,
435  "DstView must be a Kokkos::View.");
436  static_assert (Kokkos::Impl::is_view<SrcView>::value,
437  "SrcView must be a Kokkos::View.");
438  static_assert (Kokkos::Impl::is_view<IdxView>::value,
439  "IdxView must be a Kokkos::View.");
440  static_assert (static_cast<int> (DstView::rank) == 1,
441  "DstView must be a rank-1 Kokkos::View.");
442  static_assert (static_cast<int> (SrcView::rank) == 2,
443  "SrcView must be a rank-2 Kokkos::View.");
444  static_assert (static_cast<int> (IdxView::rank) == 1,
445  "IdxView must be a rank-1 Kokkos::View.");
446 
447  if (debug) {
448  typedef PackArrayMultiColumnWithBoundsCheck<DstView,
449  SrcView, IdxView> impl_type;
450  impl_type::pack (dst, src, idx, numCols);
451  }
452  else {
453  typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
454  impl_type::pack (dst, src, idx, numCols);
455  }
456  }
457 
458  template <typename DstView, typename SrcView, typename IdxView,
459  typename ColView>
460  struct PackArrayMultiColumnVariableStride {
461  typedef typename DstView::execution_space execution_space;
462  typedef typename execution_space::size_type size_type;
463 
464  DstView dst;
465  SrcView src;
466  IdxView idx;
467  ColView col;
468  size_t numCols;
469 
470  PackArrayMultiColumnVariableStride(const DstView& dst_,
471  const SrcView& src_,
472  const IdxView& idx_,
473  const ColView& col_,
474  size_t numCols_) :
475  dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
476 
477  KOKKOS_INLINE_FUNCTION
478  void operator()( const size_type k ) const {
479  const typename IdxView::value_type localRow = idx(k);
480  const size_t offset = k*numCols;
481  for (size_t j = 0; j < numCols; ++j)
482  dst(offset + j) = src(localRow, col(j));
483  }
484 
485  static void pack(const DstView& dst,
486  const SrcView& src,
487  const IdxView& idx,
488  const ColView& col,
489  size_t numCols) {
490  Kokkos::parallel_for( idx.size(),
491  PackArrayMultiColumnVariableStride(
492  dst,src,idx,col,numCols) );
493  }
494  };
495 
496  template <typename DstView,
497  typename SrcView,
498  typename IdxView,
499  typename ColView,
500  typename SizeType = typename DstView::execution_space::size_type>
501  class PackArrayMultiColumnVariableStrideWithBoundsCheck {
502  public:
503  typedef SizeType size_type;
505  typedef Kokkos::pair<int, int> value_type;
506 
507  private:
508  DstView dst;
509  SrcView src;
510  IdxView idx;
511  ColView col;
512  size_type numCols;
513 
514  public:
515  PackArrayMultiColumnVariableStrideWithBoundsCheck (const DstView& dst_,
516  const SrcView& src_,
517  const IdxView& idx_,
518  const ColView& col_,
519  const size_type numCols_) :
520  dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
521 
522  KOKKOS_INLINE_FUNCTION void
523  operator() (const size_type& k, value_type& result) const {
524  typedef typename IdxView::non_const_value_type row_index_type;
525  typedef typename ColView::non_const_value_type col_index_type;
526 
527  const row_index_type lclRow = idx(k);
528  if (lclRow < static_cast<row_index_type> (0) ||
529  lclRow >= static_cast<row_index_type> (src.dimension_0 ())) {
530  result.first = 0; // failed!
531  }
532  else {
533  const size_type offset = k*numCols;
534  for (size_type j = 0; j < numCols; ++j) {
535  const col_index_type lclCol = col(j);
536  if (Impl::outOfBounds<col_index_type> (lclCol, src.dimension_1 ())) {
537  result.second = 0; // failed!
538  }
539  else { // all indices are valid; do the assignment
540  dst(offset + j) = src(lclRow, lclCol);
541  }
542  }
543  }
544  }
545 
546  KOKKOS_INLINE_FUNCTION void
547  init (value_type& initialResult) const {
548  initialResult.first = 1; // success
549  initialResult.second = 1; // success
550  }
551 
552  KOKKOS_INLINE_FUNCTION void
553  join (volatile value_type& dstResult,
554  const volatile value_type& srcResult) const
555  {
556  dstResult.first = (dstResult.first == 0 || srcResult.first == 0) ? 0 : 1;
557  dstResult.second = (dstResult.second == 0 || srcResult.second == 0) ? 0 : 1;
558  }
559 
560  static void
561  pack (const DstView& dst,
562  const SrcView& src,
563  const IdxView& idx,
564  const ColView& col,
565  const size_type numCols)
566  {
567  using Kokkos::parallel_reduce;
568  typedef typename DstView::execution_space execution_space;
569  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
570  typedef typename IdxView::non_const_value_type row_index_type;
571  typedef typename ColView::non_const_value_type col_index_type;
572 
573  Kokkos::pair<int, int> result (1, 1);
574  parallel_reduce (range_type (0, idx.size ()),
575  PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src,
576  idx, col,
577  numCols),
578  result);
579  const bool hasBadRows = (result.first != 1);
580  const bool hasBadCols = (result.second != 1);
581  const bool hasErr = hasBadRows || hasBadCols;
582  if (hasErr) {
583  std::ostringstream os; // for error reporting
584 
585  if (hasBadRows) {
586  // Go back and find the out-of-bounds entries in the array of
587  // row indices. Performance doesn't matter since we are already
588  // in an error state, so we can do this sequentially, on host.
589  auto idx_h = Kokkos::create_mirror_view (idx);
590  Kokkos::deep_copy (idx_h, idx);
591 
592  std::vector<row_index_type> badRows;
593  const size_type numInds = idx_h.dimension_0 ();
594  for (size_type k = 0; k < numInds; ++k) {
595  if (Impl::outOfBounds<row_index_type> (idx_h(k), src.dimension_0 ())) {
596  badRows.push_back (idx_h(k));
597  }
598  }
599  os << "MultiVector multiple-column pack kernel had "
600  << badRows.size () << " out-of bounds row index/ices: [";
601  for (size_t k = 0; k < badRows.size (); ++k) {
602  os << badRows[k];
603  if (k + 1 < badRows.size ()) {
604  os << ", ";
605  }
606  }
607  os << "].";
608  } // hasBadRows
609 
610  if (hasBadCols) {
611  // Go back and find the out-of-bounds entries in the array
612  // of column indices. Performance doesn't matter since we
613  // are already in an error state, so we can do this
614  // sequentially, on host.
615  auto col_h = Kokkos::create_mirror_view (col);
616  Kokkos::deep_copy (col_h, col);
617 
618  std::vector<col_index_type> badCols;
619  const size_type numInds = col_h.dimension_0 ();
620  for (size_type k = 0; k < numInds; ++k) {
621  if (Impl::outOfBounds<col_index_type> (col_h(k), src.dimension_1 ())) {
622  badCols.push_back (col_h(k));
623  }
624  }
625 
626  if (hasBadRows) {
627  os << " ";
628  }
629  os << "MultiVector multiple-column pack kernel had "
630  << badCols.size () << " out-of bounds column index/ices: [";
631  for (size_t k = 0; k < badCols.size (); ++k) {
632  os << badCols[k];
633  if (k + 1 < badCols.size ()) {
634  os << ", ";
635  }
636  }
637  os << "].";
638  } // hasBadCols
639 
640  throw std::runtime_error (os.str ());
641  } // hasErr
642  }
643  };
644 
645  template <typename DstView,
646  typename SrcView,
647  typename IdxView,
648  typename ColView>
649  void
650  pack_array_multi_column_variable_stride (const DstView& dst,
651  const SrcView& src,
652  const IdxView& idx,
653  const ColView& col,
654  const size_t numCols,
655  const bool debug = true)
656  {
657  static_assert (Kokkos::Impl::is_view<DstView>::value,
658  "DstView must be a Kokkos::View.");
659  static_assert (Kokkos::Impl::is_view<SrcView>::value,
660  "SrcView must be a Kokkos::View.");
661  static_assert (Kokkos::Impl::is_view<IdxView>::value,
662  "IdxView must be a Kokkos::View.");
663  static_assert (Kokkos::Impl::is_view<ColView>::value,
664  "ColView must be a Kokkos::View.");
665  static_assert (static_cast<int> (DstView::rank) == 1,
666  "DstView must be a rank-1 Kokkos::View.");
667  static_assert (static_cast<int> (SrcView::rank) == 2,
668  "SrcView must be a rank-2 Kokkos::View.");
669  static_assert (static_cast<int> (IdxView::rank) == 1,
670  "IdxView must be a rank-1 Kokkos::View.");
671  static_assert (static_cast<int> (ColView::rank) == 1,
672  "ColView must be a rank-1 Kokkos::View.");
673 
674  if (debug) {
675  typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
676  SrcView, IdxView, ColView> impl_type;
677  impl_type::pack (dst, src, idx, col, numCols);
678  }
679  else {
680  typedef PackArrayMultiColumnVariableStride<DstView,
681  SrcView, IdxView, ColView> impl_type;
682  impl_type::pack (dst, src, idx, col, numCols);
683  }
684  }
685 
686  struct InsertOp {
687  template <typename Scalar>
688  KOKKOS_INLINE_FUNCTION
689  void operator() (Scalar& dest, const Scalar& src) const {
690  Kokkos::atomic_assign(&dest, src);
691  }
692  };
693  struct AddOp {
694  template <typename Scalar>
695  KOKKOS_INLINE_FUNCTION
696  void operator() (Scalar& dest, const Scalar& src) const {
697  Kokkos::atomic_add(&dest, src);
698  }
699  };
700  struct AbsMaxOp {
701  // ETP: Is this really what we want? This seems very odd if
702  // Scalar != SCT::mag_type (e.g., Scalar == std::complex<T>)
703  template <typename T>
704  KOKKOS_INLINE_FUNCTION
705  T max(const T& a, const T& b) const { return a > b ? a : b; }
706 
707  template <typename Scalar>
708  KOKKOS_INLINE_FUNCTION
709  void operator() (Scalar& dest, const Scalar& src) const {
710  typedef Kokkos::Details::ArithTraits<Scalar> SCT;
711  Kokkos::atomic_assign(&dest, Scalar(max(SCT::abs(dest),SCT::abs(src))));
712  }
713  };
714 
715  template <typename DstView, typename SrcView, typename IdxView, typename Op>
716  struct UnpackArrayMultiColumn {
717  typedef typename DstView::execution_space execution_space;
718  typedef typename execution_space::size_type size_type;
719 
720  DstView dst;
721  SrcView src;
722  IdxView idx;
723  Op op;
724  size_t numCols;
725 
726  UnpackArrayMultiColumn(const DstView& dst_,
727  const SrcView& src_,
728  const IdxView& idx_,
729  const Op& op_,
730  size_t numCols_) :
731  dst(dst_), src(src_), idx(idx_), op(op_), numCols(numCols_) {}
732 
733  KOKKOS_INLINE_FUNCTION
734  void operator()( const size_type k ) const {
735  const typename IdxView::value_type localRow = idx(k);
736  const size_t offset = k*numCols;
737  for (size_t j = 0; j < numCols; ++j)
738  op( dst(localRow,j), src(offset+j) );
739  }
740 
741  static void unpack(const DstView& dst,
742  const SrcView& src,
743  const IdxView& idx,
744  const Op& op,
745  size_t numCols) {
746  Kokkos::parallel_for( idx.size(),
747  UnpackArrayMultiColumn(dst,src,idx,op,numCols) );
748  }
749  };
750 
751  template <typename DstView,
752  typename SrcView,
753  typename IdxView,
754  typename Op,
755  typename SizeType = typename DstView::execution_space::size_type>
756  class UnpackArrayMultiColumnWithBoundsCheck {
757  static_assert (Kokkos::Impl::is_view<DstView>::value,
758  "DstView must be a Kokkos::View.");
759  static_assert (Kokkos::Impl::is_view<SrcView>::value,
760  "SrcView must be a Kokkos::View.");
761  static_assert (Kokkos::Impl::is_view<IdxView>::value,
762  "IdxView must be a Kokkos::View.");
763  static_assert (static_cast<int> (DstView::rank) == 2,
764  "DstView must be a rank-2 Kokkos::View.");
765  static_assert (static_cast<int> (SrcView::rank) == 1,
766  "SrcView must be a rank-1 Kokkos::View.");
767  static_assert (static_cast<int> (IdxView::rank) == 1,
768  "IdxView must be a rank-1 Kokkos::View.");
769  static_assert (std::is_integral<SizeType>::value,
770  "SizeType must be a built-in integer type.");
771 
772  public:
773  typedef SizeType size_type;
775  typedef int value_type;
776 
777  private:
778  DstView dst;
779  SrcView src;
780  IdxView idx;
781  Op op;
782  size_type numCols;
783 
784  public:
785  UnpackArrayMultiColumnWithBoundsCheck (const DstView& dst_,
786  const SrcView& src_,
787  const IdxView& idx_,
788  const Op& op_,
789  const size_type numCols_) :
790  dst (dst_), src (src_), idx (idx_), op (op_), numCols (numCols_)
791  {}
792 
793  KOKKOS_INLINE_FUNCTION
794  void operator() (const size_type& k, value_type& result) const {
795  typedef typename IdxView::non_const_value_type index_type;
796 
797  const index_type lclRow = idx(k);
798  if (lclRow < static_cast<index_type> (0) ||
799  lclRow >= static_cast<index_type> (dst.dimension_0 ())) {
800  result = 0; // failed!
801  }
802  else {
803  const size_type offset = k*numCols;
804  for (size_type j = 0; j < numCols; ++j)
805  op (dst(lclRow,j), src(offset+j));
806  }
807  }
808 
809  KOKKOS_INLINE_FUNCTION
810  void init (value_type& initialResult) const {
811  initialResult = 1; // success
812  }
813 
814  KOKKOS_INLINE_FUNCTION void
815  join (volatile value_type& dstResult,
816  const volatile value_type& srcResult) const
817  {
818  dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
819  }
820 
821  static void
822  unpack (const DstView& dst,
823  const SrcView& src,
824  const IdxView& idx,
825  const Op& op,
826  const size_type numCols)
827  {
828  typedef typename DstView::execution_space execution_space;
829  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
830  typedef typename IdxView::non_const_value_type index_type;
831 
832  int result = 1;
833  Kokkos::parallel_reduce (range_type (0, idx.size ()),
834  UnpackArrayMultiColumnWithBoundsCheck (dst,src,idx,op,numCols),
835  result);
836  if (result != 1) {
837  // Go back and find the out-of-bounds entries in the index
838  // array. Performance doesn't matter since we are already in
839  // an error state, so we can do this sequentially, on host.
840  auto idx_h = Kokkos::create_mirror_view (idx);
841  Kokkos::deep_copy (idx_h, idx);
842 
843  std::vector<index_type> badIndices;
844  const size_type numInds = idx_h.dimension_0 ();
845  for (size_type k = 0; k < numInds; ++k) {
846  if (idx_h(k) < static_cast<index_type> (0) ||
847  idx_h(k) >= static_cast<index_type> (dst.dimension_0 ())) {
848  badIndices.push_back (idx_h(k));
849  }
850  }
851 
852  std::ostringstream os;
853  os << "MultiVector unpack kernel had " << badIndices.size ()
854  << " out-of bounds index/ices. Here they are: [";
855  for (size_t k = 0; k < badIndices.size (); ++k) {
856  os << badIndices[k];
857  if (k + 1 < badIndices.size ()) {
858  os << ", ";
859  }
860  }
861  os << "].";
862  throw std::runtime_error (os.str ());
863  }
864  }
865  };
866 
867  template <typename DstView, typename SrcView, typename IdxView, typename Op>
868  void
869  unpack_array_multi_column (const DstView& dst,
870  const SrcView& src,
871  const IdxView& idx,
872  const Op& op,
873  const size_t numCols,
874  const bool debug = true)
875  {
876  static_assert (Kokkos::Impl::is_view<DstView>::value,
877  "DstView must be a Kokkos::View.");
878  static_assert (Kokkos::Impl::is_view<SrcView>::value,
879  "SrcView must be a Kokkos::View.");
880  static_assert (Kokkos::Impl::is_view<IdxView>::value,
881  "IdxView must be a Kokkos::View.");
882  static_assert (static_cast<int> (DstView::rank) == 2,
883  "DstView must be a rank-2 Kokkos::View.");
884  static_assert (static_cast<int> (SrcView::rank) == 1,
885  "SrcView must be a rank-1 Kokkos::View.");
886  static_assert (static_cast<int> (IdxView::rank) == 1,
887  "IdxView must be a rank-1 Kokkos::View.");
888 
889  if (debug) {
890  typedef UnpackArrayMultiColumnWithBoundsCheck<DstView,
891  SrcView, IdxView, Op> impl_type;
892  impl_type::unpack (dst, src, idx, op, numCols);
893  }
894  else {
895  typedef UnpackArrayMultiColumn<DstView,
896  SrcView, IdxView, Op> impl_type;
897  impl_type::unpack (dst, src, idx, op, numCols);
898  }
899  }
900 
901  template <typename DstView, typename SrcView, typename IdxView,
902  typename ColView, typename Op>
903  struct UnpackArrayMultiColumnVariableStride {
904  typedef typename DstView::execution_space execution_space;
905  typedef typename execution_space::size_type size_type;
906 
907  DstView dst;
908  SrcView src;
909  IdxView idx;
910  ColView col;
911  Op op;
912  size_t numCols;
913 
914  UnpackArrayMultiColumnVariableStride(const DstView& dst_,
915  const SrcView& src_,
916  const IdxView& idx_,
917  const ColView& col_,
918  const Op& op_,
919  size_t numCols_) :
920  dst(dst_), src(src_), idx(idx_), col(col_), op(op_), numCols(numCols_) {}
921 
922  KOKKOS_INLINE_FUNCTION
923  void operator()( const size_type k ) const {
924  const typename IdxView::value_type localRow = idx(k);
925  const size_t offset = k*numCols;
926  for (size_t j = 0; j < numCols; ++j)
927  op( dst(localRow,col(j)), src(offset+j) );
928  }
929 
930  static void unpack(const DstView& dst,
931  const SrcView& src,
932  const IdxView& idx,
933  const ColView& col,
934  const Op& op,
935  size_t numCols) {
936  Kokkos::parallel_for( idx.size(),
937  UnpackArrayMultiColumnVariableStride(
938  dst,src,idx,col,op,numCols) );
939  }
940  };
941 
942  template <typename DstView,
943  typename SrcView,
944  typename IdxView,
945  typename ColView,
946  typename Op,
947  typename SizeType = typename DstView::execution_space::size_type>
948  class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
949  public:
950  typedef SizeType size_type;
952  typedef Kokkos::pair<int, int> value_type;
953 
954  private:
955  DstView dst;
956  SrcView src;
957  IdxView idx;
958  ColView col;
959  Op op;
960  size_type numCols;
961 
962  public:
963  UnpackArrayMultiColumnVariableStrideWithBoundsCheck (const DstView& dst_,
964  const SrcView& src_,
965  const IdxView& idx_,
966  const ColView& col_,
967  const Op& op_,
968  const size_t numCols_) :
969  dst (dst_), src (src_), idx (idx_), col (col_), op (op_),
970  numCols (numCols_)
971  {}
972 
973  KOKKOS_INLINE_FUNCTION void
974  operator() (const size_type& k, value_type& result) const {
975  typedef typename IdxView::non_const_value_type row_index_type;
976  typedef typename ColView::non_const_value_type col_index_type;
977 
978  const row_index_type lclRow = idx(k);
979  if (lclRow < static_cast<row_index_type> (0) ||
980  lclRow >= static_cast<row_index_type> (dst.dimension_0 ())) {
981  result.first = 0; // failed!
982  }
983  else {
984  const size_type offset = k*numCols;
985  for (size_type j = 0; j < numCols; ++j) {
986  const col_index_type lclCol = col(j);
987 
988  if (Impl::outOfBounds<col_index_type> (lclCol, dst.dimension_1 ())) {
989  result.second = 0; // failed!
990  }
991  else { // all indices are valid; apply the op
992  op (dst(lclRow, col(j)), src(offset+j));
993  }
994  }
995  }
996  }
997 
998  KOKKOS_INLINE_FUNCTION void
999  init (value_type& initialResult) const {
1000  initialResult.first = 1; // success
1001  initialResult.second = 1; // success
1002  }
1003 
1004  KOKKOS_INLINE_FUNCTION void
1005  join (volatile value_type& dstResult,
1006  const volatile value_type& srcResult) const
1007  {
1008  dstResult.first = (dstResult.first == 0 || srcResult.first == 0) ? 0 : 1;
1009  dstResult.second = (dstResult.second == 0 || srcResult.second == 0) ? 0 : 1;
1010  }
1011 
1012  static void
1013  unpack (const DstView& dst,
1014  const SrcView& src,
1015  const IdxView& idx,
1016  const ColView& col,
1017  const Op& op,
1018  const size_type numCols)
1019  {
1020  typedef typename DstView::execution_space execution_space;
1021  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
1022  typedef typename IdxView::non_const_value_type row_index_type;
1023  typedef typename ColView::non_const_value_type col_index_type;
1024 
1025  Kokkos::pair<int, int> result (1, 1);
1026  Kokkos::parallel_reduce (range_type (0, idx.size ()),
1027  UnpackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
1028  col, op, numCols),
1029  result);
1030 
1031  const bool hasBadRows = (result.first != 1);
1032  const bool hasBadCols = (result.second != 1);
1033  const bool hasErr = hasBadRows || hasBadCols;
1034  if (hasErr) {
1035  std::ostringstream os; // for error reporting
1036 
1037  if (hasBadRows) {
1038  // Go back and find the out-of-bounds entries in the array
1039  // of row indices. Performance doesn't matter since we are
1040  // already in an error state, so we can do this
1041  // sequentially, on host.
1042  auto idx_h = Kokkos::create_mirror_view (idx);
1043  Kokkos::deep_copy (idx_h, idx);
1044 
1045  std::vector<row_index_type> badRows;
1046  const size_type numInds = idx_h.dimension_0 ();
1047  for (size_type k = 0; k < numInds; ++k) {
1048  if (idx_h(k) < static_cast<row_index_type> (0) ||
1049  idx_h(k) >= static_cast<row_index_type> (dst.dimension_0 ())) {
1050  badRows.push_back (idx_h(k));
1051  }
1052  }
1053  os << "MultiVector multiple-column unpack kernel had "
1054  << badRows.size () << " out-of bounds row index/ices: [";
1055  for (size_t k = 0; k < badRows.size (); ++k) {
1056  os << badRows[k];
1057  if (k + 1 < badRows.size ()) {
1058  os << ", ";
1059  }
1060  }
1061  os << "].";
1062  } // hasBadRows
1063 
1064  if (hasBadCols) {
1065  // Go back and find the out-of-bounds entries in the array
1066  // of column indices. Performance doesn't matter since we
1067  // are already in an error state, so we can do this
1068  // sequentially, on host.
1069  auto col_h = Kokkos::create_mirror_view (col);
1070  Kokkos::deep_copy (col_h, col);
1071 
1072  std::vector<col_index_type> badCols;
1073  const size_type numInds = col_h.dimension_0 ();
1074  for (size_type k = 0; k < numInds; ++k) {
1075  if (Impl::outOfBounds<col_index_type> (col_h(k), dst.dimension_1 ())) {
1076  badCols.push_back (col_h(k));
1077  }
1078  }
1079 
1080  if (hasBadRows) {
1081  os << " ";
1082  }
1083  os << "MultiVector multiple-column unpack kernel had "
1084  << badCols.size () << " out-of bounds column index/ices: [";
1085  for (size_t k = 0; k < badCols.size (); ++k) {
1086  os << badCols[k];
1087  if (k + 1 < badCols.size ()) {
1088  os << ", ";
1089  }
1090  }
1091  os << "].";
1092  } // hasBadCols
1093 
1094  throw std::runtime_error (os.str ());
1095  } // hasErr
1096  }
1097  };
1098 
1099  template <typename DstView,
1100  typename SrcView,
1101  typename IdxView,
1102  typename ColView,
1103  typename Op>
1104  void
1105  unpack_array_multi_column_variable_stride (const DstView& dst,
1106  const SrcView& src,
1107  const IdxView& idx,
1108  const ColView& col,
1109  const Op& op,
1110  const size_t numCols,
1111  const bool debug = true)
1112  {
1113  static_assert (Kokkos::Impl::is_view<DstView>::value,
1114  "DstView must be a Kokkos::View.");
1115  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1116  "SrcView must be a Kokkos::View.");
1117  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1118  "IdxView must be a Kokkos::View.");
1119  static_assert (Kokkos::Impl::is_view<ColView>::value,
1120  "ColView must be a Kokkos::View.");
1121  static_assert (static_cast<int> (DstView::rank) == 2,
1122  "DstView must be a rank-2 Kokkos::View.");
1123  static_assert (static_cast<int> (SrcView::rank) == 1,
1124  "SrcView must be a rank-1 Kokkos::View.");
1125  static_assert (static_cast<int> (IdxView::rank) == 1,
1126  "IdxView must be a rank-1 Kokkos::View.");
1127  static_assert (static_cast<int> (ColView::rank) == 1,
1128  "ColView must be a rank-1 Kokkos::View.");
1129 
1130  if (debug) {
1131  typedef UnpackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
1132  SrcView, IdxView, ColView, Op> impl_type;
1133  impl_type::unpack (dst, src, idx, col, op, numCols);
1134  }
1135  else {
1136  typedef UnpackArrayMultiColumnVariableStride<DstView,
1137  SrcView, IdxView, ColView, Op> impl_type;
1138  impl_type::unpack (dst, src, idx, col, op, numCols);
1139  }
1140  }
1141 
1142  template <typename DstView, typename SrcView,
1143  typename DstIdxView, typename SrcIdxView>
1144  struct PermuteArrayMultiColumn {
1145  typedef typename DstView::execution_space execution_space;
1146  typedef typename execution_space::size_type size_type;
1147 
1148  DstView dst;
1149  SrcView src;
1150  DstIdxView dst_idx;
1151  SrcIdxView src_idx;
1152  size_t numCols;
1153 
1154  PermuteArrayMultiColumn(const DstView& dst_,
1155  const SrcView& src_,
1156  const DstIdxView& dst_idx_,
1157  const SrcIdxView& src_idx_,
1158  size_t numCols_) :
1159  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1160  numCols(numCols_) {}
1161 
1162  KOKKOS_INLINE_FUNCTION
1163  void operator()( const size_type k ) const {
1164  const typename DstIdxView::value_type toRow = dst_idx(k);
1165  const typename SrcIdxView::value_type fromRow = src_idx(k);
1166  for (size_t j = 0; j < numCols; ++j)
1167  dst(toRow, j) = src(fromRow, j);
1168  }
1169 
1170  static void permute(const DstView& dst,
1171  const SrcView& src,
1172  const DstIdxView& dst_idx,
1173  const SrcIdxView& src_idx,
1174  size_t numCols) {
1175  const size_type n = std::min( dst_idx.size(), src_idx.size() );
1176  Kokkos::parallel_for(
1177  n, PermuteArrayMultiColumn(dst,src,dst_idx,src_idx,numCols) );
1178  }
1179  };
1180 
1181  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1182  // SrcView::Rank == 2
1183  template <typename DstView, typename SrcView,
1184  typename DstIdxView, typename SrcIdxView>
1185  void permute_array_multi_column(const DstView& dst,
1186  const SrcView& src,
1187  const DstIdxView& dst_idx,
1188  const SrcIdxView& src_idx,
1189  size_t numCols) {
1190  PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView>::permute(
1191  dst, src, dst_idx, src_idx, numCols);
1192  }
1193 
1194  template <typename DstView, typename SrcView,
1195  typename DstIdxView, typename SrcIdxView,
1196  typename DstColView, typename SrcColView>
1197  struct PermuteArrayMultiColumnVariableStride {
1198  typedef typename DstView::execution_space execution_space;
1199  typedef typename execution_space::size_type size_type;
1200 
1201  DstView dst;
1202  SrcView src;
1203  DstIdxView dst_idx;
1204  SrcIdxView src_idx;
1205  DstColView dst_col;
1206  SrcColView src_col;
1207  size_t numCols;
1208 
1209  PermuteArrayMultiColumnVariableStride(const DstView& dst_,
1210  const SrcView& src_,
1211  const DstIdxView& dst_idx_,
1212  const SrcIdxView& src_idx_,
1213  const DstColView& dst_col_,
1214  const SrcColView& src_col_,
1215  size_t numCols_) :
1216  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1217  dst_col(dst_col_), src_col(src_col_),
1218  numCols(numCols_) {}
1219 
1220  KOKKOS_INLINE_FUNCTION
1221  void operator()( const size_type k ) const {
1222  const typename DstIdxView::value_type toRow = dst_idx(k);
1223  const typename SrcIdxView::value_type fromRow = src_idx(k);
1224  for (size_t j = 0; j < numCols; ++j)
1225  dst(toRow, dst_col(j)) = src(fromRow, src_col(j));
1226  }
1227 
1228  static void permute(const DstView& dst,
1229  const SrcView& src,
1230  const DstIdxView& dst_idx,
1231  const SrcIdxView& src_idx,
1232  const DstColView& dst_col,
1233  const SrcColView& src_col,
1234  size_t numCols) {
1235  const size_type n = std::min( dst_idx.size(), src_idx.size() );
1236  Kokkos::parallel_for(
1237  n, PermuteArrayMultiColumnVariableStride(
1238  dst,src,dst_idx,src_idx,dst_col,src_col,numCols) );
1239  }
1240  };
1241 
1242  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1243  // SrcView::Rank == 2
1244  template <typename DstView, typename SrcView,
1245  typename DstIdxView, typename SrcIdxView,
1246  typename DstColView, typename SrcColView>
1247  void permute_array_multi_column_variable_stride(const DstView& dst,
1248  const SrcView& src,
1249  const DstIdxView& dst_idx,
1250  const SrcIdxView& src_idx,
1251  const DstColView& dst_col,
1252  const SrcColView& src_col,
1253  size_t numCols) {
1254  PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1255  DstIdxView,SrcIdxView,DstColView,SrcColView>::permute(
1256  dst, src, dst_idx, src_idx, dst_col, src_col, numCols);
1257  }
1258 
1259 } // Details namespace
1260 } // KokkosRefactor namespace
1261 } // Tpetra namespace
1262 
1263 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
Implementation details of Tpetra.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...