49 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP 50 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP 52 #include "Kokkos_Core.hpp" 53 #include "Kokkos_ArithTraits.hpp" 58 namespace KokkosRefactor {
74 template<
class IntegerType,
75 const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
77 static KOKKOS_INLINE_FUNCTION
bool 78 test (
const IntegerType x,
79 const IntegerType exclusiveUpperBound);
83 template<
class IntegerType>
85 static KOKKOS_INLINE_FUNCTION
bool 86 test (
const IntegerType x,
87 const IntegerType exclusiveUpperBound)
89 return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
94 template<
class IntegerType>
95 struct OutOfBounds<IntegerType, false> {
96 static KOKKOS_INLINE_FUNCTION
bool 97 test (
const IntegerType x,
98 const IntegerType exclusiveUpperBound)
100 return x >= exclusiveUpperBound;
106 template<
class IntegerType>
107 KOKKOS_INLINE_FUNCTION
bool 108 outOfBounds (
const IntegerType x,
const IntegerType exclusiveUpperBound)
118 template <
typename DstView,
typename SrcView,
typename IdxView>
119 struct PackArraySingleColumn {
120 typedef typename DstView::execution_space execution_space;
121 typedef typename execution_space::size_type size_type;
128 PackArraySingleColumn(
const DstView& dst_,
132 dst(dst_), src(src_), idx(idx_), col(col_) {}
134 KOKKOS_INLINE_FUNCTION
135 void operator()(
const size_type k )
const {
136 dst(k) = src(idx(k), col);
139 static void pack(
const DstView& dst,
143 Kokkos::parallel_for( idx.size(),
144 PackArraySingleColumn(dst,src,idx,col) );
148 template <
typename DstView,
151 typename SizeType =
typename DstView::execution_space::size_type>
152 class PackArraySingleColumnWithBoundsCheck {
154 static_assert (Kokkos::Impl::is_view<DstView>::value,
155 "DstView must be a Kokkos::View.");
156 static_assert (Kokkos::Impl::is_view<SrcView>::value,
157 "SrcView must be a Kokkos::View.");
158 static_assert (Kokkos::Impl::is_view<IdxView>::value,
159 "IdxView must be a Kokkos::View.");
160 static_assert (static_cast<int> (DstView::rank) == 1,
161 "DstView must be a rank-1 Kokkos::View.");
162 static_assert (static_cast<int> (SrcView::rank) == 2,
163 "SrcView must be a rank-2 Kokkos::View.");
164 static_assert (static_cast<int> (IdxView::rank) == 1,
165 "IdxView must be a rank-1 Kokkos::View.");
166 static_assert (std::is_integral<SizeType>::value,
167 "SizeType must be a built-in integer type.");
169 typedef SizeType size_type;
171 typedef int value_type;
180 PackArraySingleColumnWithBoundsCheck (
const DstView& dst_,
183 const size_type col_) :
184 dst (dst_), src (src_), idx (idx_), col (col_) {}
186 KOKKOS_INLINE_FUNCTION
void 187 operator() (
const size_type& k, value_type& result)
const {
188 typedef typename IdxView::non_const_value_type index_type;
190 const index_type lclRow = idx(k);
191 if (lclRow < static_cast<index_type> (0) ||
192 lclRow >= static_cast<index_type> (src.dimension_0 ())) {
196 dst(k) = src(lclRow, col);
200 KOKKOS_INLINE_FUNCTION
201 void init (value_type& initialResult)
const {
205 KOKKOS_INLINE_FUNCTION
void 206 join (
volatile value_type& dstResult,
207 const volatile value_type& srcResult)
const 209 dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
213 pack (
const DstView& dst,
218 typedef typename DstView::execution_space execution_space;
219 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
220 typedef typename IdxView::non_const_value_type index_type;
223 Kokkos::parallel_reduce (range_type (0, idx.size ()),
224 PackArraySingleColumnWithBoundsCheck (dst, src,
231 auto idx_h = Kokkos::create_mirror_view (idx);
234 std::vector<index_type> badIndices;
235 const size_type numInds = idx_h.dimension_0 ();
236 for (size_type k = 0; k < numInds; ++k) {
237 if (idx_h(k) < static_cast<index_type> (0) ||
238 idx_h(k) >= static_cast<index_type> (src.dimension_0 ())) {
239 badIndices.push_back (idx_h(k));
243 std::ostringstream os;
244 os <<
"MultiVector single-column pack kernel had " 245 << badIndices.size () <<
" out-of bounds index/ices. " 247 for (
size_t k = 0; k < badIndices.size (); ++k) {
249 if (k + 1 < badIndices.size ()) {
254 throw std::runtime_error (os.str ());
260 template <
typename DstView,
typename SrcView,
typename IdxView>
262 pack_array_single_column (
const DstView& dst,
266 const bool debug =
true)
268 static_assert (Kokkos::Impl::is_view<DstView>::value,
269 "DstView must be a Kokkos::View.");
270 static_assert (Kokkos::Impl::is_view<SrcView>::value,
271 "SrcView must be a Kokkos::View.");
272 static_assert (Kokkos::Impl::is_view<IdxView>::value,
273 "IdxView must be a Kokkos::View.");
274 static_assert (static_cast<int> (DstView::rank) == 1,
275 "DstView must be a rank-1 Kokkos::View.");
276 static_assert (static_cast<int> (SrcView::rank) == 2,
277 "SrcView must be a rank-2 Kokkos::View.");
278 static_assert (static_cast<int> (IdxView::rank) == 1,
279 "IdxView must be a rank-1 Kokkos::View.");
282 typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
283 impl_type::pack (dst, src, idx, col);
286 typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
287 impl_type::pack (dst, src, idx, col);
291 template <
typename DstView,
typename SrcView,
typename IdxView>
292 struct PackArrayMultiColumn {
293 typedef typename DstView::execution_space execution_space;
294 typedef typename execution_space::size_type size_type;
301 PackArrayMultiColumn(
const DstView& dst_,
305 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
307 KOKKOS_INLINE_FUNCTION
308 void operator()(
const size_type k )
const {
309 const typename IdxView::value_type localRow = idx(k);
310 const size_t offset = k*numCols;
311 for (
size_t j = 0; j < numCols; ++j)
312 dst(offset + j) = src(localRow, j);
315 static void pack(
const DstView& dst,
319 Kokkos::parallel_for( idx.size(),
320 PackArrayMultiColumn(dst,src,idx,numCols) );
324 template <
typename DstView,
327 typename SizeType =
typename DstView::execution_space::size_type>
328 class PackArrayMultiColumnWithBoundsCheck {
330 typedef SizeType size_type;
332 typedef int value_type;
341 PackArrayMultiColumnWithBoundsCheck (
const DstView& dst_,
344 const size_type numCols_) :
345 dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
347 KOKKOS_INLINE_FUNCTION
void 348 operator() (
const size_type& k, value_type& result)
const {
349 typedef typename IdxView::non_const_value_type index_type;
351 const index_type lclRow = idx(k);
352 if (lclRow < static_cast<index_type> (0) ||
353 lclRow >= static_cast<index_type> (src.dimension_0 ())) {
357 const size_type offset = k*numCols;
358 for (size_type j = 0; j < numCols; ++j) {
359 dst(offset + j) = src(lclRow, j);
364 KOKKOS_INLINE_FUNCTION
365 void init (value_type& initialResult)
const {
369 KOKKOS_INLINE_FUNCTION
void 370 join (
volatile value_type& dstResult,
371 const volatile value_type& srcResult)
const 373 dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
377 pack (
const DstView& dst,
380 const size_type numCols)
382 typedef typename DstView::execution_space execution_space;
383 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
384 typedef typename IdxView::non_const_value_type index_type;
387 Kokkos::parallel_reduce (range_type (0, idx.size ()),
388 PackArrayMultiColumnWithBoundsCheck (dst, src,
395 auto idx_h = Kokkos::create_mirror_view (idx);
398 std::vector<index_type> badIndices;
399 const size_type numInds = idx_h.dimension_0 ();
400 for (size_type k = 0; k < numInds; ++k) {
401 if (idx_h(k) < static_cast<index_type> (0) ||
402 idx_h(k) >= static_cast<index_type> (src.dimension_0 ())) {
403 badIndices.push_back (idx_h(k));
407 std::ostringstream os;
408 os <<
"MultiVector multiple-column pack kernel had " 409 << badIndices.size () <<
" out-of bounds index/ices. " 411 for (
size_t k = 0; k < badIndices.size (); ++k) {
413 if (k + 1 < badIndices.size ()) {
418 throw std::runtime_error (os.str ());
424 template <
typename DstView,
428 pack_array_multi_column (
const DstView& dst,
431 const size_t numCols,
432 const bool debug =
true)
434 static_assert (Kokkos::Impl::is_view<DstView>::value,
435 "DstView must be a Kokkos::View.");
436 static_assert (Kokkos::Impl::is_view<SrcView>::value,
437 "SrcView must be a Kokkos::View.");
438 static_assert (Kokkos::Impl::is_view<IdxView>::value,
439 "IdxView must be a Kokkos::View.");
440 static_assert (static_cast<int> (DstView::rank) == 1,
441 "DstView must be a rank-1 Kokkos::View.");
442 static_assert (static_cast<int> (SrcView::rank) == 2,
443 "SrcView must be a rank-2 Kokkos::View.");
444 static_assert (static_cast<int> (IdxView::rank) == 1,
445 "IdxView must be a rank-1 Kokkos::View.");
448 typedef PackArrayMultiColumnWithBoundsCheck<DstView,
449 SrcView, IdxView> impl_type;
450 impl_type::pack (dst, src, idx, numCols);
453 typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
454 impl_type::pack (dst, src, idx, numCols);
458 template <
typename DstView,
typename SrcView,
typename IdxView,
460 struct PackArrayMultiColumnVariableStride {
461 typedef typename DstView::execution_space execution_space;
462 typedef typename execution_space::size_type size_type;
470 PackArrayMultiColumnVariableStride(
const DstView& dst_,
475 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
477 KOKKOS_INLINE_FUNCTION
478 void operator()(
const size_type k )
const {
479 const typename IdxView::value_type localRow = idx(k);
480 const size_t offset = k*numCols;
481 for (
size_t j = 0; j < numCols; ++j)
482 dst(offset + j) = src(localRow, col(j));
485 static void pack(
const DstView& dst,
490 Kokkos::parallel_for( idx.size(),
491 PackArrayMultiColumnVariableStride(
492 dst,src,idx,col,numCols) );
496 template <
typename DstView,
500 typename SizeType =
typename DstView::execution_space::size_type>
501 class PackArrayMultiColumnVariableStrideWithBoundsCheck {
503 typedef SizeType size_type;
505 typedef Kokkos::pair<int, int> value_type;
515 PackArrayMultiColumnVariableStrideWithBoundsCheck (
const DstView& dst_,
519 const size_type numCols_) :
520 dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
522 KOKKOS_INLINE_FUNCTION
void 523 operator() (
const size_type& k, value_type& result)
const {
524 typedef typename IdxView::non_const_value_type row_index_type;
525 typedef typename ColView::non_const_value_type col_index_type;
527 const row_index_type lclRow = idx(k);
528 if (lclRow < static_cast<row_index_type> (0) ||
529 lclRow >= static_cast<row_index_type> (src.dimension_0 ())) {
533 const size_type offset = k*numCols;
534 for (size_type j = 0; j < numCols; ++j) {
535 const col_index_type lclCol = col(j);
536 if (Impl::outOfBounds<col_index_type> (lclCol, src.dimension_1 ())) {
540 dst(offset + j) = src(lclRow, lclCol);
546 KOKKOS_INLINE_FUNCTION
void 547 init (value_type& initialResult)
const {
548 initialResult.first = 1;
549 initialResult.second = 1;
552 KOKKOS_INLINE_FUNCTION
void 553 join (
volatile value_type& dstResult,
554 const volatile value_type& srcResult)
const 556 dstResult.first = (dstResult.first == 0 || srcResult.first == 0) ? 0 : 1;
557 dstResult.second = (dstResult.second == 0 || srcResult.second == 0) ? 0 : 1;
561 pack (
const DstView& dst,
565 const size_type numCols)
567 using Kokkos::parallel_reduce;
568 typedef typename DstView::execution_space execution_space;
569 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
570 typedef typename IdxView::non_const_value_type row_index_type;
571 typedef typename ColView::non_const_value_type col_index_type;
573 Kokkos::pair<int, int> result (1, 1);
574 parallel_reduce (range_type (0, idx.size ()),
575 PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src,
579 const bool hasBadRows = (result.first != 1);
580 const bool hasBadCols = (result.second != 1);
581 const bool hasErr = hasBadRows || hasBadCols;
583 std::ostringstream os;
589 auto idx_h = Kokkos::create_mirror_view (idx);
592 std::vector<row_index_type> badRows;
593 const size_type numInds = idx_h.dimension_0 ();
594 for (size_type k = 0; k < numInds; ++k) {
595 if (Impl::outOfBounds<row_index_type> (idx_h(k), src.dimension_0 ())) {
596 badRows.push_back (idx_h(k));
599 os <<
"MultiVector multiple-column pack kernel had " 600 << badRows.size () <<
" out-of bounds row index/ices: [";
601 for (
size_t k = 0; k < badRows.size (); ++k) {
603 if (k + 1 < badRows.size ()) {
615 auto col_h = Kokkos::create_mirror_view (col);
618 std::vector<col_index_type> badCols;
619 const size_type numInds = col_h.dimension_0 ();
620 for (size_type k = 0; k < numInds; ++k) {
621 if (Impl::outOfBounds<col_index_type> (col_h(k), src.dimension_1 ())) {
622 badCols.push_back (col_h(k));
629 os <<
"MultiVector multiple-column pack kernel had " 630 << badCols.size () <<
" out-of bounds column index/ices: [";
631 for (
size_t k = 0; k < badCols.size (); ++k) {
633 if (k + 1 < badCols.size ()) {
640 throw std::runtime_error (os.str ());
645 template <
typename DstView,
650 pack_array_multi_column_variable_stride (
const DstView& dst,
654 const size_t numCols,
655 const bool debug =
true)
657 static_assert (Kokkos::Impl::is_view<DstView>::value,
658 "DstView must be a Kokkos::View.");
659 static_assert (Kokkos::Impl::is_view<SrcView>::value,
660 "SrcView must be a Kokkos::View.");
661 static_assert (Kokkos::Impl::is_view<IdxView>::value,
662 "IdxView must be a Kokkos::View.");
663 static_assert (Kokkos::Impl::is_view<ColView>::value,
664 "ColView must be a Kokkos::View.");
665 static_assert (static_cast<int> (DstView::rank) == 1,
666 "DstView must be a rank-1 Kokkos::View.");
667 static_assert (static_cast<int> (SrcView::rank) == 2,
668 "SrcView must be a rank-2 Kokkos::View.");
669 static_assert (static_cast<int> (IdxView::rank) == 1,
670 "IdxView must be a rank-1 Kokkos::View.");
671 static_assert (static_cast<int> (ColView::rank) == 1,
672 "ColView must be a rank-1 Kokkos::View.");
675 typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
676 SrcView, IdxView, ColView> impl_type;
677 impl_type::pack (dst, src, idx, col, numCols);
680 typedef PackArrayMultiColumnVariableStride<DstView,
681 SrcView, IdxView, ColView> impl_type;
682 impl_type::pack (dst, src, idx, col, numCols);
687 template <
typename Scalar>
688 KOKKOS_INLINE_FUNCTION
689 void operator() (Scalar& dest,
const Scalar& src)
const {
690 Kokkos::atomic_assign(&dest, src);
694 template <
typename Scalar>
695 KOKKOS_INLINE_FUNCTION
696 void operator() (Scalar& dest,
const Scalar& src)
const {
697 Kokkos::atomic_add(&dest, src);
703 template <
typename T>
704 KOKKOS_INLINE_FUNCTION
705 T max(
const T& a,
const T& b)
const {
return a > b ? a : b; }
707 template <
typename Scalar>
708 KOKKOS_INLINE_FUNCTION
709 void operator() (Scalar& dest,
const Scalar& src)
const {
710 typedef Kokkos::Details::ArithTraits<Scalar> SCT;
711 Kokkos::atomic_assign(&dest, Scalar(max(SCT::abs(dest),SCT::abs(src))));
715 template <
typename DstView,
typename SrcView,
typename IdxView,
typename Op>
716 struct UnpackArrayMultiColumn {
717 typedef typename DstView::execution_space execution_space;
718 typedef typename execution_space::size_type size_type;
726 UnpackArrayMultiColumn(
const DstView& dst_,
731 dst(dst_), src(src_), idx(idx_), op(op_), numCols(numCols_) {}
733 KOKKOS_INLINE_FUNCTION
734 void operator()(
const size_type k )
const {
735 const typename IdxView::value_type localRow = idx(k);
736 const size_t offset = k*numCols;
737 for (
size_t j = 0; j < numCols; ++j)
738 op( dst(localRow,j), src(offset+j) );
741 static void unpack(
const DstView& dst,
746 Kokkos::parallel_for( idx.size(),
747 UnpackArrayMultiColumn(dst,src,idx,op,numCols) );
751 template <
typename DstView,
755 typename SizeType =
typename DstView::execution_space::size_type>
756 class UnpackArrayMultiColumnWithBoundsCheck {
757 static_assert (Kokkos::Impl::is_view<DstView>::value,
758 "DstView must be a Kokkos::View.");
759 static_assert (Kokkos::Impl::is_view<SrcView>::value,
760 "SrcView must be a Kokkos::View.");
761 static_assert (Kokkos::Impl::is_view<IdxView>::value,
762 "IdxView must be a Kokkos::View.");
763 static_assert (static_cast<int> (DstView::rank) == 2,
764 "DstView must be a rank-2 Kokkos::View.");
765 static_assert (static_cast<int> (SrcView::rank) == 1,
766 "SrcView must be a rank-1 Kokkos::View.");
767 static_assert (static_cast<int> (IdxView::rank) == 1,
768 "IdxView must be a rank-1 Kokkos::View.");
769 static_assert (std::is_integral<SizeType>::value,
770 "SizeType must be a built-in integer type.");
773 typedef SizeType size_type;
775 typedef int value_type;
785 UnpackArrayMultiColumnWithBoundsCheck (
const DstView& dst_,
789 const size_type numCols_) :
790 dst (dst_), src (src_), idx (idx_), op (op_), numCols (numCols_)
793 KOKKOS_INLINE_FUNCTION
794 void operator() (
const size_type& k, value_type& result)
const {
795 typedef typename IdxView::non_const_value_type index_type;
797 const index_type lclRow = idx(k);
798 if (lclRow < static_cast<index_type> (0) ||
799 lclRow >= static_cast<index_type> (dst.dimension_0 ())) {
803 const size_type offset = k*numCols;
804 for (size_type j = 0; j < numCols; ++j)
805 op (dst(lclRow,j), src(offset+j));
809 KOKKOS_INLINE_FUNCTION
810 void init (value_type& initialResult)
const {
814 KOKKOS_INLINE_FUNCTION
void 815 join (
volatile value_type& dstResult,
816 const volatile value_type& srcResult)
const 818 dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
822 unpack (
const DstView& dst,
826 const size_type numCols)
828 typedef typename DstView::execution_space execution_space;
829 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
830 typedef typename IdxView::non_const_value_type index_type;
833 Kokkos::parallel_reduce (range_type (0, idx.size ()),
834 UnpackArrayMultiColumnWithBoundsCheck (dst,src,idx,op,numCols),
840 auto idx_h = Kokkos::create_mirror_view (idx);
843 std::vector<index_type> badIndices;
844 const size_type numInds = idx_h.dimension_0 ();
845 for (size_type k = 0; k < numInds; ++k) {
846 if (idx_h(k) < static_cast<index_type> (0) ||
847 idx_h(k) >= static_cast<index_type> (dst.dimension_0 ())) {
848 badIndices.push_back (idx_h(k));
852 std::ostringstream os;
853 os <<
"MultiVector unpack kernel had " << badIndices.size ()
854 <<
" out-of bounds index/ices. Here they are: [";
855 for (
size_t k = 0; k < badIndices.size (); ++k) {
857 if (k + 1 < badIndices.size ()) {
862 throw std::runtime_error (os.str ());
867 template <
typename DstView,
typename SrcView,
typename IdxView,
typename Op>
869 unpack_array_multi_column (
const DstView& dst,
873 const size_t numCols,
874 const bool debug =
true)
876 static_assert (Kokkos::Impl::is_view<DstView>::value,
877 "DstView must be a Kokkos::View.");
878 static_assert (Kokkos::Impl::is_view<SrcView>::value,
879 "SrcView must be a Kokkos::View.");
880 static_assert (Kokkos::Impl::is_view<IdxView>::value,
881 "IdxView must be a Kokkos::View.");
882 static_assert (static_cast<int> (DstView::rank) == 2,
883 "DstView must be a rank-2 Kokkos::View.");
884 static_assert (static_cast<int> (SrcView::rank) == 1,
885 "SrcView must be a rank-1 Kokkos::View.");
886 static_assert (static_cast<int> (IdxView::rank) == 1,
887 "IdxView must be a rank-1 Kokkos::View.");
890 typedef UnpackArrayMultiColumnWithBoundsCheck<DstView,
891 SrcView, IdxView, Op> impl_type;
892 impl_type::unpack (dst, src, idx, op, numCols);
895 typedef UnpackArrayMultiColumn<DstView,
896 SrcView, IdxView, Op> impl_type;
897 impl_type::unpack (dst, src, idx, op, numCols);
901 template <
typename DstView,
typename SrcView,
typename IdxView,
902 typename ColView,
typename Op>
903 struct UnpackArrayMultiColumnVariableStride {
904 typedef typename DstView::execution_space execution_space;
905 typedef typename execution_space::size_type size_type;
914 UnpackArrayMultiColumnVariableStride(
const DstView& dst_,
920 dst(dst_), src(src_), idx(idx_), col(col_), op(op_), numCols(numCols_) {}
922 KOKKOS_INLINE_FUNCTION
923 void operator()(
const size_type k )
const {
924 const typename IdxView::value_type localRow = idx(k);
925 const size_t offset = k*numCols;
926 for (
size_t j = 0; j < numCols; ++j)
927 op( dst(localRow,col(j)), src(offset+j) );
930 static void unpack(
const DstView& dst,
936 Kokkos::parallel_for( idx.size(),
937 UnpackArrayMultiColumnVariableStride(
938 dst,src,idx,col,op,numCols) );
942 template <
typename DstView,
947 typename SizeType =
typename DstView::execution_space::size_type>
948 class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
950 typedef SizeType size_type;
952 typedef Kokkos::pair<int, int> value_type;
963 UnpackArrayMultiColumnVariableStrideWithBoundsCheck (
const DstView& dst_,
968 const size_t numCols_) :
969 dst (dst_), src (src_), idx (idx_), col (col_), op (op_),
973 KOKKOS_INLINE_FUNCTION
void 974 operator() (
const size_type& k, value_type& result)
const {
975 typedef typename IdxView::non_const_value_type row_index_type;
976 typedef typename ColView::non_const_value_type col_index_type;
978 const row_index_type lclRow = idx(k);
979 if (lclRow < static_cast<row_index_type> (0) ||
980 lclRow >= static_cast<row_index_type> (dst.dimension_0 ())) {
984 const size_type offset = k*numCols;
985 for (size_type j = 0; j < numCols; ++j) {
986 const col_index_type lclCol = col(j);
988 if (Impl::outOfBounds<col_index_type> (lclCol, dst.dimension_1 ())) {
992 op (dst(lclRow, col(j)), src(offset+j));
998 KOKKOS_INLINE_FUNCTION
void 999 init (value_type& initialResult)
const {
1000 initialResult.first = 1;
1001 initialResult.second = 1;
1004 KOKKOS_INLINE_FUNCTION
void 1005 join (
volatile value_type& dstResult,
1006 const volatile value_type& srcResult)
const 1008 dstResult.first = (dstResult.first == 0 || srcResult.first == 0) ? 0 : 1;
1009 dstResult.second = (dstResult.second == 0 || srcResult.second == 0) ? 0 : 1;
1013 unpack (
const DstView& dst,
1018 const size_type numCols)
1020 typedef typename DstView::execution_space execution_space;
1021 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
1022 typedef typename IdxView::non_const_value_type row_index_type;
1023 typedef typename ColView::non_const_value_type col_index_type;
1025 Kokkos::pair<int, int> result (1, 1);
1026 Kokkos::parallel_reduce (range_type (0, idx.size ()),
1027 UnpackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
1031 const bool hasBadRows = (result.first != 1);
1032 const bool hasBadCols = (result.second != 1);
1033 const bool hasErr = hasBadRows || hasBadCols;
1035 std::ostringstream os;
1042 auto idx_h = Kokkos::create_mirror_view (idx);
1045 std::vector<row_index_type> badRows;
1046 const size_type numInds = idx_h.dimension_0 ();
1047 for (size_type k = 0; k < numInds; ++k) {
1048 if (idx_h(k) < static_cast<row_index_type> (0) ||
1049 idx_h(k) >= static_cast<row_index_type> (dst.dimension_0 ())) {
1050 badRows.push_back (idx_h(k));
1053 os <<
"MultiVector multiple-column unpack kernel had " 1054 << badRows.size () <<
" out-of bounds row index/ices: [";
1055 for (
size_t k = 0; k < badRows.size (); ++k) {
1057 if (k + 1 < badRows.size ()) {
1069 auto col_h = Kokkos::create_mirror_view (col);
1072 std::vector<col_index_type> badCols;
1073 const size_type numInds = col_h.dimension_0 ();
1074 for (size_type k = 0; k < numInds; ++k) {
1075 if (Impl::outOfBounds<col_index_type> (col_h(k), dst.dimension_1 ())) {
1076 badCols.push_back (col_h(k));
1083 os <<
"MultiVector multiple-column unpack kernel had " 1084 << badCols.size () <<
" out-of bounds column index/ices: [";
1085 for (
size_t k = 0; k < badCols.size (); ++k) {
1087 if (k + 1 < badCols.size ()) {
1094 throw std::runtime_error (os.str ());
1099 template <
typename DstView,
1105 unpack_array_multi_column_variable_stride (
const DstView& dst,
1110 const size_t numCols,
1111 const bool debug =
true)
1113 static_assert (Kokkos::Impl::is_view<DstView>::value,
1114 "DstView must be a Kokkos::View.");
1115 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1116 "SrcView must be a Kokkos::View.");
1117 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1118 "IdxView must be a Kokkos::View.");
1119 static_assert (Kokkos::Impl::is_view<ColView>::value,
1120 "ColView must be a Kokkos::View.");
1121 static_assert (static_cast<int> (DstView::rank) == 2,
1122 "DstView must be a rank-2 Kokkos::View.");
1123 static_assert (static_cast<int> (SrcView::rank) == 1,
1124 "SrcView must be a rank-1 Kokkos::View.");
1125 static_assert (static_cast<int> (IdxView::rank) == 1,
1126 "IdxView must be a rank-1 Kokkos::View.");
1127 static_assert (static_cast<int> (ColView::rank) == 1,
1128 "ColView must be a rank-1 Kokkos::View.");
1131 typedef UnpackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
1132 SrcView, IdxView, ColView, Op> impl_type;
1133 impl_type::unpack (dst, src, idx, col, op, numCols);
1136 typedef UnpackArrayMultiColumnVariableStride<DstView,
1137 SrcView, IdxView, ColView, Op> impl_type;
1138 impl_type::unpack (dst, src, idx, col, op, numCols);
1142 template <
typename DstView,
typename SrcView,
1143 typename DstIdxView,
typename SrcIdxView>
1144 struct PermuteArrayMultiColumn {
1145 typedef typename DstView::execution_space execution_space;
1146 typedef typename execution_space::size_type size_type;
1154 PermuteArrayMultiColumn(
const DstView& dst_,
1155 const SrcView& src_,
1156 const DstIdxView& dst_idx_,
1157 const SrcIdxView& src_idx_,
1159 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1160 numCols(numCols_) {}
1162 KOKKOS_INLINE_FUNCTION
1163 void operator()(
const size_type k )
const {
1164 const typename DstIdxView::value_type toRow = dst_idx(k);
1165 const typename SrcIdxView::value_type fromRow = src_idx(k);
1166 for (
size_t j = 0; j < numCols; ++j)
1167 dst(toRow, j) = src(fromRow, j);
1170 static void permute(
const DstView& dst,
1172 const DstIdxView& dst_idx,
1173 const SrcIdxView& src_idx,
1175 const size_type n = std::min( dst_idx.size(), src_idx.size() );
1176 Kokkos::parallel_for(
1177 n, PermuteArrayMultiColumn(dst,src,dst_idx,src_idx,numCols) );
1183 template <
typename DstView,
typename SrcView,
1184 typename DstIdxView,
typename SrcIdxView>
1185 void permute_array_multi_column(
const DstView& dst,
1187 const DstIdxView& dst_idx,
1188 const SrcIdxView& src_idx,
1190 PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView>::permute(
1191 dst, src, dst_idx, src_idx, numCols);
1194 template <
typename DstView,
typename SrcView,
1195 typename DstIdxView,
typename SrcIdxView,
1196 typename DstColView,
typename SrcColView>
1197 struct PermuteArrayMultiColumnVariableStride {
1198 typedef typename DstView::execution_space execution_space;
1199 typedef typename execution_space::size_type size_type;
1209 PermuteArrayMultiColumnVariableStride(
const DstView& dst_,
1210 const SrcView& src_,
1211 const DstIdxView& dst_idx_,
1212 const SrcIdxView& src_idx_,
1213 const DstColView& dst_col_,
1214 const SrcColView& src_col_,
1216 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1217 dst_col(dst_col_), src_col(src_col_),
1218 numCols(numCols_) {}
1220 KOKKOS_INLINE_FUNCTION
1221 void operator()(
const size_type k )
const {
1222 const typename DstIdxView::value_type toRow = dst_idx(k);
1223 const typename SrcIdxView::value_type fromRow = src_idx(k);
1224 for (
size_t j = 0; j < numCols; ++j)
1225 dst(toRow, dst_col(j)) = src(fromRow, src_col(j));
1228 static void permute(
const DstView& dst,
1230 const DstIdxView& dst_idx,
1231 const SrcIdxView& src_idx,
1232 const DstColView& dst_col,
1233 const SrcColView& src_col,
1235 const size_type n = std::min( dst_idx.size(), src_idx.size() );
1236 Kokkos::parallel_for(
1237 n, PermuteArrayMultiColumnVariableStride(
1238 dst,src,dst_idx,src_idx,dst_col,src_col,numCols) );
1244 template <
typename DstView,
typename SrcView,
1245 typename DstIdxView,
typename SrcIdxView,
1246 typename DstColView,
typename SrcColView>
1247 void permute_array_multi_column_variable_stride(
const DstView& dst,
1249 const DstIdxView& dst_idx,
1250 const SrcIdxView& src_idx,
1251 const DstColView& dst_col,
1252 const SrcColView& src_col,
1254 PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1255 DstIdxView,SrcIdxView,DstColView,SrcColView>::permute(
1256 dst, src, dst_idx, src_idx, dst_col, src_col, numCols);
1263 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
Implementation details of Tpetra.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...