42 #ifndef TPETRA_MULTIVECTOR_DEF_HPP 43 #define TPETRA_MULTIVECTOR_DEF_HPP 54 #include "Tpetra_Vector.hpp" 55 #include "Tpetra_Details_MultiVectorDistObjectKernels.hpp" 56 #include "Tpetra_Details_gathervPrint.hpp" 57 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp" 59 #include "KokkosCompat_View.hpp" 60 #include "Kokkos_MV_GEMM.hpp" 61 #include "Kokkos_Blas1_MV.hpp" 62 #include "Kokkos_Random.hpp" 64 #ifdef HAVE_TPETRA_INST_FLOAT128 67 template<
class Generator>
68 struct rand<Generator, __float128> {
69 static KOKKOS_INLINE_FUNCTION __float128 max ()
71 return static_cast<__float128
> (1.0);
73 static KOKKOS_INLINE_FUNCTION __float128
78 const __float128 scalingFactor =
79 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
80 static_cast<__float128> (2.0);
81 const __float128 higherOrderTerm =
static_cast<__float128
> (gen.drand ());
82 const __float128 lowerOrderTerm =
83 static_cast<__float128
> (gen.drand ()) * scalingFactor;
84 return higherOrderTerm + lowerOrderTerm;
86 static KOKKOS_INLINE_FUNCTION __float128
87 draw (Generator& gen,
const __float128& range)
90 const __float128 scalingFactor =
91 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
92 static_cast<__float128> (2.0);
93 const __float128 higherOrderTerm =
94 static_cast<__float128
> (gen.drand (range));
95 const __float128 lowerOrderTerm =
96 static_cast<__float128
> (gen.drand (range)) * scalingFactor;
97 return higherOrderTerm + lowerOrderTerm;
99 static KOKKOS_INLINE_FUNCTION __float128
100 draw (Generator& gen,
const __float128& start,
const __float128& end)
103 const __float128 scalingFactor =
104 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
105 static_cast<__float128> (2.0);
106 const __float128 higherOrderTerm =
107 static_cast<__float128
> (gen.drand (start, end));
108 const __float128 lowerOrderTerm =
109 static_cast<__float128
> (gen.drand (start, end)) * scalingFactor;
110 return higherOrderTerm + lowerOrderTerm;
114 #endif // HAVE_TPETRA_INST_FLOAT128 132 template<
class ST,
class LO,
class GO,
class NT>
134 allocDualView (
const size_t lclNumRows,
135 const size_t numCols,
136 const bool zeroOut =
true,
137 const bool allowPadding =
false)
139 using Kokkos::AllowPadding;
140 using Kokkos::view_alloc;
141 using Kokkos::WithoutInitializing;
143 typedef typename dual_view_type::t_dev dev_view_type;
148 const std::string label (
"MV::DualView");
158 dev_view_type d_view;
161 d_view = dev_view_type (view_alloc (label, AllowPadding),
162 lclNumRows, numCols);
165 d_view = dev_view_type (view_alloc (label),
166 lclNumRows, numCols);
171 d_view = dev_view_type (view_alloc (label,
174 lclNumRows, numCols);
177 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
178 lclNumRows, numCols);
180 #ifdef HAVE_TPETRA_DEBUG 188 const ST nan = Kokkos::Details::ArithTraits<ST>::nan ();
189 KokkosBlas::fill (d_view, nan);
190 #endif // HAVE_TPETRA_DEBUG 192 #ifdef HAVE_TPETRA_DEBUG 193 TEUCHOS_TEST_FOR_EXCEPTION
194 (static_cast<size_t> (d_view.dimension_0 ()) != lclNumRows ||
195 static_cast<size_t> (d_view.dimension_1 ()) != numCols, std::logic_error,
196 "allocDualView: d_view's dimensions actual dimensions do not match " 197 "requested dimensions. d_view is " << d_view.dimension_0 () <<
" x " <<
198 d_view.dimension_1 () <<
"; requested " << lclNumRows <<
" x " << numCols
199 <<
". Please report this bug to the Tpetra developers.");
200 #endif // HAVE_TPETRA_DEBUG 202 typename dual_view_type::t_host h_view = Kokkos::create_mirror_view (d_view);
204 dual_view_type dv (d_view, h_view);
209 dv.template modify<typename dev_view_type::memory_space> ();
218 template<
class T,
class ExecSpace>
219 struct MakeUnmanagedView {
232 typedef typename Kokkos::Impl::if_c<
233 Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
234 typename ExecSpace::memory_space,
235 Kokkos::HostSpace>::value,
236 typename ExecSpace::device_type,
237 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
238 typedef Kokkos::LayoutLeft array_layout;
239 typedef Kokkos::View<T*, array_layout, host_exec_space,
240 Kokkos::MemoryUnmanaged> view_type;
242 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
244 const size_t numEnt =
static_cast<size_t> (x_in.size ());
248 return view_type (x_in.getRawPtr (), numEnt);
256 template<
class DualViewType>
258 takeSubview (
const DualViewType& X,
260 #ifdef KOKKOS_USING_EXPERIMENTAL_VIEW
261 const Kokkos::Experimental::Impl::ALL_t&,
265 const std::pair<size_t, size_t>& colRng)
267 if (X.dimension_0 () == 0 && X.dimension_1 () != 0) {
268 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
271 return subview (X, Kokkos::ALL (), colRng);
278 template<
class DualViewType>
280 takeSubview (
const DualViewType& X,
281 const std::pair<size_t, size_t>& rowRng,
282 const std::pair<size_t, size_t>& colRng)
284 if (X.dimension_0 () == 0 && X.dimension_1 () != 0) {
285 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
288 return subview (X, rowRng, colRng);
296 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
298 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::
299 vectorIndexOutOfRange (
const size_t VectorIndex)
const {
300 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
303 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
309 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
312 const size_t numVecs,
313 const bool zeroOut) :
316 const size_t lclNumRows = this->getLocalLength ();
317 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
321 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
325 view_ (source.view_),
326 origView_ (source.origView_),
327 whichVectors_ (source.whichVectors_)
330 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
333 const Teuchos::DataAccess copyOrView) :
335 view_ (source.view_),
336 origView_ (source.origView_),
337 whichVectors_ (source.whichVectors_)
340 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, " 341 "const Teuchos::DataAccess): ";
343 if (copyOrView == Teuchos::Copy) {
347 this->view_ = cpy.view_;
348 this->origView_ = cpy.origView_;
349 this->whichVectors_ = cpy.whichVectors_;
351 else if (copyOrView == Teuchos::View) {
354 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
355 true, std::invalid_argument,
"Second argument 'copyOrView' has an " 356 "invalid value " << copyOrView <<
". Valid values include " 357 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = " 358 << Teuchos::View <<
".");
362 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
370 const char tfecfFuncName[] =
"MultiVector(map,view): ";
375 origView_.stride (stride);
376 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] :
377 origView_.dimension_0 ();
378 const size_t lclNumRows = this->getLocalLength ();
379 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
380 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's " 381 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
382 <<
". This may also mean that the input view's first dimension (number " 383 "of rows = " << view.dimension_0 () <<
") does not not match the number " 384 "of entries in the Map on the calling process.");
388 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
391 const typename dual_view_type::t_dev& d_view) :
394 using Teuchos::ArrayRCP;
396 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
401 d_view.stride (stride);
402 const size_t LDA = (d_view.dimension_1 () > 1) ? stride[1] :
403 d_view.dimension_0 ();
404 const size_t lclNumRows = this->getLocalLength ();
405 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
406 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::View's " 407 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
408 <<
". This may also mean that the input view's first dimension (number " 409 "of rows = " << d_view.dimension_0 () <<
") does not not match the " 410 "number of entries in the Map on the calling process.");
419 this->
template modify<device_type> ();
423 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
432 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
437 origView_.stride (stride);
438 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] :
439 origView_.dimension_0 ();
440 const size_t lclNumRows = this->getLocalLength ();
441 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
442 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's " 443 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
444 <<
". This may also mean that the input origView's first dimension (number " 445 "of rows = " << origView.dimension_0 () <<
") does not not match the number " 446 "of entries in the Map on the calling process.");
450 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
454 const Teuchos::ArrayView<const size_t>& whichVectors) :
458 whichVectors_ (whichVectors.begin (), whichVectors.end ())
461 using Kokkos::subview;
462 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
464 const size_t lclNumRows = map.is_null () ? size_t (0) :
465 map->getNodeNumElements ();
472 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
473 view.dimension_1 () != 0 &&
static_cast<size_t> (view.dimension_0 ()) < lclNumRows,
474 std::invalid_argument,
"view.dimension_0() = " << view.dimension_0 ()
475 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
476 if (whichVectors.size () != 0) {
477 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
478 view.dimension_1 () != 0 && view.dimension_1 () == 0,
479 std::invalid_argument,
"view.dimension_1() = 0, but whichVectors.size()" 480 " = " << whichVectors.size () <<
" > 0.");
481 size_t maxColInd = 0;
482 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
483 for (size_type k = 0; k < whichVectors.size (); ++k) {
484 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
485 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
486 std::invalid_argument,
"whichVectors[" << k <<
"] = " 487 "Teuchos::OrdinalTraits<size_t>::invalid().");
488 maxColInd = std::max (maxColInd, whichVectors[k]);
490 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
491 view.dimension_1 () != 0 &&
static_cast<size_t> (view.dimension_1 ()) <= maxColInd,
492 std::invalid_argument,
"view.dimension_1() = " << view.dimension_1 ()
493 <<
" <= max(whichVectors) = " << maxColInd <<
".");
499 origView_.stride (stride);
500 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] :
501 origView_.dimension_0 ();
502 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
503 LDA < lclNumRows, std::invalid_argument,
504 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
506 if (whichVectors.size () == 1) {
515 const std::pair<size_t, size_t> colRng (whichVectors[0],
516 whichVectors[0] + 1);
517 view_ = takeSubview (view_, ALL (), colRng);
518 origView_ = takeSubview (origView_, ALL (), colRng);
520 whichVectors_.clear ();
524 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
529 const Teuchos::ArrayView<const size_t>& whichVectors) :
532 origView_ (origView),
533 whichVectors_ (whichVectors.begin (), whichVectors.end ())
536 using Kokkos::subview;
537 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
539 const size_t lclNumRows = this->getLocalLength ();
546 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
547 view.dimension_1 () != 0 &&
static_cast<size_t> (view.dimension_0 ()) < lclNumRows,
548 std::invalid_argument,
"view.dimension_0() = " << view.dimension_0 ()
549 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
550 if (whichVectors.size () != 0) {
551 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
552 view.dimension_1 () != 0 && view.dimension_1 () == 0,
553 std::invalid_argument,
"view.dimension_1() = 0, but whichVectors.size()" 554 " = " << whichVectors.size () <<
" > 0.");
555 size_t maxColInd = 0;
556 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
557 for (size_type k = 0; k < whichVectors.size (); ++k) {
558 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
559 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
560 std::invalid_argument,
"whichVectors[" << k <<
"] = " 561 "Teuchos::OrdinalTraits<size_t>::invalid().");
562 maxColInd = std::max (maxColInd, whichVectors[k]);
564 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
565 view.dimension_1 () != 0 &&
static_cast<size_t> (view.dimension_1 ()) <= maxColInd,
566 std::invalid_argument,
"view.dimension_1() = " << view.dimension_1 ()
567 <<
" <= max(whichVectors) = " << maxColInd <<
".");
572 origView_.stride (stride);
573 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] :
574 origView_.dimension_0 ();
575 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
576 LDA < lclNumRows, std::invalid_argument,
"Input DualView's column stride" 577 " = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
579 if (whichVectors.size () == 1) {
588 const std::pair<size_t, size_t> colRng (whichVectors[0],
589 whichVectors[0] + 1);
590 view_ = takeSubview (view_, ALL (), colRng);
591 origView_ = takeSubview (origView_, ALL (), colRng);
593 whichVectors_.clear ();
597 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
600 const Teuchos::ArrayView<const Scalar>& data,
602 const size_t numVecs) :
605 typedef LocalOrdinal LO;
606 typedef GlobalOrdinal GO;
607 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
612 const size_t lclNumRows =
613 map.is_null () ? size_t (0) : map->getNodeNumElements ();
614 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
615 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < " 616 "map->getNodeNumElements() = " << lclNumRows <<
".");
618 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
619 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
620 (static_cast<size_t> (data.size ()) < minNumEntries,
621 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough " 622 "entries, given the input Map and number of vectors in the MultiVector." 623 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + " 624 "map->getNodeNumElements () = " << minNumEntries <<
".");
627 this->view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
628 this->
template modify<device_type> ();
629 auto X_out = this->
template getLocalView<device_type> ();
640 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
641 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
642 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
647 size_t outStrides[8];
648 X_out.stride (outStrides);
649 const size_t outStride = (X_out.dimension_1 () > 1) ? outStrides[1] :
650 X_out.dimension_0 ();
651 if (LDA == outStride) {
657 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
659 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
661 for (
size_t j = 0; j < numVecs; ++j) {
662 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
663 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
669 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
672 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
673 const size_t numVecs) :
677 typedef LocalOrdinal LO;
678 typedef GlobalOrdinal GO;
679 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
681 const size_t lclNumRows =
682 map.is_null () ? size_t (0) : map->getNodeNumElements ();
683 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
684 (numVecs < 1 || numVecs != static_cast<size_t> (ArrayOfPtrs.size ()),
685 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or " 686 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
687 for (
size_t j = 0; j < numVecs; ++j) {
688 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
689 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
690 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
691 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = " 692 << X_j_av.size () <<
" < map->getNodeNumElements() = " << lclNumRows
696 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
697 this->
template modify<device_type> ();
698 auto X_out = this->getLocalView<device_type> ();
702 typedef typename decltype (X_out)::array_layout array_layout;
703 typedef typename Kokkos::View<
const IST*,
706 Kokkos::MemoryUnmanaged> input_col_view_type;
708 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
709 for (
size_t j = 0; j < numVecs; ++j) {
710 Teuchos::ArrayView<const IST> X_j_av =
711 Teuchos::av_reinterpret_cast<
const IST> (ArrayOfPtrs[j]);
712 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
713 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
719 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
723 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
726 return whichVectors_.empty ();
729 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
734 if (this->getMap ().is_null ()) {
735 return static_cast<size_t> (0);
737 return this->getMap ()->getNodeNumElements ();
741 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
746 if (this->getMap ().is_null ()) {
747 return static_cast<size_t> (0);
749 return this->getMap ()->getGlobalNumElements ();
753 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
758 if (isConstantStride ()) {
762 origView_.stride (stride);
763 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] :
764 origView_.dimension_0 ();
768 return static_cast<size_t> (0);
772 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
791 return src->getNumVectors () == this->getNumVectors ();
795 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
799 return this->getNumVectors ();
802 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
806 const size_t numSameIDs,
807 const Kokkos::DualView<const LocalOrdinal*, device_type>& permuteToLIDs,
808 const Kokkos::DualView<const LocalOrdinal*, device_type>& permuteFromLIDs)
811 using KokkosRefactor::Details::permute_array_multi_column;
812 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
813 using Kokkos::Compat::create_const_view;
814 typedef typename dual_view_type::t_dev::memory_space DMS;
815 typedef typename dual_view_type::t_host::memory_space HMS;
817 const char tfecfFuncName[] =
"copyAndPermuteNew: ";
819 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
820 permuteToLIDs.dimension_0 () != permuteFromLIDs.dimension_0 (),
821 std::runtime_error,
"permuteToLIDs.dimension_0() = " 822 << permuteToLIDs.dimension_0 () <<
" != permuteFromLIDs.dimension_0() = " 823 << permuteFromLIDs.dimension_0 () <<
".");
826 const MV& sourceMV =
dynamic_cast<const MV&
> (sourceObj);
827 const size_t numCols = this->getNumVectors ();
835 const bool copyOnHost = sourceMV.template need_sync<device_type> ();
838 this->
template sync<Kokkos::HostSpace> ();
839 this->
template modify<Kokkos::HostSpace> ();
842 this->
template sync<device_type> ();
843 this->
template modify<device_type> ();
865 if (numSameIDs > 0) {
866 const std::pair<size_t, size_t> rows (0, numSameIDs);
868 auto tgt_h = this->
template getLocalView<HMS> ();
869 auto src_h = create_const_view (sourceMV.template getLocalView<HMS> ());
871 for (
size_t j = 0; j < numCols; ++j) {
872 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
873 const size_t srcCol =
874 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
876 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
877 auto src_j = Kokkos::subview (src_h, rows, srcCol);
882 auto tgt_d = this->
template getLocalView<DMS> ();
883 auto src_d = create_const_view (sourceMV.template getLocalView<DMS> ());
885 for (
size_t j = 0; j < numCols; ++j) {
886 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
887 const size_t srcCol =
888 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
890 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
891 auto src_j = Kokkos::subview (src_d, rows, srcCol);
907 if (permuteFromLIDs.dimension_0 () == 0 ||
908 permuteToLIDs.dimension_0 () == 0) {
915 Kokkos::DualView<const LocalOrdinal*, device_type> permuteToLIDs_nc =
917 Kokkos::DualView<const LocalOrdinal*, device_type> permuteFromLIDs_nc =
922 const bool nonConstStride =
923 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
929 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
930 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
931 if (nonConstStride) {
932 if (this->whichVectors_.size () == 0) {
933 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
934 tmpTgt.template modify<HMS> ();
935 for (
size_t j = 0; j < numCols; ++j) {
936 tmpTgt.h_view(j) = j;
939 tmpTgt.template sync<DMS> ();
941 tgtWhichVecs = tmpTgt;
944 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->whichVectors_ ();
946 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
950 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
951 (static_cast<size_t> (tgtWhichVecs.dimension_0 ()) !=
952 this->getNumVectors (),
953 std::logic_error,
"tgtWhichVecs.dimension_0() = " <<
954 tgtWhichVecs.dimension_0 () <<
" != this->getNumVectors() = " <<
955 this->getNumVectors () <<
".");
957 if (sourceMV.whichVectors_.size () == 0) {
958 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
959 tmpSrc.template modify<HMS> ();
960 for (
size_t j = 0; j < numCols; ++j) {
961 tmpSrc.h_view(j) = j;
964 tmpSrc.template sync<DMS> ();
966 srcWhichVecs = tmpSrc;
969 Teuchos::ArrayView<const size_t> srcWhichVecsT =
970 sourceMV.whichVectors_ ();
972 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
976 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
977 (static_cast<size_t> (srcWhichVecs.dimension_0 ()) !=
978 sourceMV.getNumVectors (), std::logic_error,
979 "srcWhichVecs.dimension_0() = " << srcWhichVecs.dimension_0 ()
980 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
985 auto tgt_h = this->
template getLocalView<HMS> ();
986 auto src_h = create_const_view (sourceMV.template getLocalView<HMS> ());
987 permuteToLIDs_nc.template sync<HMS> ();
988 auto permuteToLIDs_h =
989 create_const_view (permuteToLIDs_nc.template view<HMS> ());
990 permuteFromLIDs_nc.template sync<HMS> ();
991 auto permuteFromLIDs_h =
992 create_const_view (permuteFromLIDs_nc.template view<HMS> ());
994 if (nonConstStride) {
997 auto tgtWhichVecs_h =
998 create_const_view (tgtWhichVecs.template view<HMS> ());
999 auto srcWhichVecs_h =
1000 create_const_view (srcWhichVecs.template view<HMS> ());
1001 permute_array_multi_column_variable_stride (tgt_h, src_h,
1005 srcWhichVecs_h, numCols);
1008 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1009 permuteFromLIDs_h, numCols);
1013 auto tgt_d = this->
template getLocalView<DMS> ();
1014 auto src_d = create_const_view (sourceMV.template getLocalView<DMS> ());
1015 permuteToLIDs_nc.template sync<DMS> ();
1016 auto permuteToLIDs_d =
1017 create_const_view (permuteToLIDs_nc.template view<DMS> ());
1018 permuteFromLIDs_nc.template sync<DMS> ();
1019 auto permuteFromLIDs_d =
1020 create_const_view (permuteFromLIDs_nc.template view<DMS> ());
1022 if (nonConstStride) {
1025 auto tgtWhichVecs_d =
1026 create_const_view (tgtWhichVecs.template view<DMS> ());
1027 auto srcWhichVecs_d =
1028 create_const_view (srcWhichVecs.template view<DMS> ());
1029 permute_array_multi_column_variable_stride (tgt_d, src_d,
1033 srcWhichVecs_d, numCols);
1036 permute_array_multi_column (tgt_d, src_d, permuteToLIDs_d,
1037 permuteFromLIDs_d, numCols);
1042 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1044 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::
1045 packAndPrepareNew (
const SrcDistObject& sourceObj,
1046 const Kokkos::DualView<const local_ordinal_type*, device_type> &exportLIDs,
1047 Kokkos::DualView<impl_scalar_type*, device_type>& exports,
1048 const Kokkos::DualView<size_t*, device_type>& ,
1049 size_t& constantNumPackets,
1052 using Kokkos::Compat::create_const_view;
1053 using Kokkos::Compat::getKokkosViewDeepCopy;
1054 typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> MV;
1055 typedef impl_scalar_type IST;
1056 typedef typename Kokkos::DualView<IST*, device_type>::t_host::memory_space
1058 typedef typename Kokkos::DualView<IST*, device_type>::t_dev::memory_space
1060 typedef typename Kokkos::DualView<IST*, device_type>::t_host::execution_space
1061 host_execution_space;
1062 typedef typename Kokkos::DualView<IST*, device_type>::t_dev::execution_space
1063 dev_execution_space;
1069 #ifdef HAVE_TPETRA_DEBUG 1070 constexpr
bool debugCheckIndices =
true;
1072 constexpr
bool debugCheckIndices =
false;
1073 #endif // HAVE_TPETRA_DEBUG 1075 const bool printDebugOutput =
false;
1076 if (printDebugOutput) {
1077 std::cerr <<
"$$$ MV::packAndPrepareNew" << std::endl;
1080 const MV& sourceMV =
dynamic_cast<const MV&
> (sourceObj);
1095 const bool packOnHost =
1096 exportLIDs.modified_host () > exportLIDs.modified_device ();
1097 auto src_dev = sourceMV.template getLocalView<dev_memory_space> ();
1098 auto src_host = sourceMV.template getLocalView<host_memory_space> ();
1100 if (sourceMV.template need_sync<Kokkos::HostSpace> ()) {
1103 src_host = decltype (src_host) (
"MV::DualView::h_view",
1104 src_dev.dimension_0 (),
1105 src_dev.dimension_1 ());
1110 if (sourceMV.template need_sync<device_type> ()) {
1113 src_dev = decltype (src_dev) (
"MV::DualView::d_view",
1114 src_host.dimension_0 (),
1115 src_host.dimension_1 ());
1120 const size_t numCols = sourceMV.getNumVectors ();
1125 constantNumPackets = numCols;
1129 if (exportLIDs.dimension_0 () == 0) {
1130 if (printDebugOutput) {
1131 std::cerr <<
"$$$ MV::packAndPrepareNew DONE" << std::endl;
1151 if (printDebugOutput) {
1152 std::cerr <<
"$$$ MV::packAndPrepareNew realloc" << std::endl;
1155 const size_t numExportLIDs = exportLIDs.dimension_0 ();
1156 const size_t newExportsSize = numCols * numExportLIDs;
1157 if (static_cast<size_t> (exports.dimension_0 ()) != newExportsSize) {
1158 if (printDebugOutput) {
1159 std::ostringstream os;
1160 const int myRank = this->getMap ()->getComm ()->getRank ();
1161 os <<
"$$$ MV::packAndPrepareNew (Proc " << myRank <<
") realloc " 1162 "exports from " << exports.dimension_0 () <<
" to " << newExportsSize
1164 std::cerr << os.str ();
1167 execution_space::fence ();
1168 exports = Kokkos::DualView<impl_scalar_type*, device_type> (
"exports", newExportsSize);
1169 execution_space::fence ();
1176 exports.template modify<host_memory_space> ();
1179 exports.template modify<dev_memory_space> ();
1195 if (sourceMV.isConstantStride ()) {
1196 using KokkosRefactor::Details::pack_array_single_column;
1197 if (printDebugOutput) {
1198 std::cerr <<
"$$$ MV::packAndPrepareNew pack numCols=1 const stride" << std::endl;
1201 pack_array_single_column (exports.template view<host_memory_space> (),
1202 create_const_view (src_host),
1203 exportLIDs.template view<host_memory_space> (),
1208 pack_array_single_column (exports.template view<dev_memory_space> (),
1209 create_const_view (src_dev),
1210 exportLIDs.template view<dev_memory_space> (),
1216 using KokkosRefactor::Details::pack_array_single_column;
1217 if (printDebugOutput) {
1218 std::cerr <<
"$$$ MV::packAndPrepareNew pack numCols=1 nonconst stride" << std::endl;
1221 pack_array_single_column (exports.template view<host_memory_space> (),
1222 create_const_view (src_host),
1223 exportLIDs.template view<host_memory_space> (),
1224 sourceMV.whichVectors_[0],
1228 pack_array_single_column (exports.template view<dev_memory_space> (),
1229 create_const_view (src_dev),
1230 exportLIDs.template view<dev_memory_space> (),
1231 sourceMV.whichVectors_[0],
1237 if (sourceMV.isConstantStride ()) {
1238 using KokkosRefactor::Details::pack_array_multi_column;
1239 if (printDebugOutput) {
1240 std::cerr <<
"$$$ MV::packAndPrepareNew pack numCols>1 const stride" << std::endl;
1243 pack_array_multi_column (exports.template view<host_memory_space> (),
1244 create_const_view (src_host),
1245 exportLIDs.template view<host_memory_space> (),
1250 pack_array_multi_column (exports.template view<dev_memory_space> (),
1251 create_const_view (src_dev),
1252 exportLIDs.template view<dev_memory_space> (),
1258 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1259 if (printDebugOutput) {
1260 std::cerr <<
"$$$ MV::packAndPrepareNew pack numCols>1 nonconst stride" << std::endl;
1263 pack_array_multi_column_variable_stride (exports.template view<host_memory_space> (),
1264 create_const_view (src_host),
1265 exportLIDs.template view<host_memory_space> (),
1266 getKokkosViewDeepCopy<host_execution_space> (sourceMV.whichVectors_ ()),
1271 pack_array_multi_column_variable_stride (exports.template view<dev_memory_space> (),
1272 create_const_view (src_dev),
1273 exportLIDs.template view<dev_memory_space> (),
1274 getKokkosViewDeepCopy<dev_execution_space> (sourceMV.whichVectors_ ()),
1281 if (printDebugOutput) {
1282 std::cerr <<
"$$$ MV::packAndPrepareNew DONE" << std::endl;
1287 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1289 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::
1290 unpackAndCombineNew (
const Kokkos::DualView<const local_ordinal_type*, device_type>& importLIDs,
1291 const Kokkos::DualView<const impl_scalar_type*, device_type>& imports,
1292 const Kokkos::DualView<const size_t*, device_type>& ,
1293 const size_t constantNumPackets,
1297 using KokkosRefactor::Details::unpack_array_multi_column;
1298 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1299 using Kokkos::Compat::getKokkosViewDeepCopy;
1301 typedef typename Kokkos::DualView<IST*, device_type>::t_host::memory_space
1303 typedef typename Kokkos::DualView<IST*, device_type>::t_dev::memory_space
1305 const char tfecfFuncName[] =
"unpackAndCombineNew: ";
1306 const char suffix[] =
" Please report this bug to the Tpetra developers.";
1312 #ifdef HAVE_TPETRA_DEBUG 1313 constexpr
bool debugCheckIndices =
true;
1315 constexpr
bool debugCheckIndices =
false;
1316 #endif // HAVE_TPETRA_DEBUG 1319 if (importLIDs.dimension_0 () == 0) {
1323 const size_t numVecs = getNumVectors ();
1324 #ifdef HAVE_TPETRA_DEBUG 1325 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1326 static_cast<size_t> (imports.dimension_0 ()) !=
1327 numVecs * importLIDs.dimension_0 (),
1329 "imports.dimension_0() = " << imports.dimension_0 ()
1330 <<
" != getNumVectors() * importLIDs.dimension_0() = " << numVecs
1331 <<
" * " << importLIDs.dimension_0 () <<
" = " 1332 << numVecs * importLIDs.dimension_0 () <<
".");
1334 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1335 (constantNumPackets == static_cast<size_t> (0), std::runtime_error,
1336 "constantNumPackets input argument must be nonzero.");
1338 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1339 (static_cast<size_t> (numVecs) !=
1340 static_cast<size_t> (constantNumPackets),
1341 std::runtime_error,
"constantNumPackets must equal numVecs.");
1342 #endif // HAVE_TPETRA_DEBUG 1349 const bool unpackOnHost =
1350 imports.modified_host () > imports.modified_device ();
1351 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1352 (unpackOnHost && importLIDs.modified_host () < importLIDs.modified_device (),
1353 std::logic_error,
"The 'imports' buffer was last modified on host, " 1354 "but importLIDs was last modified on device." << suffix);
1355 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1356 (! unpackOnHost && importLIDs.modified_host () > importLIDs.modified_device (),
1357 std::logic_error,
"The 'imports' buffer was last modified on device, " 1358 "but importLIDs was last modified on host." << suffix);
1365 this->
template sync<host_memory_space> ();
1366 this->
template modify<host_memory_space> ();
1369 this->
template sync<dev_memory_space> ();
1370 this->
template modify<dev_memory_space> ();
1372 auto X_d = this->
template getLocalView<dev_memory_space> ();
1373 auto X_h = this->
template getLocalView<host_memory_space> ();
1374 auto imports_d = imports.template view<dev_memory_space> ();
1375 auto imports_h = imports.template view<host_memory_space> ();
1376 auto importLIDs_d = importLIDs.template view<dev_memory_space> ();
1377 auto importLIDs_h = importLIDs.template view<host_memory_space> ();
1379 Kokkos::DualView<size_t*, device_type> whichVecs;
1380 if (! isConstantStride ()) {
1381 Kokkos::View<
const size_t*, host_memory_space,
1382 Kokkos::MemoryUnmanaged> whichVecsIn (whichVectors_.getRawPtr (),
1384 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
1386 whichVecs.template modify<host_memory_space> ();
1390 whichVecs.template modify<dev_memory_space> ();
1394 auto whichVecs_d = whichVecs.template view<dev_memory_space> ();
1395 auto whichVecs_h = whichVecs.template view<host_memory_space> ();
1405 if (numVecs > 0 && importLIDs.dimension_0 () > 0) {
1411 const auto op = KokkosRefactor::Details::InsertOp ();
1413 if (isConstantStride ()) {
1415 unpack_array_multi_column (X_h, imports_h, importLIDs_h, op,
1416 numVecs, debugCheckIndices);
1420 unpack_array_multi_column (X_d, imports_d, importLIDs_d, op,
1421 numVecs, debugCheckIndices);
1426 unpack_array_multi_column_variable_stride (X_h, imports_h,
1433 unpack_array_multi_column_variable_stride (X_d, imports_d,
1441 else if (CM ==
ADD) {
1442 const auto op = KokkosRefactor::Details::AddOp ();
1444 if (isConstantStride ()) {
1446 unpack_array_multi_column (X_h, imports_h, importLIDs_h, op,
1447 numVecs, debugCheckIndices);
1450 unpack_array_multi_column (X_d, imports_d, importLIDs_d, op,
1451 numVecs, debugCheckIndices);
1456 unpack_array_multi_column_variable_stride (X_h, imports_h,
1463 unpack_array_multi_column_variable_stride (X_d, imports_d,
1472 const auto op = KokkosRefactor::Details::AbsMaxOp ();
1474 if (isConstantStride ()) {
1476 unpack_array_multi_column (X_h, imports_h, importLIDs_h, op,
1477 numVecs, debugCheckIndices);
1480 unpack_array_multi_column (X_d, imports_d, importLIDs_d, op,
1481 numVecs, debugCheckIndices);
1486 unpack_array_multi_column_variable_stride (X_h, imports_h,
1493 unpack_array_multi_column_variable_stride (X_d, imports_d,
1502 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1504 std::invalid_argument,
"Invalid CombineMode: " << CM <<
". Valid " 1505 "CombineMode values are ADD, REPLACE, INSERT, and ABSMAX.");
1510 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1515 if (isConstantStride ()) {
1516 return static_cast<size_t> (view_.dimension_1 ());
1518 return static_cast<size_t> (whichVectors_.size ());
1524 template<
class RV,
class XMV>
1526 lclDotImpl (
const RV& dotsOut,
1529 const size_t lclNumRows,
1530 const size_t numVecs,
1531 const Teuchos::ArrayView<const size_t>& whichVecsX,
1532 const Teuchos::ArrayView<const size_t>& whichVecsY,
1533 const bool constantStrideX,
1534 const bool constantStrideY)
1537 using Kokkos::subview;
1538 typedef typename RV::non_const_value_type dot_type;
1539 #ifdef HAVE_TPETRA_DEBUG 1540 const char prefix[] =
"Tpetra::MultiVector::lclDotImpl: ";
1541 #endif // HAVE_TPETRA_DEBUG 1543 static_assert (Kokkos::Impl::is_view<RV>::value,
1544 "Tpetra::MultiVector::lclDotImpl: " 1545 "The first argument dotsOut is not a Kokkos::View.");
1546 static_assert (RV::rank == 1,
"Tpetra::MultiVector::lclDotImpl: " 1547 "The first argument dotsOut must have rank 1.");
1548 static_assert (Kokkos::Impl::is_view<XMV>::value,
1549 "Tpetra::MultiVector::lclDotImpl: The type of the 2nd and " 1550 "3rd arguments (X_lcl and Y_lcl) is not a Kokkos::View.");
1551 static_assert (XMV::rank == 2,
"Tpetra::MultiVector::lclDotImpl: " 1552 "X_lcl and Y_lcl must have rank 2.");
1557 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
1558 const std::pair<size_t, size_t> colRng (0, numVecs);
1559 RV theDots = subview (dotsOut, colRng);
1560 XMV X = subview (X_lcl, rowRng, Kokkos::ALL());
1561 XMV Y = subview (Y_lcl, rowRng, Kokkos::ALL());
1563 #ifdef HAVE_TPETRA_DEBUG 1564 if (lclNumRows != 0) {
1565 TEUCHOS_TEST_FOR_EXCEPTION
1566 (X.dimension_0 () != lclNumRows, std::logic_error, prefix <<
1567 "X.dimension_0() = " << X.dimension_0 () <<
" != lclNumRows " 1568 "= " << lclNumRows <<
". " 1569 "Please report this bug to the Tpetra developers.");
1570 TEUCHOS_TEST_FOR_EXCEPTION
1571 (Y.dimension_0 () != lclNumRows, std::logic_error, prefix <<
1572 "Y.dimension_0() = " << Y.dimension_0 () <<
" != lclNumRows " 1573 "= " << lclNumRows <<
". " 1574 "Please report this bug to the Tpetra developers.");
1578 TEUCHOS_TEST_FOR_EXCEPTION
1580 (X.dimension_0 () != lclNumRows || X.dimension_1 () != numVecs),
1581 std::logic_error, prefix <<
"X is " << X.dimension_0 () <<
" x " <<
1582 X.dimension_1 () <<
" (constant stride), which differs from the " 1583 "local dimensions " << lclNumRows <<
" x " << numVecs <<
". " 1584 "Please report this bug to the Tpetra developers.");
1585 TEUCHOS_TEST_FOR_EXCEPTION
1586 (! constantStrideX &&
1587 (X.dimension_0 () != lclNumRows || X.dimension_1 () < numVecs),
1588 std::logic_error, prefix <<
"X is " << X.dimension_0 () <<
" x " <<
1589 X.dimension_1 () <<
" (NOT constant stride), but the local " 1590 "dimensions are " << lclNumRows <<
" x " << numVecs <<
". " 1591 "Please report this bug to the Tpetra developers.");
1592 TEUCHOS_TEST_FOR_EXCEPTION
1594 (Y.dimension_0 () != lclNumRows || Y.dimension_1 () != numVecs),
1595 std::logic_error, prefix <<
"Y is " << Y.dimension_0 () <<
" x " <<
1596 Y.dimension_1 () <<
" (constant stride), which differs from the " 1597 "local dimensions " << lclNumRows <<
" x " << numVecs <<
". " 1598 "Please report this bug to the Tpetra developers.");
1599 TEUCHOS_TEST_FOR_EXCEPTION
1600 (! constantStrideY &&
1601 (Y.dimension_0 () != lclNumRows || Y.dimension_1 () < numVecs),
1602 std::logic_error, prefix <<
"Y is " << Y.dimension_0 () <<
" x " <<
1603 Y.dimension_1 () <<
" (NOT constant stride), but the local " 1604 "dimensions are " << lclNumRows <<
" x " << numVecs <<
". " 1605 "Please report this bug to the Tpetra developers.");
1607 #endif // HAVE_TPETRA_DEBUG 1609 if (lclNumRows == 0) {
1610 const dot_type zero = Kokkos::Details::ArithTraits<dot_type>::zero ();
1614 if (constantStrideX && constantStrideY) {
1615 if(X.dimension_1() == 1) {
1616 typename RV::non_const_value_type result =
1617 KokkosBlas::dot (Kokkos::subview(X,Kokkos::ALL(),0),
1618 Kokkos::subview(Y,Kokkos::ALL(),0));
1621 KokkosBlas::dot (theDots, X, Y);
1629 for (
size_t k = 0; k < numVecs; ++k) {
1630 const size_t X_col = constantStrideX ? k : whichVecsX[k];
1631 const size_t Y_col = constantStrideY ? k : whichVecsY[k];
1632 KokkosBlas::dot (subview (theDots, k), subview (X, ALL (), X_col),
1633 subview (Y, ALL (), Y_col));
1641 gblDotImpl (
const RV& dotsOut,
1642 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
1643 const bool distributed)
1645 using Teuchos::REDUCE_MAX;
1646 using Teuchos::REDUCE_SUM;
1647 using Teuchos::reduceAll;
1648 typedef typename RV::non_const_value_type dot_type;
1650 const size_t numVecs = dotsOut.dimension_0 ();
1665 if (distributed && ! comm.is_null ()) {
1671 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
1673 const dot_type*
const lclSum = lclDots.ptr_on_device ();
1674 dot_type*
const gblSum = dotsOut.ptr_on_device ();
1675 const int nv =
static_cast<int> (numVecs);
1676 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
1681 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1683 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::
1685 const Kokkos::View<dot_type*, device_type>& dots)
const 1687 using Kokkos::create_mirror_view;
1688 using Kokkos::subview;
1689 using Teuchos::Comm;
1690 using Teuchos::null;
1693 typedef Kokkos::View<dot_type*, device_type> RV;
1695 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
1697 const size_t numVecs = this->getNumVectors ();
1701 const size_t lclNumRows = this->getLocalLength ();
1702 const size_t numDots =
static_cast<size_t> (dots.dimension_0 ());
1704 #ifdef HAVE_TPETRA_DEBUG 1706 const bool compat = this->getMap ()->isCompatible (* (A.
getMap ()));
1707 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1708 ! compat, std::invalid_argument,
"Tpetra::MultiVector::dot: *this is " 1709 "not compatible with the input MultiVector A. We only test for this " 1710 "in a debug build.");
1712 #endif // HAVE_TPETRA_DEBUG 1721 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1723 "MultiVectors do not have the same local length. " 1724 "this->getLocalLength() = " << lclNumRows <<
" != " 1726 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1728 "MultiVectors must have the same number of columns (vectors). " 1729 "this->getNumVectors() = " << numVecs <<
" != " 1731 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1732 numDots != numVecs, std::runtime_error,
1733 "The output array 'dots' must have the same number of entries as the " 1734 "number of columns (vectors) in *this and A. dots.dimension_0() = " <<
1735 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
1737 const std::pair<size_t, size_t> colRng (0, numVecs);
1738 RV dotsOut = subview (dots, colRng);
1739 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
1740 this->getMap ()->getComm ();
1745 const bool useHostVersion = A.template need_sync<device_type> ();
1746 if (useHostVersion) {
1749 typedef typename dual_view_type::t_host XMV;
1750 typedef typename XMV::memory_space cur_memory_space;
1754 const_cast<MV*
> (
this)->
template sync<cur_memory_space> ();
1755 auto thisView = this->
template getLocalView<cur_memory_space> ();
1756 auto A_view = A.template getLocalView<cur_memory_space> ();
1758 lclDotImpl<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
1761 auto dotsOutHost = Kokkos::create_mirror_view (dotsOut);
1763 gblDotImpl (dotsOutHost, comm, this->isDistributed ());
1768 typedef typename dual_view_type::t_dev XMV;
1769 typedef typename XMV::memory_space cur_memory_space;
1775 const_cast<MV*
> (
this)->
template sync<cur_memory_space> ();
1776 auto thisView = this->
template getLocalView<cur_memory_space> ();
1777 auto A_view = A.template getLocalView<cur_memory_space> ();
1779 lclDotImpl<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
1782 gblDotImpl (dotsOut, comm, this->isDistributed ());
1787 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1791 const Teuchos::ArrayView<dot_type>& dots)
const 1793 typedef Kokkos::View<dot_type*, device_type> dev_dots_view_type;
1794 typedef MakeUnmanagedView<dot_type, device_type> view_getter_type;
1795 typedef typename view_getter_type::view_type host_dots_view_type;
1797 const size_t numDots =
static_cast<size_t> (dots.size ());
1798 host_dots_view_type dotsHostView (dots.getRawPtr (), numDots);
1799 dev_dots_view_type dotsDevView (
"MV::dot tmp", numDots);
1800 this->dot (A, dotsDevView);
1805 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1808 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const 1810 typedef Kokkos::View<mag_type*, device_type> dev_norms_view_type;
1811 typedef MakeUnmanagedView<mag_type, device_type> view_getter_type;
1812 typedef typename view_getter_type::view_type host_norms_view_type;
1814 const size_t numNorms =
static_cast<size_t> (norms.size ());
1815 host_norms_view_type normsHostView (norms.getRawPtr (), numNorms);
1816 dev_norms_view_type normsDevView (
"MV::norm2 tmp", numNorms);
1817 this->norm2 (normsDevView);
1822 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1825 norm2 (
const Kokkos::View<mag_type*, device_type>& norms)
const 1827 this->normImpl (norms, NORM_TWO);
1831 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1832 void TPETRA_DEPRECATED
1835 const Teuchos::ArrayView<mag_type> &norms)
const 1838 using Kokkos::subview;
1839 using Teuchos::Comm;
1840 using Teuchos::null;
1842 using Teuchos::reduceAll;
1843 using Teuchos::REDUCE_SUM;
1844 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
1845 typedef Kokkos::Details::ArithTraits<mag_type> ATM;
1846 typedef Kokkos::View<mag_type*, device_type> norms_view_type;
1848 const char tfecfFuncName[] =
"normWeighted: ";
1850 const size_t numVecs = this->getNumVectors ();
1851 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1852 static_cast<size_t> (norms.size ()) != numVecs, std::runtime_error,
1853 "norms.size() = " << norms.size () <<
" != this->getNumVectors() = " 1857 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1858 ! OneW && weights.
getNumVectors () != numVecs, std::runtime_error,
1859 "The input MultiVector of weights must contain either one column, " 1860 "or must have the same number of columns as *this. " 1862 <<
" and this->getNumVectors() = " << numVecs <<
".");
1864 #ifdef HAVE_TPETRA_DEBUG 1865 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1866 ! this->getMap ()->isCompatible (*weights.
getMap ()), std::runtime_error,
1867 "MultiVectors do not have compatible Maps:" << std::endl
1868 <<
"this->getMap(): " << std::endl << *this->getMap()
1869 <<
"weights.getMap(): " << std::endl << *weights.
getMap() << std::endl);
1871 const size_t lclNumRows = this->getLocalLength ();
1872 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1874 "MultiVectors do not have the same local length.");
1875 #endif // HAVE_TPETRA_DEBUG 1877 norms_view_type lclNrms (
"lclNrms", numVecs);
1880 const_cast<MV*
> (
this)->
template sync<device_type> ();
1881 const_cast<MV&
> (weights).
template sync<device_type> ();
1883 auto X_lcl = this->
template getLocalView<device_type> ();
1884 auto W_lcl = weights.template getLocalView<device_type> ();
1886 if (isConstantStride () && ! OneW) {
1887 KokkosBlas::nrm2w_squared (lclNrms, X_lcl, W_lcl);
1890 for (
size_t j = 0; j < numVecs; ++j) {
1891 const size_t X_col = this->isConstantStride () ? j :
1892 this->whichVectors_[j];
1893 const size_t W_col = OneW ?
static_cast<size_t> (0) :
1895 KokkosBlas::nrm2w_squared (subview (lclNrms, j),
1896 subview (X_lcl, ALL (), X_col),
1897 subview (W_lcl, ALL (), W_col));
1902 ATM::one () /
static_cast<mag_type> (this->getGlobalLength ());
1903 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
1904 Teuchos::null : this->getMap ()->getComm ();
1906 if (! comm.is_null () && this->isDistributed ()) {
1908 reduceAll<int, mag_type> (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
1909 lclNrms.ptr_on_device (), norms.getRawPtr ());
1910 for (
size_t k = 0; k < numVecs; ++k) {
1911 norms[k] = ATM::sqrt (norms[k] * OneOverN);
1915 typename norms_view_type::HostMirror lclNrms_h =
1916 Kokkos::create_mirror_view (lclNrms);
1918 for (
size_t k = 0; k < numVecs; ++k) {
1919 norms[k] = ATM::sqrt (ATS::magnitude (lclNrms_h(k)) * OneOverN);
1925 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1928 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const 1930 typedef Kokkos::View<mag_type*, device_type> dev_norms_view_type;
1931 typedef MakeUnmanagedView<mag_type, device_type> view_getter_type;
1932 typedef typename view_getter_type::view_type host_norms_view_type;
1934 const size_t numNorms =
static_cast<size_t> (norms.size ());
1935 host_norms_view_type normsHostView (norms.getRawPtr (), numNorms);
1936 dev_norms_view_type normsDevView (
"MV::norm1 tmp", numNorms);
1937 this->norm1 (normsDevView);
1942 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1945 norm1 (
const Kokkos::View<mag_type*, device_type>& norms)
const 1947 this->normImpl (norms, NORM_ONE);
1951 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1954 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const 1956 typedef Kokkos::View<mag_type*, device_type> dev_norms_view_type;
1957 typedef MakeUnmanagedView<mag_type, device_type> view_getter_type;
1958 typedef typename view_getter_type::view_type host_norms_view_type;
1960 const size_t numNorms =
static_cast<size_t> (norms.size ());
1961 host_norms_view_type normsHostView (norms.getRawPtr (), numNorms);
1962 dev_norms_view_type normsDevView (
"MV::normInf tmp", numNorms);
1963 this->normInf (normsDevView);
1968 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1971 normInf (
const Kokkos::View<mag_type*, device_type>& norms)
const 1973 this->normImpl (norms, NORM_INF);
1985 template<
class RV,
class XMV>
1987 lclNormImpl (
const RV& normsOut,
1989 const size_t lclNumRows,
1990 const size_t numVecs,
1991 const Teuchos::ArrayView<const size_t>& whichVecs,
1992 const bool constantStride,
1993 const EWhichNormImpl whichNorm)
1996 using Kokkos::subview;
1997 typedef typename RV::non_const_value_type mag_type;
1999 static_assert (Kokkos::Impl::is_view<RV>::value,
2000 "Tpetra::MultiVector::lclNormImpl: " 2001 "The first argument RV is not a Kokkos::View.");
2002 static_assert (RV::rank == 1,
"Tpetra::MultiVector::lclNormImpl: " 2003 "The first argument normsOut must have rank 1.");
2004 static_assert (Kokkos::Impl::is_view<XMV>::value,
2005 "Tpetra::MultiVector::lclNormImpl: " 2006 "The second argument X_lcl is not a Kokkos::View.");
2007 static_assert (XMV::rank == 2,
"Tpetra::MultiVector::lclNormImpl: " 2008 "The second argument X_lcl must have rank 2.");
2013 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2014 const std::pair<size_t, size_t> colRng (0, numVecs);
2015 RV theNorms = subview (normsOut, colRng);
2016 XMV X = subview (X_lcl, rowRng, Kokkos::ALL());
2021 TEUCHOS_TEST_FOR_EXCEPTION(
2022 lclNumRows != 0 && constantStride && ( \
2023 ( X.dimension_0 () != lclNumRows ) ||
2024 ( X.dimension_1 () != numVecs ) ),
2025 std::logic_error,
"Constant Stride X's dimensions are " << X.dimension_0 () <<
" x " 2026 << X.dimension_1 () <<
", which differ from the local dimensions " 2027 << lclNumRows <<
" x " << numVecs <<
". Please report this bug to " 2028 "the Tpetra developers.");
2030 TEUCHOS_TEST_FOR_EXCEPTION(
2031 lclNumRows != 0 && !constantStride && ( \
2032 ( X.dimension_0 () != lclNumRows ) ||
2033 ( X.dimension_1 () < numVecs ) ),
2034 std::logic_error,
"Strided X's dimensions are " << X.dimension_0 () <<
" x " 2035 << X.dimension_1 () <<
", which are incompatible with the local dimensions " 2036 << lclNumRows <<
" x " << numVecs <<
". Please report this bug to " 2037 "the Tpetra developers.");
2039 if (lclNumRows == 0) {
2040 const mag_type zeroMag = Kokkos::Details::ArithTraits<mag_type>::zero ();
2044 if (constantStride) {
2045 if (whichNorm == IMPL_NORM_INF) {
2046 KokkosBlas::nrmInf (theNorms, X);
2048 else if (whichNorm == IMPL_NORM_ONE) {
2049 KokkosBlas::nrm1 (theNorms, X);
2051 else if (whichNorm == IMPL_NORM_TWO) {
2052 KokkosBlas::nrm2_squared (theNorms, X);
2055 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
"Should never get here!");
2064 for (
size_t k = 0; k < numVecs; ++k) {
2065 const size_t X_col = constantStride ? k : whichVecs[k];
2066 if (whichNorm == IMPL_NORM_INF) {
2067 KokkosBlas::nrmInf (subview (theNorms, k),
2068 subview (X, ALL (), X_col));
2070 else if (whichNorm == IMPL_NORM_ONE) {
2071 KokkosBlas::nrm1 (subview (theNorms, k),
2072 subview (X, ALL (), X_col));
2074 else if (whichNorm == IMPL_NORM_TWO) {
2075 KokkosBlas::nrm2_squared (subview (theNorms, k),
2076 subview (X, ALL (), X_col));
2079 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
"Should never get here!");
2088 gblNormImpl (
const RV& normsOut,
2089 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
2090 const bool distributed,
2091 const EWhichNormImpl whichNorm)
2093 using Teuchos::REDUCE_MAX;
2094 using Teuchos::REDUCE_SUM;
2095 using Teuchos::reduceAll;
2096 typedef typename RV::non_const_value_type mag_type;
2098 const size_t numVecs = normsOut.dimension_0 ();
2113 if (distributed && ! comm.is_null ()) {
2119 RV lclNorms (
"MV::normImpl lcl", numVecs);
2121 const mag_type*
const lclSum = lclNorms.ptr_on_device ();
2122 mag_type*
const gblSum = normsOut.ptr_on_device ();
2123 const int nv =
static_cast<int> (numVecs);
2124 if (whichNorm == IMPL_NORM_INF) {
2125 reduceAll<int, mag_type> (*comm, REDUCE_MAX, nv, lclSum, gblSum);
2127 reduceAll<int, mag_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
2131 if (whichNorm == IMPL_NORM_TWO) {
2137 const bool inHostMemory =
2138 Kokkos::Impl::is_same<
typename RV::memory_space,
2139 typename RV::host_mirror_space::memory_space>::value;
2141 for (
size_t j = 0; j < numVecs; ++j) {
2142 normsOut(j) = Kokkos::Details::ArithTraits<mag_type>::sqrt (normsOut(j));
2150 KokkosBlas::Impl::SquareRootFunctor<RV> f (normsOut);
2151 Kokkos::parallel_for (numVecs, f);
2158 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2160 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::
2161 normImpl (
const Kokkos::View<mag_type*, device_type>& norms,
2164 using Kokkos::create_mirror_view;
2165 using Kokkos::subview;
2166 using Teuchos::Comm;
2167 using Teuchos::null;
2170 typedef Kokkos::View<mag_type*, device_type> RV;
2172 const size_t numVecs = this->getNumVectors ();
2176 const size_t lclNumRows = this->getLocalLength ();
2177 const size_t numNorms =
static_cast<size_t> (norms.dimension_0 ());
2178 TEUCHOS_TEST_FOR_EXCEPTION(
2179 numNorms < numVecs, std::runtime_error,
"Tpetra::MultiVector::normImpl: " 2180 "'norms' must have at least as many entries as the number of vectors in " 2181 "*this. norms.dimension_0() = " << numNorms <<
" < this->getNumVectors()" 2182 " = " << numVecs <<
".");
2184 const std::pair<size_t, size_t> colRng (0, numVecs);
2185 RV normsOut = subview (norms, colRng);
2187 EWhichNormImpl lclNormType;
2188 if (whichNorm == NORM_ONE) {
2189 lclNormType = IMPL_NORM_ONE;
2190 }
else if (whichNorm == NORM_TWO) {
2191 lclNormType = IMPL_NORM_TWO;
2193 lclNormType = IMPL_NORM_INF;
2196 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
2197 this->getMap ()->getComm ();
2200 const bool useHostVersion = this->
template need_sync<device_type> ();
2201 if (useHostVersion) {
2204 typedef typename dual_view_type::t_host XMV;
2205 typedef typename XMV::memory_space cur_memory_space;
2207 auto thisView = this->
template getLocalView<cur_memory_space> ();
2208 lclNormImpl<RV, XMV> (normsOut, thisView, lclNumRows, numVecs,
2209 this->whichVectors_, this->isConstantStride (),
2211 auto normsOutHost = Kokkos::create_mirror_view (normsOut);
2213 gblNormImpl (normsOutHost, comm, this->isDistributed (), lclNormType);
2218 typedef typename dual_view_type::t_dev XMV;
2219 typedef typename XMV::memory_space cur_memory_space;
2221 auto thisView = this->
template getLocalView<cur_memory_space> ();
2222 lclNormImpl<RV, XMV> (normsOut, thisView, lclNumRows, numVecs,
2223 this->whichVectors_, this->isConstantStride (),
2225 gblNormImpl (normsOut, comm, this->isDistributed (), lclNormType);
2229 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2232 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const 2236 using Kokkos::subview;
2237 using Teuchos::Comm;
2239 using Teuchos::reduceAll;
2240 using Teuchos::REDUCE_SUM;
2241 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
2243 const size_t lclNumRows = this->getLocalLength ();
2244 const size_t numVecs = this->getNumVectors ();
2245 const size_t numMeans =
static_cast<size_t> (means.size ());
2247 TEUCHOS_TEST_FOR_EXCEPTION(
2248 numMeans != numVecs, std::runtime_error,
2249 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2250 <<
" != this->getNumVectors() = " << numVecs <<
".");
2252 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2253 const std::pair<size_t, size_t> colRng (0, numVecs);
2258 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2260 typename local_view_type::HostMirror::array_layout,
2262 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2263 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2265 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
2266 this->getMap ()->getComm ();
2269 const bool useHostVersion = this->
template need_sync<device_type> ();
2270 if (useHostVersion) {
2272 auto X_lcl = subview (this->
template getLocalView<Kokkos::HostSpace> (),
2273 rowRng, Kokkos::ALL ());
2275 typename local_view_type::HostMirror lclSums (
"MV::meanValue tmp", numVecs);
2276 if (isConstantStride ()) {
2277 KokkosBlas::sum (lclSums, X_lcl);
2280 for (
size_t j = 0; j < numVecs; ++j) {
2281 const size_t col = whichVectors_[j];
2282 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2289 if (! comm.is_null () && this->isDistributed ()) {
2290 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2291 lclSums.ptr_on_device (), meansOut.ptr_on_device ());
2299 auto X_lcl = subview (this->
template getLocalView<device_type> (),
2300 rowRng, Kokkos::ALL ());
2303 local_view_type lclSums (
"MV::meanValue tmp", numVecs);
2304 if (isConstantStride ()) {
2305 KokkosBlas::sum (lclSums, X_lcl);
2308 for (
size_t j = 0; j < numVecs; ++j) {
2309 const size_t col = whichVectors_[j];
2310 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2318 if (! comm.is_null () && this->isDistributed ()) {
2319 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2320 lclSums.ptr_on_device (), meansOut.ptr_on_device ());
2331 ATS::one () /
static_cast<mag_type> (this->getGlobalLength ());
2332 for (
size_t k = 0; k < numMeans; ++k) {
2333 meansOut(k) = meansOut(k) * OneOverN;
2338 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2344 typedef Kokkos::Details::ArithTraits<IST> ATS;
2345 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2346 typedef typename pool_type::generator_type generator_type;
2348 const IST max = Kokkos::rand<generator_type, IST>::max ();
2349 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2351 this->randomize (static_cast<Scalar> (min), static_cast<Scalar> (max));
2355 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2361 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2372 const uint64_t myRank =
2373 static_cast<uint64_t
> (this->getMap ()->getComm ()->getRank ());
2374 uint64_t seed64 =
static_cast<uint64_t
> (std::rand ()) + myRank + 17311uLL;
2375 unsigned int seed =
static_cast<unsigned int> (seed64&0xffffffff);
2377 pool_type rand_pool (seed);
2378 const IST max =
static_cast<IST
> (maxVal);
2379 const IST min =
static_cast<IST
> (minVal);
2381 this->
template modify<device_type> ();
2382 auto thisView = this->getLocalView<device_type> ();
2384 if (isConstantStride ()) {
2385 Kokkos::fill_random (thisView, rand_pool, min, max);
2388 const size_t numVecs = getNumVectors ();
2389 for (
size_t k = 0; k < numVecs; ++k) {
2390 const size_t col = whichVectors_[k];
2391 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2392 Kokkos::fill_random (X_k, rand_pool, min, max);
2398 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2405 using Kokkos::subview;
2406 typedef typename device_type::memory_space DMS;
2407 typedef Kokkos::HostSpace HMS;
2412 const size_t lclNumRows = this->getLocalLength ();
2413 const size_t numVecs = this->getNumVectors ();
2414 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2415 const std::pair<size_t, size_t> colRng (0, numVecs);
2421 const bool useHostVersion = this->
template need_sync<device_type> ();
2423 if (! useHostVersion) {
2424 this->
template modify<DMS> ();
2425 auto X = subview (this->
template getLocalView<DMS> (), rowRng, ALL ());
2427 auto X_0 = subview (X, ALL (), static_cast<size_t> (0));
2430 else if (isConstantStride ()) {
2434 for (
size_t k = 0; k < numVecs; ++k) {
2435 const size_t col = whichVectors_[k];
2436 auto X_k = subview (X, ALL (), col);
2442 this->
template modify<HMS> ();
2443 auto X = subview (this->
template getLocalView<HMS> (), rowRng, ALL ());
2445 auto X_0 = subview (X, ALL (), static_cast<size_t> (0));
2448 else if (isConstantStride ()) {
2452 for (
size_t k = 0; k < numVecs; ++k) {
2453 const size_t col = whichVectors_[k];
2454 auto X_k = subview (X, ALL (), col);
2462 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2467 using Teuchos::ArrayRCP;
2468 using Teuchos::Comm;
2475 TEUCHOS_TEST_FOR_EXCEPTION(
2476 ! this->isConstantStride (), std::logic_error,
2477 "Tpetra::MultiVector::replaceMap: This method does not currently work " 2478 "if the MultiVector is a column view of another MultiVector (that is, if " 2479 "isConstantStride() == false).");
2509 #ifdef HAVE_TEUCHOS_DEBUG 2523 #endif // HAVE_TEUCHOS_DEBUG 2525 if (this->getMap ().is_null ()) {
2530 TEUCHOS_TEST_FOR_EXCEPTION(
2531 newMap.is_null (), std::invalid_argument,
2532 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. " 2533 "This probably means that the input Map is incorrect.");
2537 const size_t newNumRows = newMap->getNodeNumElements ();
2538 const size_t origNumRows = view_.dimension_0 ();
2539 const size_t numCols = this->getNumVectors ();
2541 if (origNumRows != newNumRows || view_.dimension_1 () != numCols) {
2542 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (newNumRows, numCols);
2545 else if (newMap.is_null ()) {
2548 const size_t newNumRows =
static_cast<size_t> (0);
2549 const size_t numCols = this->getNumVectors ();
2550 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (newNumRows, numCols);
2553 this->map_ = newMap;
2556 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2562 if (theAlpha == Kokkos::Details::ArithTraits<impl_scalar_type>::one ()) {
2565 const size_t lclNumRows = this->getLocalLength ();
2566 const size_t numVecs = this->getNumVectors ();
2567 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2568 const std::pair<size_t, size_t> colRng (0, numVecs);
2576 const bool useHostVersion = this->
template need_sync<device_type> ();
2577 if (useHostVersion) {
2579 Kokkos::subview (this->
template getLocalView<Kokkos::HostSpace> (),
2580 rowRng, Kokkos::ALL ());
2581 if (isConstantStride ()) {
2582 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2585 for (
size_t k = 0; k < numVecs; ++k) {
2586 const size_t Y_col = this->isConstantStride () ? k :
2587 this->whichVectors_[k];
2588 auto Y_k = Kokkos::subview (Y_lcl, Kokkos::ALL (), Y_col);
2589 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2595 Kokkos::subview (this->
template getLocalView<device_type> (),
2596 rowRng, Kokkos::ALL ());
2597 if (isConstantStride ()) {
2598 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2601 for (
size_t k = 0; k < numVecs; ++k) {
2602 const size_t Y_col = this->isConstantStride () ? k :
2603 this->whichVectors_[k];
2604 auto Y_k = Kokkos::subview (Y_lcl, Kokkos::ALL (), Y_col);
2605 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2612 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2615 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2617 const size_t numVecs = this->getNumVectors ();
2618 const size_t numAlphas =
static_cast<size_t> (alphas.size ());
2619 TEUCHOS_TEST_FOR_EXCEPTION(
2620 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::" 2621 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = " 2625 typedef Kokkos::DualView<impl_scalar_type*, device_type> k_alphas_type ;
2626 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2627 k_alphas.template modify<typename k_alphas_type::host_mirror_space> ();
2628 for (
size_t i = 0; i < numAlphas; ++i) {
2631 k_alphas.template sync<typename k_alphas_type::memory_space> ();
2633 this->scale (k_alphas.d_view);
2636 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2639 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2642 using Kokkos::subview;
2644 const size_t lclNumRows = this->getLocalLength ();
2645 const size_t numVecs = this->getNumVectors ();
2646 TEUCHOS_TEST_FOR_EXCEPTION(
2647 static_cast<size_t> (alphas.dimension_0 ()) != numVecs,
2648 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): " 2649 "alphas.dimension_0() = " << alphas.dimension_0 ()
2650 <<
" != this->getNumVectors () = " << numVecs <<
".");
2651 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2652 const std::pair<size_t, size_t> colRng (0, numVecs);
2662 const bool useHostVersion = this->
template need_sync<device_type> ();
2663 if (useHostVersion) {
2666 auto alphas_h = Kokkos::create_mirror_view (alphas);
2667 typedef typename decltype (alphas_h)::memory_space cur_memory_space;
2671 Kokkos::subview (this->
template getLocalView<cur_memory_space> (),
2672 rowRng, Kokkos::ALL ());
2673 if (isConstantStride ()) {
2674 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2677 for (
size_t k = 0; k < numVecs; ++k) {
2678 const size_t Y_col = this->isConstantStride () ? k :
2679 this->whichVectors_[k];
2680 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2683 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2689 Kokkos::subview (this->
template getLocalView<device_type> (),
2690 rowRng, Kokkos::ALL());
2691 if (isConstantStride ()) {
2692 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2695 for (
size_t k = 0; k < numVecs; ++k) {
2696 const size_t Y_col = this->isConstantStride () ? k :
2697 this->whichVectors_[k];
2698 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2704 KokkosBlas::scal (Y_k, alphas(k), Y_k);
2710 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2717 using Kokkos::subview;
2718 const char tfecfFuncName[] =
"scale: ";
2720 const size_t lclNumRows = getLocalLength ();
2721 const size_t numVecs = getNumVectors ();
2723 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2725 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = " 2727 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2729 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = " 2733 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2734 const std::pair<size_t, size_t> colRng (0, numVecs);
2736 typedef typename dual_view_type::t_dev dev_view_type;
2737 typedef typename dual_view_type::t_host host_view_type;
2740 const bool useHostVersion = this->
template need_sync<device_type> ();
2741 if (useHostVersion) {
2745 typedef typename host_view_type::memory_space cur_memory_space;
2747 this->
template sync<cur_memory_space> ();
2748 this->
template modify<cur_memory_space> ();
2749 auto Y_lcl_orig = this->
template getLocalView<cur_memory_space> ();
2750 auto X_lcl_orig = A.template getLocalView<cur_memory_space> ();
2751 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
2752 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
2755 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
2759 for (
size_t k = 0; k < numVecs; ++k) {
2760 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2762 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2763 auto X_k = subview (X_lcl, ALL (), X_col);
2765 KokkosBlas::scal (Y_k, theAlpha, X_k);
2770 typedef typename dev_view_type::memory_space cur_memory_space;
2772 this->
template sync<cur_memory_space> ();
2773 this->
template modify<cur_memory_space> ();
2774 auto Y_lcl_orig = this->
template getLocalView<cur_memory_space> ();
2775 auto X_lcl_orig = A.template getLocalView<cur_memory_space> ();
2776 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
2777 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
2780 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
2784 for (
size_t k = 0; k < numVecs; ++k) {
2785 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2787 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2788 auto X_k = subview (X_lcl, ALL (), X_col);
2790 KokkosBlas::scal (Y_k, theAlpha, X_k);
2797 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2803 const char tfecfFuncName[] =
"reciprocal: ";
2805 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2807 "MultiVectors do not have the same local length. " 2808 "this->getLocalLength() = " << getLocalLength ()
2810 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2811 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2812 ": MultiVectors do not have the same number of columns (vectors). " 2813 "this->getNumVectors() = " << getNumVectors ()
2814 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2818 const size_t numVecs = getNumVectors ();
2820 this->
template sync<device_type> ();
2821 this->
template modify<device_type> ();
2827 const_cast<MV&
> (A).
template sync<device_type> ();
2829 auto this_view_dev = this->
template getLocalView<device_type> ();
2830 auto A_view_dev = A.template getLocalView<device_type> ();
2833 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
2837 using Kokkos::subview;
2838 typedef Kokkos::View<impl_scalar_type*, device_type> view_type;
2840 for (
size_t k = 0; k < numVecs; ++k) {
2841 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2842 view_type vector_k = subview (this_view_dev, ALL (), this_col);
2843 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
2844 view_type vector_Ak = subview (A_view_dev, ALL (), A_col);
2845 KokkosBlas::reciprocal (vector_k, vector_Ak);
2849 catch (std::runtime_error &e) {
2850 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
2851 ": Caught exception from Kokkos: " << e.what () << std::endl);
2856 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2862 const char tfecfFuncName[] =
"abs";
2863 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2865 ": MultiVectors do not have the same local length. " 2866 "this->getLocalLength() = " << getLocalLength ()
2868 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2869 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2870 ": MultiVectors do not have the same number of columns (vectors). " 2871 "this->getNumVectors() = " << getNumVectors ()
2872 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2876 const size_t numVecs = getNumVectors ();
2878 this->
template sync<device_type> ();
2879 this->
template modify<device_type> ();
2885 const_cast<MV&
> (A).
template sync<device_type> ();
2887 auto this_view_dev = this->
template getLocalView<device_type> ();
2888 auto A_view_dev = A.template getLocalView<device_type> ();
2891 KokkosBlas::abs (this_view_dev, A_view_dev);
2895 using Kokkos::subview;
2896 typedef Kokkos::View<impl_scalar_type*, device_type> view_type;
2898 for (
size_t k=0; k < numVecs; ++k) {
2899 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2900 view_type vector_k = subview (this_view_dev, ALL (), this_col);
2901 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
2902 view_type vector_Ak = subview (A_view_dev, ALL (), A_col);
2903 KokkosBlas::abs (vector_k, vector_Ak);
2909 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2917 using Kokkos::subview;
2918 const char tfecfFuncName[] =
"update: ";
2920 const size_t lclNumRows = getLocalLength ();
2921 const size_t numVecs = getNumVectors ();
2923 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2925 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = " 2927 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2929 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = " 2934 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2935 const std::pair<size_t, size_t> colRng (0, numVecs);
2937 typedef typename dual_view_type::t_dev dev_view_type;
2938 typedef typename dual_view_type::t_host host_view_type;
2941 const bool useHostVersion = this->
template need_sync<device_type> ();
2942 if (useHostVersion) {
2946 typedef typename host_view_type::memory_space cur_memory_space;
2948 this->
template sync<cur_memory_space> ();
2949 this->
template modify<cur_memory_space> ();
2950 auto Y_lcl_orig = this->
template getLocalView<cur_memory_space> ();
2951 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
2952 auto X_lcl_orig = A.template getLocalView<cur_memory_space> ();
2953 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
2956 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
2960 for (
size_t k = 0; k < numVecs; ++k) {
2961 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2963 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2964 auto X_k = subview (X_lcl, ALL (), X_col);
2966 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
2973 typedef typename dev_view_type::memory_space cur_memory_space;
2974 this->
template sync<cur_memory_space> ();
2975 this->
template modify<cur_memory_space> ();
2976 auto Y_lcl_orig = this->
template getLocalView<cur_memory_space> ();
2977 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
2978 auto X_lcl_orig = A.template getLocalView<cur_memory_space> ();
2979 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
2982 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
2986 for (
size_t k = 0; k < numVecs; ++k) {
2987 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2989 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2990 auto X_k = subview (X_lcl, ALL (), X_col);
2992 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
2998 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3005 const Scalar& gamma)
3008 using Kokkos::subview;
3010 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
3012 const size_t lclNumRows = this->getLocalLength ();
3013 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3015 "The input MultiVector A has " << A.
getLocalLength () <<
" local " 3016 "row(s), but this MultiVector has " << lclNumRows <<
" local " 3018 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3020 "The input MultiVector B has " << B.
getLocalLength () <<
" local " 3021 "row(s), but this MultiVector has " << lclNumRows <<
" local " 3023 const size_t numVecs = getNumVectors ();
3024 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3026 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), " 3027 "but this MultiVector has " << numVecs <<
" column(s).");
3028 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3030 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), " 3031 "but this MultiVector has " << numVecs <<
" column(s).");
3041 this->
template sync<device_type> ();
3042 const_cast<MV&
> (A).
template sync<device_type> ();
3043 const_cast<MV&
> (B).
template sync<device_type> ();
3046 this->
template modify<device_type> ();
3048 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3049 const std::pair<size_t, size_t> colRng (0, numVecs);
3054 auto C_lcl = subview (this->
template getLocalView<device_type> (), rowRng, ALL ());
3055 auto A_lcl = subview (A.template getLocalView<device_type> (), rowRng, ALL ());
3056 auto B_lcl = subview (B.template getLocalView<device_type> (), rowRng, ALL ());
3059 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
3064 for (
size_t k = 0; k < numVecs; ++k) {
3065 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3068 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
3069 theBeta, subview (B_lcl, rowRng, B_col),
3070 theGamma, subview (C_lcl, rowRng, this_col));
3075 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3076 Teuchos::ArrayRCP<const Scalar>
3081 using Kokkos::subview;
3088 const_cast<MV*
> (
this)->
template sync<Kokkos::HostSpace> ();
3091 auto hostView = this->
template getLocalView<Kokkos::HostSpace> ();
3094 const size_t col = isConstantStride () ? j : whichVectors_[j];
3095 auto hostView_j = subview (hostView, ALL (), col);
3098 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3099 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3101 #ifdef HAVE_TPETRA_DEBUG 3102 TEUCHOS_TEST_FOR_EXCEPTION
3103 (static_cast<size_t> (hostView_j.dimension_0 ()) <
3104 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3105 "Tpetra::MultiVector::getData: hostView_j.dimension_0() = " 3106 << hostView_j.dimension_0 () <<
" < dataAsArcp.size() = " 3107 << dataAsArcp.size () <<
". " 3108 "Please report this bug to the Tpetra developers.");
3109 #endif // HAVE_TPETRA_DEBUG 3111 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3114 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3115 Teuchos::ArrayRCP<Scalar>
3120 using Kokkos::subview;
3127 const_cast<MV*
> (
this)->
template sync<Kokkos::HostSpace> ();
3131 this->
template modify<Kokkos::HostSpace> ();
3134 auto hostView = this->
template getLocalView<Kokkos::HostSpace> ();
3137 const size_t col = isConstantStride () ? j : whichVectors_[j];
3138 auto hostView_j = subview (hostView, ALL (), col);
3141 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3142 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3144 #ifdef HAVE_TPETRA_DEBUG 3145 TEUCHOS_TEST_FOR_EXCEPTION
3146 (static_cast<size_t> (hostView_j.dimension_0 ()) <
3147 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3148 "Tpetra::MultiVector::getDataNonConst: hostView_j.dimension_0() = " 3149 << hostView_j.dimension_0 () <<
" < dataAsArcp.size() = " 3150 << dataAsArcp.size () <<
". " 3151 "Please report this bug to the Tpetra developers.");
3152 #endif // HAVE_TPETRA_DEBUG 3154 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3157 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3162 if (
this != &source) {
3163 base_type::operator= (source);
3169 view_ = source.
view_;
3185 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3186 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3188 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const 3195 bool contiguous =
true;
3196 const size_t numCopyVecs =
static_cast<size_t> (cols.size ());
3197 for (
size_t j = 1; j < numCopyVecs; ++j) {
3198 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3203 if (contiguous && numCopyVecs > 0) {
3204 return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
3207 RCP<const MV> X_sub = this->subView (cols);
3208 RCP<MV> Y (
new MV (this->getMap (), numCopyVecs,
false));
3214 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3215 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3222 RCP<const MV> X_sub = this->subView (colRng);
3223 RCP<MV> Y (
new MV (this->getMap (), static_cast<size_t> (colRng.size ()),
false));
3228 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3232 return origView_.dimension_0 ();
3235 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3239 return origView_.dimension_1 ();
3242 template <
class Scalar,
class LO,
class GO,
class Node, const
bool classic>
3246 const size_t offset) :
3250 using Kokkos::subview;
3254 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3259 const int myRank = X.
getMap ()->getComm ()->getRank ();
3260 TEUCHOS_TEST_FOR_EXCEPTION(
3262 prefix <<
"Invalid input Map. The input Map owns " << newNumRows <<
3263 " entries on process " << myRank <<
". offset = " << offset <<
". " 3265 " rows on this process.");
3268 #ifdef HAVE_TPETRA_DEBUG 3269 const size_t strideBefore =
3274 X.template getLocalView<Kokkos::HostSpace> ().ptr_on_device ();
3275 #endif // HAVE_TPETRA_DEBUG 3277 const std::pair<size_t, size_t> origRowRng (offset, X.
origView_.dimension_0 ());
3278 const std::pair<size_t, size_t> rowRng (offset, offset + newNumRows);
3298 if (newOrigView.dimension_0 () == 0 &&
3299 newOrigView.dimension_1 () != X.
origView_.dimension_1 ()) {
3300 newOrigView = allocDualView<Scalar, LO, GO, Node> (size_t (0),
3303 if (newView.dimension_0 () == 0 &&
3304 newView.dimension_1 () != X.
view_.dimension_1 ()) {
3305 newView = allocDualView<Scalar, LO, GO, Node> (size_t (0),
3310 MV (Teuchos::rcp (
new map_type (subMap)), newView, newOrigView) :
3313 #ifdef HAVE_TPETRA_DEBUG 3316 static_cast<size_t> (0);
3320 X.template getLocalView<Kokkos::HostSpace> ().ptr_on_device ();
3322 const size_t strideRet = subViewMV.isConstantStride () ?
3323 subViewMV.getStride () :
3324 static_cast<size_t> (0);
3325 const size_t lclNumRowsRet = subViewMV.getLocalLength ();
3326 const size_t numColsRet = subViewMV.getNumVectors ();
3328 const char suffix[] =
". This should never happen. Please report this " 3329 "bug to the Tpetra developers.";
3331 TEUCHOS_TEST_FOR_EXCEPTION(
3333 std::logic_error, prefix <<
"Returned MultiVector has a number of rows " 3334 "different than the number of local indices in the input Map. " 3335 "lclNumRowsRet: " << lclNumRowsRet <<
", subMap.getNodeNumElements(): " 3337 TEUCHOS_TEST_FOR_EXCEPTION(
3338 strideBefore != strideAfter || lclNumRowsBefore != lclNumRowsAfter ||
3339 numColsBefore != numColsAfter || hostPtrBefore != hostPtrAfter,
3340 std::logic_error, prefix <<
"Original MultiVector changed dimensions, " 3341 "stride, or host pointer after taking offset view. strideBefore: " <<
3342 strideBefore <<
", strideAfter: " << strideAfter <<
", lclNumRowsBefore: " 3343 << lclNumRowsBefore <<
", lclNumRowsAfter: " << lclNumRowsAfter <<
3344 ", numColsBefore: " << numColsBefore <<
", numColsAfter: " <<
3345 numColsAfter <<
", hostPtrBefore: " << hostPtrBefore <<
", hostPtrAfter: " 3346 << hostPtrAfter << suffix);
3347 TEUCHOS_TEST_FOR_EXCEPTION(
3348 strideBefore != strideRet, std::logic_error, prefix <<
"Returned " 3349 "MultiVector has different stride than original MultiVector. " 3350 "strideBefore: " << strideBefore <<
", strideRet: " << strideRet <<
3351 ", numColsBefore: " << numColsBefore <<
", numColsRet: " << numColsRet
3353 TEUCHOS_TEST_FOR_EXCEPTION(
3354 numColsBefore != numColsRet, std::logic_error,
3355 prefix <<
"Returned MultiVector has a different number of columns than " 3356 "original MultiVector. numColsBefore: " << numColsBefore <<
", " 3357 "numColsRet: " << numColsRet << suffix);
3358 #endif // HAVE_TPETRA_DEBUG 3363 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3364 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3367 const size_t offset)
const 3370 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3373 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3374 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3377 const size_t offset)
3380 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3383 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3384 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3386 subView (
const Teuchos::ArrayView<const size_t>& cols)
const 3388 using Teuchos::Array;
3392 const size_t numViewCols =
static_cast<size_t> (cols.size ());
3393 TEUCHOS_TEST_FOR_EXCEPTION(
3394 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView" 3395 "(const Teuchos::ArrayView<const size_t>&): The input array cols must " 3396 "contain at least one entry, but cols.size() = " << cols.size ()
3401 bool contiguous =
true;
3402 for (
size_t j = 1; j < numViewCols; ++j) {
3403 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3409 if (numViewCols == 0) {
3411 return rcp (
new MV (this->getMap (), numViewCols));
3414 return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3418 if (isConstantStride ()) {
3419 return rcp (
new MV (this->getMap (), view_, origView_, cols));
3422 Array<size_t> newcols (cols.size ());
3423 for (
size_t j = 0; j < numViewCols; ++j) {
3424 newcols[j] = whichVectors_[cols[j]];
3426 return rcp (
new MV (this->getMap (), view_, origView_, newcols ()));
3431 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3432 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3437 using Kokkos::subview;
3438 using Teuchos::Array;
3442 const char tfecfFuncName[] =
"subView(Range1D): ";
3444 const size_t lclNumRows = this->getLocalLength ();
3445 const size_t numVecs = this->getNumVectors ();
3449 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3450 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3451 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = " 3453 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3454 numVecs != 0 && colRng.size () != 0 &&
3455 (colRng.lbound () <
static_cast<Teuchos::Ordinal
> (0) ||
3456 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3457 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3458 "," << colRng.ubound () <<
"] exceeds the valid range of column indices " 3459 "[0, " << numVecs <<
"].");
3461 RCP<const MV> X_ret;
3471 const std::pair<size_t, size_t> rows (0, lclNumRows);
3472 if (colRng.size () == 0) {
3473 const std::pair<size_t, size_t> cols (0, 0);
3475 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3479 if (isConstantStride ()) {
3480 const std::pair<size_t, size_t> cols (colRng.lbound (),
3481 colRng.ubound () + 1);
3483 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3486 if (static_cast<size_t> (colRng.size ()) == static_cast<size_t> (1)) {
3489 const std::pair<size_t, size_t> col (whichVectors_[0] + colRng.lbound (),
3490 whichVectors_[0] + colRng.ubound () + 1);
3492 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3495 Array<size_t> which (whichVectors_.begin () + colRng.lbound (),
3496 whichVectors_.begin () + colRng.ubound () + 1);
3497 X_ret = rcp (
new MV (this->getMap (), view_, origView_, which));
3502 #ifdef HAVE_TPETRA_DEBUG 3503 using Teuchos::Comm;
3504 using Teuchos::outArg;
3505 using Teuchos::REDUCE_MIN;
3506 using Teuchos::reduceAll;
3508 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
3509 this->getMap ()->getComm ();
3510 if (! comm.is_null ()) {
3514 if (X_ret.is_null ()) {
3517 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3518 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3519 lclSuccess != 1, std::logic_error,
"X_ret (the subview of this " 3520 "MultiVector; the return value of this method) is null on some MPI " 3521 "process in this MultiVector's communicator. This should never " 3522 "happen. Please report this bug to the Tpetra developers.");
3524 if (! X_ret.is_null () &&
3525 X_ret->getNumVectors () !=
static_cast<size_t> (colRng.size ())) {
3528 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3529 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3530 lclSuccess != 1, std::logic_error,
3531 "X_ret->getNumVectors() != colRng.size(), on at least one MPI process " 3532 "in this MultiVector's communicator. This should never happen. " 3533 "Please report this bug to the Tpetra developers.");
3535 #endif // HAVE_TPETRA_DEBUG 3541 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3542 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3547 return Teuchos::rcp_const_cast<MV> (this->subView (cols));
3551 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3552 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3557 return Teuchos::rcp_const_cast<MV> (this->subView (colRng));
3561 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3562 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3567 using Kokkos::subview;
3571 #ifdef HAVE_TPETRA_DEBUG 3572 const char tfecfFuncName[] =
"getVector(NonConst): ";
3573 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3574 this->vectorIndexOutOfRange (j), std::runtime_error,
"Input index j (== " 3575 << j <<
") exceeds valid range [0, " << this->getNumVectors ()
3577 #endif // HAVE_TPETRA_DEBUG 3578 const size_t jj = this->isConstantStride () ?
3579 static_cast<size_t> (j) :
3580 static_cast<size_t> (this->whichVectors_[j]);
3581 const std::pair<size_t, size_t> rng (jj, jj+1);
3582 return rcp (
new V (this->getMap (),
3583 takeSubview (this->view_, ALL (), rng),
3588 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3589 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3594 return Teuchos::rcp_const_cast<V> (this->getVector (j));
3598 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3601 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const 3603 typedef decltype (this->
template getLocalView<device_type> ())
3605 typedef decltype (this->
template getLocalView<Kokkos::HostSpace> ())
3608 typedef Kokkos::View<IST*,
typename host_view_type::array_layout,
3609 Kokkos::HostSpace, Kokkos::MemoryUnmanaged> input_col_type;
3610 const char tfecfFuncName[] =
"get1dCopy: ";
3612 const size_t numRows = this->getLocalLength ();
3613 const size_t numCols = this->getNumVectors ();
3614 const std::pair<size_t, size_t> rowRange (0, numRows);
3616 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3617 LDA < numRows, std::runtime_error,
3618 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3619 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3620 numRows > static_cast<size_t> (0) &&
3621 numCols > static_cast<size_t> (0) &&
3622 static_cast<size_t> (A.size ()) < LDA * (numCols - 1) + numRows,
3624 "A.size() = " << A.size () <<
", but its size must be at least " 3625 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3640 const bool useHostVersion = this->
template need_sync<device_type> ();
3642 dev_view_type srcView_dev;
3643 host_view_type srcView_host;
3644 if (useHostVersion) {
3645 srcView_host = this->
template getLocalView<Kokkos::HostSpace> ();
3648 srcView_dev = this->
template getLocalView<device_type> ();
3651 for (
size_t j = 0; j < numCols; ++j) {
3652 const size_t srcCol =
3653 this->isConstantStride () ? j : this->whichVectors_[j];
3654 const size_t dstCol = j;
3655 IST*
const dstColRaw =
3656 reinterpret_cast<IST*
> (A.getRawPtr () + LDA * dstCol);
3657 input_col_type dstColView (dstColRaw, numRows);
3659 if (useHostVersion) {
3660 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3661 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3662 dstColView.dimension_0 () != srcColView_host.dimension_0 (),
3663 std::logic_error,
": srcColView and dstColView_host have different " 3664 "dimensions. Please report this bug to the Tpetra developers.");
3668 auto srcColView_dev = Kokkos::subview (srcView_dev, rowRange, srcCol);
3669 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3670 dstColView.dimension_0 () != srcColView_dev.dimension_0 (),
3671 std::logic_error,
": srcColView and dstColView_dev have different " 3672 "dimensions. Please report this bug to the Tpetra developers.");
3679 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3682 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const 3685 const char tfecfFuncName[] =
"get2dCopy: ";
3686 const size_t numRows = this->getLocalLength ();
3687 const size_t numCols = this->getNumVectors ();
3689 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3690 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3691 std::runtime_error,
"Input array of pointers must contain as many " 3692 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = " 3693 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3695 if (numRows != 0 && numCols != 0) {
3697 for (
size_t j = 0; j < numCols; ++j) {
3698 const size_t dstLen =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3699 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3700 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of " 3701 "the input array of arrays is not long enough to fit all entries in " 3702 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3703 <<
" < getLocalLength() = " << numRows <<
".");
3707 for (
size_t j = 0; j < numCols; ++j) {
3708 Teuchos::RCP<const V> X_j = this->getVector (j);
3709 const size_t LDA =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3710 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3715 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3716 Teuchos::ArrayRCP<const Scalar>
3720 if (getLocalLength () == 0 || getNumVectors () == 0) {
3721 return Teuchos::null;
3725 TEUCHOS_TEST_FOR_EXCEPTION(
3726 ! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::" 3727 "get1dView: This MultiVector does not have constant stride, so it is " 3728 "not possible to view its data as a single array. You may check " 3729 "whether a MultiVector has constant stride by calling " 3730 "isConstantStride().");
3737 const_cast<MV*
> (
this)->
template sync<Kokkos::HostSpace> ();
3740 auto X_lcl = this->
template getLocalView<Kokkos::HostSpace> ();
3741 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3742 Kokkos::Compat::persistingView (X_lcl);
3743 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3747 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3748 Teuchos::ArrayRCP<Scalar>
3752 if (getLocalLength () == 0 || getNumVectors () == 0) {
3753 return Teuchos::null;
3755 TEUCHOS_TEST_FOR_EXCEPTION
3756 (! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::" 3757 "get1dViewNonConst: This MultiVector does not have constant stride, " 3758 "so it is not possible to view its data as a single array. You may " 3759 "check whether a MultiVector has constant stride by calling " 3760 "isConstantStride().");
3764 this->
template sync<Kokkos::HostSpace> ();
3767 auto X_lcl = this->
template getLocalView<Kokkos::HostSpace> ();
3768 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3769 Kokkos::Compat::persistingView (X_lcl);
3770 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3774 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3775 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3783 this->sync<Kokkos::HostSpace> ();
3787 this->modify<Kokkos::HostSpace> ();
3789 const size_t myNumRows = this->getLocalLength ();
3790 const size_t numCols = this->getNumVectors ();
3791 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3796 auto X_lcl = this->getLocalView<Kokkos::HostSpace> ();
3798 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3799 for (
size_t j = 0; j < numCols; ++j) {
3800 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3801 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3802 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3803 Kokkos::Compat::persistingView (X_lcl_j);
3804 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3809 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3810 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
3823 const_cast<this_type*
> (
this)->sync<Kokkos::HostSpace> ();
3825 const size_t myNumRows = this->getLocalLength ();
3826 const size_t numCols = this->getNumVectors ();
3827 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3832 auto X_lcl = this->getLocalView<Kokkos::HostSpace> ();
3834 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
3835 for (
size_t j = 0; j < numCols; ++j) {
3836 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3837 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3838 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
3839 Kokkos::Compat::persistingView (X_lcl_j);
3840 views[j] = Teuchos::arcp_reinterpret_cast<
const Scalar> (X_lcl_j_arcp);
3845 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3849 Teuchos::ETransp transB,
3850 const Scalar& alpha,
3855 using Teuchos::CONJ_TRANS;
3856 using Teuchos::NO_TRANS;
3857 using Teuchos::TRANS;
3860 using Teuchos::rcpFromRef;
3861 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
3862 typedef Teuchos::ScalarTraits<Scalar> STS;
3864 const char errPrefix[] =
"Tpetra::MultiVector::multiply: ";
3900 #ifdef HAVE_TPETRA_DEBUG 3901 if (! this->getMap ().is_null () && ! this->getMap ()->getComm ().is_null ()) {
3907 const bool lclBad = this->getLocalLength () != A_nrows ||
3908 this->getNumVectors () != B_ncols || A_ncols != B_nrows;
3910 const int lclGood = lclBad ? 0 : 1;
3913 using Teuchos::REDUCE_MIN;
3914 using Teuchos::reduceAll;
3915 using Teuchos::outArg;
3917 auto comm = this->getMap ()->getComm ();
3918 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3920 TEUCHOS_TEST_FOR_EXCEPTION
3921 (gblGood != 1, std::runtime_error, errPrefix <<
"Local dimensions of " 3922 "*this, op(A), and op(B) are not consistent on at least one process " 3923 "in this object's communicator.");
3925 #endif // HAVE_TPETRA_DEBUG 3929 const bool C_is_local = ! this->isDistributed ();
3931 const bool Case1 = C_is_local && A_is_local && B_is_local;
3933 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
3934 transA != NO_TRANS &&
3937 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
3941 TEUCHOS_TEST_FOR_EXCEPTION(
3942 ! Case1 && ! Case2 && ! Case3, std::runtime_error, errPrefix
3943 <<
"Multiplication of op(A) and op(B) into *this is not a " 3944 "supported use case.");
3946 if (beta != STS::zero () && Case2) {
3952 const int myRank = this->getMap ()->getComm ()->getRank ();
3954 beta_local = ATS::zero ();
3963 if (! isConstantStride ()) {
3964 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
3966 C_tmp = rcp (
this,
false);
3969 RCP<const MV> A_tmp;
3971 A_tmp = rcp (
new MV (A, Teuchos::Copy));
3973 A_tmp = rcpFromRef (A);
3976 RCP<const MV> B_tmp;
3978 B_tmp = rcp (
new MV (B, Teuchos::Copy));
3980 B_tmp = rcpFromRef (B);
3983 TEUCHOS_TEST_FOR_EXCEPTION(
3984 ! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
3985 ! A_tmp->isConstantStride (), std::logic_error, errPrefix
3986 <<
"Failed to make temporary constant-stride copies of MultiVectors.");
3991 const size_t A_lclNumRows = A_tmp->getLocalLength ();
3992 const size_t A_numVecs = A_tmp->getNumVectors ();
3993 auto A_lcl = A_tmp->template getLocalView<device_type> ();
3994 auto A_sub = Kokkos::subview (A_lcl,
3995 std::make_pair (
size_t (0), A_lclNumRows),
3996 std::make_pair (
size_t (0), A_numVecs));
3997 const size_t B_lclNumRows = B_tmp->getLocalLength ();
3998 const size_t B_numVecs = B_tmp->getNumVectors ();
3999 auto B_lcl = B_tmp->template getLocalView<device_type> ();
4000 auto B_sub = Kokkos::subview (B_lcl,
4001 std::make_pair (
size_t (0), B_lclNumRows),
4002 std::make_pair (
size_t (0), B_numVecs));
4003 const size_t C_lclNumRows = C_tmp->getLocalLength ();
4004 const size_t C_numVecs = C_tmp->getNumVectors ();
4005 auto C_lcl = C_tmp->template getLocalView<device_type> ();
4006 auto C_sub = Kokkos::subview (C_lcl,
4007 std::make_pair (
size_t (0), C_lclNumRows),
4008 std::make_pair (
size_t (0), C_numVecs));
4009 gemm_type::GEMM (transA, transB, alpha, A_sub, B_sub, beta_local, C_sub);
4012 if (! isConstantStride ()) {
4017 A_tmp = Teuchos::null;
4018 B_tmp = Teuchos::null;
4026 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4035 using Kokkos::subview;
4038 const char tfecfFuncName[] =
"elementWiseMultiply: ";
4040 const size_t lclNumRows = this->getLocalLength ();
4041 const size_t numVecs = this->getNumVectors ();
4043 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4045 std::runtime_error,
"MultiVectors do not have the same local length.");
4046 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4047 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors" 4048 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
4057 this->
template sync<device_type> ();
4058 this->
template modify<device_type> ();
4059 const_cast<V&
> (A).
template sync<device_type> ();
4060 const_cast<MV&
> (B).
template sync<device_type> ();
4061 auto this_view = this->
template getLocalView<device_type> ();
4062 auto A_view = A.template getLocalView<device_type> ();
4063 auto B_view = B.template getLocalView<device_type> ();
4071 KokkosBlas::mult (scalarThis,
4074 subview (A_view, ALL (), 0),
4078 for (
size_t j = 0; j < numVecs; ++j) {
4079 const size_t C_col = isConstantStride () ? j : whichVectors_[j];
4081 KokkosBlas::mult (scalarThis,
4082 subview (this_view, ALL (), C_col),
4084 subview (A_view, ALL (), 0),
4085 subview (B_view, ALL (), B_col));
4090 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4095 using Teuchos::reduceAll;
4096 using Teuchos::REDUCE_SUM;
4097 typedef decltype (this->
template getLocalView<device_type> ())
4099 typedef decltype (this->
template getLocalView<Kokkos::HostSpace> ())
4102 TEUCHOS_TEST_FOR_EXCEPTION
4103 (this->isDistributed (), std::runtime_error,
4104 "Tpetra::MultiVector::reduce should only be called with locally " 4105 "replicated or otherwise not distributed MultiVector objects.");
4106 const Teuchos::Comm<int>& comm = * (this->getMap ()->getComm ());
4107 if (comm.getSize () == 1) {
4111 const size_t lclNumRows = getLocalLength ();
4112 const size_t numCols = getNumVectors ();
4113 const size_t totalAllocSize = lclNumRows * numCols;
4120 TEUCHOS_TEST_FOR_EXCEPTION(
4121 lclNumRows > static_cast<size_t> (std::numeric_limits<int>::max ()),
4122 std::runtime_error,
"Tpetra::MultiVector::reduce: On Process " <<
4123 comm.getRank () <<
", the number of local rows " << lclNumRows <<
4124 " does not fit in int.");
4133 const bool useHostVersion = this->
template need_sync<device_type> ();
4135 dev_view_type srcView_dev;
4136 host_view_type srcView_host;
4137 if (useHostVersion) {
4138 srcView_host = this->
template getLocalView<Kokkos::HostSpace> ();
4139 if (lclNumRows != static_cast<size_t> (srcView_host.dimension_0 ())) {
4141 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
4142 srcView_host = Kokkos::subview (srcView_host, rowRng, Kokkos::ALL ());
4146 srcView_dev = this->
template getLocalView<device_type> ();
4147 if (lclNumRows != static_cast<size_t> (srcView_dev.dimension_0 ())) {
4149 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
4150 srcView_dev = Kokkos::subview (srcView_dev, rowRng, Kokkos::ALL ());
4158 const bool contig = isConstantStride () && getStride () == lclNumRows;
4159 dev_view_type srcBuf_dev;
4160 host_view_type srcBuf_host;
4161 if (useHostVersion) {
4163 srcBuf_host = srcView_host;
4166 srcBuf_host = decltype (srcBuf_host) (
"srcBuf", lclNumRows, numCols);
4172 srcBuf_dev = srcView_dev;
4175 srcBuf_dev = decltype (srcBuf_dev) (
"srcBuf", lclNumRows, numCols);
4186 const bool correct =
4187 (useHostVersion &&
static_cast<size_t> (srcBuf_host.size ()) >= totalAllocSize) ||
4188 (! useHostVersion &&
static_cast<size_t> (srcBuf_dev.size ()) >= totalAllocSize);
4189 TEUCHOS_TEST_FOR_EXCEPTION
4190 (! correct, std::logic_error,
"Tpetra::MultiVector::reduce: Violated " 4191 "invariant of temporary source buffer construction. Please report " 4192 "this bug to the Tpetra developers.");
4200 dev_view_type tgtBuf_dev;
4201 host_view_type tgtBuf_host;
4202 if (useHostVersion) {
4203 tgtBuf_host = host_view_type (
"tgtBuf", lclNumRows, numCols);
4206 tgtBuf_dev = dev_view_type (
"tgtBuf", lclNumRows, numCols);
4209 const int reduceCount =
static_cast<int> (totalAllocSize);
4210 if (useHostVersion) {
4211 TEUCHOS_TEST_FOR_EXCEPTION
4212 (static_cast<size_t> (tgtBuf_host.size ()) < totalAllocSize,
4213 std::logic_error,
"Tpetra::MultiVector::reduce: tgtBuf_host.size() = " 4214 << tgtBuf_host.size () <<
" < lclNumRows*numCols = " << totalAllocSize
4215 <<
". Please report this bug to the Tpetra developers.");
4216 reduceAll<int, impl_scalar_type> (comm, REDUCE_SUM, reduceCount,
4217 srcBuf_host.ptr_on_device (),
4218 tgtBuf_host.ptr_on_device ());
4221 TEUCHOS_TEST_FOR_EXCEPTION
4222 (static_cast<size_t> (tgtBuf_dev.size ()) < totalAllocSize,
4223 std::logic_error,
"Tpetra::MultiVector::reduce: tgtBuf_dev.size() = " 4224 << tgtBuf_dev.size () <<
" < lclNumRows*numCols = " << totalAllocSize
4225 <<
". Please report this bug to the Tpetra developers.");
4226 reduceAll<int, impl_scalar_type> (comm, REDUCE_SUM, reduceCount,
4227 srcBuf_dev.ptr_on_device (),
4228 tgtBuf_dev.ptr_on_device ());
4232 if (useHostVersion) {
4233 this->
template modify<Kokkos::HostSpace> ();
4234 if (contig || isConstantStride ()) {
4238 for (
size_t j = 0; j < numCols; ++j) {
4239 auto X_j_out = Kokkos::subview (srcView_host, Kokkos::ALL (), j);
4240 auto X_j_in = Kokkos::subview (tgtBuf_host, Kokkos::ALL (), j);
4246 this->
template modify<device_type> ();
4247 if (contig || isConstantStride ()) {
4251 for (
size_t j = 0; j < numCols; ++j) {
4252 auto X_j_out = Kokkos::subview (srcView_dev, Kokkos::ALL (), j);
4253 auto X_j_in = Kokkos::subview (tgtBuf_dev, Kokkos::ALL (), j);
4263 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4270 #ifdef HAVE_TPETRA_DEBUG 4271 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4272 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4273 TEUCHOS_TEST_FOR_EXCEPTION(
4274 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4276 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4277 <<
" is invalid. The range of valid row indices on this process " 4278 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4279 <<
", " << maxLocalIndex <<
"].");
4280 TEUCHOS_TEST_FOR_EXCEPTION(
4281 vectorIndexOutOfRange(col),
4283 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4284 <<
" of the multivector is invalid.");
4286 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4287 view_.h_view (lclRow, colInd) = ScalarValue;
4291 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4297 const bool atomic)
const 4299 #ifdef HAVE_TPETRA_DEBUG 4300 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4301 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4302 TEUCHOS_TEST_FOR_EXCEPTION(
4303 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4305 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4306 <<
" is invalid. The range of valid row indices on this process " 4307 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4308 <<
", " << maxLocalIndex <<
"].");
4309 TEUCHOS_TEST_FOR_EXCEPTION(
4310 vectorIndexOutOfRange(col),
4312 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4313 <<
" of the multivector is invalid.");
4315 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4317 Kokkos::atomic_add (& (view_.h_view(lclRow, colInd)), value);
4320 view_.h_view (lclRow, colInd) += value;
4325 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4334 const LocalOrdinal MyRow = this->map_->getLocalElement (gblRow);
4335 #ifdef HAVE_TPETRA_DEBUG 4336 TEUCHOS_TEST_FOR_EXCEPTION(
4337 MyRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4339 "Tpetra::MultiVector::replaceGlobalValue: Global row index " << gblRow
4340 <<
"is not present on this process " 4341 << this->getMap ()->getComm ()->getRank () <<
".");
4342 TEUCHOS_TEST_FOR_EXCEPTION(
4343 vectorIndexOutOfRange (col), std::runtime_error,
4344 "Tpetra::MultiVector::replaceGlobalValue: Vector index " << col
4345 <<
" of the multivector is invalid.");
4346 #endif // HAVE_TPETRA_DEBUG 4347 this->replaceLocalValue (MyRow, col, ScalarValue);
4350 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4356 const bool atomic)
const 4360 const LocalOrdinal lclRow = this->map_->getLocalElement (globalRow);
4361 #ifdef HAVE_TEUCHOS_DEBUG 4362 TEUCHOS_TEST_FOR_EXCEPTION(
4363 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4365 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4366 <<
"is not present on this process " 4367 << this->getMap ()->getComm ()->getRank () <<
".");
4368 TEUCHOS_TEST_FOR_EXCEPTION(
4369 vectorIndexOutOfRange(col),
4371 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4372 <<
" of the multivector is invalid.");
4374 this->sumIntoLocalValue (lclRow, col, value, atomic);
4377 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4379 Teuchos::ArrayRCP<T>
4385 typename dual_view_type::array_layout,
4387 const size_t col = isConstantStride () ? j : whichVectors_[j];
4388 col_dual_view_type X_col =
4389 Kokkos::subview (view_, Kokkos::ALL (), col);
4390 return Kokkos::Compat::persistingView (X_col.d_view);
4393 template <
class Scalar,
4395 class GlobalOrdinal,
4404 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4409 using Teuchos::TypeNameTraits;
4411 std::ostringstream out;
4412 out <<
"\"" << className <<
"\": {";
4413 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4414 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4415 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4416 <<
", Node" << Node::name ()
4418 if (this->getObjectLabel () !=
"") {
4419 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4421 out <<
", numRows: " << this->getGlobalLength ();
4422 if (className !=
"Tpetra::Vector") {
4423 out <<
", numCols: " << this->getNumVectors ()
4424 <<
", isConstantStride: " << this->isConstantStride ();
4426 if (this->isConstantStride ()) {
4427 out <<
", columnStride: " << this->getStride ();
4434 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4439 return this->descriptionImpl (
"Tpetra::MultiVector");
4442 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4447 typedef LocalOrdinal LO;
4450 if (vl <= Teuchos::VERB_LOW) {
4451 return std::string ();
4453 auto map = this->getMap ();
4454 if (map.is_null ()) {
4455 return std::string ();
4457 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4458 auto outp = Teuchos::getFancyOStream (outStringP);
4459 Teuchos::FancyOStream& out = *outp;
4460 auto comm = map->getComm ();
4461 const int myRank = comm->getRank ();
4462 const int numProcs = comm->getSize ();
4464 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4465 Teuchos::OSTab tab1 (out);
4468 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
4469 out <<
"Local number of rows: " << lclNumRows << endl;
4471 if (vl > Teuchos::VERB_MEDIUM) {
4474 if (this->getNumVectors () !=
static_cast<size_t> (1)) {
4475 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4477 if (this->isConstantStride ()) {
4478 out <<
"Column stride: " << this->getStride () << endl;
4481 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4485 auto X_dual = this->getDualView ();
4486 typename dual_view_type::t_host X_host;
4487 if (X_dual.template need_sync<Kokkos::HostSpace> ()) {
4493 auto X_dev = this->
template getLocalView<device_type> ();
4494 auto X_host_copy = Kokkos::create_mirror_view (X_dev);
4496 X_host = X_host_copy;
4502 X_host = this->
template getLocalView<Kokkos::HostSpace> ();
4506 out <<
"Values: " << endl
4508 const LO numCols =
static_cast<LO
> (this->getNumVectors ());
4510 for (LO i = 0; i < lclNumRows; ++i) {
4512 if (i + 1 < lclNumRows) {
4518 for (LO i = 0; i < lclNumRows; ++i) {
4519 for (LO j = 0; j < numCols; ++j) {
4521 if (j + 1 < numCols) {
4525 if (i + 1 < lclNumRows) {
4535 return outStringP->str ();
4538 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4542 const std::string& className,
4543 const Teuchos::EVerbosityLevel verbLevel)
const 4545 using Teuchos::TypeNameTraits;
4546 using Teuchos::VERB_DEFAULT;
4547 using Teuchos::VERB_NONE;
4548 using Teuchos::VERB_LOW;
4550 const Teuchos::EVerbosityLevel vl =
4551 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4553 if (vl == VERB_NONE) {
4560 auto map = this->getMap ();
4561 if (map.is_null ()) {
4564 auto comm = map->getComm ();
4565 if (comm.is_null ()) {
4569 const int myRank = comm->getRank ();
4578 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4582 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4583 out <<
"\"" << className <<
"\":" << endl;
4584 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4586 out <<
"Template parameters:" << endl;
4587 Teuchos::OSTab tab2 (out);
4588 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4589 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4590 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4591 <<
"Node: " << Node::name () << endl;
4593 if (this->getObjectLabel () !=
"") {
4594 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4596 out <<
"Global number of rows: " << this->getGlobalLength () << endl;
4597 if (className !=
"Tpetra::Vector") {
4598 out <<
"Number of columns: " << this->getNumVectors () << endl;
4605 if (vl > VERB_LOW) {
4606 const std::string lclStr = this->localDescribeToString (vl);
4611 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4615 const Teuchos::EVerbosityLevel verbLevel)
const 4617 this->describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4620 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4625 replaceMap (newMap);
4628 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4633 typedef LocalOrdinal LO;
4635 typedef typename dual_view_type::host_mirror_space::device_type HMDT;
4636 const bool debug =
false;
4638 TEUCHOS_TEST_FOR_EXCEPTION(
4640 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4641 "Tpetra::deep_copy: Global dimensions of the two Tpetra::MultiVector " 4642 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4643 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions [" 4644 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4646 TEUCHOS_TEST_FOR_EXCEPTION(
4647 this->getLocalLength () != src.
getLocalLength (), std::invalid_argument,
4648 "Tpetra::deep_copy: The local row counts of the two Tpetra::MultiVector " 4649 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) " 4650 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4652 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4653 std::cout <<
"*** MultiVector::assign: ";
4657 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4658 std::cout <<
"Both *this and src have constant stride" << std::endl;
4662 const bool useHostVersion = src.template need_sync<device_type> ();
4664 if (useHostVersion) {
4665 this->
template modify<HMDT> ();
4667 Details::localDeepCopyConstStride (this->
template getLocalView<HMDT> (),
4668 src.template getLocalView<HMDT> ());
4669 this->
template sync<DT> ();
4672 this->
template modify<DT> ();
4674 Details::localDeepCopyConstStride (this->
template getLocalView<DT> (),
4675 src.template getLocalView<DT> ());
4676 this->
template sync<HMDT> ();
4680 if (this->isConstantStride ()) {
4681 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4682 std::cout <<
"Only *this has constant stride";
4685 const LO numWhichVecs =
static_cast<LO
> (src.
whichVectors_.size ());
4686 const std::string whichVecsLabel (
"MV::deep_copy::whichVecs");
4693 const bool useHostVersion = src.template need_sync<device_type> ();
4694 if (useHostVersion) {
4695 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4696 std::cout <<
"; Copy from host version of src" << std::endl;
4702 typedef Kokkos::View<LO*, HMDT> whichvecs_type;
4703 whichvecs_type srcWhichVecs (whichVecsLabel, numWhichVecs);
4704 for (LO i = 0; i < numWhichVecs; ++i) {
4710 Details::localDeepCopy (this->
template getLocalView<HMDT> (),
4711 src.template getLocalView<HMDT> (),
4712 true,
false, srcWhichVecs, srcWhichVecs);
4714 this->
template sync<DT> ();
4717 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4718 std::cout <<
"; Copy from device version of src" << std::endl;
4724 typedef Kokkos::DualView<LO*, DT> whichvecs_type;
4725 whichvecs_type srcWhichVecs (whichVecsLabel, numWhichVecs);
4726 srcWhichVecs.template modify<HMDT> ();
4727 for (LO i = 0; i < numWhichVecs; ++i) {
4728 srcWhichVecs.h_view(i) =
static_cast<LO
> (src.
whichVectors_[i]);
4731 srcWhichVecs.template sync<DT> ();
4734 this->
template modify<DT> ();
4739 Details::localDeepCopy (this->
template getLocalView<DT> (),
4740 src.template getLocalView<DT> (),
4741 true,
false, srcWhichVecs.d_view,
4742 srcWhichVecs.d_view);
4745 this->
template sync<HMDT> ();
4751 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4752 std::cout <<
"Only src has constant stride" << std::endl;
4756 const bool useHostVersion = src.template need_sync<device_type> ();
4757 if (useHostVersion) {
4762 typedef Kokkos::View<LO*, HMDT> whichvecs_type;
4763 const LO numWhichVecs =
static_cast<LO
> (this->whichVectors_.size ());
4764 whichvecs_type whichVecs (
"MV::deep_copy::whichVecs", numWhichVecs);
4765 for (LO i = 0; i < numWhichVecs; ++i) {
4766 whichVecs(i) =
static_cast<LO
> (this->whichVectors_[i]);
4770 Details::localDeepCopy (this->
template getLocalView<HMDT> (),
4771 src.template getLocalView<HMDT> (),
4774 whichVecs, whichVecs);
4779 this->
template sync<DT> ();
4784 typedef Kokkos::DualView<LO*, DT> whichvecs_type;
4785 const std::string whichVecsLabel (
"MV::deep_copy::whichVecs");
4786 const LO numWhichVecs =
static_cast<LO
> (this->whichVectors_.size ());
4787 whichvecs_type whichVecs (whichVecsLabel, numWhichVecs);
4788 whichVecs.template modify<HMDT> ();
4789 for (LO i = 0; i < numWhichVecs; ++i) {
4790 whichVecs.h_view(i) = this->whichVectors_[i];
4793 whichVecs.template sync<DT> ();
4796 Details::localDeepCopy (this->
template getLocalView<DT> (),
4797 src.template getLocalView<DT> (),
4800 whichVecs.d_view, whichVecs.d_view);
4806 this->
template sync<HMDT> ();
4810 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4811 std::cout <<
"Neither *this nor src has constant stride" << std::endl;
4815 const bool useHostVersion = src.template need_sync<device_type> ();
4816 if (useHostVersion) {
4817 const LO dstNumWhichVecs =
static_cast<LO
> (this->whichVectors_.size ());
4818 Kokkos::View<LO*, HMDT> whichVectorsDst (
"dstWhichVecs", dstNumWhichVecs);
4819 for (LO i = 0; i < dstNumWhichVecs; ++i) {
4820 whichVectorsDst(i) = this->whichVectors_[i];
4824 const LO srcNumWhichVecs =
static_cast<LO
> (src.
whichVectors_.size ());
4825 Kokkos::View<LO*, HMDT> whichVectorsSrc (
"srcWhichVecs", srcNumWhichVecs);
4826 for (LO i = 0; i < srcNumWhichVecs; ++i) {
4832 Details::localDeepCopy (this->
template getLocalView<HMDT> (),
4833 src.template getLocalView<HMDT> (),
4836 whichVectorsDst, whichVectorsSrc);
4843 this->
template sync<HMDT> ();
4848 const LO dstNumWhichVecs =
static_cast<LO
> (this->whichVectors_.size ());
4849 Kokkos::DualView<LO*, DT> whichVecsDst (
"MV::deep_copy::whichVecsDst",
4851 whichVecsDst.template modify<HMDT> ();
4852 for (LO i = 0; i < dstNumWhichVecs; ++i) {
4853 whichVecsDst.h_view(i) =
static_cast<LO
> (this->whichVectors_[i]);
4856 whichVecsDst.template sync<DT> ();
4862 const LO srcNumWhichVecs =
static_cast<LO
> (src.
whichVectors_.size ());
4863 Kokkos::DualView<LO*, DT> whichVecsSrc (
"MV::deep_copy::whichVecsSrc",
4865 whichVecsSrc.template modify<HMDT> ();
4866 for (LO i = 0; i < srcNumWhichVecs; ++i) {
4867 whichVecsSrc.h_view(i) =
static_cast<LO
> (src.
whichVectors_[i]);
4870 whichVecsSrc.template sync<DT> ();
4874 Details::localDeepCopy (this->
template getLocalView<DT> (),
4875 src.template getLocalView<DT> (),
4878 whichVecsDst.d_view, whichVecsSrc.d_view);
4885 template <
class Scalar,
class LO,
class GO,
class NT, const
bool classic>
4886 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT, classic> >
4891 return Teuchos::rcp (
new MV (map, numVectors));
4894 template <
class ST,
class LO,
class GO,
class NT, const
bool classic>
4895 MultiVector<ST, LO, GO, NT, classic>
4912 #define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \ 4913 template class MultiVector< SCALAR , LO , GO , NODE >; \ 4914 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \ 4915 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); 4917 #endif // TPETRA_MULTIVECTOR_DEF_HPP Namespace Tpetra contains the class and methods constituting the Tpetra library.
size_t getStride() const
Stride between columns in the multivector.
device_type::execution_space execution_space
The Kokkos execution space.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
MultiVector< ST, LO, GO, NT, classic > createCopy(const MultiVector< ST, LO, GO, NT, classic > &src)
Return a deep copy of the given MultiVector.
MultiVector< ST, LO, GO, NT, classic > createCopy(const MultiVector< ST, LO, GO, NT, classic > &src)
Return a deep copy of the given MultiVector.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
One or more distributed dense vectors.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
EWhichNormImpl
Input argument for localNormImpl() (which see).
dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
Node::device_type device_type
The Kokkos Device type.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
size_t global_size_t
Global size_t object.
Insert new values that don't currently exist.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
bool isConstantStride() const
Whether this multivector has constant stride between columns.
size_t getLocalLength() const
Local number of rows on the calling process.
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Replace old value with maximum of magnitudes of old and new values.
Abstract base class for objects that can be the source of an Import or Export operation.
Replace existing values with new values.
Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
Kokkos::Details::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, typename execution_space::execution_space > dual_view_type
Kokkos::DualView specialization used by this class.
Describes a parallel distribution of objects over processes.
size_t getNumVectors() const
Number of columns in the multivector.
Class that provides GEMM for a particular Kokkos Device.
A distributed dense vector.
Stand-alone utility functions and macros.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &src)
Copy the contents of src into *this (deep copy).
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
EWhichNorm
Input argument for normImpl() (which see).
bool isDistributed() const
Whether this is a globally distributed object.
dual_view_type origView_
The "original view" of the MultiVector's data.