42 #ifndef TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP 43 #define TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP 50 #include "Teuchos_TimeMonitor.hpp" 64 #if defined(__CUDACC__) 66 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 67 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 68 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 70 #elif defined(__GNUC__) 72 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 73 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 74 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 76 #else // some other compiler 79 # if ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 80 # define TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 1 81 # endif // ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 82 #endif // defined(__CUDACC__), defined(__GNUC__) 86 namespace Experimental {
91 template<
class AlphaCoeffType,
93 class MatrixValuesType,
97 class BcrsApplyNoTrans1VecTeamFunctor {
99 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
100 "MatrixValuesType must be a Kokkos::View.");
101 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
102 "OutVecType must be a Kokkos::View.");
103 static_assert (Kokkos::Impl::is_view<InVecType>::value,
104 "InVecType must be a Kokkos::View.");
105 static_assert (std::is_same<MatrixValuesType,
106 typename MatrixValuesType::const_type>::value,
107 "MatrixValuesType must be a const Kokkos::View.");
108 static_assert (std::is_same<OutVecType,
109 typename OutVecType::non_const_type>::value,
110 "OutVecType must be a nonconst Kokkos::View.");
111 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
112 "InVecType must be a const Kokkos::View.");
113 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
114 "MatrixValuesType must be a rank-1 Kokkos::View.");
115 static_assert (static_cast<int> (InVecType::rank) == 1,
116 "InVecType must be a rank-1 Kokkos::View.");
117 static_assert (static_cast<int> (OutVecType::rank) == 1,
118 "OutVecType must be a rank-1 Kokkos::View.");
119 typedef typename MatrixValuesType::non_const_value_type
scalar_type;
120 typedef typename GraphType::device_type device_type;
121 typedef typename device_type::execution_space execution_space;
122 typedef typename execution_space::scratch_memory_space shmem_space;
126 typedef typename std::remove_const<typename GraphType::data_type>::type
129 typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,
138 void setX (
const InVecType& X) { X_ = X; }
146 void setY (
const OutVecType& Y) { Y_ = Y; }
152 getScratchSizePerTeam ()
const 156 typedef typename std::decay<decltype (Y_(0))>::type y_value_type;
157 return blockSize_ *
sizeof (y_value_type);
164 getScratchSizePerThread ()
const 168 typedef typename std::decay<decltype (Y_(0))>::type y_value_type;
169 return blockSize_ *
sizeof (y_value_type);
174 getNumLclMeshRows ()
const 176 return ptr_.dimension_0 () == 0 ?
178 static_cast<local_ordinal_type> (ptr_.dimension_0 () - 1);
192 BcrsApplyNoTrans1VecTeamFunctor (
const typename std::decay<AlphaCoeffType>::type& alpha,
193 const GraphType& graph,
194 const MatrixValuesType& val,
195 const local_ordinal_type blockSize,
197 const typename std::decay<BetaCoeffType>::type& beta,
199 const local_ordinal_type rowsPerTeam = defaultRowsPerTeam) :
201 ptr_ (graph.row_map),
202 ind_ (graph.entries),
204 blockSize_ (blockSize),
208 rowsPerTeam_ (rowsPerTeam)
215 KOKKOS_INLINE_FUNCTION
void 216 operator () (
const typename policy_type::member_type& member)
const 222 using Kokkos::Details::ArithTraits;
228 using Kokkos::parallel_for;
229 using Kokkos::subview;
231 typedef typename decltype (ptr_)::non_const_value_type offset_type;
232 typedef Kokkos::View<
typename OutVecType::non_const_value_type*,
233 shmem_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
235 typedef Kokkos::View<
typename OutVecType::non_const_value_type*,
238 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
240 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
243 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
246 const LO leagueRank = member.league_rank();
251 shared_array_type threadLocalMem =
252 shared_array_type (member.thread_scratch (1), blockSize_ * rowsPerTeam_);
257 const LO numLclMeshRows = getNumLclMeshRows ();
258 const LO rowBeg = leagueRank * rowsPerTeam_;
259 const LO rowTmp = rowBeg + rowsPerTeam_;
260 const LO rowEnd = rowTmp < numLclMeshRows ? rowTmp : numLclMeshRows;
271 parallel_for (Kokkos::TeamThreadRange (member, rowBeg, rowEnd),
272 [&] (
const LO& lclRow) {
274 out_little_vec_type Y_tlm (threadLocalMem.ptr_on_device () + blockSize_ * member.team_rank (), blockSize_);
276 const offset_type Y_ptBeg = lclRow * blockSize_;
277 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
279 subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
280 if (beta_ == ArithTraits<BetaCoeffType>::zero ()) {
281 FILL (Y_tlm, ArithTraits<BetaCoeffType>::zero ());
283 else if (beta_ == ArithTraits<BetaCoeffType>::one ()) {
291 if (alpha_ != ArithTraits<AlphaCoeffType>::zero ()) {
292 const offset_type blkBeg = ptr_[lclRow];
293 const offset_type blkEnd = ptr_[lclRow+1];
295 const offset_type bs2 = blockSize_ * blockSize_;
296 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
298 little_block_type A_cur (val_.ptr_on_device () + absBlkOff * bs2,
299 blockSize_, blockSize_);
300 const offset_type X_blkCol = ind_[absBlkOff];
301 const offset_type X_ptBeg = X_blkCol * blockSize_;
302 const offset_type X_ptEnd = X_ptBeg + blockSize_;
304 subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
306 GEMV (alpha_, A_cur, X_cur, Y_tlm);
314 typename std::decay<AlphaCoeffType>::type alpha_;
315 typename GraphType::row_map_type::const_type ptr_;
316 typename GraphType::entries_type::const_type ind_;
317 MatrixValuesType val_;
320 typename std::decay<BetaCoeffType>::type beta_;
326 template<
class AlphaCoeffType,
328 class MatrixValuesType,
332 class BcrsApplyNoTrans1VecFunctor {
334 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
335 "MatrixValuesType must be a Kokkos::View.");
336 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
337 "OutVecType must be a Kokkos::View.");
338 static_assert (Kokkos::Impl::is_view<InVecType>::value,
339 "InVecType must be a Kokkos::View.");
340 static_assert (std::is_same<MatrixValuesType,
341 typename MatrixValuesType::const_type>::value,
342 "MatrixValuesType must be a const Kokkos::View.");
343 static_assert (std::is_same<OutVecType,
344 typename OutVecType::non_const_type>::value,
345 "OutVecType must be a nonconst Kokkos::View.");
346 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
347 "InVecType must be a const Kokkos::View.");
348 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
349 "MatrixValuesType must be a rank-1 Kokkos::View.");
350 static_assert (static_cast<int> (InVecType::rank) == 1,
351 "InVecType must be a rank-1 Kokkos::View.");
352 static_assert (static_cast<int> (OutVecType::rank) == 1,
353 "OutVecType must be a rank-1 Kokkos::View.");
354 typedef typename MatrixValuesType::non_const_value_type
scalar_type;
357 typedef typename GraphType::device_type device_type;
360 typedef typename std::remove_const<typename GraphType::data_type>::type
363 typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>,
364 typename device_type::execution_space,
372 void setX (
const InVecType& X) { X_ = X; }
380 void setY (
const OutVecType& Y) { Y_ = Y; }
383 BcrsApplyNoTrans1VecFunctor (
const typename std::decay<AlphaCoeffType>::type& alpha,
384 const GraphType& graph,
385 const MatrixValuesType& val,
386 const local_ordinal_type blockSize,
388 const typename std::decay<BetaCoeffType>::type& beta,
389 const OutVecType& Y) :
391 ptr_ (graph.row_map),
392 ind_ (graph.entries),
394 blockSize_ (blockSize),
400 KOKKOS_INLINE_FUNCTION
void 401 operator () (
const local_ordinal_type& lclRow)
const 407 using Kokkos::Details::ArithTraits;
413 using Kokkos::parallel_for;
414 using Kokkos::subview;
415 typedef typename decltype (ptr_)::non_const_value_type offset_type;
416 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
419 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
422 const offset_type Y_ptBeg = lclRow * blockSize_;
423 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
424 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
428 if (beta_ == ArithTraits<BetaCoeffType>::zero ()) {
429 FILL (Y_cur, ArithTraits<BetaCoeffType>::zero ());
431 else if (beta_ != ArithTraits<BetaCoeffType>::one ()) {
435 if (alpha_ != ArithTraits<AlphaCoeffType>::zero ()) {
436 const offset_type blkBeg = ptr_[lclRow];
437 const offset_type blkEnd = ptr_[lclRow+1];
439 const offset_type bs2 = blockSize_ * blockSize_;
440 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
442 little_block_type A_cur (val_.ptr_on_device () + absBlkOff * bs2,
443 blockSize_, blockSize_);
444 const offset_type X_blkCol = ind_[absBlkOff];
445 const offset_type X_ptBeg = X_blkCol * blockSize_;
446 const offset_type X_ptEnd = X_ptBeg + blockSize_;
447 auto X_cur = subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
449 GEMV (alpha_, A_cur, X_cur, Y_cur);
455 typename std::decay<AlphaCoeffType>::type alpha_;
456 typename GraphType::row_map_type::const_type ptr_;
457 typename GraphType::entries_type::const_type ind_;
458 MatrixValuesType val_;
461 typename std::decay<BetaCoeffType>::type beta_;
465 template<
class AlphaCoeffType,
467 class MatrixValuesType,
468 class InMultiVecType,
470 class OutMultiVecType>
472 bcrsLocalApplyNoTrans (
const AlphaCoeffType& alpha,
473 const GraphType& graph,
474 const MatrixValuesType& val,
475 const typename std::remove_const<typename GraphType::data_type>::type blockSize,
476 const InMultiVecType& X,
477 const BetaCoeffType& beta,
478 const OutMultiVecType& Y
480 ,
const typename std::remove_const<typename GraphType::data_type>::type rowsPerTeam = 20
484 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
485 "MatrixValuesType must be a Kokkos::View.");
486 static_assert (Kokkos::Impl::is_view<OutMultiVecType>::value,
487 "OutMultiVecType must be a Kokkos::View.");
488 static_assert (Kokkos::Impl::is_view<InMultiVecType>::value,
489 "InMultiVecType must be a Kokkos::View.");
490 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
491 "MatrixValuesType must be a rank-1 Kokkos::View.");
492 static_assert (static_cast<int> (OutMultiVecType::rank) == 2,
493 "OutMultiVecType must be a rank-2 Kokkos::View.");
494 static_assert (static_cast<int> (InMultiVecType::rank) == 2,
495 "InMultiVecType must be a rank-2 Kokkos::View.");
497 typedef typename MatrixValuesType::const_type matrix_values_type;
498 typedef typename OutMultiVecType::non_const_type out_multivec_type;
499 typedef typename InMultiVecType::const_type in_multivec_type;
500 typedef typename std::decay<AlphaCoeffType>::type alpha_type;
501 typedef typename std::decay<BetaCoeffType>::type beta_type;
502 typedef typename std::remove_const<typename GraphType::data_type>::type LO;
504 const LO numLocalMeshRows = graph.row_map.dimension_0 () == 0 ?
505 static_cast<LO
> (0) :
506 static_cast<LO> (graph.row_map.dimension_0 () - 1);
507 const LO numVecs = Y.dimension_1 ();
508 if (numLocalMeshRows == 0 || numVecs == 0) {
515 in_multivec_type X_in = X;
516 out_multivec_type Y_out = Y;
521 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0)) in_vec_type;
522 typedef decltype (
Kokkos::subview (Y_out,
Kokkos::ALL (), 0)) out_vec_type;
524 typedef BcrsApplyNoTrans1VecTeamFunctor<alpha_type, GraphType,
525 matrix_values_type, in_vec_type, beta_type, out_vec_type> functor_type;
527 typedef BcrsApplyNoTrans1VecFunctor<alpha_type, GraphType,
528 matrix_values_type, in_vec_type, beta_type, out_vec_type> functor_type;
530 typedef typename functor_type::policy_type policy_type;
532 auto X_0 = Kokkos::subview (X_in, Kokkos::ALL (), 0);
533 auto Y_0 = Kokkos::subview (Y_out, Kokkos::ALL (), 0);
535 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0, rowsPerTeam);
536 const LO numTeams = functor.getNumTeams ();
537 policy_type policy (numTeams, Kokkos::AUTO ());
543 const LO scratchSizePerTeam = functor.getScratchSizePerTeam ();
544 const LO scratchSizePerThread = functor.getScratchSizePerThread ();
546 policy.set_scratch_size (level,
547 Kokkos::PerTeam (scratchSizePerTeam),
548 Kokkos::PerThread (scratchSizePerThread));
551 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
552 policy_type policy (0, numLocalMeshRows);
556 Kokkos::parallel_for (policy, functor);
559 for (LO j = 1; j < numVecs; ++j) {
560 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
561 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
564 Kokkos::parallel_for (policy, functor);
574 template<
class Scalar,
class LO,
class GO,
class Node>
575 class GetLocalDiagCopy {
577 typedef typename Node::device_type device_type;
578 typedef size_t diag_offset_type;
579 typedef Kokkos::View<
const size_t*, device_type,
580 Kokkos::MemoryUnmanaged> diag_offsets_type;
581 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
583 typedef typename local_graph_type::row_map_type row_offsets_type;
584 typedef typename ::Tpetra::Experimental::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
585 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
586 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
589 GetLocalDiagCopy (
const diag_type& diag,
590 const values_type& val,
591 const diag_offsets_type& diagOffsets,
592 const row_offsets_type& ptr,
593 const LO blockSize) :
595 diagOffsets_ (diagOffsets),
597 blockSize_ (blockSize),
598 offsetPerBlock_ (blockSize_*blockSize_),
602 KOKKOS_INLINE_FUNCTION
void 603 operator() (
const LO& lclRowInd)
const 608 const size_t absOffset = ptr_[lclRowInd];
611 const size_t relOffset = diagOffsets_[lclRowInd];
614 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
618 typedef Kokkos::View<
const IST**, Kokkos::LayoutRight,
619 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
620 const_little_block_type;
621 const_little_block_type D_in (val_.ptr_on_device () + pointOffset,
622 blockSize_, blockSize_);
623 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
629 diag_offsets_type diagOffsets_;
630 row_offsets_type ptr_;
637 template<
class Scalar,
class LO,
class GO,
class Node>
639 BlockCrsMatrix<Scalar, LO, GO, Node>::
640 markLocalErrorAndGetStream ()
642 * (this->localError_) =
true;
643 if ((*errs_).is_null ()) {
644 *errs_ = Teuchos::rcp (
new std::ostringstream ());
649 template<
class Scalar,
class LO,
class GO,
class Node>
654 blockSize_ (static_cast<LO> (0)),
659 localError_ (new bool (false)),
660 errs_ (new
Teuchos::RCP<std::ostringstream> ())
664 template<
class Scalar,
class LO,
class GO,
class Node>
667 const LO blockSize) :
670 rowMeshMap_ (* (graph.getRowMap ())),
671 blockSize_ (blockSize),
675 offsetPerBlock_ (blockSize * blockSize),
676 localError_ (new bool (false)),
677 errs_ (new
Teuchos::RCP<std::ostringstream> ())
679 TEUCHOS_TEST_FOR_EXCEPTION(
680 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::Experimental::" 681 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted " 682 "rows (isSorted() is false). This class assumes sorted rows.");
684 graphRCP_ = Teuchos::rcpFromRef(graph_);
690 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
691 TEUCHOS_TEST_FOR_EXCEPTION(
692 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::Experimental::" 693 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
694 " <= 0. The block size must be positive.");
700 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
701 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
704 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
709 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
710 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
713 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
719 val_ = decltype (val_) (
"val", numValEnt);
722 template<
class Scalar,
class LO,
class GO,
class Node>
727 const LO blockSize) :
730 rowMeshMap_ (* (graph.getRowMap ())),
731 domainPointMap_ (domainPointMap),
732 rangePointMap_ (rangePointMap),
733 blockSize_ (blockSize),
737 offsetPerBlock_ (blockSize * blockSize),
738 localError_ (new bool (false)),
739 errs_ (new
Teuchos::RCP<std::ostringstream> ())
741 TEUCHOS_TEST_FOR_EXCEPTION(
742 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::Experimental::" 743 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted " 744 "rows (isSorted() is false). This class assumes sorted rows.");
746 graphRCP_ = Teuchos::rcpFromRef(graph_);
752 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
753 TEUCHOS_TEST_FOR_EXCEPTION(
754 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::Experimental::" 755 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
756 " <= 0. The block size must be positive.");
759 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
760 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
763 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
768 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
769 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
772 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
778 val_ = decltype (val_) (
"val", numValEnt);
781 template<
class Scalar,
class LO,
class GO,
class Node>
782 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
787 return Teuchos::rcp (
new map_type (domainPointMap_));
790 template<
class Scalar,
class LO,
class GO,
class Node>
791 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
796 return Teuchos::rcp (
new map_type (rangePointMap_));
799 template<
class Scalar,
class LO,
class GO,
class Node>
800 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
804 return graph_.getRowMap();
807 template<
class Scalar,
class LO,
class GO,
class Node>
808 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
812 return graph_.getColMap();
815 template<
class Scalar,
class LO,
class GO,
class Node>
820 return graph_.getGlobalNumRows();
823 template<
class Scalar,
class LO,
class GO,
class Node>
828 return graph_.getNodeNumRows();
831 template<
class Scalar,
class LO,
class GO,
class Node>
836 return graph_.getNodeMaxNumRowEntries();
839 template<
class Scalar,
class LO,
class GO,
class Node>
844 Teuchos::ETransp mode,
849 TEUCHOS_TEST_FOR_EXCEPTION(
850 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
851 std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::apply: " 852 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, " 853 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
857 const LO blockSize = getBlockSize ();
859 X_view =
BMV (X, * (graph_.getDomainMap ()), blockSize);
860 Y_view =
BMV (Y, * (graph_.getRangeMap ()), blockSize);
862 catch (std::invalid_argument& e) {
863 TEUCHOS_TEST_FOR_EXCEPTION(
864 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 865 "apply: Either the input MultiVector X or the output MultiVector Y " 866 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's " 867 "graph. BlockMultiVector's constructor threw the following exception: " 876 const_cast<this_type*
> (
this)->applyBlock (X_view, Y_view, mode, alpha, beta);
877 }
catch (std::invalid_argument& e) {
878 TEUCHOS_TEST_FOR_EXCEPTION(
879 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 880 "apply: The implementation method applyBlock complained about having " 881 "an invalid argument. It reported the following: " << e.what ());
882 }
catch (std::logic_error& e) {
883 TEUCHOS_TEST_FOR_EXCEPTION(
884 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 885 "apply: The implementation method applyBlock complained about a " 886 "possible bug in its implementation. It reported the following: " 888 }
catch (std::exception& e) {
889 TEUCHOS_TEST_FOR_EXCEPTION(
890 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 891 "apply: The implementation method applyBlock threw an exception which " 892 "is neither std::invalid_argument nor std::logic_error, but is a " 893 "subclass of std::exception. It reported the following: " 896 TEUCHOS_TEST_FOR_EXCEPTION(
897 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 898 "apply: The implementation method applyBlock threw an exception which " 899 "is not an instance of a subclass of std::exception. This probably " 900 "indicates a bug. Please report this to the Tpetra developers.");
904 template<
class Scalar,
class LO,
class GO,
class Node>
909 Teuchos::ETransp mode,
913 TEUCHOS_TEST_FOR_EXCEPTION(
915 "Tpetra::Experimental::BlockCrsMatrix::applyBlock: " 916 "X and Y have different block sizes. X.getBlockSize() = " 920 if (mode == Teuchos::NO_TRANS) {
921 applyBlockNoTrans (X, Y, alpha, beta);
922 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
923 applyBlockTrans (X, Y, mode, alpha, beta);
925 TEUCHOS_TEST_FOR_EXCEPTION(
926 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 927 "applyBlock: Invalid 'mode' argument. Valid values are " 928 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
932 template<
class Scalar,
class LO,
class GO,
class Node>
937 #ifdef HAVE_TPETRA_DEBUG 938 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::setAllToScalar: ";
939 #endif // HAVE_TPETRA_DEBUG 941 if (this->
template need_sync<device_type> ()) {
945 #ifdef HAVE_TPETRA_DEBUG 946 TEUCHOS_TEST_FOR_EXCEPTION
947 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
948 prefix <<
"The matrix's values need sync on both device and host.");
949 #endif // HAVE_TPETRA_DEBUG 950 this->
template modify<Kokkos::HostSpace> ();
953 else if (this->
template need_sync<Kokkos::HostSpace> ()) {
957 #ifdef HAVE_TPETRA_DEBUG 958 TEUCHOS_TEST_FOR_EXCEPTION
959 (this->
template need_sync<device_type> (), std::runtime_error,
960 prefix <<
"The matrix's values need sync on both host and device.");
961 #endif // HAVE_TPETRA_DEBUG 962 this->
template modify<device_type> ();
966 this->
template modify<device_type> ();
971 template<
class Scalar,
class LO,
class GO,
class Node>
977 const LO numColInds)
const 979 #ifdef HAVE_TPETRA_DEBUG 980 const char prefix[] =
981 "Tpetra::Experimental::BlockCrsMatrix::replaceLocalValues: ";
982 #endif // HAVE_TPETRA_DEBUG 984 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
989 return static_cast<LO
> (0);
993 const size_t absRowBlockOffset = ptrHost_[localRowInd];
994 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
995 const LO perBlockSize = this->offsetPerBlock ();
1000 #ifdef HAVE_TPETRA_DEBUG 1001 TEUCHOS_TEST_FOR_EXCEPTION
1002 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1003 prefix <<
"The matrix's data were last modified on device, but have " 1004 "not been sync'd to host. Please sync to host (by calling " 1005 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 1007 #endif // HAVE_TPETRA_DEBUG 1013 auto vals_host_out =
1014 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
1017 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1018 const LO relBlockOffset =
1019 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1020 if (relBlockOffset != LINV) {
1029 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1033 vals_host_out_raw + absBlockOffset * perBlockSize;
1038 for (LO i = 0; i < perBlockSize; ++i) {
1039 A_old[i] = A_new[i];
1041 hint = relBlockOffset + 1;
1048 template <
class Scalar,
class LO,
class GO,
class Node>
1052 Kokkos::MemoryUnmanaged>& offsets)
const 1054 graph_.getLocalDiagOffsets (offsets);
1057 template <
class Scalar,
class LO,
class GO,
class Node>
1058 void TPETRA_DEPRECATED
1067 const size_t lclNumRows = graph_.getNodeNumRows ();
1068 if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
1069 offsets.resize (lclNumRows);
1075 typedef typename device_type::memory_space
memory_space;
1076 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
1081 Kokkos::MemoryUnmanaged> output_type;
1082 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
1083 graph_.getLocalDiagOffsets (offsetsOut);
1086 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
1087 graph_.getLocalDiagOffsets (offsetsTmp);
1088 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
1089 Kokkos::MemoryUnmanaged> output_type;
1090 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
1095 template <
class Scalar,
class LO,
class GO,
class Node>
1101 Kokkos::MemoryUnmanaged>& D_inv,
1102 const Scalar& omega,
1107 Kokkos::Details::ArithTraits<impl_scalar_type>::zero ();
1109 Kokkos::Details::ArithTraits<impl_scalar_type>::one ();
1110 const LO numLocalMeshRows =
1111 static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1118 const LO blockSize = getBlockSize ();
1119 Teuchos::Array<impl_scalar_type> localMem (blockSize);
1120 Teuchos::Array<impl_scalar_type> localMat (blockSize*blockSize);
1124 LO rowBegin = 0, rowEnd = 0, rowStride = 0;
1125 if (direction == Forward) {
1127 rowEnd = numLocalMeshRows+1;
1130 else if (direction == Backward) {
1131 rowBegin = numLocalMeshRows;
1135 else if (direction == Symmetric) {
1136 this->localGaussSeidel (B, X, D_inv, omega, Forward);
1137 this->localGaussSeidel (B, X, D_inv, omega, Backward);
1141 const Scalar one_minus_omega = Teuchos::ScalarTraits<Scalar>::one()-omega;
1142 const Scalar minus_omega = -omega;
1145 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1146 const LO actlRow = lclRow - 1;
1149 COPY (B_cur, X_lcl);
1150 SCAL (omega, X_lcl);
1152 const size_t meshBeg = ptrHost_[actlRow];
1153 const size_t meshEnd = ptrHost_[actlRow+1];
1154 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1155 const LO meshCol = indHost_[absBlkOff];
1157 getConstLocalBlockFromAbsOffset (absBlkOff);
1161 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1163 GEMV (alpha, A_cur, X_cur, X_lcl);
1169 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1171 FILL (X_update, zero);
1172 GEMV (one, D_lcl, X_lcl, X_update);
1176 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1177 for (LO j = 0; j < numVecs; ++j) {
1178 LO actlRow = lclRow-1;
1181 COPY (B_cur, X_lcl);
1182 SCAL (omega, X_lcl);
1184 const size_t meshBeg = ptrHost_[actlRow];
1185 const size_t meshEnd = ptrHost_[actlRow+1];
1186 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1187 const LO meshCol = indHost_[absBlkOff];
1189 getConstLocalBlockFromAbsOffset (absBlkOff);
1193 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1194 GEMV (alpha, A_cur, X_cur, X_lcl);
1197 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1199 FILL (X_update, zero);
1200 GEMV (one, D_lcl, X_lcl, X_update);
1206 template <
class Scalar,
class LO,
class GO,
class Node>
1212 const Scalar& dampingFactor,
1214 const int numSweeps,
1215 const bool zeroInitialGuess)
const 1219 TEUCHOS_TEST_FOR_EXCEPTION(
1220 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1221 "gaussSeidelCopy: Not implemented.");
1224 template <
class Scalar,
class LO,
class GO,
class Node>
1230 const Teuchos::ArrayView<LO>& rowIndices,
1231 const Scalar& dampingFactor,
1233 const int numSweeps,
1234 const bool zeroInitialGuess)
const 1238 TEUCHOS_TEST_FOR_EXCEPTION(
1239 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1240 "reorderedGaussSeidelCopy: Not implemented.");
1243 template <
class Scalar,
class LO,
class GO,
class Node>
1244 void TPETRA_DEPRECATED
1247 const Teuchos::ArrayView<const size_t>& offsets)
const 1249 using Teuchos::ArrayRCP;
1250 using Teuchos::ArrayView;
1251 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1253 const size_t myNumRows = rowMeshMap_.getNodeNumElements();
1254 const LO* columnIndices;
1257 Teuchos::Array<LO> cols(1);
1260 Teuchos::Array<Scalar> zeroMat (blockSize_*blockSize_,
ZERO);
1261 for (
size_t i = 0; i < myNumRows; ++i) {
1263 if (offsets[i] == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1267 getLocalRowView (i, columnIndices, vals, numColumns);
1268 diag.
replaceLocalValues (i, cols.getRawPtr(), &vals[offsets[i]*blockSize_*blockSize_], 1);
1274 template <
class Scalar,
class LO,
class GO,
class Node>
1278 Kokkos::MemoryUnmanaged>& diag,
1280 Kokkos::MemoryUnmanaged>& offsets)
const 1282 using Kokkos::parallel_for;
1284 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
1286 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1287 const LO blockSize = this->getBlockSize ();
1288 TEUCHOS_TEST_FOR_EXCEPTION
1289 (static_cast<LO> (diag.dimension_0 ()) < lclNumMeshRows ||
1290 static_cast<LO> (diag.dimension_1 ()) < blockSize ||
1291 static_cast<LO> (diag.dimension_2 ()) < blockSize,
1292 std::invalid_argument, prefix <<
1293 "The input Kokkos::View is not big enough to hold all the data.");
1294 TEUCHOS_TEST_FOR_EXCEPTION
1295 (static_cast<LO> (offsets.size ()) < lclNumMeshRows, std::invalid_argument,
1296 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of " 1297 "diagonal blocks " << lclNumMeshRows <<
".");
1299 #ifdef HAVE_TPETRA_DEBUG 1300 TEUCHOS_TEST_FOR_EXCEPTION
1301 (this->
template need_sync<device_type> (), std::runtime_error,
1302 prefix <<
"The matrix's data were last modified on host, but have " 1303 "not been sync'd to device. Please sync to device (by calling " 1304 "sync<device_type>() on this matrix) before calling this method.");
1305 #endif // HAVE_TPETRA_DEBUG 1307 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
1308 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
1316 const_cast<this_type*
> (
this)->
template getValues<device_type> ();
1318 parallel_for (policy_type (0, lclNumMeshRows),
1319 functor_type (diag, vals_dev, offsets,
1320 graph_.getLocalGraph ().row_map, blockSize_));
1324 template <
class Scalar,
class LO,
class GO,
class Node>
1328 Kokkos::MemoryUnmanaged>& diag,
1329 const Teuchos::ArrayView<const size_t>& offsets)
const 1332 using Kokkos::parallel_for;
1334 Kokkos::MemoryUnmanaged>::HostMirror::execution_space host_exec_space;
1336 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1337 const LO blockSize = this->getBlockSize ();
1338 TEUCHOS_TEST_FOR_EXCEPTION
1339 (static_cast<LO> (diag.dimension_0 ()) < lclNumMeshRows ||
1340 static_cast<LO> (diag.dimension_1 ()) < blockSize ||
1341 static_cast<LO> (diag.dimension_2 ()) < blockSize,
1342 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: " 1343 "The input Kokkos::View is not big enough to hold all the data.");
1344 TEUCHOS_TEST_FOR_EXCEPTION
1345 (static_cast<LO> (offsets.size ()) < lclNumMeshRows,
1346 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: " 1347 "offsets.size() = " << offsets.size () <<
" < local number of diagonal " 1348 "blocks " << lclNumMeshRows <<
".");
1352 typedef Kokkos::RangePolicy<host_exec_space, LO> policy_type;
1353 parallel_for (policy_type (0, lclNumMeshRows), [=] (
const LO& lclMeshRow) {
1354 auto D_in = this->getConstLocalBlockFromRelOffset (lclMeshRow, offsets[lclMeshRow]);
1355 auto D_out = Kokkos::subview (diag, lclMeshRow, ALL (), ALL ());
1361 template<
class Scalar,
class LO,
class GO,
class Node>
1366 const Scalar vals[],
1367 const LO numColInds)
const 1369 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1374 return static_cast<LO
> (0);
1378 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1379 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1380 const LO perBlockSize = this->offsetPerBlock ();
1385 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1386 const LO relBlockOffset =
1387 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1388 if (relBlockOffset != LINV) {
1389 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1391 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1393 getConstLocalBlockFromInput (vIn, pointOffset);
1395 Impl::absMax (A_old, A_new);
1396 hint = relBlockOffset + 1;
1404 template<
class Scalar,
class LO,
class GO,
class Node>
1409 const Scalar vals[],
1410 const LO numColInds)
const 1412 #ifdef HAVE_TPETRA_DEBUG 1413 const char prefix[] =
1414 "Tpetra::Experimental::BlockCrsMatrix::sumIntoLocalValues: ";
1415 #endif // HAVE_TPETRA_DEBUG 1417 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1422 return static_cast<LO
> (0);
1427 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1428 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1429 const LO perBlockSize = this->offsetPerBlock ();
1434 #ifdef HAVE_TPETRA_DEBUG 1435 TEUCHOS_TEST_FOR_EXCEPTION
1436 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1437 prefix <<
"The matrix's data were last modified on device, but have not " 1438 "been sync'd to host. Please sync to host (by calling " 1439 "sync<Kokkos::HostSpace>() on this matrix) before calling this method.");
1440 #endif // HAVE_TPETRA_DEBUG 1446 auto vals_host_out =
1447 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
1450 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1451 const LO relBlockOffset =
1452 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1453 if (relBlockOffset != LINV) {
1462 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1466 vals_host_out_raw + absBlockOffset * perBlockSize;
1471 for (LO i = 0; i < perBlockSize; ++i) {
1472 A_old[i] += A_new[i];
1474 hint = relBlockOffset + 1;
1481 template<
class Scalar,
class LO,
class GO,
class Node>
1489 #ifdef HAVE_TPETRA_DEBUG 1490 const char prefix[] =
1491 "Tpetra::Experimental::BlockCrsMatrix::getLocalRowView: ";
1492 #endif // HAVE_TPETRA_DEBUG 1494 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1498 return Teuchos::OrdinalTraits<LO>::invalid ();
1501 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
1502 colInds = indHost_.ptr_on_device () + absBlockOffsetStart;
1504 #ifdef HAVE_TPETRA_DEBUG 1505 TEUCHOS_TEST_FOR_EXCEPTION
1506 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1507 prefix <<
"The matrix's data were last modified on device, but have " 1508 "not been sync'd to host. Please sync to host (by calling " 1509 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 1511 #endif // HAVE_TPETRA_DEBUG 1517 auto vals_host_out =
1518 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
1521 absBlockOffsetStart * offsetPerBlock ();
1522 vals =
reinterpret_cast<Scalar*
> (vOut);
1524 numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
1529 template<
class Scalar,
class LO,
class GO,
class Node>
1533 const Teuchos::ArrayView<LO>& Indices,
1534 const Teuchos::ArrayView<Scalar>& Values,
1535 size_t &NumEntries)
const 1540 getLocalRowView(LocalRow,colInds,vals,numInds);
1541 if (numInds > Indices.size() || numInds*blockSize_*blockSize_ > Values.size()) {
1542 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
1543 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold " 1544 << numInds <<
" row entries");
1546 for (LO i=0; i<numInds; ++i) {
1547 Indices[i] = colInds[i];
1549 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
1550 Values[i] = vals[i];
1552 NumEntries = numInds;
1555 template<
class Scalar,
class LO,
class GO,
class Node>
1559 ptrdiff_t offsets[],
1561 const LO numColInds)
const 1563 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1568 return static_cast<LO
> (0);
1571 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1575 for (LO k = 0; k < numColInds; ++k) {
1576 const LO relBlockOffset =
1577 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1578 if (relBlockOffset != LINV) {
1579 offsets[k] =
static_cast<ptrdiff_t
> (relBlockOffset);
1580 hint = relBlockOffset + 1;
1588 template<
class Scalar,
class LO,
class GO,
class Node>
1592 const ptrdiff_t offsets[],
1593 const Scalar vals[],
1594 const LO numOffsets)
const 1596 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1601 return static_cast<LO
> (0);
1605 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1606 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1607 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1608 size_t pointOffset = 0;
1611 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1612 const size_t relBlockOffset = offsets[k];
1613 if (relBlockOffset != STINV) {
1614 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1616 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1618 getConstLocalBlockFromInput (vIn, pointOffset);
1619 COPY (A_new, A_old);
1627 template<
class Scalar,
class LO,
class GO,
class Node>
1631 const ptrdiff_t offsets[],
1632 const Scalar vals[],
1633 const LO numOffsets)
const 1635 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1640 return static_cast<LO
> (0);
1644 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1645 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1646 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1647 size_t pointOffset = 0;
1650 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1651 const size_t relBlockOffset = offsets[k];
1652 if (relBlockOffset != STINV) {
1653 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1655 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1657 getConstLocalBlockFromInput (vIn, pointOffset);
1658 Impl::absMax (A_old, A_new);
1666 template<
class Scalar,
class LO,
class GO,
class Node>
1670 const ptrdiff_t offsets[],
1671 const Scalar vals[],
1672 const LO numOffsets)
const 1674 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1679 return static_cast<LO
> (0);
1684 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1685 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1686 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1687 size_t pointOffset = 0;
1690 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1691 const size_t relBlockOffset = offsets[k];
1692 if (relBlockOffset != STINV) {
1693 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1695 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1697 getConstLocalBlockFromInput (vIn, pointOffset);
1699 AXPY (ONE, A_new, A_old);
1707 template<
class Scalar,
class LO,
class GO,
class Node>
1712 const size_t numEntInGraph = graph_.getNumEntriesInLocalRow (localRowInd);
1713 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1714 return static_cast<LO
> (0);
1716 return static_cast<LO
> (numEntInGraph);
1720 template<
class Scalar,
class LO,
class GO,
class Node>
1725 const Teuchos::ETransp mode,
1735 TEUCHOS_TEST_FOR_EXCEPTION(
1736 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::apply: " 1737 "transpose and conjugate transpose modes are not yet implemented.");
1740 template<
class Scalar,
class LO,
class GO,
class Node>
1742 BlockCrsMatrix<Scalar, LO, GO, Node>::
1743 applyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1744 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1752 const Scalar zero = STS::zero ();
1753 const Scalar one = STS::one ();
1754 RCP<const import_type>
import = graph_.getImporter ();
1756 RCP<const export_type> theExport = graph_.getExporter ();
1760 TEUCHOS_TEST_FOR_EXCEPTION(
1761 X.mv_.getCopyOrView () != Teuchos::View, std::invalid_argument,
1762 "Tpetra::Experimental::BlockCrsMatrix::applyBlockNoTrans: " 1763 "The input BlockMultiVector X has deep copy semantics, " 1764 "not view semantics (as it should).");
1765 TEUCHOS_TEST_FOR_EXCEPTION(
1766 Y.mv_.getCopyOrView () != Teuchos::View, std::invalid_argument,
1767 "Tpetra::Experimental::BlockCrsMatrix::applyBlockNoTrans: " 1768 "The output BlockMultiVector Y has deep copy semantics, " 1769 "not view semantics (as it should).");
1771 if (alpha == zero) {
1774 }
else if (beta != one) {
1778 const BMV* X_colMap = NULL;
1779 if (
import.is_null ()) {
1782 }
catch (std::exception& e) {
1783 TEUCHOS_TEST_FOR_EXCEPTION(
1784 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1785 "applyBlockNoTrans:" << std::endl <<
"Tpetra::MultiVector::" 1786 "operator= threw an exception: " << std::endl << e.what ());
1795 if ((*X_colMap_).is_null () ||
1796 (**X_colMap_).getNumVectors () != X.getNumVectors () ||
1797 (**X_colMap_).getBlockSize () != X.getBlockSize ()) {
1798 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1799 static_cast<LO
> (X.getNumVectors ())));
1801 #ifdef HAVE_TPETRA_BCRS_DO_POINT_IMPORT 1802 if (pointImporter_->is_null ()) {
1805 const auto domainPointMap = rcp (
new typename BMV::map_type (domainPointMap_));
1806 const auto colPointMap = rcp (
new typename BMV::map_type (
1807 BMV::makePointMap (*graph_.getColMap(),
1809 *pointImporter_ = rcp (
new typename crs_graph_type::import_type (
1810 domainPointMap, colPointMap));
1812 (*X_colMap_)->getMultiVectorView().doImport (X.getMultiVectorView (),
1819 X_colMap = &(**X_colMap_);
1820 }
catch (std::exception& e) {
1821 TEUCHOS_TEST_FOR_EXCEPTION(
1822 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1823 "applyBlockNoTrans:" << std::endl <<
"Tpetra::MultiVector::" 1824 "operator= threw an exception: " << std::endl << e.what ());
1828 BMV* Y_rowMap = NULL;
1829 if (theExport.is_null ()) {
1831 }
else if ((*Y_rowMap_).is_null () ||
1832 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () ||
1833 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) {
1834 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1835 static_cast<LO
> (X.getNumVectors ())));
1837 Y_rowMap = &(**Y_rowMap_);
1838 }
catch (std::exception& e) {
1839 TEUCHOS_TEST_FOR_EXCEPTION(
1840 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1841 "applyBlockNoTrans:" << std::endl <<
"Tpetra::MultiVector::" 1842 "operator= threw an exception: " << std::endl << e.what ());
1847 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1849 catch (std::exception& e) {
1850 TEUCHOS_TEST_FOR_EXCEPTION
1851 (
true, std::runtime_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1852 "applyBlockNoTrans: localApplyBlockNoTrans threw an exception: " 1856 TEUCHOS_TEST_FOR_EXCEPTION
1857 (
true, std::runtime_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1858 "applyBlockNoTrans: localApplyBlockNoTrans threw some exception " 1859 "that is not a subclass of std::exception.");
1862 if (! theExport.is_null ()) {
1868 template<
class Scalar,
class LO,
class GO,
class Node>
1870 BlockCrsMatrix<Scalar, LO, GO, Node>::
1871 localApplyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1872 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1876 using Tpetra::Experimental::Impl::bcrsLocalApplyNoTrans;
1878 const impl_scalar_type alpha_impl = alpha;
1879 const auto graph = this->graph_.getLocalGraph ();
1880 const impl_scalar_type beta_impl = beta;
1881 const LO blockSize = this->getBlockSize ();
1883 auto X_mv = X.getMultiVectorView ();
1884 auto Y_mv = Y.getMultiVectorView ();
1885 Y_mv.template modify<device_type> ();
1887 auto X_lcl = X_mv.template getLocalView<device_type> ();
1888 auto Y_lcl = Y_mv.template getLocalView<device_type> ();
1889 auto val = this->val_.template view<device_type> ();
1891 bcrsLocalApplyNoTrans (alpha_impl, graph, val, blockSize, X_lcl,
1895 template<
class Scalar,
class LO,
class GO,
class Node>
1897 BlockCrsMatrix<Scalar, LO, GO, Node>::
1898 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1899 const LO colIndexToFind,
1900 const LO hint)
const 1902 const size_t absStartOffset = ptrHost_[localRowIndex];
1903 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1904 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1906 const LO*
const curInd = indHost_.ptr_on_device () + absStartOffset;
1909 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1916 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1921 const LO maxNumEntriesForLinearSearch = 32;
1922 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1927 const LO* beg = curInd;
1928 const LO* end = curInd + numEntriesInRow;
1929 std::pair<const LO*, const LO*> p =
1930 std::equal_range (beg, end, colIndexToFind);
1931 if (p.first != p.second) {
1933 relOffset =
static_cast<LO
> (p.first - beg);
1937 for (LO k = 0; k < numEntriesInRow; ++k) {
1938 if (colIndexToFind == curInd[k]) {
1948 template<
class Scalar,
class LO,
class GO,
class Node>
1950 BlockCrsMatrix<Scalar, LO, GO, Node>::
1951 offsetPerBlock ()
const 1953 return offsetPerBlock_;
1956 template<
class Scalar,
class LO,
class GO,
class Node>
1958 BlockCrsMatrix<Scalar, LO, GO, Node>::
1959 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1960 const size_t pointOffset)
const 1963 const LO rowStride = blockSize_;
1964 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1967 template<
class Scalar,
class LO,
class GO,
class Node>
1969 BlockCrsMatrix<Scalar, LO, GO, Node>::
1970 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1971 const size_t pointOffset)
const 1974 const LO rowStride = blockSize_;
1975 return little_block_type (val + pointOffset, blockSize_, rowStride);
1978 template<
class Scalar,
class LO,
class GO,
class Node>
1980 BlockCrsMatrix<Scalar, LO, GO, Node>::
1981 getConstLocalBlockFromAbsOffset (
const size_t absBlockOffset)
const 1983 #ifdef HAVE_TPETRA_DEBUG 1984 const char prefix[] =
1985 "Tpetra::Experimental::BlockCrsMatrix::getConstLocalBlockFromAbsOffset: ";
1986 #endif // HAVE_TPETRA_DEBUG 1988 if (absBlockOffset >= ptrHost_[rowMeshMap_.getNodeNumElements ()]) {
1992 return const_little_block_type ();
1995 #ifdef HAVE_TPETRA_DEBUG 1996 TEUCHOS_TEST_FOR_EXCEPTION
1997 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1998 prefix <<
"The matrix's data were last modified on device, but have " 1999 "not been sync'd to host. Please sync to host (by calling " 2000 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 2002 #endif // HAVE_TPETRA_DEBUG 2003 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
2008 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type;
2010 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
2011 const impl_scalar_type* vals_host_raw = vals_host.ptr_on_device ();
2013 return getConstLocalBlockFromInput (vals_host_raw, absPointOffset);
2017 template<
class Scalar,
class LO,
class GO,
class Node>
2019 BlockCrsMatrix<Scalar, LO, GO, Node>::
2020 getConstLocalBlockFromRelOffset (
const LO lclMeshRow,
2021 const size_t relMeshOffset)
const 2023 typedef impl_scalar_type IST;
2025 const LO* lclColInds = NULL;
2026 Scalar* lclVals = NULL;
2029 LO err = this->getLocalRowView (lclMeshRow, lclColInds, lclVals, numEnt);
2034 return const_little_block_type ();
2037 const size_t relPointOffset = relMeshOffset * this->offsetPerBlock ();
2038 IST* lclValsImpl =
reinterpret_cast<IST*
> (lclVals);
2039 return this->getConstLocalBlockFromInput (const_cast<const IST*> (lclValsImpl),
2044 template<
class Scalar,
class LO,
class GO,
class Node>
2046 BlockCrsMatrix<Scalar, LO, GO, Node>::
2047 getNonConstLocalBlockFromAbsOffset (
const size_t absBlockOffset)
const 2049 #ifdef HAVE_TPETRA_DEBUG 2050 const char prefix[] =
2051 "Tpetra::Experimental::BlockCrsMatrix::getNonConstLocalBlockFromAbsOffset: ";
2052 #endif // HAVE_TPETRA_DEBUG 2054 if (absBlockOffset >= ptrHost_[rowMeshMap_.getNodeNumElements ()]) {
2058 return little_block_type ();
2061 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
2062 #ifdef HAVE_TPETRA_DEBUG 2063 TEUCHOS_TEST_FOR_EXCEPTION
2064 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
2065 prefix <<
"The matrix's data were last modified on device, but have " 2066 "not been sync'd to host. Please sync to host (by calling " 2067 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 2069 #endif // HAVE_TPETRA_DEBUG 2073 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type;
2075 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
2076 impl_scalar_type* vals_host_raw = vals_host.ptr_on_device ();
2077 return getNonConstLocalBlockFromInput (vals_host_raw, absPointOffset);
2081 template<
class Scalar,
class LO,
class GO,
class Node>
2083 BlockCrsMatrix<Scalar, LO, GO, Node>::
2084 getLocalBlock (
const LO localRowInd,
const LO localColInd)
const 2086 const size_t absRowBlockOffset = ptrHost_[localRowInd];
2087 const LO relBlockOffset =
2088 this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
2090 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
2091 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
2092 return getNonConstLocalBlockFromAbsOffset (absBlockOffset);
2095 return little_block_type ();
2109 template<
class Scalar,
class LO,
class GO,
class Node>
2119 std::ostream& err = markLocalErrorAndGetStream ();
2120 err <<
"checkSizes: The source object of the Import or Export " 2121 "must be a BlockCrsMatrix with the same template parameters as the " 2122 "target object." << endl;
2127 if (src->getBlockSize () != this->getBlockSize ()) {
2128 std::ostream& err = markLocalErrorAndGetStream ();
2129 err <<
"checkSizes: The source and target objects of the Import or " 2130 <<
"Export must have the same block sizes. The source's block " 2131 <<
"size = " << src->getBlockSize () <<
" != the target's block " 2132 <<
"size = " << this->getBlockSize () <<
"." << endl;
2134 if (! src->graph_.isFillComplete ()) {
2135 std::ostream& err = markLocalErrorAndGetStream ();
2136 err <<
"checkSizes: The source object of the Import or Export is " 2137 "not fill complete. Both source and target objects must be fill " 2138 "complete." << endl;
2140 if (! this->graph_.isFillComplete ()) {
2141 std::ostream& err = markLocalErrorAndGetStream ();
2142 err <<
"checkSizes: The target object of the Import or Export is " 2143 "not fill complete. Both source and target objects must be fill " 2144 "complete." << endl;
2146 if (src->graph_.getColMap ().is_null ()) {
2147 std::ostream& err = markLocalErrorAndGetStream ();
2148 err <<
"checkSizes: The source object of the Import or Export does " 2149 "not have a column Map. Both source and target objects must have " 2150 "column Maps." << endl;
2152 if (this->graph_.getColMap ().is_null ()) {
2153 std::ostream& err = markLocalErrorAndGetStream ();
2154 err <<
"checkSizes: The target object of the Import or Export does " 2155 "not have a column Map. Both source and target objects must have " 2156 "column Maps." << endl;
2160 return ! (* (this->localError_));
2163 template<
class Scalar,
class LO,
class GO,
class Node>
2168 const Teuchos::ArrayView<const LO>& permuteToLIDs,
2169 const Teuchos::ArrayView<const LO>& permuteFromLIDs)
2173 const bool debug =
false;
2176 std::ostringstream os;
2177 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2178 os <<
"Proc " << myRank <<
": copyAndPermute: " 2179 <<
"numSameIDs: " << numSameIDs
2180 <<
", permuteToLIDs.size(): " << permuteToLIDs.size ()
2181 <<
", permuteFromLIDs.size(): " << permuteFromLIDs.size ()
2183 std::cerr << os.str ();
2189 if (* (this->localError_)) {
2190 std::ostream& err = this->markLocalErrorAndGetStream ();
2191 err <<
"copyAndPermute: The target object of the Import or Export is " 2192 "already in an error state." << endl;
2195 if (permuteToLIDs.size () != permuteFromLIDs.size ()) {
2196 std::ostream& err = this->markLocalErrorAndGetStream ();
2197 err <<
"copyAndPermute: permuteToLIDs.size() = " << permuteToLIDs.size ()
2198 <<
" != permuteFromLIDs.size() = " << permuteFromLIDs.size () <<
"." 2205 std::ostream& err = this->markLocalErrorAndGetStream ();
2206 err <<
"copyAndPermute: The source object of the Import or Export is " 2207 "either not a BlockCrsMatrix, or does not have the right template " 2208 "parameters. checkSizes() should have caught this. " 2209 "Please report this bug to the Tpetra developers." << endl;
2212 if (* (src->localError_)) {
2213 std::ostream& err = this->markLocalErrorAndGetStream ();
2214 err <<
"copyAndPermute: The source object of the Import or Export is " 2215 "already in an error state." << endl;
2219 bool lclErr =
false;
2220 #ifdef HAVE_TPETRA_DEBUG 2221 std::set<LO> invalidSrcCopyRows;
2222 std::set<LO> invalidDstCopyRows;
2223 std::set<LO> invalidDstCopyCols;
2224 std::set<LO> invalidDstPermuteCols;
2225 std::set<LO> invalidPermuteFromRows;
2226 #endif // HAVE_TPETRA_DEBUG 2235 #ifdef HAVE_TPETRA_DEBUG 2236 const map_type& srcRowMap = * (src->graph_.getRowMap ());
2237 #endif // HAVE_TPETRA_DEBUG 2238 const map_type& dstRowMap = * (this->graph_.getRowMap ());
2239 const map_type& srcColMap = * (src->graph_.getColMap ());
2240 const map_type& dstColMap = * (this->graph_.getColMap ());
2241 const bool canUseLocalColumnIndices = srcColMap.
locallySameAs (dstColMap);
2242 const size_t numPermute =
static_cast<size_t> (permuteFromLIDs.size ());
2245 std::ostringstream os;
2246 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2247 os <<
"Proc " << myRank <<
": copyAndPermute: " 2248 <<
"canUseLocalColumnIndices: " 2249 << (canUseLocalColumnIndices ?
"true" :
"false")
2250 <<
", numPermute: " << numPermute
2252 std::cerr << os.str ();
2255 if (canUseLocalColumnIndices) {
2257 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2258 #ifdef HAVE_TPETRA_DEBUG 2261 invalidSrcCopyRows.insert (localRow);
2264 #endif // HAVE_TPETRA_DEBUG 2266 const LO* lclSrcCols;
2271 LO err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2274 #ifdef HAVE_TPETRA_DEBUG 2275 (void) invalidSrcCopyRows.insert (localRow);
2276 #endif // HAVE_TPETRA_DEBUG 2279 err = this->replaceLocalValues (localRow, lclSrcCols, vals, numEntries);
2280 if (err != numEntries) {
2283 #ifdef HAVE_TPETRA_DEBUG 2284 invalidDstCopyRows.insert (localRow);
2285 #endif // HAVE_TPETRA_DEBUG 2293 for (LO k = 0; k < numEntries; ++k) {
2296 #ifdef HAVE_TPETRA_DEBUG 2297 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
2298 #endif // HAVE_TPETRA_DEBUG 2307 for (
size_t k = 0; k < numPermute; ++k) {
2308 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDs[k]);
2309 const LO dstLclRow =
static_cast<LO
> (permuteToLIDs[k]);
2311 const LO* lclSrcCols;
2314 LO err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2317 #ifdef HAVE_TPETRA_DEBUG 2318 invalidPermuteFromRows.insert (srcLclRow);
2319 #endif // HAVE_TPETRA_DEBUG 2322 err = this->replaceLocalValues (dstLclRow, lclSrcCols, vals, numEntries);
2323 if (err != numEntries) {
2325 #ifdef HAVE_TPETRA_DEBUG 2326 for (LO c = 0; c < numEntries; ++c) {
2328 invalidDstPermuteCols.insert (lclSrcCols[c]);
2331 #endif // HAVE_TPETRA_DEBUG 2338 const size_t maxNumEnt = src->graph_.getNodeMaxNumRowEntries ();
2339 Teuchos::Array<LO> lclDstCols (maxNumEnt);
2342 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2343 const LO* lclSrcCols;
2350 err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2351 }
catch (std::exception& e) {
2353 std::ostringstream os;
2354 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2355 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow " 2356 << localRow <<
", src->getLocalRowView() threw an exception: " 2358 std::cerr << os.str ();
2365 #ifdef HAVE_TPETRA_DEBUG 2366 invalidSrcCopyRows.insert (localRow);
2367 #endif // HAVE_TPETRA_DEBUG 2370 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
2373 std::ostringstream os;
2374 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2375 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow " 2376 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = " 2377 << maxNumEnt << endl;
2378 std::cerr << os.str ();
2384 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2385 for (LO j = 0; j < numEntries; ++j) {
2387 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2389 #ifdef HAVE_TPETRA_DEBUG 2390 invalidDstCopyCols.insert (lclDstColsView[j]);
2391 #endif // HAVE_TPETRA_DEBUG 2395 err = this->replaceLocalValues (localRow, lclDstColsView.getRawPtr (), vals, numEntries);
2396 }
catch (std::exception& e) {
2398 std::ostringstream os;
2399 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2400 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow " 2401 << localRow <<
", this->replaceLocalValues() threw an exception: " 2403 std::cerr << os.str ();
2407 if (err != numEntries) {
2410 std::ostringstream os;
2411 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2412 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" " 2413 "localRow " << localRow <<
", this->replaceLocalValues " 2414 "returned " << err <<
" instead of numEntries = " 2415 << numEntries << endl;
2416 std::cerr << os.str ();
2424 for (
size_t k = 0; k < numPermute; ++k) {
2425 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDs[k]);
2426 const LO dstLclRow =
static_cast<LO
> (permuteToLIDs[k]);
2428 const LO* lclSrcCols;
2433 err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2434 }
catch (std::exception& e) {
2436 std::ostringstream os;
2437 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2438 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" " 2439 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
2440 <<
", src->getLocalRowView() threw an exception: " << e.what ();
2441 std::cerr << os.str ();
2448 #ifdef HAVE_TPETRA_DEBUG 2449 invalidPermuteFromRows.insert (srcLclRow);
2450 #endif // HAVE_TPETRA_DEBUG 2453 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
2459 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2460 for (LO j = 0; j < numEntries; ++j) {
2462 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2464 #ifdef HAVE_TPETRA_DEBUG 2465 invalidDstPermuteCols.insert (lclDstColsView[j]);
2466 #endif // HAVE_TPETRA_DEBUG 2469 err = this->replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (), vals, numEntries);
2470 if (err != numEntries) {
2479 std::ostream& err = this->markLocalErrorAndGetStream ();
2480 #ifdef HAVE_TPETRA_DEBUG 2481 err <<
"copyAndPermute: The graph structure of the source of the " 2482 "Import or Export must be a subset of the graph structure of the " 2484 err <<
"invalidSrcCopyRows = [";
2485 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
2486 it != invalidSrcCopyRows.end (); ++it) {
2488 typename std::set<LO>::const_iterator itp1 = it;
2490 if (itp1 != invalidSrcCopyRows.end ()) {
2494 err <<
"], invalidDstCopyRows = [";
2495 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
2496 it != invalidDstCopyRows.end (); ++it) {
2498 typename std::set<LO>::const_iterator itp1 = it;
2500 if (itp1 != invalidDstCopyRows.end ()) {
2504 err <<
"], invalidDstCopyCols = [";
2505 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
2506 it != invalidDstCopyCols.end (); ++it) {
2508 typename std::set<LO>::const_iterator itp1 = it;
2510 if (itp1 != invalidDstCopyCols.end ()) {
2514 err <<
"], invalidDstPermuteCols = [";
2515 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
2516 it != invalidDstPermuteCols.end (); ++it) {
2518 typename std::set<LO>::const_iterator itp1 = it;
2520 if (itp1 != invalidDstPermuteCols.end ()) {
2524 err <<
"], invalidPermuteFromRows = [";
2525 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
2526 it != invalidPermuteFromRows.end (); ++it) {
2528 typename std::set<LO>::const_iterator itp1 = it;
2530 if (itp1 != invalidPermuteFromRows.end ()) {
2534 err <<
"]" << std::endl;
2536 err <<
"copyAndPermute: The graph structure of the source of the " 2537 "Import or Export must be a subset of the graph structure of the " 2538 "target." << std::endl;
2539 #endif // HAVE_TPETRA_DEBUG 2543 std::ostringstream os;
2544 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2545 const bool lclSuccess = ! (* (this->localError_));
2546 os <<
"*** Proc " << myRank <<
": copyAndPermute " 2547 << (lclSuccess ?
"succeeded" :
"FAILED");
2551 os <<
": error messages: " << this->errorMessages ();
2553 std::cerr << os.str ();
2577 template<
class LO,
class GO,
class D>
2579 packRowCount (
const size_t numEnt,
2580 const size_t numBytesPerValue,
2581 const size_t blkSize)
2593 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2594 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2595 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
2596 return numEntLen + gidsLen + valsLen;
2610 template<
class ST,
class LO,
class GO,
class D>
2613 const size_t offset,
2614 const size_t numBytes,
2615 const size_t numBytesPerValue)
2617 using Kokkos::subview;
2619 typedef typename PackTraits<LO, D>::input_buffer_type input_buffer_type;
2620 typedef typename input_buffer_type::size_type size_type;
2622 if (numBytes == 0) {
2624 return static_cast<size_t> (0);
2628 const size_t theNumBytes = PackTraits<LO, D>::packValueCount (numEntLO);
2629 #ifdef HAVE_TPETRA_DEBUG 2630 TEUCHOS_TEST_FOR_EXCEPTION(
2631 theNumBytes > numBytes, std::logic_error,
"unpackRowCount: " 2632 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
2634 #endif // HAVE_TPETRA_DEBUG 2635 const std::pair<size_type, size_type> rng (offset, offset + theNumBytes);
2636 input_buffer_type inBuf = subview (imports, rng);
2637 const size_t actualNumBytes = PackTraits<LO, D>::unpackValue (numEntLO, inBuf);
2638 #ifdef HAVE_TPETRA_DEBUG 2639 TEUCHOS_TEST_FOR_EXCEPTION(
2640 theNumBytes > numBytes, std::logic_error,
"unpackRowCount: " 2641 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
2644 (void) actualNumBytes;
2645 #endif // HAVE_TPETRA_DEBUG 2646 return static_cast<size_t> (numEntLO);
2657 template<
class ST,
class LO,
class GO,
class D>
2660 const size_t offset,
2661 const size_t numEnt,
2664 const size_t numBytesPerValue,
2665 const size_t blockSize)
2667 using Kokkos::subview;
2674 typedef typename PackTraits<LO, D>::output_buffer_type output_buffer_type;
2675 typedef typename output_buffer_type::size_type size_type;
2676 typedef typename std::pair<size_type, size_type> pair_type;
2682 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2685 const LO numEntLO =
static_cast<size_t> (numEnt);
2687 const size_t numEntBeg = offset;
2688 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2689 const size_t gidsBeg = numEntBeg + numEntLen;
2690 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2691 const size_t valsBeg = gidsBeg + gidsLen;
2692 const size_t valsLen = numScalarEnt * numBytesPerValue;
2694 output_buffer_type numEntOut =
2695 subview (exports, pair_type (numEntBeg, numEntBeg + numEntLen));
2696 output_buffer_type gidsOut =
2697 subview (exports, pair_type (gidsBeg, gidsBeg + gidsLen));
2698 output_buffer_type valsOut =
2699 subview (exports, pair_type (valsBeg, valsBeg + valsLen));
2701 size_t numBytesOut = 0;
2702 numBytesOut += PackTraits<LO, D>::packValue (numEntOut, numEntLO);
2703 numBytesOut += PackTraits<GO, D>::packArray (gidsOut, gidsIn, numEnt);
2704 numBytesOut += PackTraits<ST, D>::packArray (valsOut, valsIn, numScalarEnt);
2706 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2707 TEUCHOS_TEST_FOR_EXCEPTION(
2708 numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: " 2709 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = " 2710 << expectedNumBytes <<
".");
2715 template<
class ST,
class LO,
class GO,
class D>
2720 const size_t offset,
2721 const size_t numBytes,
2722 const size_t numEnt,
2723 const size_t numBytesPerValue,
2724 const size_t blockSize)
2726 using Kokkos::subview;
2733 typedef typename PackTraits<LO, D>::input_buffer_type input_buffer_type;
2734 typedef typename input_buffer_type::size_type size_type;
2735 typedef typename std::pair<size_type, size_type> pair_type;
2737 if (numBytes == 0) {
2741 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2742 TEUCHOS_TEST_FOR_EXCEPTION(
2743 static_cast<size_t> (imports.dimension_0 ()) <= offset,
2744 std::logic_error,
"unpackRow: imports.dimension_0() = " 2745 << imports.dimension_0 () <<
" <= offset = " << offset <<
".");
2746 TEUCHOS_TEST_FOR_EXCEPTION(
2747 static_cast<size_t> (imports.dimension_0 ()) < offset + numBytes,
2748 std::logic_error,
"unpackRow: imports.dimension_0() = " 2749 << imports.dimension_0 () <<
" < offset + numBytes = " 2750 << (offset + numBytes) <<
".");
2755 const size_t numEntBeg = offset;
2756 const size_t numEntLen = PackTraits<LO, D>::packValueCount (lid);
2757 const size_t gidsBeg = numEntBeg + numEntLen;
2758 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2759 const size_t valsBeg = gidsBeg + gidsLen;
2760 const size_t valsLen = numScalarEnt * numBytesPerValue;
2762 input_buffer_type numEntIn =
2763 subview (imports, pair_type (numEntBeg, numEntBeg + numEntLen));
2764 input_buffer_type gidsIn =
2765 subview (imports, pair_type (gidsBeg, gidsBeg + gidsLen));
2766 input_buffer_type valsIn =
2767 subview (imports, pair_type (valsBeg, valsBeg + valsLen));
2769 size_t numBytesOut = 0;
2771 numBytesOut += PackTraits<LO, D>::unpackValue (numEntOut, numEntIn);
2772 TEUCHOS_TEST_FOR_EXCEPTION(
2773 static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
2774 "unpackRow: Expected number of entries " << numEnt
2775 <<
" != actual number of entries " << numEntOut <<
".");
2777 numBytesOut += PackTraits<GO, D>::unpackArray (gidsOut, gidsIn, numEnt);
2778 numBytesOut += PackTraits<ST, D>::unpackArray (valsOut, valsIn, numScalarEnt);
2780 TEUCHOS_TEST_FOR_EXCEPTION(
2781 numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = " 2782 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
2783 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2784 TEUCHOS_TEST_FOR_EXCEPTION(
2785 numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: " 2786 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = " 2787 << expectedNumBytes <<
".");
2792 template<
class Scalar,
class LO,
class GO,
class Node>
2794 BlockCrsMatrix<Scalar, LO, GO, Node>::
2796 const Teuchos::ArrayView<const LO>& exportLIDs,
2797 Teuchos::Array<packet_type>& exports,
2798 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
2799 size_t& constantNumPackets,
2804 using Kokkos::MemoryUnmanaged;
2805 using Kokkos::subview;
2808 typedef typename View<int*, device_type>::HostMirror::execution_space HES;
2809 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type;
2810 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
2811 const bool debug =
false;
2814 std::ostringstream os;
2815 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2816 os <<
"Proc " << myRank <<
": packAndPrepare: exportLIDs.size() = " 2817 << exportLIDs.size () <<
", numPacketsPerLID.size() = " 2818 << numPacketsPerLID.size () << endl;
2819 std::cerr << os.str ();
2822 if (* (this->localError_)) {
2823 std::ostream& err = this->markLocalErrorAndGetStream ();
2824 err <<
"packAndPrepare: The target object of the Import or Export is " 2825 "already in an error state." << endl;
2829 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2832 std::ostream& err = this->markLocalErrorAndGetStream ();
2833 err <<
"packAndPrepare: The source (input) object of the Import or " 2834 "Export is either not a BlockCrsMatrix, or does not have the right " 2835 "template parameters. checkSizes() should have caught this. " 2836 "Please report this bug to the Tpetra developers." << endl;
2840 const crs_graph_type& srcGraph = src->graph_;
2841 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2842 const size_type numExportLIDs = exportLIDs.size ();
2844 if (numExportLIDs != numPacketsPerLID.size ()) {
2845 std::ostream& err = this->markLocalErrorAndGetStream ();
2846 err <<
"packAndPrepare: exportLIDs.size() = " << numExportLIDs
2847 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.size ()
2860 constantNumPackets = 0;
2865 size_t totalNumBytes = 0;
2866 size_t totalNumEntries = 0;
2867 size_t maxRowLength = 0;
2868 for (size_type i = 0; i < numExportLIDs; ++i) {
2869 const LO lclRow = exportLIDs[i];
2870 size_t numEnt = srcGraph.getNumEntriesInLocalRow (lclRow);
2877 if (numEnt == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2880 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2884 size_t numBytesPerValue = 0;
2890 Scalar* valsRaw = NULL;
2891 const LO* lidsRaw = NULL;
2892 LO actualNumEnt = 0;
2894 src->getLocalRowView (lclRow, lidsRaw, valsRaw, actualNumEnt);
2896 if (numEnt != static_cast<size_t> (actualNumEnt)) {
2897 std::ostream& err = this->markLocalErrorAndGetStream ();
2898 err <<
"packAndPrepare: Local row " << i <<
" claims to have " <<
2899 numEnt <<
"entry/ies, but the View returned by getLocalRowView() " 2900 "has " << actualNumEnt <<
" entry/ies. This should never happen. " 2901 "Please report this bug to the Tpetra developers." << endl;
2904 if (errCode == Teuchos::OrdinalTraits<LO>::invalid ()) {
2905 std::ostream& err = this->markLocalErrorAndGetStream ();
2906 err <<
"packAndPrepare: Local row " << i <<
" is not in the row Map " 2907 "of the source object on the calling process." << endl;
2911 const ST* valsRawST =
2912 const_cast<const ST*
> (
reinterpret_cast<ST*
> (valsRaw));
2913 View<const ST*, HES, MemoryUnmanaged> vals (valsRawST, numScalarEnt);
2918 numBytesPerValue = PackTraits<ST, HES>::packValueCount (vals(0));
2921 const size_t numBytes =
2922 packRowCount<LO, GO, HES> (numEnt, numBytesPerValue, blockSize);
2923 numPacketsPerLID[i] = numBytes;
2924 totalNumBytes += numBytes;
2925 totalNumEntries += numEnt;
2926 maxRowLength = std::max (maxRowLength, numEnt);
2930 const int myRank = graph_.getComm ()->getRank ();
2931 std::ostringstream os;
2932 os <<
"Proc " << myRank <<
": packAndPrepare: totalNumBytes = " 2933 << totalNumBytes << endl;
2934 std::cerr << os.str ();
2940 exports.resize (totalNumBytes);
2941 if (totalNumEntries > 0) {
2942 View<char*, HES, MemoryUnmanaged> exportsK (exports.getRawPtr (),
2954 const map_type& srcColMap = * (srcGraph.getColMap ());
2957 View<GO*, HES> gblColInds;
2960 gblColInds = PackTraits<GO, HES>::allocateArray (gid, maxRowLength,
"gids");
2964 for (size_type i = 0; i < numExportLIDs; ++i) {
2965 const LO lclRowInd = exportLIDs[i];
2966 const LO* lclColIndsRaw;
2972 (void) src->getLocalRowView (lclRowInd, lclColIndsRaw, valsRaw, numEntLO);
2973 const size_t numEnt =
static_cast<size_t> (numEntLO);
2974 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2975 View<const LO*, HES, MemoryUnmanaged> lclColInds (lclColIndsRaw, numEnt);
2976 const ST* valsRawST =
const_cast<const ST*
> (
reinterpret_cast<ST*
> (valsRaw));
2977 View<const ST*, HES, MemoryUnmanaged> vals (valsRawST, numScalarEnt);
2982 const size_t numBytesPerValue = numEnt == 0 ?
2983 static_cast<size_t> (0) :
2984 PackTraits<ST, HES>::packValueCount (vals(0));
2987 for (
size_t j = 0; j < numEnt; ++j) {
2988 gblColInds(j) = srcColMap.getGlobalElement (lclColInds(j));
2992 const size_t numBytes =
2993 packRowForBlockCrs<ST, LO, GO, HES> (exportsK, offset, numEnt, gblColInds,
2994 vals, numBytesPerValue, blockSize);
2999 if (offset != totalNumBytes) {
3000 std::ostream& err = this->markLocalErrorAndGetStream ();
3001 err <<
"packAndPreapre: At end of method, the final offset (in bytes) " 3002 << offset <<
" does not equal the total number of bytes packed " 3003 << totalNumBytes <<
". " 3004 <<
"Please report this bug to the Tpetra developers." << endl;
3010 std::ostringstream os;
3011 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
3012 const bool lclSuccess = ! (* (this->localError_));
3013 os <<
"*** Proc " << myRank <<
": packAndPrepare " 3014 << (lclSuccess ?
"succeeded" :
"FAILED")
3015 <<
" (totalNumEntries = " << totalNumEntries <<
") ***" << endl;
3016 std::cerr << os.str ();
3021 template<
class Scalar,
class LO,
class GO,
class Node>
3023 BlockCrsMatrix<Scalar, LO, GO, Node>::
3024 unpackAndCombine (
const Teuchos::ArrayView<const LO>& importLIDs,
3025 const Teuchos::ArrayView<const packet_type>& imports,
3026 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
3033 using Kokkos::MemoryUnmanaged;
3034 using Kokkos::subview;
3037 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
3038 typedef typename View<int*, device_type>::HostMirror::execution_space HES;
3039 typedef std::pair<typename View<int*, HES>::size_type,
3040 typename View<int*, HES>::size_type> pair_type;
3041 typedef View<GO*, HES, MemoryUnmanaged> gids_out_type;
3042 typedef View<LO*, HES, MemoryUnmanaged> lids_out_type;
3043 typedef View<ST*, HES, MemoryUnmanaged> vals_out_type;
3044 typedef typename PackTraits<GO, HES>::input_buffer_type input_buffer_type;
3045 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::unpackAndCombine: ";
3046 const bool debug =
false;
3049 std::ostringstream os;
3050 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
3051 os <<
"Proc " << myRank <<
": unpackAndCombine" << endl;
3052 std::cerr << os.str ();
3058 if (* (this->localError_)) {
3059 std::ostream& err = this->markLocalErrorAndGetStream ();
3060 err << prefix <<
"The target object of the Import or Export is " 3061 "already in an error state." << endl;
3064 if (importLIDs.size () != numPacketsPerLID.size ()) {
3065 std::ostream& err = this->markLocalErrorAndGetStream ();
3066 err << prefix <<
"importLIDs.size() = " << importLIDs.size () <<
" != " 3067 "numPacketsPerLID.size() = " << numPacketsPerLID.size () <<
"." << endl;
3071 std::ostream& err = this->markLocalErrorAndGetStream ();
3072 err << prefix <<
"Invalid CombineMode value " << CM <<
". Valid " 3073 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO." 3081 const map_type& tgtColMap = * (this->graph_.getColMap ());
3083 const size_type numImportLIDs = importLIDs.size ();
3084 if (CM ==
ZERO || numImportLIDs == 0) {
3086 std::ostringstream os;
3087 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
3088 os <<
"Proc " << myRank <<
": unpackAndCombine: Nothing to do" << endl;
3089 std::cerr << os.str ();
3095 std::ostringstream os;
3096 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
3097 os <<
"Proc " << myRank <<
": unpackAndCombine: Getting ready" << endl;
3098 std::cerr << os.str ();
3101 input_buffer_type importsK (imports.getRawPtr (), imports.size ());
3102 const size_t blockSize = this->getBlockSize ();
3103 const size_t maxRowNumEnt = graph_.getNodeMaxNumRowEntries ();
3104 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
3106 size_t numBytesPerValue;
3118 numBytesPerValue = PackTraits<ST, HES>::packValueCount (val);
3125 View<GO*, HES> gblColInds;
3126 View<LO*, HES> lclColInds;
3127 View<ST*, HES> vals;
3141 gblColInds = PackTraits<GO, HES>::allocateArray (gid, maxRowNumEnt,
"gids");
3142 lclColInds = PackTraits<LO, HES>::allocateArray (lid, maxRowNumEnt,
"lids");
3143 vals = PackTraits<ST, HES>::allocateArray (val, maxRowNumScalarEnt,
"vals");
3147 bool errorDuringUnpack =
false;
3148 for (size_type i = 0; i < numImportLIDs; ++i) {
3149 const size_t numBytes = numPacketsPerLID[i];
3150 if (numBytes == 0) {
3153 const size_t numEnt =
3154 unpackRowCount<ST, LO, GO, HES> (importsK, offset, numBytes,
3156 if (numEnt > maxRowNumEnt) {
3157 errorDuringUnpack =
true;
3158 #ifdef HAVE_TPETRA_DEBUG 3159 std::ostream& err = this->markLocalErrorAndGetStream ();
3160 err << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
3161 <<
" > maxRowNumEnt = " << maxRowNumEnt << endl;
3162 #endif // HAVE_TPETRA_DEBUG 3166 const size_t numScalarEnt = numEnt * blockSize * blockSize;
3167 const LO lclRow = importLIDs[i];
3169 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
3170 vals_out_type valsOut = subview (vals, pair_type (0, numScalarEnt));
3172 const size_t numBytesOut =
3173 unpackRowForBlockCrs<ST, LO, GO, HES> (gidsOut, valsOut, importsK,
3174 offset, numBytes, numEnt,
3175 numBytesPerValue, blockSize);
3176 if (numBytes != numBytesOut) {
3177 errorDuringUnpack =
true;
3178 #ifdef HAVE_TPETRA_DEBUG 3179 std::ostream& err = this->markLocalErrorAndGetStream ();
3180 err << prefix <<
"At i = " << i <<
", numBytes = " << numBytes
3181 <<
" != numBytesOut = " << numBytesOut <<
".";
3182 #endif // HAVE_TPETRA_DEBUG 3187 lids_out_type lidsOut = subview (lclColInds, pair_type (0, numEnt));
3188 for (
size_t k = 0; k < numEnt; ++k) {
3189 lidsOut(k) = tgtColMap.getLocalElement (gidsOut(k));
3190 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
3191 errorDuringUnpack =
true;
3192 #ifdef HAVE_TPETRA_DEBUG 3193 std::ostream& err = this->markLocalErrorAndGetStream ();
3194 err << prefix <<
"At i = " << i <<
", GID " << gidsOut(k)
3195 <<
" is not owned by the calling process.";
3196 #endif // HAVE_TPETRA_DEBUG 3203 const LO*
const lidsRaw =
const_cast<const LO*
> (lidsOut.ptr_on_device ());
3204 const Scalar*
const valsRaw =
3205 reinterpret_cast<const Scalar*
> (
const_cast<const ST*
> (valsOut.ptr_on_device ()));
3207 numCombd = this->sumIntoLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3209 numCombd = this->replaceLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3210 }
else if (CM ==
ABSMAX) {
3211 numCombd = this->absMaxLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3214 if (static_cast<LO> (numEnt) != numCombd) {
3215 errorDuringUnpack =
true;
3216 #ifdef HAVE_TPETRA_DEBUG 3217 std::ostream& err = this->markLocalErrorAndGetStream ();
3218 err << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
3219 <<
" != numCombd = " << numCombd <<
".";
3220 #endif // HAVE_TPETRA_DEBUG 3228 if (errorDuringUnpack) {
3229 std::ostream& err = this->markLocalErrorAndGetStream ();
3230 err << prefix <<
"Unpacking failed.";
3231 #ifndef HAVE_TPETRA_DEBUG 3232 err <<
" Please run again with a debug build to get more verbose " 3233 "diagnostic output.";
3234 #endif // ! HAVE_TPETRA_DEBUG 3239 std::ostringstream os;
3240 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
3241 const bool lclSuccess = ! (* (this->localError_));
3242 os <<
"*** Proc " << myRank <<
": unpackAndCombine " 3243 << (lclSuccess ?
"succeeded" :
"FAILED")
3245 std::cerr << os.str ();
3250 template<
class Scalar,
class LO,
class GO,
class Node>
3254 using Teuchos::TypeNameTraits;
3255 std::ostringstream os;
3256 os <<
"\"Tpetra::BlockCrsMatrix\": { " 3257 <<
"Template parameters: { " 3258 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
3259 <<
"LO: " << TypeNameTraits<LO>::name ()
3260 <<
"GO: " << TypeNameTraits<GO>::name ()
3261 <<
"Node: " << TypeNameTraits<Node>::name ()
3263 <<
", Label: \"" << this->getObjectLabel () <<
"\"" 3264 <<
", Global dimensions: [" 3265 << graph_.getDomainMap ()->getGlobalNumElements () <<
", " 3266 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" 3267 <<
", Block size: " << getBlockSize ()
3273 template<
class Scalar,
class LO,
class GO,
class Node>
3277 const Teuchos::EVerbosityLevel verbLevel)
const 3279 using Teuchos::ArrayRCP;
3280 using Teuchos::CommRequest;
3281 using Teuchos::FancyOStream;
3282 using Teuchos::getFancyOStream;
3283 using Teuchos::ireceive;
3284 using Teuchos::isend;
3285 using Teuchos::outArg;
3286 using Teuchos::TypeNameTraits;
3287 using Teuchos::VERB_DEFAULT;
3288 using Teuchos::VERB_NONE;
3289 using Teuchos::VERB_LOW;
3290 using Teuchos::VERB_MEDIUM;
3292 using Teuchos::VERB_EXTREME;
3294 using Teuchos::wait;
3296 #ifdef HAVE_TPETRA_DEBUG 3297 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::describe: ";
3298 #endif // HAVE_TPETRA_DEBUG 3301 const Teuchos::EVerbosityLevel vl =
3302 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
3304 if (vl == VERB_NONE) {
3309 Teuchos::OSTab tab0 (out);
3311 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
3312 Teuchos::OSTab tab1 (out);
3314 out <<
"Template parameters:" << endl;
3316 Teuchos::OSTab tab2 (out);
3317 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
3318 <<
"LO: " << TypeNameTraits<LO>::name () << endl
3319 <<
"GO: " << TypeNameTraits<GO>::name () << endl
3320 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
3322 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
3323 <<
"Global dimensions: [" 3324 << graph_.getDomainMap ()->getGlobalNumElements () <<
", " 3325 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
3327 const LO blockSize = getBlockSize ();
3328 out <<
"Block size: " << blockSize << endl;
3331 if (vl >= VERB_MEDIUM) {
3332 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3333 const int myRank = comm.getRank ();
3335 out <<
"Row Map:" << endl;
3337 getRowMap()->describe (out, vl);
3339 if (! getColMap ().is_null ()) {
3340 if (getColMap() == getRowMap()) {
3342 out <<
"Column Map: same as row Map" << endl;
3347 out <<
"Column Map:" << endl;
3349 getColMap ()->describe (out, vl);
3352 if (! getDomainMap ().is_null ()) {
3353 if (getDomainMap () == getRowMap ()) {
3355 out <<
"Domain Map: same as row Map" << endl;
3358 else if (getDomainMap () == getColMap ()) {
3360 out <<
"Domain Map: same as column Map" << endl;
3365 out <<
"Domain Map:" << endl;
3367 getDomainMap ()->describe (out, vl);
3370 if (! getRangeMap ().is_null ()) {
3371 if (getRangeMap () == getDomainMap ()) {
3373 out <<
"Range Map: same as domain Map" << endl;
3376 else if (getRangeMap () == getRowMap ()) {
3378 out <<
"Range Map: same as row Map" << endl;
3383 out <<
"Range Map: " << endl;
3385 getRangeMap ()->describe (out, vl);
3390 if (vl >= VERB_EXTREME) {
3396 const_cast<this_type*
> (
this)->
template sync<Kokkos::HostSpace> ();
3398 #ifdef HAVE_TPETRA_DEBUG 3399 TEUCHOS_TEST_FOR_EXCEPTION
3400 (this->
template need_sync<Kokkos::HostSpace> (), std::logic_error,
3401 prefix <<
"Right after sync to host, the matrix claims that it needs " 3402 "sync to host. Please report this bug to the Tpetra developers.");
3403 #endif // HAVE_TPETRA_DEBUG 3405 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3406 const int myRank = comm.getRank ();
3407 const int numProcs = comm.getSize ();
3410 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
3411 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
3412 FancyOStream& os = *osPtr;
3413 os <<
"Process " << myRank <<
":" << endl;
3414 Teuchos::OSTab tab2 (os);
3416 const map_type& meshRowMap = * (graph_.getRowMap ());
3417 const map_type& meshColMap = * (graph_.getColMap ());
3422 os <<
"Row " << meshGblRow <<
": {";
3424 const LO* lclColInds = NULL;
3425 Scalar* vals = NULL;
3427 this->getLocalRowView (meshLclRow, lclColInds, vals, numInds);
3429 for (LO k = 0; k < numInds; ++k) {
3432 os <<
"Col " << gblCol <<
": [";
3433 for (LO i = 0; i < blockSize; ++i) {
3434 for (LO j = 0; j < blockSize; ++j) {
3435 os << vals[blockSize*blockSize*k + i*blockSize + j];
3436 if (j + 1 < blockSize) {
3440 if (i + 1 < blockSize) {
3445 if (k + 1 < numInds) {
3455 out << lclOutStrPtr->str ();
3456 lclOutStrPtr = Teuchos::null;
3459 const int sizeTag = 1337;
3460 const int dataTag = 1338;
3462 ArrayRCP<char> recvDataBuf;
3466 for (
int p = 1; p < numProcs; ++p) {
3469 ArrayRCP<size_t> recvSize (1);
3471 RCP<CommRequest<int> > recvSizeReq =
3472 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
3473 wait<int> (comm, outArg (recvSizeReq));
3474 const size_t numCharsToRecv = recvSize[0];
3481 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
3482 recvDataBuf.resize (numCharsToRecv + 1);
3484 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
3486 RCP<CommRequest<int> > recvDataReq =
3487 ireceive<int, char> (recvData, p, dataTag, comm);
3488 wait<int> (comm, outArg (recvDataReq));
3493 recvDataBuf[numCharsToRecv] =
'\0';
3494 out << recvDataBuf.getRawPtr ();
3496 else if (myRank == p) {
3500 const std::string stringToSend = lclOutStrPtr->str ();
3501 lclOutStrPtr = Teuchos::null;
3504 const size_t numCharsToSend = stringToSend.size ();
3505 ArrayRCP<size_t> sendSize (1);
3506 sendSize[0] = numCharsToSend;
3507 RCP<CommRequest<int> > sendSizeReq =
3508 isend<int, size_t> (sendSize, 0, sizeTag, comm);
3509 wait<int> (comm, outArg (sendSizeReq));
3517 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
3518 RCP<CommRequest<int> > sendDataReq =
3519 isend<int, char> (sendData, 0, dataTag, comm);
3520 wait<int> (comm, outArg (sendDataReq));
3526 template<
class Scalar,
class LO,
class GO,
class Node>
3527 Teuchos::RCP<const Teuchos::Comm<int> >
3531 return graph_.getComm();
3534 template<
class Scalar,
class LO,
class GO,
class Node>
3539 return graph_.getNode();
3543 template<
class Scalar,
class LO,
class GO,
class Node>
3548 return graph_.getGlobalNumCols();
3551 template<
class Scalar,
class LO,
class GO,
class Node>
3556 return graph_.getNodeNumCols();
3559 template<
class Scalar,
class LO,
class GO,
class Node>
3564 return graph_.getIndexBase();
3567 template<
class Scalar,
class LO,
class GO,
class Node>
3572 return graph_.getGlobalNumEntries();
3575 template<
class Scalar,
class LO,
class GO,
class Node>
3580 return graph_.getNodeNumEntries();
3583 template<
class Scalar,
class LO,
class GO,
class Node>
3588 return graph_.getNumEntriesInGlobalRow(globalRow);
3591 template<
class Scalar,
class LO,
class GO,
class Node>
3596 return graph_.getGlobalNumDiags();
3599 template<
class Scalar,
class LO,
class GO,
class Node>
3604 return graph_.getNodeNumDiags();
3607 template<
class Scalar,
class LO,
class GO,
class Node>
3612 return graph_.getGlobalMaxNumRowEntries();
3615 template<
class Scalar,
class LO,
class GO,
class Node>
3620 return graph_.hasColMap();
3623 template<
class Scalar,
class LO,
class GO,
class Node>
3628 return graph_.isLowerTriangular();
3631 template<
class Scalar,
class LO,
class GO,
class Node>
3636 return graph_.isUpperTriangular();
3639 template<
class Scalar,
class LO,
class GO,
class Node>
3644 return graph_.isLocallyIndexed();
3647 template<
class Scalar,
class LO,
class GO,
class Node>
3652 return graph_.isGloballyIndexed();
3655 template<
class Scalar,
class LO,
class GO,
class Node>
3660 return graph_.isFillComplete ();
3663 template<
class Scalar,
class LO,
class GO,
class Node>
3672 template<
class Scalar,
class LO,
class GO,
class Node>
3676 const Teuchos::ArrayView<GO> &Indices,
3677 const Teuchos::ArrayView<Scalar> &Values,
3678 size_t &NumEntries)
const 3680 TEUCHOS_TEST_FOR_EXCEPTION(
3681 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getGlobalRowCopy: " 3682 "This class doesn't support global matrix indexing.");
3686 template<
class Scalar,
class LO,
class GO,
class Node>
3690 Teuchos::ArrayView<const GO> &indices,
3691 Teuchos::ArrayView<const Scalar> &values)
const 3693 TEUCHOS_TEST_FOR_EXCEPTION(
3694 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getGlobalRowView: " 3695 "This class doesn't support global matrix indexing.");
3699 template<
class Scalar,
class LO,
class GO,
class Node>
3703 Teuchos::ArrayView<const LO> &indices,
3704 Teuchos::ArrayView<const Scalar> &values)
const 3706 TEUCHOS_TEST_FOR_EXCEPTION(
3707 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getLocalRowView: " 3708 "This class doesn't support local matrix indexing.");
3713 template<
class Scalar,
class LO,
class GO,
class Node>
3718 #ifdef HAVE_TPETRA_DEBUG 3719 const char prefix[] =
3720 "Tpetra::Experimental::BlockCrsMatrix::getLocalDiagCopy: ";
3721 #endif // HAVE_TPETRA_DEBUG 3723 const size_t lclNumMeshRows = graph_.getNodeNumRows ();
3725 Kokkos::View<size_t*, device_type> diagOffsets (
"diagOffsets", lclNumMeshRows);
3726 graph_.getLocalDiagOffsets (diagOffsets);
3729 auto diagOffsetsHost = Kokkos::create_mirror_view (diagOffsets);
3732 diag.template modify<typename decltype (diagOffsetsHost)::memory_space> ();
3734 #ifdef HAVE_TPETRA_DEBUG 3735 TEUCHOS_TEST_FOR_EXCEPTION
3736 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
3737 prefix <<
"The matrix's data were last modified on device, but have " 3738 "not been sync'd to host. Please sync to host (by calling " 3739 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 3741 #endif // HAVE_TPETRA_DEBUG 3747 auto vals_host_out =
3748 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
3749 Scalar* vals_host_out_raw =
3750 reinterpret_cast<Scalar*
> (vals_host_out.ptr_on_device ());
3753 size_t rowOffset = 0;
3755 LO bs = getBlockSize();
3756 for(
size_t r=0; r<getNodeNumRows(); r++)
3759 offset = rowOffset + diagOffsetsHost(r)*bs*bs;
3760 for(
int b=0; b<bs; b++)
3765 rowOffset += getNumEntriesInLocalRow(r)*bs*bs;
3768 diag.template sync<memory_space> ();
3771 template<
class Scalar,
class LO,
class GO,
class Node>
3776 TEUCHOS_TEST_FOR_EXCEPTION(
3777 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::leftScale: " 3778 "not implemented.");
3782 template<
class Scalar,
class LO,
class GO,
class Node>
3787 TEUCHOS_TEST_FOR_EXCEPTION(
3788 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::rightScale: " 3789 "not implemented.");
3793 template<
class Scalar,
class LO,
class GO,
class Node>
3794 Teuchos::RCP<const Tpetra::RowGraph<LO, GO, Node> >
3801 template<
class Scalar,
class LO,
class GO,
class Node>
3806 TEUCHOS_TEST_FOR_EXCEPTION(
3807 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getFrobeniusNorm: " 3808 "not implemented.");
3819 #define TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \ 3820 template class Experimental::BlockCrsMatrix< S, LO, GO, NODE >; 3822 #endif // TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP virtual Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const
The Frobenius norm of the matrix.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Like sumIntoLocalValues, but for the ABSMAX combine mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
virtual void copyAndPermute(const Tpetra::SrcDistObject &source, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Perform copies and permutations that are local to this process.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
virtual size_t getNodeNumEntries() const
The local number of stored (structurally nonzero) entries.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
Kokkos::View< impl_scalar_type *, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_vec_type
The type used to access nonconst vector blocks.
size_t getNodeNumEntries() const
Returns the local number of entries in the graph.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const
The current number of entries on the calling process in the specified global row. ...
global_size_t getGlobalNumRows() const
get the global number of block rows
Kokkos::View< impl_scalar_type **, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
LocalOrdinal getMaxLocalIndex() const
The maximum local index on the calling process.
Traits class for packing / unpacking data of type T, using Kokkos data structures that live in the gi...
Teuchos::RCP< const map_type > getRangeMap() const
Returns the Map associated with the domain of this graph.
One or more distributed dense vectors.
Node::device_type device_type
The Kokkos::Device specialization that this class uses.
MultiVector for multiple degrees of freedom per mesh point.
size_t getNumEntriesInLocalRow(const LO localRowInd) const
Return the number of entries in the given row on the calling process.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
virtual Teuchos::RCP< const Tpetra::RowGraph< LO, GO, Node > > getGraph() const
Get the (mesh) graph.
virtual void getGlobalRowCopy(GO GlobalRow, const Teuchos::ArrayView< GO > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Get a copy of the given global row's entries.
virtual global_size_t getGlobalNumCols() const
The global number of columns of this matrix.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
std::string description() const
One-line description of this object.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
virtual size_t getNodeNumDiags() const
The number of local diagonal entries, based on global row/column index comparisons.
LO getLocalRowView(const LO localRowInd, const LO *&colInds, Scalar *&vals, LO &numInds) const
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
bool locallySameAs(const Map< LocalOrdinal, GlobalOrdinal, node_type > &map) const
Is this Map locally the same as the input Map?
virtual bool isGloballyIndexed() const
Whether matrix indices are globally indexed.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
int local_ordinal_type
Default value of LocalOrdinal template parameter.
virtual bool isLowerTriangular() const
Whether this matrix is lower triangular.
virtual global_size_t getGlobalNumDiags() const
The number of global diagonal entries, based on global row/column index comparisons.
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
Kokkos::View< char *, D, Kokkos::MemoryUnmanaged > output_buffer_type
The type of an output buffer of bytes.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const map_type > getDomainMap() const
Returns the Map associated with the domain of this graph.
virtual size_t getNodeNumCols() const
The number of columns needed to apply the forward operator on this node.
size_t global_size_t
Global size_t object.
device_type::execution_space execution_space
The Kokkos execution space that this class uses.
Kokkos::StaticCrsGraph< LocalOrdinal, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
Insert new values that don't currently exist.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
void reorderedGaussSeidelCopy(MultiVector< Scalar, LO, GO, Node > &X, const MultiVector< Scalar, LO, GO, Node > &B, const MultiVector< Scalar, LO, GO, Node > &D, const Teuchos::ArrayView< LO > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of reorderedGaussSeidel(), with fewer requirements on X.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
LO absMaxLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValuesByOffsets, but for the ABSMAX combine mode.
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
virtual Teuchos::RCP< Node > getNode() const
The Kokkos Node instance.
Sets up and executes a communication plan for a Tpetra DistObject.
void gaussSeidelCopy(MultiVector< Scalar, LO, GO, Node > &X, const MultiVector< Scalar, LO, GO, Node > &B, const MultiVector< Scalar, LO, GO, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of gaussSeidel(), with fewer requirements on X.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
size_t getNodeMaxNumRowEntries() const
The maximum number of entries across all rows/columns on this node.
Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node >::mag_type mag_type
Type of a norm result.
Kokkos::View< const value_type *, D, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.
virtual void leftScale(const Vector< Scalar, LO, GO, Node > &x)
Scale the RowMatrix on the left with the given Vector x.
Kokkos::View< const char *, D, Kokkos::MemoryUnmanaged > input_buffer_type
The type of an input buffer of bytes.
Replace old value with maximum of magnitudes of old and new values.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
Abstract base class for objects that can be the source of an Import or Export operation.
virtual size_t getGlobalMaxNumRowEntries() const
The maximum number of entries across all rows/columns on all nodes.
double scalar_type
Default value of Scalar template parameter.
virtual bool supportsRowViews() const
Whether this object implements getLocalRowView() and getGlobalRowView().
Replace existing values with new values.
void getLocalRowCopy(LO LocalRow, const Teuchos::ArrayView< LO > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Not implemented.
Teuchos::RCP< const map_type > getRowMap() const
get the (mesh) map for the rows of this block matrix.
Kokkos::View< const impl_scalar_type **, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > const_little_block_type
The type used to access const matrix blocks.
Replace old values with zero.
virtual bool hasColMap() const
Whether this matrix has a well-defined column map.
Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
virtual bool isUpperTriangular() const
Whether this matrix is upper triangular.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
virtual global_size_t getGlobalNumEntries() const
The global number of stored (structurally nonzero) entries.
void replaceLocalValue(const LocalOrdinal myRow, const Scalar &value) const
Replace current value at the specified location with specified values.
virtual void rightScale(const Vector< Scalar, LO, GO, Node > &x)
Scale the RowMatrix on the right with the given Vector x.
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which this matrix is distributed.
virtual void getGlobalRowView(GO GlobalRow, Teuchos::ArrayView< const GO > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
virtual GO getIndexBase() const
The index base for global indices in this matrix.
void localGaussSeidel(const BlockMultiVector< Scalar, LO, GO, Node > &Residual, BlockMultiVector< Scalar, LO, GO, Node > &Solution, const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D_inv, const Scalar &omega, const ESweepDirection direction) const
Local Gauss-Seidel solve, given a factorized diagonal.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
little_vec_type::HostMirror getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a host view of the degrees of freedom at the given mesh point.
Kokkos::View< value_type *, D, Kokkos::MemoryUnmanaged > output_array_type
The type of an output array of value_type.
A distributed dense vector.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
device_type::memory_space memory_space
The Kokkos memory space that this class uses.
Teuchos::RCP< const map_type > getDomainMap() const
Get the (point) domain Map of this matrix.
virtual bool isFillComplete() const
Whether fillComplete() has been called.
BlockMultiVector< Scalar, LO, GO, Node >::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
Teuchos::RCP< const map_type > getColMap() const
get the (mesh) map for the columns of this block matrix.
Declaration of Tpetra::Experimental::BlockCrsMatrix.
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
Teuchos::RCP< const map_type > getRangeMap() const
Get the (point) range Map of this matrix.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
Print a description of this object to the given output stream.
LocalOrdinal getMinLocalIndex() const
The minimum local index.
size_t getNodeNumRows() const
get the local number of block rows
virtual bool isLocallyIndexed() const
Whether matrix indices are locally indexed.
local_graph_type getLocalGraph() const
Get the local graph.
LO getNumVectors() const
Get the number of columns (vectors) in the BlockMultiVector.