42 #ifndef TPETRA_IMPORT_UTIL2_HPP 43 #define TPETRA_IMPORT_UTIL2_HPP 50 #include "Tpetra_ConfigDefs.hpp" 52 #include "Tpetra_Import.hpp" 53 #include "Tpetra_HashTable.hpp" 54 #include "Tpetra_Map.hpp" 56 #include "Tpetra_Distributor.hpp" 57 #include "Kokkos_DualView.hpp" 58 #include <Teuchos_Array.hpp> 70 template<
class T,
class D>
71 Kokkos::View<T*, D, Kokkos::MemoryUnmanaged>
72 getNonconstView (
const Teuchos::ArrayView<T>& x)
74 typedef Kokkos::View<T*, D, Kokkos::MemoryUnmanaged> view_type;
75 typedef typename view_type::size_type size_type;
76 const size_type numEnt =
static_cast<size_type
> (x.size ());
77 return view_type (x.getRawPtr (), numEnt);
80 template<
class T,
class D>
81 Kokkos::View<const T*, D, Kokkos::MemoryUnmanaged>
82 getConstView (
const Teuchos::ArrayView<const T>& x)
84 typedef Kokkos::View<const T*, D, Kokkos::MemoryUnmanaged> view_type;
85 typedef typename view_type::size_type size_type;
86 const size_type numEnt =
static_cast<size_type
> (x.size ());
87 return view_type (x.getRawPtr (), numEnt);
93 struct GetHostExecSpace {
94 typedef typename Space::execution_space execution_space;
95 typedef typename Kokkos::View<int*, execution_space>::HostMirror::execution_space host_execution_space;
101 namespace Import_Util {
117 template<
typename Scalar,
118 typename LocalOrdinal,
119 typename GlobalOrdinal,
123 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
124 Kokkos::DualView<char*, typename Node::device_type>& exports,
125 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
126 size_t& constantNumPackets,
128 const Teuchos::ArrayView<const int>& SourcePids);
145 template<
typename Scalar,
146 typename LocalOrdinal,
147 typename GlobalOrdinal,
151 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
152 const Teuchos::ArrayView<const char> &imports,
153 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
154 size_t constantNumPackets,
158 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
159 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs);
175 template<
typename Scalar,
176 typename LocalOrdinal,
177 typename GlobalOrdinal,
181 const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
182 const Teuchos::ArrayView<const char>& imports,
183 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
184 size_t constantNumPackets,
188 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
189 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
190 size_t TargetNumRows,
191 size_t TargetNumNonzeros,
193 const Teuchos::ArrayView<size_t>& rowPointers,
194 const Teuchos::ArrayView<GlobalOrdinal>& columnIndices,
195 const Teuchos::ArrayView<Scalar>& values,
196 const Teuchos::ArrayView<const int>& SourcePids,
197 Teuchos::Array<int>& TargetPids);
201 template<
typename Scalar,
typename Ordinal>
204 const Teuchos::ArrayView<Ordinal>& CRS_colind,
205 const Teuchos::ArrayView<Scalar>&CRS_vals);
211 template<
typename Scalar,
typename Ordinal>
214 const Teuchos::ArrayView<Ordinal>& CRS_colind,
215 const Teuchos::ArrayView<Scalar>& CRS_vals);
232 template <
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
235 const Teuchos::ArrayView<LocalOrdinal> &columnIndices_LID,
236 const Teuchos::ArrayView<GlobalOrdinal> &columnIndices_GID,
238 const Teuchos::ArrayView<const int> &owningPids,
239 Teuchos::Array<int> &remotePids,
269 template<
class LO,
class GO,
class D>
271 packRowCount (
const size_t numEnt,
272 const size_t numBytesPerValue)
285 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
286 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
287 const size_t pidsLen = numEnt * PackTraits<int, D>::packValueCount (lid);
288 const size_t valsLen = numEnt * numBytesPerValue;
289 return numEntLen + gidsLen + pidsLen + valsLen;
293 template<
class LO,
class D>
297 const size_t numBytes,
298 const size_t numBytesPerValue)
300 using Kokkos::subview;
302 typedef typename PackTraits<LO, D>::input_buffer_type input_buffer_type;
303 typedef typename input_buffer_type::size_type size_type;
307 return static_cast<size_t> (0);
311 const size_t theNumBytes = PackTraits<LO, D>::packValueCount (numEntLO);
312 #ifdef HAVE_TPETRA_DEBUG 313 TEUCHOS_TEST_FOR_EXCEPTION(
314 theNumBytes > numBytes, std::logic_error,
"unpackRowCount: " 315 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
317 #endif // HAVE_TPETRA_DEBUG 318 const std::pair<size_type, size_type> rng (offset, offset + theNumBytes);
319 input_buffer_type inBuf = subview (imports, rng);
320 const size_t actualNumBytes = PackTraits<LO, D>::unpackValue (numEntLO, inBuf);
321 (void)actualNumBytes;
322 #ifdef HAVE_TPETRA_DEBUG 323 TEUCHOS_TEST_FOR_EXCEPTION(
324 theNumBytes > numBytes, std::logic_error,
"unpackRowCount: " 325 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
327 #endif // HAVE_TPETRA_DEBUG 328 return static_cast<size_t> (numEntLO);
333 template<
class ST,
class LO,
class GO,
class D>
341 const size_t numBytesPerValue)
343 using Kokkos::subview;
350 typedef typename PackTraits<LO, D>::output_buffer_type output_buffer_type;
351 typedef typename output_buffer_type::size_type size_type;
352 typedef typename std::pair<size_type, size_type> pair_type;
360 const LO numEntLO =
static_cast<size_t> (numEnt);
363 const size_t numEntBeg = offset;
364 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
365 const size_t gidsBeg = numEntBeg + numEntLen;
366 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
367 const size_t pidsBeg = gidsBeg + gidsLen;
368 const size_t pidsLen = numEnt * PackTraits<int, D>::packValueCount (pid);
369 const size_t valsBeg = pidsBeg + pidsLen;
370 const size_t valsLen = numEnt * numBytesPerValue;
372 output_buffer_type numEntOut =
373 subview (exports, pair_type (numEntBeg, numEntBeg + numEntLen));
374 output_buffer_type gidsOut =
375 subview (exports, pair_type (gidsBeg, gidsBeg + gidsLen));
376 output_buffer_type pidsOut =
377 subview (exports, pair_type (pidsBeg, pidsBeg + pidsLen));
378 output_buffer_type valsOut =
379 subview (exports, pair_type (valsBeg, valsBeg + valsLen));
381 size_t numBytesOut = 0;
382 numBytesOut += PackTraits<LO, D>::packValue (numEntOut, numEntLO);
383 numBytesOut += PackTraits<GO, D>::packArray (gidsOut, gidsIn, numEnt);
384 numBytesOut += PackTraits<int, D>::packArray (pidsOut, pidsIn, numEnt);
385 numBytesOut += PackTraits<ST, D>::packArray (valsOut, valsIn, numEnt);
387 const size_t expectedNumBytes = numEntLen + gidsLen + pidsLen + valsLen;
388 TEUCHOS_TEST_FOR_EXCEPTION(
389 numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: " 390 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = " 391 << expectedNumBytes <<
".");
397 template<
class ST,
class LO,
class GO,
class D>
404 const size_t numBytes,
406 const size_t numBytesPerValue)
408 using Kokkos::subview;
415 typedef typename PackTraits<LO, D>::input_buffer_type input_buffer_type;
416 typedef typename input_buffer_type::size_type size_type;
417 typedef typename std::pair<size_type, size_type> pair_type;
423 TEUCHOS_TEST_FOR_EXCEPTION(
424 static_cast<size_t> (imports.dimension_0 ()) <= offset, std::logic_error,
425 "unpackRow: imports.dimension_0() = " << imports.dimension_0 () <<
426 " <= offset = " << offset <<
".");
427 TEUCHOS_TEST_FOR_EXCEPTION(
428 static_cast<size_t> (imports.dimension_0 ()) < offset + numBytes,
429 std::logic_error,
"unpackRow: imports.dimension_0() = " 430 << imports.dimension_0 () <<
" < offset + numBytes = " 431 << (offset + numBytes) <<
".");
437 const size_t numEntBeg = offset;
438 const size_t numEntLen = PackTraits<LO, D>::packValueCount (lid);
439 const size_t gidsBeg = numEntBeg + numEntLen;
440 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
441 const size_t pidsBeg = gidsBeg + gidsLen;
442 const size_t pidsLen = numEnt * PackTraits<int, D>::packValueCount (pid);
443 const size_t valsBeg = pidsBeg + pidsLen;
444 const size_t valsLen = numEnt * numBytesPerValue;
446 input_buffer_type numEntIn = subview (imports, pair_type (numEntBeg, numEntBeg + numEntLen));
447 input_buffer_type gidsIn = subview (imports, pair_type (gidsBeg, gidsBeg + gidsLen));
448 input_buffer_type pidsIn = subview (imports, pair_type (pidsBeg, pidsBeg + pidsLen));
449 input_buffer_type valsIn = subview (imports, pair_type (valsBeg, valsBeg + valsLen));
451 size_t numBytesOut = 0;
453 numBytesOut += PackTraits<LO, D>::unpackValue (numEntOut, numEntIn);
454 TEUCHOS_TEST_FOR_EXCEPTION(
455 static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
456 "unpackRow: Expected number of entries " << numEnt
457 <<
" != actual number of entries " << numEntOut <<
".");
459 numBytesOut += PackTraits<GO, D>::unpackArray (gidsOut, gidsIn, numEnt);
460 numBytesOut += PackTraits<int, D>::unpackArray (pidsOut, pidsIn, numEnt);
461 numBytesOut += PackTraits<ST, D>::unpackArray (valsOut, valsIn, numEnt);
463 TEUCHOS_TEST_FOR_EXCEPTION(
464 numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = " 465 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
466 const size_t expectedNumBytes = numEntLen + gidsLen + pidsLen + valsLen;
467 TEUCHOS_TEST_FOR_EXCEPTION(
468 numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: " 469 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = " 470 << expectedNumBytes <<
".");
482 template<
class ST,
class DT,
483 const bool outputIsHostMemory =
484 std::is_same<typename DT::memory_space, Kokkos::HostSpace>::value>
485 struct Get1DConstViewOfUnmanagedHostArray {};
487 template<
class ST,
class DT>
488 struct Get1DConstViewOfUnmanagedHostArray<ST, DT, true> {
489 typedef Kokkos::View<const ST*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged> output_view_type;
491 static output_view_type
492 getView (
const char [],
const ST* x_raw,
const size_t x_len)
496 return output_view_type (x_raw, x_len);
500 template<
class ST,
class DT>
501 struct Get1DConstViewOfUnmanagedHostArray<ST, DT, false> {
502 typedef Kokkos::View<const ST*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged> input_view_type;
503 typedef Kokkos::View<const ST*, DT> output_view_type;
505 static output_view_type
506 getView (
const char label[],
const ST* x_raw,
const size_t x_len)
508 input_view_type x_in (x_raw, x_len);
515 Kokkos::View<ST*, DT> x_out (std::string (label), x_len);
521 template<
class ST,
class DT>
522 typename Get1DConstViewOfUnmanagedHostArray<ST, DT>::output_view_type
523 get1DConstViewOfUnmanagedHostArray (
const char label[],
const ST* x_raw,
const size_t x_len)
525 return Get1DConstViewOfUnmanagedHostArray<ST, DT>::getView (label, x_raw, x_len);
532 namespace Import_Util {
534 template<
typename Scalar,
535 typename LocalOrdinal,
536 typename GlobalOrdinal,
540 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
541 Kokkos::DualView<char*, typename Node::device_type>& exports,
542 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
543 size_t& constantNumPackets,
545 const Teuchos::ArrayView<const int>& SourcePids)
548 using Kokkos::MemoryUnmanaged;
549 using Kokkos::subview;
551 using Teuchos::Array;
552 using Teuchos::ArrayView;
555 typedef LocalOrdinal LO;
556 typedef GlobalOrdinal GO;
558 typedef typename matrix_type::impl_scalar_type ST;
559 typedef typename Node::device_type device_type;
560 typedef typename device_type::execution_space execution_space;
566 typedef typename Kokkos::DualView<char*, device_type>::t_host::device_type HDS;
568 typedef typename ArrayView<const LO>::size_type size_type;
569 typedef std::pair<typename View<int*, HDS>::size_type,
570 typename View<int*, HDS>::size_type> pair_type;
571 const char prefix[] =
"Tpetra::Import_Util::packAndPrepareWithOwningPIDs: ";
578 TEUCHOS_TEST_FOR_EXCEPTION(
580 prefix <<
"SourceMatrix must be locally indexed.");
581 TEUCHOS_TEST_FOR_EXCEPTION(
582 SourceMatrix.
getColMap ().is_null (), std::logic_error,
583 prefix <<
"The source matrix claims to be locally indexed, but its column " 584 "Map is null. This should never happen. Please report this bug to the " 585 "Tpetra developers.");
586 const size_type numExportLIDs = exportLIDs.size ();
587 TEUCHOS_TEST_FOR_EXCEPTION(
588 numExportLIDs != numPacketsPerLID.size (), std::invalid_argument, prefix
589 <<
"exportLIDs.size() = " << numExportLIDs <<
"!= numPacketsPerLID.size() " 590 <<
" = " << numPacketsPerLID.size () <<
".");
591 TEUCHOS_TEST_FOR_EXCEPTION(
592 static_cast<size_t> (SourcePids.size ()) != SourceMatrix.
getColMap ()->getNodeNumElements (),
593 std::invalid_argument, prefix <<
"SourcePids.size() = " 594 << SourcePids.size ()
595 <<
"!= SourceMatrix.getColMap()->getNodeNumElements() = " 596 << SourceMatrix.
getColMap ()->getNodeNumElements () <<
".");
601 constantNumPackets = 0;
606 size_t totalNumBytes = 0;
607 size_t totalNumEntries = 0;
608 size_t maxRowLength = 0;
609 for (size_type i = 0; i < numExportLIDs; ++i) {
610 const LO lclRow = exportLIDs[i];
615 size_t numBytesPerValue = 0;
621 ArrayView<const Scalar> valsView;
622 ArrayView<const LO> lidsView;
624 const ST* valsViewRaw =
reinterpret_cast<const ST*
> (valsView.getRawPtr ());
625 View<const ST*, Kokkos::HostSpace, MemoryUnmanaged> valsViewK (valsViewRaw, valsView.size ());
626 TEUCHOS_TEST_FOR_EXCEPTION(
627 static_cast<size_t> (valsViewK.dimension_0 ()) != numEnt,
628 std::logic_error, prefix <<
"Local row " << i <<
" claims to have " 629 << numEnt <<
"entry/ies, but the View returned by getLocalRowView() " 630 "has " << valsViewK.dimension_0 () <<
" entry/ies. This should never " 631 "happen. Please report this bug to the Tpetra developers.");
640 numBytesPerValue = PackTraits<ST, Kokkos::HostSpace>::packValueCount (valsViewK(0));
646 const size_t numBytes =
647 packRowCount<LO, GO, Kokkos::HostSpace> (numEnt, numBytesPerValue);
648 numPacketsPerLID[i] = numBytes;
649 totalNumBytes += numBytes;
650 totalNumEntries += numEnt;
651 maxRowLength = std::max (maxRowLength, numEnt);
657 if (totalNumEntries > 0) {
658 if (static_cast<size_t> (exports.dimension_0 ()) !=
659 static_cast<size_t> (totalNumBytes)) {
663 execution_space::fence ();
664 exports = Kokkos::DualView<char*, device_type> (
"exports", totalNumBytes);
665 execution_space::fence ();
670 exports.template modify<Kokkos::HostSpace> ();
684 auto exports_h = exports.template view<Kokkos::HostSpace> ();
693 const map_type& colMap = * (SourceMatrix.
getColMap ());
696 typename View<GO*, device_type>::HostMirror gids;
697 typename View<int*, device_type>::HostMirror pids;
701 gids = PackTraits<GO, HDS>::allocateArray (gid, maxRowLength,
"gids");
702 pids = PackTraits<int, HDS>::allocateArray (pid, maxRowLength,
"pids");
705 for (size_type i = 0; i < numExportLIDs; i++) {
706 const LO lclRow = exportLIDs[i];
709 ArrayView<const Scalar> valsView;
710 ArrayView<const LO> lidsView;
712 const ST* valsViewRaw =
reinterpret_cast<const ST*
> (valsView.getRawPtr ());
719 auto valsViewK = get1DConstViewOfUnmanagedHostArray<ST, HDS> (
"valsViewK", valsViewRaw,
static_cast<size_t> (valsView.size ()));
720 const size_t numEnt =
static_cast<size_t> (valsViewK.dimension_0 ());
729 const size_t numBytesPerValue = numEnt == 0 ?
730 static_cast<size_t> (0) :
731 PackTraits<ST, Kokkos::HostSpace>::packValueCount (valsViewK(0));
734 auto gidsView = subview (gids, pair_type (0, numEnt));
735 auto pidsView = subview (pids, pair_type (0, numEnt));
736 for (
size_t k = 0; k < numEnt; ++k) {
737 gidsView(k) = colMap.getGlobalElement (lidsView[k]);
738 pidsView(k) = SourcePids[lidsView[k]];
742 const size_t numBytes =
743 packRow<ST, LO, GO, HDS> (exports_h, offset, numEnt,
744 gidsView, pidsView, valsViewK,
750 #ifdef HAVE_TPETRA_DEBUG 751 TEUCHOS_TEST_FOR_EXCEPTION(
752 offset != totalNumBytes, std::logic_error, prefix <<
"At end of method, " 753 "the final offset (in bytes) " << offset <<
" does not equal the total " 754 "number of bytes packed " << totalNumBytes <<
". Please report this bug " 755 "to the Tpetra developers.");
756 #endif // HAVE_TPETRA_DEBUG 760 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
763 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
764 const Teuchos::ArrayView<const char> &imports,
765 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
766 size_t constantNumPackets,
770 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
771 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
773 using Kokkos::MemoryUnmanaged;
775 typedef LocalOrdinal LO;
776 typedef GlobalOrdinal GO;
778 typedef typename matrix_type::impl_scalar_type ST;
779 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
780 typedef typename Node::execution_space execution_space;
781 typedef typename GetHostExecSpace<execution_space>::host_execution_space HES;
782 const char prefix[] =
"unpackAndCombineWithOwningPIDsCount: ";
784 TEUCHOS_TEST_FOR_EXCEPTION(
785 permuteToLIDs.size () != permuteFromLIDs.size (), std::invalid_argument,
786 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size () <<
" != " 787 "permuteFromLIDs.size() = " << permuteFromLIDs.size() <<
".");
791 TEUCHOS_TEST_FOR_EXCEPTION(
792 ! locallyIndexed, std::invalid_argument, prefix <<
"The input CrsMatrix " 793 "'SourceMatrix' must be locally indexed.");
794 TEUCHOS_TEST_FOR_EXCEPTION(
795 importLIDs.size () != numPacketsPerLID.size (), std::invalid_argument,
796 prefix <<
"importLIDs.size() = " << importLIDs.size () <<
" != " 797 "numPacketsPerLID.size() = " << numPacketsPerLID.size () <<
".");
799 View<const char*, HES, MemoryUnmanaged> importsK (imports.getRawPtr (),
806 for (
size_t sourceLID = 0; sourceLID < numSameIDs; ++sourceLID) {
807 const LO srcLID =
static_cast<LO
> (sourceLID);
812 const size_type numPermuteToLIDs = permuteToLIDs.size ();
813 for (size_type p = 0; p < numPermuteToLIDs; ++p) {
819 const size_type numImportLIDs = importLIDs.size ();
820 for (size_type i = 0; i < numImportLIDs; ++i) {
821 const size_t numBytes = numPacketsPerLID[i];
825 const size_t numEnt = unpackRowCount<LO, HES> (importsK, offset,
826 numBytes,
sizeof (ST));
833 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
836 const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
837 const Teuchos::ArrayView<const char>& imports,
838 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
839 const size_t constantNumPackets,
842 const size_t numSameIDs,
843 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
844 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
845 size_t TargetNumRows,
846 size_t TargetNumNonzeros,
847 const int MyTargetPID,
848 const Teuchos::ArrayView<size_t>& CSR_rowptr,
849 const Teuchos::ArrayView<GlobalOrdinal>& CSR_colind,
850 const Teuchos::ArrayView<Scalar>& CSR_vals,
851 const Teuchos::ArrayView<const int>& SourcePids,
852 Teuchos::Array<int>& TargetPids)
855 using Kokkos::MemoryUnmanaged;
856 using Kokkos::subview;
858 using Teuchos::ArrayRCP;
859 using Teuchos::ArrayView;
861 using Teuchos::av_reinterpret_cast;
862 using Teuchos::outArg;
863 using Teuchos::REDUCE_MAX;
864 using Teuchos::reduceAll;
865 typedef LocalOrdinal LO;
866 typedef GlobalOrdinal GO;
867 typedef typename Node::execution_space execution_space;
868 typedef typename GetHostExecSpace<execution_space>::host_execution_space HES;
870 typedef typename matrix_type::impl_scalar_type ST;
872 typedef typename ArrayView<const LO>::size_type size_type;
875 const char prefix[] =
"Tpetra::Import_Util::unpackAndCombineIntoCrsArrays: ";
877 const size_t N = TargetNumRows;
878 const size_t mynnz = TargetNumNonzeros;
881 const int MyPID = MyTargetPID;
883 TEUCHOS_TEST_FOR_EXCEPTION(
884 TargetNumRows + 1 != static_cast<size_t> (CSR_rowptr.size ()),
885 std::invalid_argument, prefix <<
"CSR_rowptr.size() = " <<
886 CSR_rowptr.size () <<
"!= TargetNumRows+1 = " << TargetNumRows+1 <<
".");
887 TEUCHOS_TEST_FOR_EXCEPTION(
888 permuteToLIDs.size () != permuteFromLIDs.size (), std::invalid_argument,
889 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size ()
890 <<
"!= permuteFromLIDs.size() = " << permuteFromLIDs.size () <<
".");
891 const size_type numImportLIDs = importLIDs.size ();
892 TEUCHOS_TEST_FOR_EXCEPTION(
893 numImportLIDs != numPacketsPerLID.size (), std::invalid_argument,
894 prefix <<
"importLIDs.size() = " << numImportLIDs <<
" != " 895 "numPacketsPerLID.size() = " << numPacketsPerLID.size() <<
".");
898 View<const char*, HES, MemoryUnmanaged> importsK (imports.getRawPtr (),
901 for (
size_t i = 0; i< N+1; ++i) {
906 for (
size_t i = 0; i < numSameIDs; ++i) {
911 size_t numPermuteIDs = permuteToLIDs.size ();
912 for (
size_t i = 0; i < numPermuteIDs; ++i) {
913 CSR_rowptr[permuteToLIDs[i]] =
920 for (size_type k = 0; k < numImportLIDs; ++k) {
921 const size_t numBytes = numPacketsPerLID[k];
925 const size_t numEnt = unpackRowCount<LO, HES> (importsK, offset,
926 numBytes,
sizeof (ST));
927 CSR_rowptr[importLIDs[k]] += numEnt;
934 Teuchos::Array<size_t> NewStartRow (N + 1);
937 size_t last_len = CSR_rowptr[0];
939 for (
size_t i = 1; i < N+1; ++i) {
940 size_t new_len = CSR_rowptr[i];
941 CSR_rowptr[i] = last_len + CSR_rowptr[i-1];
942 NewStartRow[i] = CSR_rowptr[i];
946 TEUCHOS_TEST_FOR_EXCEPTION(
947 CSR_rowptr[N] != mynnz, std::invalid_argument, prefix <<
"CSR_rowptr[last]" 948 " = " << CSR_rowptr[N] <<
"!= mynnz = " << mynnz <<
".");
951 if (static_cast<size_t> (TargetPids.size ()) != mynnz) {
952 TargetPids.resize (mynnz);
954 TargetPids.assign (mynnz, -1);
957 ArrayRCP<const size_t> Source_rowptr_RCP;
958 ArrayRCP<const LO> Source_colind_RCP;
959 ArrayRCP<const Scalar> Source_vals_RCP;
960 SourceMatrix.getAllValues (Source_rowptr_RCP, Source_colind_RCP, Source_vals_RCP);
961 ArrayView<const size_t> Source_rowptr = Source_rowptr_RCP ();
962 ArrayView<const LO> Source_colind = Source_colind_RCP ();
963 ArrayView<const Scalar> Source_vals = Source_vals_RCP ();
965 const map_type& sourceColMap = * (SourceMatrix.
getColMap());
966 ArrayView<const GO> globalColElts = sourceColMap.getNodeElementList();
969 for (
size_t i = 0; i < numSameIDs; ++i) {
970 size_t FromRow = Source_rowptr[i];
971 size_t ToRow = CSR_rowptr[i];
972 NewStartRow[i] += Source_rowptr[i+1] - Source_rowptr[i];
974 for (
size_t j = Source_rowptr[i]; j < Source_rowptr[i+1]; ++j) {
975 CSR_vals[ToRow + j - FromRow] = Source_vals[j];
976 CSR_colind[ToRow + j - FromRow] = globalColElts[Source_colind[j]];
977 TargetPids[ToRow + j - FromRow] =
978 (SourcePids[Source_colind[j]] != MyPID) ?
979 SourcePids[Source_colind[j]] : -1;
983 size_t numBytesPerValue = 0;
984 if (PackTraits<ST, HES>::compileTimeSize) {
986 numBytesPerValue = PackTraits<ST, HES>::packValueCount (val);
1001 if (Source_rowptr.size () == 0 || Source_rowptr[Source_rowptr.size () - 1] == 0) {
1002 numBytesPerValue = PackTraits<ST, HES>::packValueCount (CSR_vals[0]);
1005 numBytesPerValue = PackTraits<ST, HES>::packValueCount (Source_vals[0]);
1007 size_t lclNumBytesPerValue = numBytesPerValue;
1008 Teuchos::RCP<const Teuchos::Comm<int> > comm = SourceMatrix.
getComm ();
1009 reduceAll<int, size_t> (*comm, REDUCE_MAX, lclNumBytesPerValue,
1010 outArg (numBytesPerValue));
1014 for (
size_t i = 0; i < numPermuteIDs; ++i) {
1015 LO FromLID = permuteFromLIDs[i];
1016 size_t FromRow = Source_rowptr[FromLID];
1017 size_t ToRow = CSR_rowptr[permuteToLIDs[i]];
1019 NewStartRow[permuteToLIDs[i]] += Source_rowptr[FromLID+1]-Source_rowptr[FromLID];
1021 for (
size_t j = Source_rowptr[FromLID]; j < Source_rowptr[FromLID+1]; ++j) {
1022 CSR_vals[ToRow + j - FromRow] = Source_vals[j];
1023 CSR_colind[ToRow + j - FromRow] = globalColElts[Source_colind[j]];
1024 TargetPids[ToRow + j - FromRow] =
1025 (SourcePids[Source_colind[j]] != MyPID) ?
1026 SourcePids[Source_colind[j]] : -1;
1031 if (imports.size () > 0) {
1033 #ifdef HAVE_TPETRA_DEBUG 1037 for (
size_t i = 0; i < static_cast<size_t> (numImportLIDs); ++i) {
1038 const size_t numBytes = numPacketsPerLID[i];
1039 if (numBytes == 0) {
1046 const size_t numEnt = unpackRowCount<LO, HES> (importsK, offset, numBytes,
1048 const LO lclRow = importLIDs[i];
1049 const size_t StartRow = NewStartRow[lclRow];
1050 NewStartRow[lclRow] += numEnt;
1052 View<GO*, HES, MemoryUnmanaged> gidsOut =
1053 getNonconstView<GO, HES> (CSR_colind (StartRow, numEnt));
1054 View<int*, HES, MemoryUnmanaged> pidsOut =
1055 getNonconstView<int, HES> (TargetPids (StartRow, numEnt));
1056 ArrayView<Scalar> valsOutS = CSR_vals (StartRow, numEnt);
1057 View<ST*, HES, MemoryUnmanaged> valsOut =
1058 getNonconstView<ST, HES> (av_reinterpret_cast<ST> (valsOutS));
1060 const size_t numBytesOut =
1061 unpackRow<ST, LO, GO, HES> (gidsOut, pidsOut, valsOut, importsK,
1062 offset, numBytes, numEnt, numBytesPerValue);
1063 if (numBytesOut != numBytes) {
1064 #ifdef HAVE_TPETRA_DEBUG 1071 for (
size_t j = 0; j < numEnt; ++j) {
1072 const int pid = pidsOut[j];
1073 pidsOut[j] = (pid != MyPID) ? pid : -1;
1077 #ifdef HAVE_TPETRA_DEBUG 1078 TEUCHOS_TEST_FOR_EXCEPTION(
1079 offset != static_cast<size_t> (imports.size ()), std::logic_error, prefix
1080 <<
"After unpacking and counting all the import packets, the final offset" 1081 " in bytes " << offset <<
" != imports.size() = " << imports.size () <<
1082 ". Please report this bug to the Tpetra developers.");
1083 TEUCHOS_TEST_FOR_EXCEPTION(
1084 lclErr != 0, std::logic_error, prefix <<
"numBytes != numBytesOut " 1085 "somewhere in unpack loop. This should never happen. " 1086 "Please report this bug to the Tpetra developers.");
1087 #endif // HAVE_TPETRA_DEBUG 1092 template<
typename Scalar,
typename Ordinal>
1095 const Teuchos::ArrayView<Ordinal> & CRS_colind,
1096 const Teuchos::ArrayView<Scalar> &CRS_vals)
1101 size_t NumRows = CRS_rowptr.size()-1;
1102 size_t nnz = CRS_colind.size();
1104 for(
size_t i = 0; i < NumRows; i++){
1105 size_t start=CRS_rowptr[i];
1106 if(start >= nnz)
continue;
1108 Scalar* locValues = &CRS_vals[start];
1109 size_t NumEntries = CRS_rowptr[i+1] - start;
1110 Ordinal* locIndices = &CRS_colind[start];
1112 Ordinal n = NumEntries;
1116 Ordinal max = n - m;
1117 for(Ordinal j = 0; j < max; j++) {
1118 for(Ordinal k = j; k >= 0; k-=m) {
1119 if(locIndices[k+m] >= locIndices[k])
1121 Scalar dtemp = locValues[k+m];
1122 locValues[k+m] = locValues[k];
1123 locValues[k] = dtemp;
1124 Ordinal itemp = locIndices[k+m];
1125 locIndices[k+m] = locIndices[k];
1126 locIndices[k] = itemp;
1135 template<
typename Scalar,
typename Ordinal>
1138 const Teuchos::ArrayView<Ordinal> & CRS_colind,
1139 const Teuchos::ArrayView<Scalar> &CRS_vals)
1146 size_t NumRows = CRS_rowptr.size()-1;
1147 size_t nnz = CRS_colind.size();
1148 size_t new_curr=CRS_rowptr[0], old_curr=CRS_rowptr[0];
1150 for(
size_t i = 0; i < NumRows; i++){
1151 size_t start=CRS_rowptr[i];
1152 if(start >= nnz)
continue;
1154 Scalar* locValues = &CRS_vals[start];
1155 size_t NumEntries = CRS_rowptr[i+1] - start;
1156 Ordinal* locIndices = &CRS_colind[start];
1159 Ordinal n = NumEntries;
1163 Ordinal max = n - m;
1164 for(Ordinal j = 0; j < max; j++) {
1165 for(Ordinal k = j; k >= 0; k-=m) {
1166 if(locIndices[k+m] >= locIndices[k])
1168 Scalar dtemp = locValues[k+m];
1169 locValues[k+m] = locValues[k];
1170 locValues[k] = dtemp;
1171 Ordinal itemp = locIndices[k+m];
1172 locIndices[k+m] = locIndices[k];
1173 locIndices[k] = itemp;
1180 for(
size_t j=CRS_rowptr[i]; j < CRS_rowptr[i+1]; j++) {
1181 if(j > CRS_rowptr[i] && CRS_colind[j]==CRS_colind[new_curr-1]) {
1182 CRS_vals[new_curr-1] += CRS_vals[j];
1184 else if(new_curr==j) {
1188 CRS_colind[new_curr] = CRS_colind[j];
1189 CRS_vals[new_curr] = CRS_vals[j];
1194 CRS_rowptr[i] = old_curr;
1198 CRS_rowptr[NumRows] = new_curr;
1201 template <
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1204 const Teuchos::ArrayView<LocalOrdinal> &colind_LID,
1205 const Teuchos::ArrayView<GlobalOrdinal> &colind_GID,
1207 const Teuchos::ArrayView<const int> &owningPIDs,
1208 Teuchos::Array<int> &remotePIDs,
1212 typedef LocalOrdinal LO;
1213 typedef GlobalOrdinal GO;
1216 const char prefix[] =
"lowCommunicationMakeColMapAndReindex: ";
1220 const map_type& domainMap = *domainMapRCP;
1226 Teuchos::Array<bool> LocalGIDs;
1227 if (numDomainElements > 0) {
1228 LocalGIDs.resize (numDomainElements,
false);
1239 const size_t numMyRows = rowptr.size () - 1;
1240 const int hashsize = std::max (static_cast<int> (numMyRows), 100);
1243 Teuchos::Array<GO> RemoteGIDList;
1244 RemoteGIDList.reserve (hashsize);
1245 Teuchos::Array<int> PIDList;
1246 PIDList.reserve (hashsize);
1257 size_t NumLocalColGIDs = 0;
1258 LO NumRemoteColGIDs = 0;
1259 for (
size_t i = 0; i < numMyRows; ++i) {
1260 for(
size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
1261 const GO GID = colind_GID[j];
1263 const LO LID = domainMap.getLocalElement (GID);
1265 const bool alreadyFound = LocalGIDs[LID];
1266 if (! alreadyFound) {
1267 LocalGIDs[LID] =
true;
1270 colind_LID[j] = LID;
1273 const LO hash_value = RemoteGIDs.
get (GID);
1274 if (hash_value == -1) {
1275 const int PID = owningPIDs[j];
1276 TEUCHOS_TEST_FOR_EXCEPTION(
1277 PID == -1, std::invalid_argument, prefix <<
"Cannot figure out if " 1279 colind_LID[j] =
static_cast<LO
> (numDomainElements + NumRemoteColGIDs);
1280 RemoteGIDs.
add (GID, NumRemoteColGIDs);
1281 RemoteGIDList.push_back (GID);
1282 PIDList.push_back (PID);
1286 colind_LID[j] =
static_cast<LO
> (numDomainElements + hash_value);
1294 if (domainMap.getComm ()->getSize () == 1) {
1297 TEUCHOS_TEST_FOR_EXCEPTION(
1298 NumRemoteColGIDs != 0, std::runtime_error, prefix <<
"There is only one " 1299 "process in the domain Map's communicator, which means that there are no " 1300 "\"remote\" indices. Nevertheless, some column indices are not in the " 1302 if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1306 colMap = domainMapRCP;
1313 const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
1314 Teuchos::Array<GO> ColIndices;
1315 GO* RemoteColIndices = NULL;
1316 if (numMyCols > 0) {
1317 ColIndices.resize (numMyCols);
1318 if (NumLocalColGIDs != static_cast<size_t> (numMyCols)) {
1319 RemoteColIndices = &ColIndices[NumLocalColGIDs];
1323 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
1324 RemoteColIndices[i] = RemoteGIDList[i];
1328 Teuchos::Array<LO> RemotePermuteIDs (NumRemoteColGIDs);
1329 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
1330 RemotePermuteIDs[i]=i;
1337 ColIndices.begin () + NumLocalColGIDs,
1338 RemotePermuteIDs.begin ());
1344 remotePIDs = PIDList;
1353 LO StartCurrent = 0, StartNext = 1;
1354 while (StartNext < NumRemoteColGIDs) {
1355 if (PIDList[StartNext]==PIDList[StartNext-1]) {
1359 Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
1360 ColIndices.begin () + NumLocalColGIDs + StartNext,
1361 RemotePermuteIDs.begin () + StartCurrent);
1362 StartCurrent = StartNext;
1366 Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
1367 ColIndices.begin () + NumLocalColGIDs + StartNext,
1368 RemotePermuteIDs.begin () + StartCurrent);
1371 Teuchos::Array<LO> ReverseRemotePermuteIDs (NumRemoteColGIDs);
1372 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
1373 ReverseRemotePermuteIDs[RemotePermuteIDs[i]] = i;
1377 bool use_local_permute =
false;
1378 Teuchos::Array<LO> LocalPermuteIDs (numDomainElements);
1390 Teuchos::ArrayView<const GO> domainGlobalElements = domainMap.getNodeElementList();
1391 if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1392 if (NumLocalColGIDs > 0) {
1394 std::copy (domainGlobalElements.begin (), domainGlobalElements.end (),
1395 ColIndices.begin ());
1399 LO NumLocalAgain = 0;
1400 use_local_permute =
true;
1401 for (
size_t i = 0; i < numDomainElements; ++i) {
1403 LocalPermuteIDs[i] = NumLocalAgain;
1404 ColIndices[NumLocalAgain++] = domainGlobalElements[i];
1407 TEUCHOS_TEST_FOR_EXCEPTION(
1408 static_cast<size_t> (NumLocalAgain) != NumLocalColGIDs,
1409 std::runtime_error, prefix <<
"Local ID count test failed.");
1413 const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid ();
1414 colMap = rcp (
new map_type (minus_one, ColIndices, domainMap.getIndexBase (),
1415 domainMap.getComm (), domainMap.getNode ()));
1418 for (
size_t i = 0; i < numMyRows; ++i) {
1419 for (
size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
1420 const LO ID = colind_LID[j];
1421 if (static_cast<size_t> (ID) < numDomainElements) {
1422 if (use_local_permute) {
1423 colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
1430 colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j]-numDomainElements];
1442 #include "Tpetra_CrsMatrix.hpp" 1444 #endif // TPETRA_IMPORT_UTIL_HPP void sortAndMergeCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort and merge the entries of the (raw CSR) matrix by column index within each row.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void unpackAndCombineIntoCrsArrays(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &SourceMatrix, const Teuchos::ArrayView< const LocalOrdinal > &importLIDs, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, int MyTargetPID, const Teuchos::ArrayView< size_t > &rowPointers, const Teuchos::ArrayView< GlobalOrdinal > &columnIndices, const Teuchos::ArrayView< Scalar > &values, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3)
Sort the first array, and apply the same permutation to the second and third arrays.
Traits class for packing / unpacking data of type T, using Kokkos data structures that live in the gi...
Declaration of the Tpetra::CrsMatrix class.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
size_t unpackAndCombineWithOwningPIDsCount(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &SourceMatrix, const Teuchos::ArrayView< const LocalOrdinal > &importLIDs, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs)
Special version of Tpetra::CrsMatrix::unpackAndCombine that also unpacks owning process ranks...
Kokkos::View< char *, D, Kokkos::MemoryUnmanaged > output_buffer_type
The type of an output buffer of bytes.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort the entries of the (raw CSR) matrix by column index within each row.
void lowCommunicationMakeColMapAndReindex(const Teuchos::ArrayView< const size_t > &rowPointers, const Teuchos::ArrayView< LocalOrdinal > &columnIndices_LID, const Teuchos::ArrayView< GlobalOrdinal > &columnIndices_GID, const Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::ArrayView< const int > &owningPids, Teuchos::Array< int > &remotePids, Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &colMap)
lowCommunicationMakeColMapAndReindex
size_t global_size_t
Global size_t object.
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
Kokkos::View< const value_type *, D, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.
void packAndPrepareWithOwningPIDs(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &SourceMatrix, const Teuchos::ArrayView< const LocalOrdinal > &exportLIDs, Kokkos::DualView< char *, typename Node::device_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets, Distributor &distor, const Teuchos::ArrayView< const int > &SourcePids)
Special version of Tpetra::CrsMatrix::packAndPrepare that also packs owning process ranks...
void getLocalRowView(LocalOrdinal LocalRow, Teuchos::ArrayView< const LocalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
Kokkos::View< const char *, D, Kokkos::MemoryUnmanaged > input_buffer_type
The type of an input buffer of bytes.
size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const
Returns the current number of entries on this node in the specified local row.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
ValueType get(const KeyType key)
Get the value corresponding to the given key.
Describes a parallel distribution of objects over processes.
void add(const KeyType key, const ValueType value)
Add a key and its value to the hash table.
Kokkos::View< value_type *, D, Kokkos::MemoryUnmanaged > output_array_type
The type of an output array of value_type.
Stand-alone utility functions and macros.
Teuchos::RCP< const map_type > getColMap() const
The Map that describes the column distribution in this matrix.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix is distributed.
bool isLocallyIndexed() const
Whether the matrix is locally indexed on the calling process.