41 #ifndef TPETRA_MATRIXMATRIX_DEF_HPP 42 #define TPETRA_MATRIXMATRIX_DEF_HPP 45 #include "Teuchos_VerboseObject.hpp" 46 #include "Teuchos_Array.hpp" 48 #include "Tpetra_ConfigDefs.hpp" 49 #include "Tpetra_CrsMatrix.hpp" 51 #include "Tpetra_RowMatrixTransposer.hpp" 52 #include "Tpetra_ConfigDefs.hpp" 53 #include "Tpetra_Map.hpp" 54 #include "Tpetra_Export.hpp" 58 #include "Teuchos_FancyOStream.hpp" 67 namespace MatrixMatrix{
75 template <
class Scalar,
85 bool call_FillComplete_on_result,
86 const std::string& label,
87 const Teuchos::RCP<Teuchos::ParameterList>& params)
92 typedef LocalOrdinal LO;
93 typedef GlobalOrdinal GO;
101 #ifdef HAVE_TPETRA_MMM_TIMINGS 102 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
103 using Teuchos::TimeMonitor;
104 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All Setup"))));
107 const std::string prefix =
"TpetraExt::MatrixMatrix::Multiply(): ";
112 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
113 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
118 RCP<const crs_matrix_type> Aprime = null;
122 RCP<const crs_matrix_type> Bprime = null;
132 const bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
134 bool use_optimized_ATB =
false;
135 if (transposeA && !transposeB && call_FillComplete_on_result && newFlag)
136 use_optimized_ATB =
true;
138 #ifdef USE_OLD_TRANSPOSE // NOTE: For Grey Ballard's use. Remove this later. 139 use_optimized_ATB =
false;
142 if (!use_optimized_ATB && transposeA) {
143 transposer_type transposer(rcpFromRef (A));
144 Aprime = transposer.createTranspose();
147 Aprime = rcpFromRef(A);
151 transposer_type transposer(rcpFromRef (B));
152 Bprime = transposer.createTranspose();
155 Bprime = rcpFromRef(B);
165 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
166 prefix <<
"ERROR, inner dimensions of op(A) and op(B) " 167 "must match for matrix-matrix product. op(A) is " 168 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
174 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
175 prefix <<
"ERROR, dimensions of result C must " 177 <<
" rows, should have at least " << Aouter << std::endl);
189 int numProcs = A.
getComm()->getSize();
193 crs_matrix_struct_type Aview;
194 crs_matrix_struct_type Bview;
196 RCP<const map_type> targetMap_A = Aprime->getRowMap();
197 RCP<const map_type> targetMap_B = Bprime->getRowMap();
199 #ifdef HAVE_TPETRA_MMM_TIMINGS 200 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All I&X"))));
206 if (!use_optimized_ATB) {
207 RCP<const import_type> dummyImporter;
208 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
true, label, params);
214 targetMap_B = Aprime->getColMap();
217 if (!use_optimized_ATB)
218 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),
false, label, params);
220 #ifdef HAVE_TPETRA_MMM_TIMINGS 221 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All Multiply"))));
225 if (use_optimized_ATB) {
226 MMdetails::mult_AT_B_newmatrix(A, B, C, label);
228 }
else if (call_FillComplete_on_result && newFlag) {
229 MMdetails::mult_A_B_newmatrix(Aview, Bview, C, label);
231 }
else if (call_FillComplete_on_result) {
232 MMdetails::mult_A_B_reuse(Aview, Bview, C, label);
238 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
240 MMdetails::mult_A_B(Aview, Bview, crsmat, label);
242 #ifdef HAVE_TPETRA_MMM_TIMINGS 243 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All FillComplete"))));
245 if (call_FillComplete_on_result) {
253 C.
fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap());
259 template <
class Scalar,
268 bool call_FillComplete_on_result,
269 const std::string& label,
270 const Teuchos::RCP<Teuchos::ParameterList>& params)
274 typedef LocalOrdinal LO;
275 typedef GlobalOrdinal GO;
282 #ifdef HAVE_TPETRA_MMM_TIMINGS 283 std::string prefix_mmm = std::string(
"TpetraExt ")+ label + std::string(
": ");
284 using Teuchos::TimeMonitor;
285 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm+std::string(
"Jacobi All Setup"))));
288 const std::string prefix =
"TpetraExt::MatrixMatrix::Jacobi(): ";
293 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
294 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
296 RCP<const crs_matrix_type> Aprime = rcpFromRef(A);
297 RCP<const crs_matrix_type> Bprime = rcpFromRef(B);
306 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
307 prefix <<
"ERROR, inner dimensions of op(A) and op(B) " 308 "must match for matrix-matrix product. op(A) is " 309 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
315 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
316 prefix <<
"ERROR, dimensions of result C must " 318 <<
" rows, should have at least "<< Aouter << std::endl);
330 int numProcs = A.
getComm()->getSize();
334 crs_matrix_struct_type Aview;
335 crs_matrix_struct_type Bview;
337 RCP<const map_type> targetMap_A = Aprime->getRowMap();
338 RCP<const map_type> targetMap_B = Bprime->getRowMap();
340 #ifdef HAVE_TPETRA_MMM_TIMINGS 341 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi All I&X"))));
345 RCP<const import_type> dummyImporter;
346 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
false, label);
351 targetMap_B = Aprime->getColMap();
354 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),
false, label);
356 #ifdef HAVE_TPETRA_MMM_TIMINGS 357 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi All Multiply"))));
361 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
364 bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
366 if (call_FillComplete_on_result && newFlag) {
367 MMdetails::jacobi_A_B_newmatrix(omega, Dinv, Aview, Bview, C, label);
369 }
else if (call_FillComplete_on_result) {
370 MMdetails::jacobi_A_B_reuse(omega, Dinv, Aview, Bview, C, label);
373 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"jacobi_A_B_general not implemented");
396 template <
class Scalar,
407 using Teuchos::Array;
411 typedef LocalOrdinal LO;
412 typedef GlobalOrdinal GO;
417 const std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
419 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error,
420 prefix <<
"ERROR, input matrix A.isFillComplete() is false; it is required to be true. " 421 "(Result matrix B is not required to be isFillComplete()).");
422 TEUCHOS_TEST_FOR_EXCEPTION(B.
isFillComplete() , std::runtime_error,
423 prefix <<
"ERROR, input matrix B must not be fill complete!");
424 TEUCHOS_TEST_FOR_EXCEPTION(B.
isStaticGraph() , std::runtime_error,
425 prefix <<
"ERROR, input matrix B must not have static graph!");
427 prefix <<
"ERROR, input matrix B must not be locally indexed!");
429 prefix <<
"ERROR, input matrix B must have a dynamic profile!");
432 RCP<const crs_matrix_type> Aprime = null;
434 transposer_type transposer(rcpFromRef (A));
435 Aprime = transposer.createTranspose();
437 Aprime = rcpFromRef(A);
445 if (scalarB != Teuchos::ScalarTraits<SC>::one())
450 if (scalarA != Teuchos::ScalarTraits<SC>::zero()) {
451 for (LO i = 0; (size_t)i < numMyRows; ++i) {
452 row = B.
getRowMap()->getGlobalElement(i);
453 Aprime->getGlobalRowCopy(row, a_inds(), a_vals(), a_numEntries);
455 if (scalarA != Teuchos::ScalarTraits<SC>::one())
456 for (
size_t j = 0; j < a_numEntries; ++j)
457 a_vals[j] *= scalarA;
468 template <
class Scalar,
472 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
474 const bool transposeA,
477 const bool transposeB,
481 const Teuchos::RCP<Teuchos::ParameterList>& params)
484 using Teuchos::rcpFromRef;
485 using Teuchos::rcp_implicit_cast;
486 using Teuchos::rcp_dynamic_cast;
489 typedef LocalOrdinal LO;
490 typedef GlobalOrdinal GO;
496 const std::string prefix =
"TpetraExt::MatrixMatrix::add(): ";
499 prefix <<
"A and B must both be fill complete.");
501 #ifdef HAVE_TPETRA_DEBUG 504 const bool domainMapsSame =
508 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
509 prefix <<
"The domain Maps of Op(A) and Op(B) are not the same.");
511 const bool rangeMapsSame =
515 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
516 prefix <<
"The range Maps of Op(A) and Op(B) are not the same.");
518 #endif // HAVE_TPETRA_DEBUG 521 RCP<const crs_matrix_type> Aprime;
523 transposer_type transposer (rcpFromRef (A));
524 Aprime = transposer.createTranspose ();
527 Aprime = rcpFromRef (A);
530 #ifdef HAVE_TPETRA_DEBUG 531 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
532 prefix <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
533 #endif // HAVE_TPETRA_DEBUG 536 RCP<const crs_matrix_type> Bprime;
538 transposer_type transposer (rcpFromRef (B));
539 Bprime = transposer.createTranspose ();
542 Bprime = rcpFromRef (B);
545 #ifdef HAVE_TPETRA_DEBUG 546 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
547 prefix <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
549 TEUCHOS_TEST_FOR_EXCEPTION(
550 !Aprime->isFillComplete () || !Bprime->isFillComplete (), std::invalid_argument,
551 prefix <<
"Aprime and Bprime must both be fill complete. " 552 "Please report this bug to the Tpetra developers.");
553 #endif // HAVE_TPETRA_DEBUG 555 RCP<row_matrix_type> C =
556 Bprime->add (alpha, *rcp_implicit_cast<const row_matrix_type> (Aprime),
557 beta, domainMap, rangeMap, params);
559 return rcp_dynamic_cast<crs_matrix_type> (C);
563 template <
class Scalar,
577 using Teuchos::Array;
578 using Teuchos::ArrayRCP;
579 using Teuchos::ArrayView;
582 using Teuchos::rcp_dynamic_cast;
583 using Teuchos::rcpFromRef;
584 using Teuchos::tuple;
587 typedef Teuchos::ScalarTraits<Scalar> STS;
595 std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
597 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
598 prefix <<
"The case C == null does not actually work. Fixing this will require an interface change.");
600 TEUCHOS_TEST_FOR_EXCEPTION(
602 prefix <<
"Both input matrices must be fill complete before calling this function.");
604 #ifdef HAVE_TPETRA_DEBUG 606 const bool domainMapsSame =
610 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
611 prefix <<
"The domain Maps of Op(A) and Op(B) are not the same.");
613 const bool rangeMapsSame =
617 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
618 prefix <<
"The range Maps of Op(A) and Op(B) are not the same.");
620 #endif // HAVE_TPETRA_DEBUG 623 RCP<const crs_matrix_type> Aprime;
625 transposer_type theTransposer (rcpFromRef (A));
626 Aprime = theTransposer.createTranspose ();
628 Aprime = rcpFromRef (A);
631 #ifdef HAVE_TPETRA_DEBUG 632 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
633 prefix <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
634 #endif // HAVE_TPETRA_DEBUG 637 RCP<const crs_matrix_type> Bprime;
639 transposer_type theTransposer (rcpFromRef (B));
640 Bprime = theTransposer.createTranspose ();
642 Bprime = rcpFromRef (B);
645 #ifdef HAVE_TPETRA_DEBUG 646 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
647 prefix <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
648 #endif // HAVE_TPETRA_DEBUG 651 if (! C.is_null ()) {
652 C->setAllToScalar (STS::zero ());
660 if (Aprime->getRowMap ()->isSameAs (* (Bprime->getRowMap ())) {
661 RCP<const map_type> rowMap = Aprime->getRowMap ();
663 RCP<const crs_graph_type> A_graph =
664 rcp_dynamic_cast<
const crs_graph_type> (Aprime->getGraph (),
true);
665 #ifdef HAVE_TPETRA_DEBUG 666 TEUCHOS_TEST_FOR_EXCEPTION(A_graph.is_null (), std::logic_error,
667 "Tpetra::MatrixMatrix::Add: Graph of Op(A) is null. " 668 "Please report this bug to the Tpetra developers.");
669 #endif // HAVE_TPETRA_DEBUG 670 RCP<const crs_graph_type> B_graph =
671 rcp_dynamic_cast<
const crs_graph_type> (Bprime->getGraph (),
true);
672 #ifdef HAVE_TPETRA_DEBUG 673 TEUCHOS_TEST_FOR_EXCEPTION(B_graph.is_null (), std::logic_error,
674 "Tpetra::MatrixMatrix::Add: Graph of Op(B) is null. " 675 "Please report this bug to the Tpetra developers.");
676 #endif // HAVE_TPETRA_DEBUG 677 RCP<const map_type> A_colMap = A_graph->getColMap ();
678 #ifdef HAVE_TPETRA_DEBUG 679 TEUCHOS_TEST_FOR_EXCEPTION(A_colMap.is_null (), std::logic_error,
680 "Tpetra::MatrixMatrix::Add: Column Map of Op(A) is null. " 681 "Please report this bug to the Tpetra developers.");
682 #endif // HAVE_TPETRA_DEBUG 683 RCP<const map_type> B_colMap = B_graph->getColMap ();
684 #ifdef HAVE_TPETRA_DEBUG 685 TEUCHOS_TEST_FOR_EXCEPTION(B_colMap.is_null (), std::logic_error,
686 "Tpetra::MatrixMatrix::Add: Column Map of Op(B) is null. " 687 "Please report this bug to the Tpetra developers.");
688 TEUCHOS_TEST_FOR_EXCEPTION(A_graph->getImporter ().is_null (),
690 "Tpetra::MatrixMatrix::Add: Op(A)'s Import is null. " 691 "Please report this bug to the Tpetra developers.");
692 TEUCHOS_TEST_FOR_EXCEPTION(B_graph->getImporter ().is_null (),
694 "Tpetra::MatrixMatrix::Add: Op(B)'s Import is null. " 695 "Please report this bug to the Tpetra developers.");
696 #endif // HAVE_TPETRA_DEBUG 699 RCP<const import_type> sumImport =
700 A_graph->getImporter ()->setUnion (* (B_graph->getImporter ()));
701 RCP<const map_type> C_colMap = sumImport->getTargetMap ();
709 ArrayView<const LocalOrdinal> A_local_ind;
710 Array<GlobalOrdinal> A_global_ind;
711 ArrayView<const LocalOrdinal> B_local_ind;
712 Array<GlobalOrdinal> B_global_ind;
714 const size_t localNumRows = rowMap->getNodeNumElements ();
715 ArrayRCP<size_t> numEntriesPerRow (localNumRows);
718 size_t maxNumEntriesPerRow = 0;
719 for (
size_t localRow = 0; localRow < localNumRows; ++localRow) {
721 A_graph->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind);
722 const size_type A_numEnt = A_local_ind.size ();
723 if (A_numEnt > A_global_ind.size ()) {
724 A_global_ind.resize (A_numEnt);
727 for (size_type k = 0; k < A_numEnt; ++k) {
728 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
732 B_graph->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind);
733 const size_type B_numEnt = B_local_ind.size ();
734 if (B_numEnt > B_global_ind.size ()) {
735 B_global_ind.resize (B_numEnt);
738 for (size_type k = 0; k < B_numEnt; ++k) {
739 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
743 const size_t curNumEntriesPerRow =
744 keyMergeCount (A_global_ind.begin (), A_global_ind.end (),
745 B_global_ind.begin (), B_global_ind.end ());
746 numEntriesPerRow[localRow] = curNumEntriesPerRow;
747 maxNumEntriesPerRow = std::max (maxNumEntriesPerRow, curNumEntriesPerRow);
754 C = rcp (
new crs_matrix_type (rowMap, C_colMap, numEntriesPerRow,
StaticProfile));
759 ArrayView<const Scalar> A_val;
760 ArrayView<const Scalar> B_val;
762 Array<LocalOrdinal> AplusB_local_ind (maxNumEntriesPerRow);
763 Array<GlobalOrdinal> AplusB_global_ind (maxNumEntriesPerRow);
764 Array<Scalar> AplusB_val (maxNumEntriesPerRow);
766 for (
size_t localRow = 0; localRow < localNumRows; ++localRow) {
768 Aprime->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind, A_val);
770 for (size_type k = 0; k < A_local_ind.size (); ++k) {
771 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
775 Bprime->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind, B_val);
777 for (size_type k = 0; k < B_local_ind.size (); ++k) {
778 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
781 const size_t curNumEntries = numEntriesPerRow[localRow];
782 ArrayView<LocalOrdinal> C_local_ind = AplusB_local_ind (0, curNumEntries);
783 ArrayView<GlobalOrdinal> C_global_ind = AplusB_global_ind (0, curNumEntries);
784 ArrayView<Scalar> C_val = AplusB_val (0, curNumEntries);
788 A_val.begin (), A_val.end (),
789 B_global_ind.begin (), B_global_ind.end (),
790 B_val.begin (), B_val.end (),
791 C_global_ind.begin (), C_val.begin (),
792 std::plus<Scalar> ());
794 for (size_type k = 0; k < as<size_type> (numEntriesPerRow[localRow]); ++k) {
795 C_local_ind[k] = C_colMap->getLocalElement (C_global_ind[k]);
798 C->replaceLocalValues (localRow, C_local_ind, C_val);
803 RCP<const map_type> domainMap = A_graph->getDomainMap ();
804 RCP<const map_type> rangeMap = A_graph->getRangeMap ();
805 C->expertStaticFillComplete (domainMap, rangeMap, sumImport, A_graph->getExporter ());
813 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), null));
820 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), 0));
824 #ifdef HAVE_TPETRA_DEBUG 825 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
826 prefix <<
"At this point, Aprime is null. Please report this bug to the Tpetra developers.");
827 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
828 prefix <<
"At this point, Bprime is null. Please report this bug to the Tpetra developers.");
829 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
830 prefix <<
"At this point, C is null. Please report this bug to the Tpetra developers.");
831 #endif // HAVE_TPETRA_DEBUG 833 Array<RCP<const crs_matrix_type> > Mat =
834 tuple<RCP<const crs_matrix_type> > (Aprime, Bprime);
835 Array<Scalar> scalar = tuple<Scalar> (scalarA, scalarB);
838 for (
int k = 0; k < 2; ++k) {
839 Array<GlobalOrdinal> Indices;
840 Array<Scalar> Values;
848 #ifdef HAVE_TPETRA_DEBUG 849 TEUCHOS_TEST_FOR_EXCEPTION(Mat[k].is_null (), std::logic_error,
850 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
851 #endif // HAVE_TPETRA_DEBUG 852 RCP<const map_type> curRowMap = Mat[k]->getRowMap ();
853 #ifdef HAVE_TPETRA_DEBUG 854 TEUCHOS_TEST_FOR_EXCEPTION(curRowMap.is_null (), std::logic_error,
855 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
856 #endif // HAVE_TPETRA_DEBUG 858 const size_t localNumRows = Mat[k]->getNodeNumRows ();
859 for (
size_t i = 0; i < localNumRows; ++i) {
860 const GlobalOrdinal globalRow = curRowMap->getGlobalElement (i);
861 size_t numEntries = Mat[k]->getNumEntriesInGlobalRow (globalRow);
862 if (numEntries > 0) {
863 Indices.resize (numEntries);
864 Values.resize (numEntries);
865 Mat[k]->getGlobalRowCopy (globalRow, Indices (), Values (), numEntries);
867 if (scalar[k] != STS::one ()) {
868 for (
size_t j = 0; j < numEntries; ++j) {
869 Values[j] *= scalar[k];
873 if (C->isFillComplete ()) {
874 C->sumIntoGlobalValues (globalRow, Indices, Values);
876 C->insertGlobalValues (globalRow, Indices, Values);
889 template <
class TransferType>
890 void printMultiplicationStatistics(Teuchos::RCP<TransferType > Transfer,
const std::string &label) {
891 if (Transfer.is_null())
894 const Distributor & Distor = Transfer->getDistributor();
895 Teuchos::RCP<const Teuchos::Comm<int> > Comm = Transfer->getSourceMap()->getComm();
897 size_t rows_send = Transfer->getNumExportIDs();
898 size_t rows_recv = Transfer->getNumRemoteIDs();
900 size_t round1_send = Transfer->getNumExportIDs() *
sizeof(size_t);
901 size_t round1_recv = Transfer->getNumRemoteIDs() *
sizeof(size_t);
902 size_t num_send_neighbors = Distor.getNumSends();
903 size_t num_recv_neighbors = Distor.getNumReceives();
904 size_t round2_send, round2_recv;
905 Distor.getLastDoStatistics(round2_send,round2_recv);
907 int myPID = Comm->getRank();
908 int NumProcs = Comm->getSize();
915 size_t lstats[8] = {num_send_neighbors,num_recv_neighbors,rows_send,rows_recv,round1_send,round1_recv,round2_send,round2_recv};
916 size_t gstats_min[8], gstats_max[8];
918 double lstats_avg[8], gstats_avg[8];
919 for(
int i=0; i<8; i++)
920 lstats_avg[i] = ((
double)lstats[i])/NumProcs;
922 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MIN,8,lstats,gstats_min);
923 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MAX,8,lstats,gstats_max);
924 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_SUM,8,lstats_avg,gstats_avg);
927 printf(
"%s Send Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n", label.c_str(),
928 (int)gstats_min[0],gstats_avg[0],(
int)gstats_max[0], (int)gstats_min[2],gstats_avg[2],(
int)gstats_max[2],
929 (int)gstats_min[4],gstats_avg[4],(
int)gstats_max[4], (int)gstats_min[6],gstats_avg[6],(
int)gstats_max[6]);
930 printf(
"%s Recv Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n", label.c_str(),
931 (int)gstats_min[1],gstats_avg[1],(
int)gstats_max[1], (int)gstats_min[3],gstats_avg[3],(
int)gstats_max[3],
932 (int)gstats_min[5],gstats_avg[5],(
int)gstats_max[5], (int)gstats_min[7],gstats_avg[7],(
int)gstats_max[7]);
938 template<
class Scalar,
942 void mult_AT_B_newmatrix(
943 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
944 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B,
945 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
946 const std::string & label,
947 const Teuchos::RCP<Teuchos::ParameterList>& params)
953 typedef LocalOrdinal LO;
954 typedef GlobalOrdinal GO;
956 typedef CrsMatrixStruct<SC,LO,GO,NO> crs_matrix_struct_type;
957 typedef RowMatrixTransposer<SC,LO,GO,NO> transposer_type;
959 #ifdef HAVE_TPETRA_MMM_TIMINGS 960 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
961 using Teuchos::TimeMonitor;
962 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T Transpose"))));
968 transposer_type transposer (rcpFromRef (A));
969 RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Atrans = transposer.createTransposeLocal();
974 #ifdef HAVE_TPETRA_MMM_TIMINGS 975 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T I&X"))));
979 crs_matrix_struct_type Aview;
980 crs_matrix_struct_type Bview;
981 RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter;
982 MMdetails::import_and_extract_views(*Atrans, Atrans->getRowMap(), Aview, dummyImporter,
true, label);
983 MMdetails::import_and_extract_views(B, B.getRowMap(), Bview, dummyImporter,
true, label);
985 #ifdef HAVE_TPETRA_MMM_TIMINGS 986 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T AB-core"))));
989 RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >Ctemp;
992 bool needs_final_export = !Atrans->getGraph()->getExporter().is_null();
993 if (needs_final_export)
996 Ctemp = rcp(&C,
false);
999 mult_A_B_newmatrix(Aview, Bview, *Ctemp, label);
1004 #ifdef HAVE_TPETRA_MMM_TIMINGS 1005 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T exportAndFillComplete"))));
1008 Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Crcp(&C,
false);
1009 if (needs_final_export) {
1010 Teuchos::ParameterList labelList;
1011 labelList.set(
"Timer Label", label);
1012 Ctemp->exportAndFillComplete(Crcp,*Ctemp->getGraph()->getExporter(),
1013 B.getDomainMap(),A.getDomainMap(),rcp(&labelList,
false));
1015 #ifdef HAVE_TPETRA_MMM_STATISTICS 1016 printMultiplicationStatistics(Ctemp->getGraph()->getExporter(), label+std::string(
" AT_B MMM"));
1021 template<
class Scalar,
1023 class GlobalOrdinal,
1026 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1027 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1028 CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1029 const std::string& label,
1030 const Teuchos::RCP<Teuchos::ParameterList>& params)
1032 using Teuchos::Array;
1033 using Teuchos::ArrayRCP;
1034 using Teuchos::ArrayView;
1035 using Teuchos::OrdinalTraits;
1036 using Teuchos::null;
1038 typedef Teuchos::ScalarTraits<Scalar> STS;
1040 LocalOrdinal C_firstCol = Bview.colMap->getMinLocalIndex();
1041 LocalOrdinal C_lastCol = Bview.colMap->getMaxLocalIndex();
1043 LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
1044 LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
1046 ArrayView<const GlobalOrdinal> bcols = Bview.colMap->getNodeElementList();
1047 ArrayView<const GlobalOrdinal> bcols_import = null;
1048 if (Bview.importColMap != null) {
1049 C_firstCol_import = Bview.importColMap->getMinLocalIndex();
1050 C_lastCol_import = Bview.importColMap->getMaxLocalIndex();
1052 bcols_import = Bview.importColMap->getNodeElementList();
1055 size_t C_numCols = C_lastCol - C_firstCol +
1056 OrdinalTraits<LocalOrdinal>::one();
1057 size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
1058 OrdinalTraits<LocalOrdinal>::one();
1060 if (C_numCols_import > C_numCols)
1061 C_numCols = C_numCols_import;
1063 Array<Scalar> dwork = Array<Scalar>(C_numCols);
1064 Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
1065 Array<size_t> iwork2 = Array<size_t>(C_numCols);
1067 Array<Scalar> C_row_i = dwork;
1068 Array<GlobalOrdinal> C_cols = iwork;
1069 Array<size_t> c_index = iwork2;
1070 Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2*C_numCols);
1071 Array<Scalar> combined_values = Array<Scalar>(2*C_numCols);
1073 size_t C_row_i_length, j, k, last_index;
1076 LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
1077 Array<LocalOrdinal> Acol2Brow(Aview.colMap->getNodeNumElements(),LO_INVALID);
1078 Array<LocalOrdinal> Acol2Irow(Aview.colMap->getNodeNumElements(),LO_INVALID);
1079 if(Aview.colMap->isSameAs(*Bview.origMatrix->getRowMap())){
1081 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1082 Aview.colMap->getMaxLocalIndex(); i++)
1087 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1088 Aview.colMap->getMaxLocalIndex(); i++) {
1089 GlobalOrdinal GID = Aview.colMap->getGlobalElement(i);
1090 LocalOrdinal BLID = Bview.origMatrix->getRowMap()->getLocalElement(GID);
1091 if(BLID != LO_INVALID) Acol2Brow[i] = BLID;
1092 else Acol2Irow[i] = Bview.importMatrix->getRowMap()->getLocalElement(GID);
1102 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1103 ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1104 ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
1105 ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1106 ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
1107 ArrayView<const Scalar> Avals, Bvals, Ivals;
1108 Aview.origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
1109 Bview.origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
1110 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1111 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1112 if(!Bview.importMatrix.is_null()) {
1113 Bview.importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP);
1114 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1117 bool C_filled = C.isFillComplete();
1119 for (
size_t i = 0; i < C_numCols; i++)
1120 c_index[i] = OrdinalTraits<size_t>::invalid();
1123 size_t Arows = Aview.rowMap->getNodeNumElements();
1124 for(
size_t i=0; i<Arows; ++i) {
1128 GlobalOrdinal global_row = Aview.rowMap->getGlobalElement(i);
1134 C_row_i_length = OrdinalTraits<size_t>::zero();
1136 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1137 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1138 Scalar Aval = Avals[k];
1139 if (Aval == STS::zero())
1142 if (Ak == LO_INVALID)
1145 for (j = Browptr[Ak]; j < Browptr[Ak+1]; ++j) {
1146 LocalOrdinal col = Bcolind[j];
1149 if (c_index[col] == OrdinalTraits<size_t>::invalid()){
1152 C_row_i[C_row_i_length] = Aval*Bvals[j];
1153 C_cols[C_row_i_length] = col;
1154 c_index[col] = C_row_i_length;
1158 C_row_i[c_index[col]] += Aval*Bvals[j];
1163 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1164 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1165 C_cols[ii] = bcols[C_cols[ii]];
1166 combined_index[ii] = C_cols[ii];
1167 combined_values[ii] = C_row_i[ii];
1169 last_index = C_row_i_length;
1175 C_row_i_length = OrdinalTraits<size_t>::zero();
1177 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1178 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1179 Scalar Aval = Avals[k];
1180 if (Aval == STS::zero())
1183 if (Ak!=LO_INVALID)
continue;
1185 Ak = Acol2Irow[Acolind[k]];
1186 for (j = Irowptr[Ak]; j < Irowptr[Ak+1]; ++j) {
1187 LocalOrdinal col = Icolind[j];
1190 if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
1193 C_row_i[C_row_i_length] = Aval*Ivals[j];
1194 C_cols[C_row_i_length] = col;
1195 c_index[col] = C_row_i_length;
1200 C_row_i[c_index[col]] += Aval*Ivals[j];
1205 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1206 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1207 C_cols[ii] = bcols_import[C_cols[ii]];
1208 combined_index[last_index] = C_cols[ii];
1209 combined_values[last_index] = C_row_i[ii];
1216 C.sumIntoGlobalValues(
1218 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1219 combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
1221 C.insertGlobalValues(
1223 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1224 combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
1229 template<
class Scalar,
1231 class GlobalOrdinal,
1233 void setMaxNumEntriesPerRow(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview) {
1234 typedef typename Teuchos::Array<Teuchos::ArrayView<const LocalOrdinal> >::size_type local_length_size;
1235 Mview.maxNumRowEntries = Teuchos::OrdinalTraits<local_length_size>::zero();
1237 if (Mview.indices.size() > Teuchos::OrdinalTraits<local_length_size>::zero()) {
1238 Mview.maxNumRowEntries = Mview.indices[0].size();
1240 for (local_length_size i = 1; i < Mview.indices.size(); ++i)
1241 if (Mview.indices[i].size() > Mview.maxNumRowEntries)
1242 Mview.maxNumRowEntries = Mview.indices[i].size();
1247 template<
class CrsMatrixType>
1248 size_t C_estimate_nnz(CrsMatrixType & A, CrsMatrixType &B){
1250 size_t Aest = 100, Best=100;
1251 if (A.getNodeNumEntries() > 0)
1252 Aest = (A.getNodeNumRows() > 0)? A.getNodeNumEntries()/A.getNodeNumEntries() : 100;
1253 if (B.getNodeNumEntries() > 0)
1254 Best = (B.getNodeNumRows() > 0) ? B.getNodeNumEntries()/B.getNodeNumEntries() : 100;
1256 size_t nnzperrow = (size_t)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
1257 nnzperrow *= nnzperrow;
1259 return (
size_t)(A.getNodeNumRows()*nnzperrow*0.75 + 100);
1268 template<
class Scalar,
1270 class GlobalOrdinal,
1272 void mult_A_B_newmatrix(
1273 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1274 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1275 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1276 const std::string& label,
1277 const Teuchos::RCP<Teuchos::ParameterList>& params)
1279 using Teuchos::Array;
1280 using Teuchos::ArrayRCP;
1281 using Teuchos::ArrayView;
1286 typedef LocalOrdinal LO;
1287 typedef GlobalOrdinal GO;
1290 typedef Import<LO,GO,NO> import_type;
1291 typedef Map<LO,GO,NO> map_type;
1293 #ifdef HAVE_TPETRA_MMM_TIMINGS 1294 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1295 using Teuchos::TimeMonitor;
1296 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM M5 Cmap")))));
1298 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1299 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1302 RCP<const import_type> Cimport;
1303 RCP<const map_type> Ccolmap;
1304 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1305 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
1306 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1313 Array<LO> Bcol2Ccol(Bview.colMap->getNodeNumElements()), Icol2Ccol;
1315 if (Bview.importMatrix.is_null()) {
1318 Ccolmap = Bview.colMap;
1320 for (
size_t i = 0; i < Bview.colMap->getNodeNumElements(); i++)
1321 Bcol2Ccol[i] = Teuchos::as<LO>(i);
1332 if (!Bimport.is_null() && !Iimport.is_null())
1333 Cimport = Bimport->setUnion(*Iimport);
1335 else if (!Bimport.is_null() && Iimport.is_null())
1336 Cimport = Bimport->setUnion();
1338 else if (Bimport.is_null() && !Iimport.is_null())
1339 Cimport = Iimport->setUnion();
1342 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1344 Ccolmap = Cimport->getTargetMap();
1349 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
1350 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
1357 Icol2Ccol.resize(Bview.importMatrix->getColMap()->getNodeNumElements());
1358 ArrayView<const GO> Bgid = Bview.origMatrix->getColMap()->getNodeElementList();
1359 ArrayView<const GO> Igid = Bview.importMatrix->getColMap()->getNodeElementList();
1361 for (
size_t i = 0; i < Bview.origMatrix->getColMap()->getNodeNumElements(); i++)
1362 Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]);
1363 for (
size_t i = 0; i < Bview.importMatrix->getColMap()->getNodeNumElements(); i++)
1364 Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]);
1367 #ifdef HAVE_TPETRA_MMM_TIMINGS 1368 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore"))));
1372 size_t m = Aview.origMatrix->getNodeNumRows();
1373 size_t n = Ccolmap->getNodeNumElements();
1376 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1377 ArrayRCP<size_t> Crowptr_RCP;
1378 ArrayRCP<const LO> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1379 ArrayRCP<LO> Ccolind_RCP;
1380 ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
1381 ArrayRCP<SC> Cvals_RCP;
1388 Aview.origMatrix->getAllValues(Arowptr_RCP, Acolind_RCP, Avals_RCP);
1389 Bview.origMatrix->getAllValues(Browptr_RCP, Bcolind_RCP, Bvals_RCP);
1390 if (!Bview.importMatrix.is_null())
1391 Bview.importMatrix->getAllValues(Irowptr_RCP, Icolind_RCP, Ivals_RCP);
1398 ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1399 ArrayView<const LO> Acolind, Bcolind, Icolind;
1400 ArrayView<const SC> Avals, Bvals, Ivals;
1401 ArrayView<size_t> Crowptr;
1402 ArrayView<LO> Ccolind;
1403 ArrayView<SC> Cvals;
1404 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1405 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1406 if (!Bview.importMatrix.is_null()) {
1407 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1417 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
1418 Crowptr_RCP.resize(m+1); Crowptr = Crowptr_RCP();
1419 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
1420 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
1438 Array<LO> targetMapToOrigRow (Aview.colMap->getNodeNumElements(), LO_INVALID);
1439 Array<LO> targetMapToImportRow(Aview.colMap->getNodeNumElements(), LO_INVALID);
1441 for (LO i = Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) {
1442 LO B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
1443 if (B_LID != LO_INVALID) {
1444 targetMapToOrigRow[i] = B_LID;
1446 LO I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
1447 targetMapToImportRow[i] = I_LID;
1451 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1461 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
1462 Array<size_t> c_status(n, ST_INVALID);
1472 size_t CSR_ip = 0, OLD_ip = 0;
1473 for (
size_t i = 0; i < m; i++) {
1476 Crowptr[i] = CSR_ip;
1479 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
1480 LO Aik = Acolind[k];
1482 if (Aval == SC_ZERO)
1485 if (targetMapToOrigRow[Aik] != LO_INVALID) {
1492 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
1495 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
1496 LO Bkj = Bcolind[j];
1497 LO Cij = Bcol2Ccol[Bkj];
1499 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
1501 c_status[Cij] = CSR_ip;
1502 Ccolind[CSR_ip] = Cij;
1503 Cvals[CSR_ip] = Aval*Bvals[j];
1507 Cvals[c_status[Cij]] += Aval*Bvals[j];
1518 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
1519 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
1520 LO Ikj = Icolind[j];
1521 LO Cij = Icol2Ccol[Ikj];
1523 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip){
1525 c_status[Cij] = CSR_ip;
1526 Ccolind[CSR_ip] = Cij;
1527 Cvals[CSR_ip] = Aval*Ivals[j];
1531 Cvals[c_status[Cij]] += Aval*Ivals[j];
1538 if (CSR_ip + n > CSR_alloc) {
1540 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
1541 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
1546 Crowptr[m] = CSR_ip;
1549 Cvals_RCP .resize(CSR_ip);
1550 Ccolind_RCP.resize(CSR_ip);
1552 #ifdef HAVE_TPETRA_MMM_TIMINGS 1553 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix Final Sort"))));
1560 C.replaceColMap(Ccolmap);
1566 Import_Util::sortCrsEntries(Crowptr_RCP(), Ccolind_RCP(), Cvals_RCP());
1568 C.setAllValues(Crowptr_RCP, Ccolind_RCP, Cvals_RCP);
1570 #ifdef HAVE_TPETRA_MMM_TIMINGS 1571 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix ESFC"))));
1582 C.expertStaticFillComplete(Bview. origMatrix->getDomainMap(), Aview. origMatrix->getRangeMap(), Cimport);
1587 template<
class Scalar,
1589 class GlobalOrdinal,
1591 void mult_A_B_reuse(
1592 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1593 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1594 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1595 const std::string& label,
1596 const Teuchos::RCP<Teuchos::ParameterList>& params)
1598 using Teuchos::Array;
1599 using Teuchos::ArrayRCP;
1600 using Teuchos::ArrayView;
1605 typedef LocalOrdinal LO;
1606 typedef GlobalOrdinal GO;
1609 typedef Import<LO,GO,NO> import_type;
1610 typedef Map<LO,GO,NO> map_type;
1612 #ifdef HAVE_TPETRA_MMM_TIMINGS 1613 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1614 using Teuchos::TimeMonitor;
1615 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse Cmap"))));
1617 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1618 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1621 RCP<const import_type> Cimport = C.getGraph()->getImporter();
1622 RCP<const map_type> Ccolmap = C.getColMap();
1624 Array<LO> Bcol2Ccol(Bview.colMap->getNodeNumElements()), Icol2Ccol;
1628 ArrayView<const GO> Bgid = Bview.origMatrix->getColMap()->getNodeElementList();
1629 for (
size_t i = 0; i < Bview.origMatrix->getColMap()->getNodeNumElements(); i++)
1630 Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]);
1632 if (!Bview.importMatrix.is_null()) {
1633 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
1634 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
1636 Icol2Ccol.resize(Bview.importMatrix->getColMap()->getNodeNumElements());
1637 ArrayView<const GO> Igid = Bview.importMatrix->getColMap()->getNodeElementList();
1638 for (
size_t i = 0; i < Bview.importMatrix->getColMap()->getNodeNumElements(); i++)
1639 Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]);
1643 #ifdef HAVE_TPETRA_MMM_TIMINGS 1644 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
1648 size_t m = Aview.origMatrix->getNodeNumRows();
1649 size_t n = Ccolmap->getNodeNumElements();
1652 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP, Crowptr_RCP;
1653 ArrayRCP<const LO> Acolind_RCP, Bcolind_RCP, Icolind_RCP, Ccolind_RCP;
1654 ArrayRCP<const SC> Avals_RCP, Bvals_RCP, Ivals_RCP, Cvals_RCP;
1656 Aview.origMatrix->getAllValues(Arowptr_RCP, Acolind_RCP, Avals_RCP);
1657 Bview.origMatrix->getAllValues(Browptr_RCP, Bcolind_RCP, Bvals_RCP);
1658 if (!Bview.importMatrix.is_null())
1659 Bview.importMatrix->getAllValues(Irowptr_RCP, Icolind_RCP, Ivals_RCP);
1660 C.getAllValues(Crowptr_RCP, Ccolind_RCP, Cvals_RCP);
1663 ArrayView<const size_t> Arowptr, Browptr, Irowptr, Crowptr;
1664 ArrayView<const LO> Acolind, Bcolind, Icolind, Ccolind;
1665 ArrayView<const SC> Avals, Bvals, Ivals;
1666 ArrayView<SC> Cvals;
1667 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1668 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1669 if (!Bview.importMatrix.is_null()) {
1670 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1672 Crowptr = Crowptr_RCP(); Ccolind = Ccolind_RCP(); Cvals = (Teuchos::arcp_const_cast<SC>(Cvals_RCP))();
1675 Array<LO> targetMapToOrigRow (Aview.colMap->getNodeNumElements(), LO_INVALID);
1676 Array<LO> targetMapToImportRow(Aview.colMap->getNodeNumElements(), LO_INVALID);
1678 for (LO i = Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) {
1679 LO B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
1680 if (B_LID != LO_INVALID) {
1681 targetMapToOrigRow[i] = B_LID;
1683 LO I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
1684 targetMapToImportRow[i] = I_LID;
1688 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1695 Array<size_t> c_status(n, ST_INVALID);
1698 size_t CSR_ip = 0, OLD_ip = 0;
1699 for (
size_t i = 0; i < m; i++) {
1703 OLD_ip = Crowptr[i];
1704 CSR_ip = Crowptr[i+1];
1705 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
1706 c_status[Ccolind[k]] = k;
1712 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
1713 LO Aik = Acolind[k];
1715 if (Aval == SC_ZERO)
1718 if (targetMapToOrigRow[Aik] != LO_INVALID) {
1720 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
1722 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
1723 LO Bkj = Bcolind[j];
1724 LO Cij = Bcol2Ccol[Bkj];
1726 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
1727 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
1728 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
1730 Cvals[c_status[Cij]] += Aval * Bvals[j];
1735 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
1736 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
1737 LO Ikj = Icolind[j];
1738 LO Cij = Icol2Ccol[Ikj];
1740 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
1741 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
1742 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
1744 Cvals[c_status[Cij]] += Aval * Ivals[j];
1750 C.fillComplete(C.getDomainMap(), C.getRangeMap());
1755 template<
class Scalar,
1757 class GlobalOrdinal,
1759 void jacobi_A_B_newmatrix(
1761 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
1762 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1763 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1764 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1765 const std::string& label,
1766 const Teuchos::RCP<Teuchos::ParameterList>& params)
1768 using Teuchos::Array;
1769 using Teuchos::ArrayRCP;
1770 using Teuchos::ArrayView;
1775 typedef LocalOrdinal LO;
1776 typedef GlobalOrdinal GO;
1779 typedef Import<LO,GO,NO> import_type;
1780 typedef Map<LO,GO,NO> map_type;
1782 #ifdef HAVE_TPETRA_MMM_TIMINGS 1783 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1784 using Teuchos::TimeMonitor;
1785 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi M5 Cmap"))));
1787 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1788 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1792 RCP<const import_type> Cimport;
1793 RCP<const map_type> Ccolmap;
1794 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1795 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1802 Array<LO> Bcol2Ccol(Bview.colMap->getNodeNumElements()), Icol2Ccol;
1804 if (Bview.importMatrix.is_null()) {
1807 Ccolmap = Bview.colMap;
1809 for (
size_t i = 0; i < Bview.colMap->getNodeNumElements(); i++)
1810 Bcol2Ccol[i] = Teuchos::as<LO>(i);
1821 if (!Bimport.is_null() && !Iimport.is_null()){
1822 Cimport = Bimport->setUnion(*Iimport);
1823 Ccolmap = Cimport->getTargetMap();
1825 }
else if (!Bimport.is_null() && Iimport.is_null()) {
1826 Cimport = Bimport->setUnion();
1828 }
else if(Bimport.is_null() && !Iimport.is_null()) {
1829 Cimport = Iimport->setUnion();
1832 throw std::runtime_error(
"TpetraExt::Jacobi status of matrix importers is nonsensical");
1834 Ccolmap = Cimport->getTargetMap();
1836 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
1837 std::runtime_error,
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
1844 Icol2Ccol.resize(Bview.importMatrix->getColMap()->getNodeNumElements());
1845 ArrayView<const GO> Bgid = Bview.origMatrix->getColMap()->getNodeElementList();
1846 ArrayView<const GO> Igid = Bview.importMatrix->getColMap()->getNodeElementList();
1848 for (
size_t i = 0; i < Bview.origMatrix->getColMap()->getNodeNumElements(); i++)
1849 Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]);
1850 for (
size_t i = 0; i < Bview.importMatrix->getColMap()->getNodeNumElements(); i++)
1851 Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]);
1854 #ifdef HAVE_TPETRA_MMM_TIMINGS 1855 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SerialCore"))));
1859 size_t m = Aview.origMatrix->getNodeNumRows();
1860 size_t n = Ccolmap->getNodeNumElements();
1863 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1864 ArrayRCP<size_t> Crowptr_RCP;
1865 ArrayRCP<const LO> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1866 ArrayRCP<LO> Ccolind_RCP;
1867 ArrayRCP<const SC> Avals_RCP, Bvals_RCP, Ivals_RCP;
1868 ArrayRCP<SC> Cvals_RCP;
1869 ArrayRCP<const SC> Dvals_RCP;
1876 Aview.origMatrix->getAllValues(Arowptr_RCP, Acolind_RCP, Avals_RCP);
1877 Bview.origMatrix->getAllValues(Browptr_RCP, Bcolind_RCP, Bvals_RCP);
1878 if (!Bview.importMatrix.is_null())
1879 Bview.importMatrix->getAllValues(Irowptr_RCP, Icolind_RCP, Ivals_RCP);
1883 Dvals_RCP = Dinv.getData();
1890 ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1891 ArrayView<const LO> Acolind, Bcolind, Icolind;
1892 ArrayView<const SC> Avals, Bvals, Ivals;
1893 ArrayView<size_t> Crowptr;
1894 ArrayView<LO> Ccolind;
1895 ArrayView<SC> Cvals;
1896 ArrayView<const SC> Dvals;
1897 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1898 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1899 if (!Bview.importMatrix.is_null()) {
1900 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1902 Dvals = Dvals_RCP();
1909 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
1910 Array<size_t> c_status(n, ST_INVALID);
1919 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
1920 size_t CSR_ip = 0, OLD_ip = 0;
1921 Crowptr_RCP.resize(m+1); Crowptr = Crowptr_RCP();
1922 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
1923 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
1941 Array<LO> targetMapToOrigRow (Aview.colMap->getNodeNumElements(), LO_INVALID);
1942 Array<LO> targetMapToImportRow(Aview.colMap->getNodeNumElements(), LO_INVALID);
1944 for (LO i = Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) {
1945 LO B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
1946 if (B_LID != LO_INVALID) {
1947 targetMapToOrigRow[i] = B_LID;
1949 LO I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
1950 targetMapToImportRow[i] = I_LID;
1954 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1968 for (
size_t i = 0; i < m; i++) {
1971 Crowptr[i] = CSR_ip;
1975 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
1976 Scalar Bval = Bvals[j];
1977 if (Bval == SC_ZERO)
1979 LO Bij = Bcolind[j];
1980 LO Cij = Bcol2Ccol[Bij];
1983 c_status[Cij] = CSR_ip;
1984 Ccolind[CSR_ip] = Cij;
1985 Cvals[CSR_ip] = Bvals[j];
1990 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
1991 LO Aik = Acolind[k];
1993 if (Aval == SC_ZERO)
1996 if (targetMapToOrigRow[Aik] != LO_INVALID) {
1998 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
2000 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2001 LO Bkj = Bcolind[j];
2002 LO Cij = Bcol2Ccol[Bkj];
2004 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2006 c_status[Cij] = CSR_ip;
2007 Ccolind[CSR_ip] = Cij;
2008 Cvals[CSR_ip] = - omega * Dval* Aval * Bvals[j];
2012 Cvals[c_status[Cij]] -= omega * Dval* Aval * Bvals[j];
2018 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
2019 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2020 LO Ikj = Icolind[j];
2021 LO Cij = Icol2Ccol[Ikj];
2023 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2025 c_status[Cij] = CSR_ip;
2026 Ccolind[CSR_ip] = Cij;
2027 Cvals[CSR_ip] = - omega * Dval* Aval * Ivals[j];
2030 Cvals[c_status[Cij]] -= omega * Dval* Aval * Ivals[j];
2037 if (CSR_ip + n > CSR_alloc) {
2039 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
2040 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
2045 Crowptr[m] = CSR_ip;
2048 Cvals_RCP .resize(CSR_ip);
2049 Ccolind_RCP.resize(CSR_ip);
2052 #ifdef HAVE_TPETRA_MMM_TIMINGS 2053 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix Final Sort"))));
2060 C.replaceColMap(Ccolmap);
2066 Import_Util::sortCrsEntries(Crowptr_RCP(), Ccolind_RCP(), Cvals_RCP());
2068 C.setAllValues(Crowptr_RCP, Ccolind_RCP, Cvals_RCP);
2070 #ifdef HAVE_TPETRA_MMM_TIMINGS 2071 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix ESFC"))));
2082 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport);
2087 template<
class Scalar,
2089 class GlobalOrdinal,
2091 void jacobi_A_B_reuse(
2093 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2094 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2095 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2096 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2097 const std::string& label,
2098 const Teuchos::RCP<Teuchos::ParameterList>& params)
2100 using Teuchos::Array;
2101 using Teuchos::ArrayRCP;
2102 using Teuchos::ArrayView;
2107 typedef LocalOrdinal LO;
2108 typedef GlobalOrdinal GO;
2111 typedef Import<LO,GO,NO> import_type;
2112 typedef Map<LO,GO,NO> map_type;
2114 #ifdef HAVE_TPETRA_MMM_TIMINGS 2115 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2116 using Teuchos::TimeMonitor;
2117 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse Cmap"))));
2119 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2120 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2123 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2124 RCP<const map_type> Ccolmap = C.getColMap();
2126 Array<LO> Bcol2Ccol(Bview.colMap->getNodeNumElements()), Icol2Ccol;
2128 if (Bview.importMatrix.is_null()) {
2132 for (
size_t i = 0; i < Bview.colMap->getNodeNumElements(); i++)
2133 Bcol2Ccol[i] = Teuchos::as<LO>(i);
2136 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2137 std::runtime_error,
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
2140 Icol2Ccol.resize(Bview.importMatrix->getColMap()->getNodeNumElements());
2141 ArrayView<const GO> Bgid = Bview.origMatrix->getColMap()->getNodeElementList();
2142 ArrayView<const GO> Igid = Bview.importMatrix->getColMap()->getNodeElementList();
2144 for (
size_t i = 0; i < Bview.origMatrix->getColMap()->getNodeNumElements(); i++)
2145 Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]);
2146 for (
size_t i = 0; i < Bview.importMatrix->getColMap()->getNodeNumElements(); i++)
2147 Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]);
2150 #ifdef HAVE_TPETRA_MMM_TIMINGS 2151 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SerialCore"))));
2155 size_t m = Aview.origMatrix->getNodeNumRows();
2156 size_t n = Ccolmap->getNodeNumElements();
2159 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP, Crowptr_RCP;
2160 ArrayRCP<const LO> Acolind_RCP, Bcolind_RCP, Icolind_RCP, Ccolind_RCP;
2161 ArrayRCP<const SC> Avals_RCP, Bvals_RCP, Ivals_RCP, Cvals_RCP, Dvals_RCP;
2163 Aview.origMatrix->getAllValues(Arowptr_RCP, Acolind_RCP, Avals_RCP);
2164 Bview.origMatrix->getAllValues(Browptr_RCP, Bcolind_RCP, Bvals_RCP);
2165 if (!Bview.importMatrix.is_null())
2166 Bview.importMatrix->getAllValues(Irowptr_RCP, Icolind_RCP, Ivals_RCP);
2167 C.getAllValues(Crowptr_RCP, Ccolind_RCP, Cvals_RCP);
2168 Dvals_RCP = Dinv.getData();
2171 ArrayView<const size_t> Arowptr, Browptr, Irowptr, Crowptr;
2172 ArrayView<const LO> Acolind, Bcolind, Icolind, Ccolind;
2173 ArrayView<const SC> Avals, Bvals, Ivals;
2174 ArrayView<SC> Cvals;
2175 ArrayView<const SC> Dvals;
2176 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
2177 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
2178 if (!Bview.importMatrix.is_null()) {
2179 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
2181 Crowptr = Crowptr_RCP(); Ccolind = Ccolind_RCP(); Cvals = (Teuchos::arcp_const_cast<SC>(Cvals_RCP))();
2182 Dvals = Dvals_RCP();
2185 Array<LO> targetMapToOrigRow (Aview.colMap->getNodeNumElements(), LO_INVALID);
2186 Array<LO> targetMapToImportRow(Aview.colMap->getNodeNumElements(), LO_INVALID);
2188 for (LO i = Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) {
2189 LO B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
2190 if (B_LID != LO_INVALID) {
2191 targetMapToOrigRow[i] = B_LID;
2193 LO I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
2194 targetMapToImportRow[i] = I_LID;
2198 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2205 Array<size_t> c_status(n, ST_INVALID);
2208 size_t CSR_ip = 0, OLD_ip = 0;
2209 for (
size_t i = 0; i < m; i++) {
2213 OLD_ip = Crowptr[i];
2214 CSR_ip = Crowptr[i+1];
2215 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2216 c_status[Ccolind[k]] = k;
2225 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
2226 Scalar Bval = Bvals[j];
2227 if (Bval == SC_ZERO)
2229 LO Bij = Bcolind[j];
2230 LO Cij = Bcol2Ccol[Bij];
2232 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2233 std::runtime_error,
"Trying to insert a new entry into a static graph");
2235 Cvals[c_status[Cij]] = Bvals[j];
2239 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2240 LO Aik = Acolind[k];
2242 if (Aval == SC_ZERO)
2245 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2247 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
2249 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2250 LO Bkj = Bcolind[j];
2251 LO Cij = Bcol2Ccol[Bkj];
2253 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2254 std::runtime_error,
"Trying to insert a new entry into a static graph");
2256 Cvals[c_status[Cij]] -= omega * Dval* Aval * Bvals[j];
2261 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
2262 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2263 LO Ikj = Icolind[j];
2264 LO Cij = Icol2Ccol[Ikj];
2266 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2267 std::runtime_error,
"Trying to insert a new entry into a static graph");
2269 Cvals[c_status[Cij]] -= omega * Dval* Aval * Ivals[j];
2275 C.fillComplete(C.getDomainMap(), C.getRangeMap());
2279 template<
class Scalar,
2281 class GlobalOrdinal,
2283 void import_and_extract_views(
2284 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
2285 Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node> > targetMap,
2286 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2287 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node> > prototypeImporter,
2288 bool userAssertsThereAreNoRemotes,
2289 const std::string& label,
2290 const Teuchos::RCP<Teuchos::ParameterList>& params)
2292 using Teuchos::Array;
2293 using Teuchos::ArrayView;
2296 using Teuchos::null;
2299 typedef LocalOrdinal LO;
2300 typedef GlobalOrdinal GO;
2303 typedef Map<LO,GO,NO> map_type;
2304 typedef Import<LO,GO,NO> import_type;
2305 typedef CrsMatrix<SC,LO,GO,NO> crs_matrix_type;
2307 #ifdef HAVE_TPETRA_MMM_TIMINGS 2308 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2309 using Teuchos::TimeMonitor;
2310 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Alloc"))));
2319 Aview.deleteContents();
2321 Aview.origMatrix = rcp(&A,
false);
2322 Aview.origRowMap = A.getRowMap();
2323 Aview.rowMap = targetMap;
2324 Aview.colMap = A.getColMap();
2325 Aview.domainMap = A.getDomainMap();
2326 Aview.importColMap = null;
2329 if (userAssertsThereAreNoRemotes)
2332 RCP<const import_type> importer;
2333 if (params != null && params->isParameter(
"importer")) {
2334 importer = params->get<RCP<const import_type> >(
"importer");
2337 #ifdef HAVE_TPETRA_MMM_TIMINGS 2338 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X RemoteMap"))));
2343 RCP<const map_type> rowMap = A.getRowMap(), remoteRowMap;
2344 size_t numRemote = 0;
2346 if (!prototypeImporter.is_null() &&
2347 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
2348 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
2350 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
2351 numRemote = prototypeImporter->getNumRemoteIDs();
2353 Array<GO> remoteRows(numRemote);
2354 for (
size_t i = 0; i < numRemote; i++)
2355 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
2357 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
2358 rowMap->getIndexBase(), rowMap->getComm(), rowMap->getNode()));
2361 }
else if (prototypeImporter.is_null()) {
2363 ArrayView<const GO> rows = targetMap->getNodeElementList();
2364 size_t numRows = targetMap->getNodeNumElements();
2366 Array<GO> remoteRows(numRows);
2367 for(
size_t i = 0; i < numRows; ++i) {
2368 const LO mlid = rowMap->getLocalElement(rows[i]);
2370 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
2371 remoteRows[numRemote++] = rows[i];
2373 remoteRows.resize(numRemote);
2374 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
2375 rowMap->getIndexBase(), rowMap->getComm(), rowMap->getNode()));
2383 const int numProcs = rowMap->getComm()->getSize();
2385 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
2386 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
2395 #ifdef HAVE_TPETRA_MMM_TIMINGS 2396 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Collective-0"))));
2400 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
2402 if (globalMaxNumRemote > 0) {
2403 #ifdef HAVE_TPETRA_MMM_TIMINGS 2404 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-2"))));
2408 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
2410 importer = rcp(
new import_type(rowMap, remoteRowMap));
2412 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match A.getRowMap()!");
2416 params->set(
"importer", importer);
2419 if (importer != null) {
2420 #ifdef HAVE_TPETRA_MMM_TIMINGS 2421 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-3"))));
2425 Teuchos::ParameterList labelList;
2426 labelList.set(
"Timer Label", label);
2427 Aview.importMatrix = Tpetra::importAndFillCompleteCrsMatrix<crs_matrix_type>(rcpFromRef(A), *importer,
2428 A.getDomainMap(), importer->getTargetMap(), rcpFromRef(labelList));
2430 #ifdef HAVE_TPETRA_MMM_STATISTICS 2431 printMultiplicationStatistics(importer, label + std::string(
" I&X MMM"));
2435 #ifdef HAVE_TPETRA_MMM_TIMINGS 2436 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-4"))));
2440 Aview.importColMap = Aview.importMatrix->getColMap();
2455 #define TPETRA_MATRIXMATRIX_INSTANT(SCALAR,LO,GO,NODE) \ 2458 void MatrixMatrix::Multiply( \ 2459 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 2461 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 2463 CrsMatrix< SCALAR , LO , GO , NODE >& C, \ 2464 bool call_FillComplete_on_result, \ 2465 const std::string & label, \ 2466 const Teuchos::RCP<Teuchos::ParameterList>& params); \ 2469 void MatrixMatrix::Jacobi( \ 2471 const Vector< SCALAR, LO, GO, NODE > & Dinv, \ 2472 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 2473 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 2474 CrsMatrix< SCALAR , LO , GO , NODE >& C, \ 2475 bool call_FillComplete_on_result, \ 2476 const std::string & label, \ 2477 const Teuchos::RCP<Teuchos::ParameterList>& params); \ 2480 void MatrixMatrix::Add( \ 2481 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 2484 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 2487 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > C); \ 2490 void MatrixMatrix::Add( \ 2491 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \ 2494 CrsMatrix<SCALAR, LO, GO, NODE>& B, \ 2498 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > \ 2499 MatrixMatrix::add (const SCALAR & alpha, \ 2500 const bool transposeA, \ 2501 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 2502 const SCALAR & beta, \ 2503 const bool transposeB, \ 2504 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 2505 const Teuchos::RCP<const Map< LO , GO , NODE > >& domainMap, \ 2506 const Teuchos::RCP<const Map< LO , GO , NODE > >& rangeMap, \ 2507 const Teuchos::RCP<Teuchos::ParameterList>& params); \ 2511 #endif // TPETRA_MATRIXMATRIX_DEF_HPP ProfileType getProfileType() const
Returns true if the matrix was allocated with static data structures.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
size_t getNodeMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const
This matrix's graph, as a RowGraph.
bool isFillActive() const
Whether the matrix is not fill complete.
void Jacobi(Scalar omega, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Teuchos::RCP< const map_type > getRowMap() const
The Map that describes the row distribution in this matrix.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
size_t global_size_t
Global size_t object.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
void Add(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, Scalar scalarA, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarB)
global_size_t getGlobalNumRows() const
Number of global elements in the row map of this matrix.
bool isFillComplete() const
Whether the matrix is fill complete.
Construct and (optionally) redistribute the explicitly stored transpose of a CrsMatrix.
void keyValueMerge(KeyInputIterType keyBeg1, KeyInputIterType keyEnd1, ValueInputIterType valBeg1, ValueInputIterType valEnd1, KeyInputIterType keyBeg2, KeyInputIterType keyEnd2, ValueInputIterType valBeg2, ValueInputIterType valEnd2, KeyOutputIterType keyOut, ValueOutputIterType valOut, BinaryFunction f)
Merge two sorted (by keys) sequences of unique (key,value) pairs by combining pairs with equal keys...
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
Utility functions for packing and unpacking sparse matrix entries.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
void Multiply(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, bool transposeB, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Sparse matrix-matrix multiply.
size_t getNodeNumRows() const
The number of matrix rows owned by the calling process.
Describes a parallel distribution of objects over processes.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
A read-only, row-oriented interface to a sparse matrix.
A distributed dense vector.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Teuchos::RCP< const map_type > getDomainMap() const
The domain Map of this matrix.
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const bool transposeB, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap=Teuchos::null, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute the sparse matrix sum C = scalarA * Op(A) + scalarB * Op(B), where Op(X) is either X or its t...
Teuchos::RCP< const map_type > getRangeMap() const
The range Map of this matrix.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix is distributed.
bool isLocallyIndexed() const
Whether the matrix is locally indexed on the calling process.