Xpetra_MatrixMatrix.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Xpetra: A linear algebra interface package
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 // Tobias Wiesner (tawiesn@sandia.gov)
43 //
44 // ***********************************************************************
45 //
46 // @HEADER
47 
48 #ifndef PACKAGES_XPETRA_SUP_UTILS_XPETRA_MATRIXMATRIX_HPP_
49 #define PACKAGES_XPETRA_SUP_UTILS_XPETRA_MATRIXMATRIX_HPP_
50 
51 #include "Xpetra_ConfigDefs.hpp"
52 
54 #include "Xpetra_CrsMatrixWrap.hpp"
55 #include "Xpetra_MapExtractor.hpp"
56 #include "Xpetra_Map.hpp"
57 #include "Xpetra_MatrixFactory.hpp"
58 #include "Xpetra_Matrix.hpp"
60 #include "Xpetra_StridedMap.hpp"
61 
62 #ifdef HAVE_XPETRA_EPETRA
64 #endif
65 
66 #ifdef HAVE_XPETRA_EPETRAEXT
67 #include <EpetraExt_MatrixMatrix.h>
68 #include <EpetraExt_RowMatrixOut.h>
69 #include <Epetra_RowMatrixTransposer.h>
70 #endif // HAVE_XPETRA_EPETRAEXT
71 
72 #ifdef HAVE_XPETRA_TPETRA
73 #include <TpetraExt_MatrixMatrix.hpp>
74 #include <MatrixMarket_Tpetra.hpp>
77 #include <Xpetra_TpetraVector.hpp>
78 #endif // HAVE_XPETRA_TPETRA
79 
80 namespace Xpetra {
81 
88  template <class Scalar,
89  class LocalOrdinal = int,
90  class GlobalOrdinal = LocalOrdinal,
92  class Helpers {
93 #include "Xpetra_UseShortNames.hpp"
94 
95  public:
96 
97 #ifdef HAVE_XPETRA_EPETRA
98  static RCP<const Epetra_CrsMatrix> Op2EpetraCrs(RCP<Matrix> Op) {
99  // Get the underlying Epetra Mtx
100  RCP<const CrsMatrixWrap> crsOp = Teuchos::rcp_dynamic_cast<const CrsMatrixWrap>(Op);
101  TEUCHOS_TEST_FOR_EXCEPTION(crsOp == Teuchos::null, Xpetra::Exceptions::BadCast,
102  "Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
103 
104  RCP<const CrsMatrix> tmp_CrsMtx = crsOp->getCrsMatrix();
105  const RCP<const EpetraCrsMatrixT<GO,NO> > &tmp_ECrsMtx = Teuchos::rcp_dynamic_cast<const EpetraCrsMatrixT<GO,NO> >(tmp_CrsMtx);
106  TEUCHOS_TEST_FOR_EXCEPTION(tmp_ECrsMtx == Teuchos::null, Exceptions::BadCast,
107  "Cast from Xpetra::CrsMatrix to Xpetra::EpetraCrsMatrix failed");
108 
109  return tmp_ECrsMtx->getEpetra_CrsMatrix();
110  }
111 
112  static RCP<Epetra_CrsMatrix> Op2NonConstEpetraCrs(RCP<Matrix> Op) {
113  RCP<Epetra_CrsMatrix> A;
114  // Get the underlying Epetra Mtx
115  RCP<const CrsMatrixWrap> crsOp = Teuchos::rcp_dynamic_cast<const CrsMatrixWrap>(Op);
116  TEUCHOS_TEST_FOR_EXCEPTION(crsOp == Teuchos::null, Exceptions::BadCast,
117  "Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
118 
119  RCP<const CrsMatrix> tmp_CrsMtx = crsOp->getCrsMatrix();
120  const RCP<const EpetraCrsMatrixT<GO,NO> > &tmp_ECrsMtx = Teuchos::rcp_dynamic_cast<const EpetraCrsMatrixT<GO,NO> >(tmp_CrsMtx);
121  TEUCHOS_TEST_FOR_EXCEPTION(tmp_ECrsMtx == Teuchos::null, Exceptions::BadCast, "Cast from Xpetra::CrsMatrix to Xpetra::EpetraCrsMatrix failed");
122 
123  return tmp_ECrsMtx->getEpetra_CrsMatrixNonConst();
124  }
125 
126  static const Epetra_CrsMatrix& Op2EpetraCrs(const Matrix& Op) {
127  // Get the underlying Epetra Mtx
128  try {
129  const CrsMatrixWrap& crsOp = dynamic_cast<const CrsMatrixWrap&>(Op);
130  RCP<const CrsMatrix> tmp_CrsMtx = crsOp.getCrsMatrix();
131  const RCP<const EpetraCrsMatrixT<GO,NO> > &tmp_ECrsMtx = Teuchos::rcp_dynamic_cast<const EpetraCrsMatrixT<GO,NO> >(tmp_CrsMtx);
132  TEUCHOS_TEST_FOR_EXCEPTION(tmp_ECrsMtx == Teuchos::null, Xpetra::Exceptions::BadCast,
133  "Cast from Xpetra::CrsMatrix to Xpetra::EpetraCrsMatrix failed");
134 
135  return *tmp_ECrsMtx->getEpetra_CrsMatrix();
136 
137  } catch(...) {
138  throw(Xpetra::Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed"));
139  }
140  }
141 
142  static Epetra_CrsMatrix& Op2NonConstEpetraCrs(const Matrix& Op) {
143  RCP<Epetra_CrsMatrix> A;
144  // Get the underlying Epetra Mtx
145  try {
146  const CrsMatrixWrap& crsOp = dynamic_cast<const CrsMatrixWrap&>(Op);
147  RCP<const CrsMatrix> tmp_CrsMtx = crsOp.getCrsMatrix();
148  const RCP<const EpetraCrsMatrixT<GO,NO> > &tmp_ECrsMtx = Teuchos::rcp_dynamic_cast<const EpetraCrsMatrixT<GO,NO> >(tmp_CrsMtx);
149  TEUCHOS_TEST_FOR_EXCEPTION(tmp_ECrsMtx == Teuchos::null, Xpetra::Exceptions::BadCast, "Cast from Xpetra::CrsMatrix to Xpetra::EpetraCrsMatrix failed");
150 
151  return *Teuchos::rcp_const_cast<Epetra_CrsMatrix>(tmp_ECrsMtx->getEpetra_CrsMatrix());
152 
153  } catch(...) {
154  throw(Xpetra::Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed"));
155  }
156  }
157 #endif // HAVE_XPETRA_EPETRA
158 
159 #ifdef HAVE_XPETRA_TPETRA
160  static RCP<const Tpetra::CrsMatrix<SC,LO,GO,NO> > Op2TpetraCrs(RCP<Matrix> Op) {
161  // Get the underlying Tpetra Mtx
162  RCP<const CrsMatrixWrap> crsOp = Teuchos::rcp_dynamic_cast<const CrsMatrixWrap>(Op);
163  TEUCHOS_TEST_FOR_EXCEPTION(crsOp == Teuchos::null, Xpetra::Exceptions::BadCast, "Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
164 
165  RCP<const CrsMatrix> tmp_CrsMtx = crsOp->getCrsMatrix();
166  const RCP<const Xpetra::TpetraCrsMatrix<SC,LO,GO,NO> > &tmp_ECrsMtx = Teuchos::rcp_dynamic_cast<const Xpetra::TpetraCrsMatrix<SC,LO,GO,NO> >(tmp_CrsMtx);
167  TEUCHOS_TEST_FOR_EXCEPTION(tmp_ECrsMtx == Teuchos::null, Xpetra::Exceptions::BadCast, "Cast from Xpetra::CrsMatrix to Xpetra::TpetraCrsMatrix failed");
168 
169  return tmp_ECrsMtx->getTpetra_CrsMatrix();
170  }
171 
172  static RCP<Tpetra::CrsMatrix<SC,LO,GO,NO> > Op2NonConstTpetraCrs(RCP<Matrix> Op) {
173  // Get the underlying Tpetra Mtx
174  RCP<const CrsMatrixWrap> crsOp = Teuchos::rcp_dynamic_cast<const CrsMatrixWrap>(Op);
175  TEUCHOS_TEST_FOR_EXCEPTION(crsOp == Teuchos::null, Xpetra::Exceptions::BadCast, "Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
176  RCP<const CrsMatrix> tmp_CrsMtx = crsOp->getCrsMatrix();
177  const RCP<const Xpetra::TpetraCrsMatrix<SC,LO,GO,NO> > &tmp_ECrsMtx = Teuchos::rcp_dynamic_cast<const Xpetra::TpetraCrsMatrix<SC,LO,GO,NO> >(tmp_CrsMtx);
178  TEUCHOS_TEST_FOR_EXCEPTION(tmp_ECrsMtx == Teuchos::null, Xpetra::Exceptions::BadCast, "Cast from Xpetra::CrsMatrix to Xpetra::TpetraCrsMatrix failed");
179 
180  return tmp_ECrsMtx->getTpetra_CrsMatrixNonConst();
181  }
182 
183  static const Tpetra::CrsMatrix<SC,LO,GO,NO> & Op2TpetraCrs(const Matrix& Op) {
184  // Get the underlying Tpetra Mtx
185  try{
186  const CrsMatrixWrap& crsOp = dynamic_cast<const CrsMatrixWrap&>(Op);
187  RCP<const CrsMatrix> tmp_CrsMtx = crsOp.getCrsMatrix();
188  const RCP<const Xpetra::TpetraCrsMatrix<SC,LO,GO,NO> > &tmp_TCrsMtx = Teuchos::rcp_dynamic_cast<const Xpetra::TpetraCrsMatrix<SC,LO,GO,NO> >(tmp_CrsMtx);
189  TEUCHOS_TEST_FOR_EXCEPTION(tmp_TCrsMtx == Teuchos::null, Xpetra::Exceptions::BadCast, "Cast from Xpetra::CrsMatrix to Xpetra::TpetraCrsMatrix failed");
190 
191  return *tmp_TCrsMtx->getTpetra_CrsMatrix();
192 
193  } catch (...) {
194  throw(Xpetra::Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed"));
195  }
196  }
197 
198  static Tpetra::CrsMatrix<SC,LO,GO,NO> & Op2NonConstTpetraCrs(const Matrix& Op) {
199  // Get the underlying Tpetra Mtx
200  try{
201  const CrsMatrixWrap& crsOp = dynamic_cast<const CrsMatrixWrap& >(Op);
202  RCP<const CrsMatrix> tmp_CrsMtx = crsOp.getCrsMatrix();
203  const RCP<const Xpetra::TpetraCrsMatrix<SC,LO,GO,NO> > &tmp_TCrsMtx = Teuchos::rcp_dynamic_cast<const Xpetra::TpetraCrsMatrix<SC,LO,GO,NO> >(tmp_CrsMtx);
204  TEUCHOS_TEST_FOR_EXCEPTION(tmp_TCrsMtx == Teuchos::null, Xpetra::Exceptions::BadCast, "Cast from Xpetra::CrsMatrix to Xpetra::TpetraCrsMatrix failed");
205 
206  return *Teuchos::rcp_const_cast<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >(tmp_TCrsMtx->getTpetra_CrsMatrix());
207 
208  } catch (...) {
209  throw(Xpetra::Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed"));
210  }
211  }
212 #endif // HAVE_XPETRA_TPETRA
213 
214  };
215 
216  template <class Scalar,
217  class LocalOrdinal /*= int*/,
218  class GlobalOrdinal /*= LocalOrdinal*/,
219  class Node /*= KokkosClassic::DefaultNode::DefaultNodeType*/>
220  class MatrixMatrix {
221 #undef XPETRA_MATRIXMATRIX_SHORT
222 #include "Xpetra_UseShortNames.hpp"
223 
224  public:
225 
250  static void Multiply(const Matrix& A, bool transposeA,
251  const Matrix& B, bool transposeB,
252  Matrix& C,
253  bool call_FillComplete_on_result = true,
254  bool doOptimizeStorage = true,
255  const std::string & label = std::string(),
256  const RCP<ParameterList>& params = null) {
257 
258  TEUCHOS_TEST_FOR_EXCEPTION(transposeA == false && C.getRowMap()->isSameAs(*A.getRowMap()) == false,
259  Exceptions::RuntimeError, "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as row map of A");
260  TEUCHOS_TEST_FOR_EXCEPTION(transposeA == true && C.getRowMap()->isSameAs(*A.getDomainMap()) == false,
261  Exceptions::RuntimeError, "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as domain map of A");
262 
263  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Exceptions::RuntimeError, "A is not fill-completed");
264  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Exceptions::RuntimeError, "B is not fill-completed");
265 
266  bool haveMultiplyDoFillComplete = call_FillComplete_on_result && doOptimizeStorage;
267 
268  if (C.getRowMap()->lib() == Xpetra::UseEpetra) {
269 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
270  throw(Xpetra::Exceptions::RuntimeError("Xpetra::MatrixMatrix::Multiply only available for GO=int or GO=long long with EpetraNode (Serial or OpenMP depending on configuration)"));
271 #else
272  throw(Xpetra::Exceptions::RuntimeError("Xpetra::MatrixMatrix::Multiply requires EpetraExt to be compiled."));
273 
274 #endif
275  } else if (C.getRowMap()->lib() == Xpetra::UseTpetra) {
276 #ifdef HAVE_XPETRA_TPETRA
277  const Tpetra::CrsMatrix<SC,LO,GO,NO> & tpA = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(A);
278  const Tpetra::CrsMatrix<SC,LO,GO,NO> & tpB = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(B);
279  Tpetra::CrsMatrix<SC,LO,GO,NO> & tpC = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstTpetraCrs(C);
280 
281  // 18Feb2013 JJH I'm reenabling the code that allows the matrix matrix multiply to do the fillComplete.
282  // Previously, Tpetra's matrix matrix multiply did not support fillComplete.
283  Tpetra::MatrixMatrix::Multiply(tpA, transposeA, tpB, transposeB, tpC, haveMultiplyDoFillComplete, label, params);
284 #else
285  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra."));
286 #endif
287  }
288 
289  if (call_FillComplete_on_result && !haveMultiplyDoFillComplete) {
290  RCP<Teuchos::ParameterList> fillParams = rcp(new Teuchos::ParameterList());
291  fillParams->set("Optimize Storage", doOptimizeStorage);
292  C.fillComplete((transposeB) ? B.getRangeMap() : B.getDomainMap(),
293  (transposeA) ? A.getDomainMap() : A.getRangeMap(),
294  fillParams);
295  }
296 
297  // transfer striding information
298  RCP<Matrix> rcpA = Teuchos::rcp_const_cast<Matrix>(Teuchos::rcpFromRef(A));
299  RCP<Matrix> rcpB = Teuchos::rcp_const_cast<Matrix>(Teuchos::rcpFromRef(B));
300  C.CreateView("stridedMaps", rcpA, transposeA, rcpB, transposeB); // TODO use references instead of RCPs
301  } // end Multiply
302 
325  static RCP<Matrix> Multiply(const Matrix& A, bool transposeA, const Matrix& B, bool transposeB, RCP<Matrix> C_in,
326  Teuchos::FancyOStream& fos,
327  bool doFillComplete = true,
328  bool doOptimizeStorage = true,
329  const std::string & label = std::string(),
330  const RCP<ParameterList>& params = null) {
331 
332  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Exceptions::RuntimeError, "A is not fill-completed");
333  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Exceptions::RuntimeError, "B is not fill-completed");
334 
335  // Default case: Xpetra Multiply
336  RCP<Matrix> C = C_in;
337 
338  if (C == Teuchos::null) {
339  double nnzPerRow = Teuchos::as<double>(0);
340 
341  if (A.getDomainMap()->lib() == Xpetra::UseTpetra) {
342  // For now, follow what ML and Epetra do.
343  GO numRowsA = A.getGlobalNumRows();
344  GO numRowsB = B.getGlobalNumRows();
345  nnzPerRow = sqrt(Teuchos::as<double>(A.getGlobalNumEntries())/numRowsA) +
346  sqrt(Teuchos::as<double>(B.getGlobalNumEntries())/numRowsB) - 1;
347  nnzPerRow *= nnzPerRow;
348  double totalNnz = nnzPerRow * A.getGlobalNumRows() * 0.75 + 100;
349  double minNnz = Teuchos::as<double>(1.2 * A.getGlobalNumEntries());
350  if (totalNnz < minNnz)
351  totalNnz = minNnz;
352  nnzPerRow = totalNnz / A.getGlobalNumRows();
353 
354  fos << "Matrix product nnz per row estimate = " << Teuchos::as<LO>(nnzPerRow) << std::endl;
355  }
356 
357  if (transposeA) C = MatrixFactory::Build(A.getDomainMap(), Teuchos::as<LO>(nnzPerRow));
358  else C = MatrixFactory::Build(A.getRowMap(), Teuchos::as<LO>(nnzPerRow));
359 
360  } else {
361  C->resumeFill(); // why this is not done inside of Tpetra MxM?
362  fos << "Reuse C pattern" << std::endl;
363  }
364 
365  Multiply(A, transposeA, B, transposeB, *C, doFillComplete, doOptimizeStorage, label, params); // call Multiply routine from above
366 
367  return C;
368  }
369 
380  static RCP<Matrix> Multiply(const Matrix& A, bool transposeA, const Matrix& B, bool transposeB, Teuchos::FancyOStream &fos,
381  bool callFillCompleteOnResult = true, bool doOptimizeStorage = true, const std::string& label = std::string(),
382  const RCP<ParameterList>& params = null) {
383  return Multiply(A, transposeA, B, transposeB, Teuchos::null, fos, callFillCompleteOnResult, doOptimizeStorage, label, params);
384  }
385 
386 #ifdef HAVE_XPETRA_EPETRAEXT
387  // Michael Gee's MLMultiply
388  static RCP<Epetra_CrsMatrix> MLTwoMatrixMultiply(const Epetra_CrsMatrix& epA,
389  const Epetra_CrsMatrix& epB,
390  Teuchos::FancyOStream& fos) {
391  throw(Xpetra::Exceptions::RuntimeError("MLTwoMatrixMultiply only available for GO=int or GO=long long with EpetraNode (Serial or OpenMP depending on configuration)"));
392  return Teuchos::null;
393  }
394 #endif //ifdef HAVE_XPETRA_EPETRAEXT
395 
406  static RCP<BlockedCrsMatrix> TwoMatrixMultiplyBlock(const BlockedCrsMatrix& A, bool transposeA,
407  const BlockedCrsMatrix& B, bool transposeB,
408  Teuchos::FancyOStream& fos,
409  bool doFillComplete = true,
410  bool doOptimizeStorage = true) {
411  TEUCHOS_TEST_FOR_EXCEPTION(transposeA || transposeB, Exceptions::RuntimeError,
412  "TwoMatrixMultiply for BlockedCrsMatrix not implemented for transposeA==true or transposeB==true");
413 
414  // Preconditions
415  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Exceptions::RuntimeError, "A is not fill-completed");
416  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Exceptions::RuntimeError, "B is not fill-completed");
417 
418  RCP<const MapExtractor> rgmapextractor = A.getRangeMapExtractor();
419  RCP<const MapExtractor> domapextractor = B.getDomainMapExtractor();
420 
421  RCP<BlockedCrsMatrix> C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
422 
423  for (size_t i = 0; i < A.Rows(); ++i) { // loop over all block rows of A
424  for (size_t j = 0; j < B.Cols(); ++j) { // loop over all block columns of B
425  RCP<Matrix> Cij;
426 
427  for (size_t l = 0; l < B.Rows(); ++l) { // loop for calculating entry C_{ij}
428  RCP<Matrix> crmat1 = A.getMatrix(i,l);
429  RCP<Matrix> crmat2 = B.getMatrix(l,j);
430 
431  if (crmat1.is_null() || crmat2.is_null())
432  continue;
433  if (crmat1->getGlobalNumEntries() == 0 || crmat2->getGlobalNumEntries() == 0)
434  continue;
435 
436  RCP<CrsMatrixWrap> crop1 = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(crmat1);
437  RCP<CrsMatrixWrap> crop2 = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(crmat2);
438  TEUCHOS_TEST_FOR_EXCEPTION((crop1==Teuchos::null) != (crop2==Teuchos::null), Xpetra::Exceptions::RuntimeError, "A and B must be either both (compatible) BlockedCrsMatrix objects or both CrsMatrixWrap objects.");
439 
440  // temporary matrix containing result of local block multiplication
441  RCP<Matrix> temp = Teuchos::null;
442 
443  if(crop1 != Teuchos::null && crop2 != Teuchos::null)
444  temp = Multiply (*crop1, false, *crop2, false, fos);
445  else {
446  RCP<BlockedCrsMatrix> bop1 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat1);
447  RCP<BlockedCrsMatrix> bop2 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat2);
448  TEUCHOS_TEST_FOR_EXCEPTION(bop1.is_null()==true, Xpetra::Exceptions::BadCast, "A is not a BlockedCrsMatrix. (TwoMatrixMultiplyBlock)");
449  TEUCHOS_TEST_FOR_EXCEPTION(bop2.is_null()==true, Xpetra::Exceptions::BadCast, "B is not a BlockedCrsMatrix. (TwoMatrixMultiplyBlock)");
450  TEUCHOS_TEST_FOR_EXCEPTION(bop1->Cols()!=bop2->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << bop1->Cols() << " columns and B has " << bop2->Rows() << " rows. Matrices are not compatible! (TwoMatrixMultiplyBlock)");
451  TEUCHOS_TEST_FOR_EXCEPTION(bop1->getDomainMap()->isSameAs(*(bop2->getRangeMap()))==false, Xpetra::Exceptions::RuntimeError, "Domain map of A is not the same as range map of B. Matrices are not compatible! (TwoMatrixMultiplyBlock)");
452 
453  // recursive multiplication call
454  temp = TwoMatrixMultiplyBlock(*bop1, transposeA, *bop2, transposeB, fos, doFillComplete, doOptimizeStorage);
455 
456  RCP<BlockedCrsMatrix> btemp = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(temp);
457  TEUCHOS_TEST_FOR_EXCEPTION(btemp->Rows()!=bop1->Rows(), Xpetra::Exceptions::RuntimeError, "Number of block rows of local blocked operator is " << btemp->Rows() << " but should be " << bop1->Rows() << ". (TwoMatrixMultiplyBlock)");
458  TEUCHOS_TEST_FOR_EXCEPTION(btemp->Cols()!=bop2->Cols(), Xpetra::Exceptions::RuntimeError, "Number of block cols of local blocked operator is " << btemp->Cols() << " but should be " << bop2->Cols() << ". (TwoMatrixMultiplyBlock)");
459  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getRangeMapExtractor()->getFullMap()->isSameAs(*(bop1->getRangeMapExtractor()->getFullMap())) == false, Xpetra::Exceptions::RuntimeError, "Range map of local blocked operator should be same as first operator. (TwoMatrixMultiplyBlock)");
460  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getDomainMapExtractor()->getFullMap()->isSameAs(*(bop2->getDomainMapExtractor()->getFullMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of local blocked operator should be same as second operator. (TwoMatrixMultiplyBlock)");
461  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getRangeMapExtractor()->getThyraMode() != bop1->getRangeMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Thyra mode of local range map extractor incompatible with range map extractor of A (TwoMatrixMultiplyBlock)");
462  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getDomainMapExtractor()->getThyraMode() != bop2->getDomainMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Thyra mode of local domain map extractor incompatible with domain map extractor of B (TwoMatrixMultiplyBlock)");
463  }
464 
465  TEUCHOS_TEST_FOR_EXCEPTION(temp->isFillComplete() == false, Xpetra::Exceptions::RuntimeError, "Local block is not filled. (TwoMatrixMultiplyBlock)");
466 
467  RCP<Matrix> addRes = null;
468  if (Cij.is_null ())
469  Cij = temp;
470  else {
471  MatrixMatrix::TwoMatrixAdd (*temp, false, 1.0, *Cij, false, 1.0, addRes, fos);
472  Cij = addRes;
473  }
474  }
475 
476  if (!Cij.is_null()) {
477  if (Cij->isFillComplete())
478  Cij->resumeFill();
479  Cij->fillComplete(B.getDomainMap(j), A.getRangeMap(i));
480  C->setMatrix(i, j, Cij);
481  } else {
482  C->setMatrix(i, j, Teuchos::null);
483  }
484  }
485  }
486 
487  if (doFillComplete)
488  C->fillComplete(); // call default fillComplete for BlockCrsMatrixWrap objects
489 
490  return C;
491  } // TwoMatrixMultiplyBlock
492 
505  static void TwoMatrixAdd(const Matrix& A, bool transposeA, SC alpha, Matrix& B, SC beta) {
506  if (!(A.getRowMap()->isSameAs(*(B.getRowMap()))))
507  throw Exceptions::Incompatible("TwoMatrixAdd: matrix row maps are not the same.");
508 
509  if (A.getRowMap()->lib() == Xpetra::UseEpetra) {
510  throw Exceptions::RuntimeError("TwoMatrixAdd for Epetra matrices needs <double,int,int> for Scalar, LocalOrdinal and GlobalOrdinal.");
511  } else if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
512 #ifdef HAVE_XPETRA_TPETRA
513  const Tpetra::CrsMatrix<SC,LO,GO,NO>& tpA = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(A);
514  Tpetra::CrsMatrix<SC,LO,GO,NO>& tpB = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstTpetraCrs(B);
515 
516  Tpetra::MatrixMatrix::Add(tpA, transposeA, alpha, tpB, beta);
517 #else
518  throw Exceptions::RuntimeError("Xpetra must be compiled with Tpetra.");
519 #endif
520  }
521  } //MatrixMatrix::TwoMatrixAdd()
522 
523 
536  static void TwoMatrixAdd(const Matrix& A, bool transposeA, const SC& alpha,
537  const Matrix& B, bool transposeB, const SC& beta,
538  RCP<Matrix>& C, Teuchos::FancyOStream &fos, bool AHasFixedNnzPerRow = false) {
539 
540  RCP<const Matrix> rcpA = Teuchos::rcpFromRef(A);
541  RCP<const Matrix> rcpB = Teuchos::rcpFromRef(B);
542  RCP<const BlockedCrsMatrix> rcpBopA = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpA);
543  RCP<const BlockedCrsMatrix> rcpBopB = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpB);
544 
545  // We have to distinguish 4 cases:
546  // both matrices are CrsMatrixWrap based, only one of them is CrsMatrixWrap based, or none.
547 
548  // Both matrices are CrsMatrixWrap
549  if(rcpBopA == Teuchos::null && rcpBopB == Teuchos::null) {
550 
551  if (!(A.getRowMap()->isSameAs(*(B.getRowMap()))))
552  throw Exceptions::Incompatible("TwoMatrixAdd: matrix row maps are not the same.");
553 
554 
555  if (C == Teuchos::null) {
556  size_t maxNzInA = 0;
557  size_t maxNzInB = 0;
558  size_t numLocalRows = 0;
559  if (A.isFillComplete() && B.isFillComplete()) {
560  maxNzInA = A.getGlobalMaxNumRowEntries();
561  maxNzInB = B.getGlobalMaxNumRowEntries();
562  numLocalRows = A.getNodeNumRows();
563  }
564  if (maxNzInA == 1 || maxNzInB == 1 || AHasFixedNnzPerRow) {
565  // first check if either A or B has at most 1 nonzero per row
566  // the case of both having at most 1 nz per row is handled by the ``else''
567  Teuchos::ArrayRCP<size_t> exactNnzPerRow(numLocalRows);
568 
569  if ((maxNzInA == 1 && maxNzInB > 1) || AHasFixedNnzPerRow) {
570  for (size_t i = 0; i < numLocalRows; ++i)
571  exactNnzPerRow[i] = B.getNumEntriesInLocalRow(Teuchos::as<LO>(i)) + maxNzInA;
572 
573  } else {
574  for (size_t i = 0; i < numLocalRows; ++i)
575  exactNnzPerRow[i] = A.getNumEntriesInLocalRow(Teuchos::as<LO>(i)) + maxNzInB;
576  }
577 
578  fos << "MatrixMatrix::TwoMatrixAdd : special case detected (one matrix has a fixed nnz per row)"
579  << ", using static profiling" << std::endl;
580  C = rcp(new CrsMatrixWrap(A.getRowMap(), exactNnzPerRow, Xpetra::StaticProfile));
581  } else {
582  // general case
583  double nnzPerRowInA = Teuchos::as<double>(A.getGlobalNumEntries()) / A.getGlobalNumRows();
584  double nnzPerRowInB = Teuchos::as<double>(B.getGlobalNumEntries()) / B.getGlobalNumRows();
585  LO nnzToAllocate = Teuchos::as<LO>( (nnzPerRowInA + nnzPerRowInB) * 1.5) + Teuchos::as<LO>(1);
586 
588  //Use static profiling (more efficient) if the estimate is at least as big as the max
589  //possible nnz's in any single row of the result.
590  Xpetra::ProfileType pft = (maxPossible) > nnzToAllocate ? Xpetra::DynamicProfile : Xpetra::StaticProfile;
591 
592  fos << "nnzPerRowInA = " << nnzPerRowInA << ", nnzPerRowInB = " << nnzPerRowInB << std::endl;
593  fos << "MatrixMatrix::TwoMatrixAdd : space allocated per row = " << nnzToAllocate
594  << ", max possible nnz per row in sum = " << maxPossible
595  << ", using " << (pft == Xpetra::DynamicProfile ? "dynamic" : "static" ) << " profiling"
596  << std::endl;
597  C = rcp(new CrsMatrixWrap(A.getRowMap(), nnzToAllocate, pft));
598  }
599  if (transposeB)
600  fos << "MatrixMatrix::TwoMatrixAdd : ** WARNING ** estimate could be badly wrong because second summand is transposed" << std::endl;
601  }
602 
603  if (C->getRowMap()->lib() == Xpetra::UseEpetra) {
604  throw Exceptions::RuntimeError("MatrixMatrix::Add for Epetra only available with Scalar = double, LO = GO = int.");
605  } else if (C->getRowMap()->lib() == Xpetra::UseTpetra) {
606  #ifdef HAVE_XPETRA_TPETRA
607  const Tpetra::CrsMatrix<SC,LO,GO,NO>& tpA = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(A);
608  const Tpetra::CrsMatrix<SC,LO,GO,NO>& tpB = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(B);
609  RCP<Tpetra::CrsMatrix<SC,LO,GO,NO> > tpC = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstTpetraCrs(C);
610 
611  Tpetra::MatrixMatrix::Add(tpA, transposeA, alpha, tpB, transposeB, beta, tpC);
612  #else
613  throw Exceptions::RuntimeError("Xpetra must be compile with Tpetra.");
614  #endif
615  }
617  if (A.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpFromRef(A));
618  if (B.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpFromRef(B));
620  }
621  // the first matrix is of type CrsMatrixWrap, the second is a blocked operator
622  else if(rcpBopA == Teuchos::null && rcpBopB != Teuchos::null) {
623  RCP<const MapExtractor> rgmapextractor = rcpBopB->getRangeMapExtractor();
624  RCP<const MapExtractor> domapextractor = rcpBopB->getDomainMapExtractor();
625 
626  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
627  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
628 
629  size_t i = 0;
630  for (size_t j = 0; j < rcpBopB->Cols(); ++j) { // loop over all block columns of B
631  RCP<Matrix> Cij = Teuchos::null;
632  if(rcpA != Teuchos::null &&
633  rcpBopB->getMatrix(i,j)!=Teuchos::null) {
634  // recursive call
635  TwoMatrixAdd(*rcpA, transposeA, alpha,
636  *(rcpBopB->getMatrix(i,j)), transposeB, beta,
637  Cij, fos, AHasFixedNnzPerRow);
638  } else if (rcpA == Teuchos::null &&
639  rcpBopB->getMatrix(i,j)!=Teuchos::null) {
640  Cij = rcpBopB->getMatrix(i,j);
641  } else if (rcpA != Teuchos::null &&
642  rcpBopB->getMatrix(i,j)==Teuchos::null) {
643  Cij = Teuchos::rcp_const_cast<Matrix>(rcpA);
644  } else {
645  Cij = Teuchos::null;
646  }
647 
648  if (!Cij.is_null()) {
649  if (Cij->isFillComplete())
650  Cij->resumeFill();
651  Cij->fillComplete();
652  bC->setMatrix(i, j, Cij);
653  } else {
654  bC->setMatrix(i, j, Teuchos::null);
655  }
656  } // loop over columns j
657  }
658  // the second matrix is of type CrsMatrixWrap, the first is a blocked operator
659  else if(rcpBopA != Teuchos::null && rcpBopB == Teuchos::null) {
660  RCP<const MapExtractor> rgmapextractor = rcpBopA->getRangeMapExtractor();
661  RCP<const MapExtractor> domapextractor = rcpBopA->getDomainMapExtractor();
662 
663  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
664  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
665  size_t j = 0;
666  for (size_t i = 0; i < rcpBopA->Rows(); ++i) { // loop over all block rows of A
667  RCP<Matrix> Cij = Teuchos::null;
668  if(rcpBopA->getMatrix(i,j)!=Teuchos::null &&
669  rcpB!=Teuchos::null) {
670  // recursive call
671  TwoMatrixAdd(*(rcpBopA->getMatrix(i,j)), transposeA, alpha,
672  *rcpB, transposeB, beta,
673  Cij, fos, AHasFixedNnzPerRow);
674  } else if (rcpBopA->getMatrix(i,j) == Teuchos::null &&
675  rcpB!=Teuchos::null) {
676  Cij = Teuchos::rcp_const_cast<Matrix>(rcpB);
677  } else if (rcpBopA->getMatrix(i,j) != Teuchos::null &&
678  rcpB==Teuchos::null) {
679  Cij = rcpBopA->getMatrix(i,j);
680  } else {
681  Cij = Teuchos::null;
682  }
683 
684  if (!Cij.is_null()) {
685  if (Cij->isFillComplete())
686  Cij->resumeFill();
687  Cij->fillComplete();
688  bC->setMatrix(i, j, Cij);
689  } else {
690  bC->setMatrix(i, j, Teuchos::null);
691  }
692  } // loop over rows i
693  }
694  else {
695  // This is the version for blocked matrices
696 
697  // check the compatibility of the blocked operators
698  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA.is_null()==true, Xpetra::Exceptions::BadCast, "A is not a BlockedCrsMatrix. (TwoMatrixAdd)");
699  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopB.is_null()==true, Xpetra::Exceptions::BadCast, "B is not a BlockedCrsMatrix. (TwoMatrixAdd)");
700  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->Rows()!=rcpBopB->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << rcpBopA->Rows() << " rows and B has " << rcpBopA->Rows() << " rows. Matrices are not compatible! (TwoMatrixAdd)");
701  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->Rows()!=rcpBopB->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << rcpBopA->Cols() << " cols and B has " << rcpBopA->Cols() << " cols. Matrices are not compatible! (TwoMatrixAdd)");
702  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getRangeMap()->isSameAs(*(rcpBopB->getRangeMap()))==false, Xpetra::Exceptions::RuntimeError, "Range map of A is not the same as range map of B. Matrices are not compatible! (TwoMatrixAdd)");
703  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getDomainMap()->isSameAs(*(rcpBopB->getDomainMap()))==false, Xpetra::Exceptions::RuntimeError, "Domain map of A is not the same as domain map of B. Matrices are not compatible! (TwoMatrixAdd)");
704  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getRangeMapExtractor()->getThyraMode() != rcpBopB->getRangeMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Different Thyra/Xpetra style gids in RangeMapExtractor of A and B. Matrices are not compatible! (TwoMatrixAdd)");
705  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getDomainMapExtractor()->getThyraMode() != rcpBopB->getDomainMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Different Thyra/Xpetra style gids in DomainMapExtractor of A and B. Matrices are not compatible! (TwoMatrixAdd)");
706 
707  RCP<const MapExtractor> rgmapextractor = rcpBopA->getRangeMapExtractor();
708  RCP<const MapExtractor> domapextractor = rcpBopB->getDomainMapExtractor();
709 
710  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
711  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
712  for (size_t i = 0; i < rcpBopA->Rows(); ++i) { // loop over all block rows of A
713  for (size_t j = 0; j < rcpBopB->Cols(); ++j) { // loop over all block columns of B
714 
715  RCP<Matrix> Cij = Teuchos::null;
716  if(rcpBopA->getMatrix(i,j)!=Teuchos::null &&
717  rcpBopB->getMatrix(i,j)!=Teuchos::null) {
718  // recursive call
719  TwoMatrixAdd(*(rcpBopA->getMatrix(i,j)), transposeA, alpha,
720  *(rcpBopB->getMatrix(i,j)), transposeB, beta,
721  Cij, fos, AHasFixedNnzPerRow);
722  } else if (rcpBopA->getMatrix(i,j) == Teuchos::null &&
723  rcpBopB->getMatrix(i,j)!=Teuchos::null) {
724  Cij = rcpBopB->getMatrix(i,j);
725  } else if (rcpBopA->getMatrix(i,j) != Teuchos::null &&
726  rcpBopB->getMatrix(i,j)==Teuchos::null) {
727  Cij = rcpBopA->getMatrix(i,j);
728  } else {
729  Cij = Teuchos::null;
730  }
731 
732  if (!Cij.is_null()) {
733  if (Cij->isFillComplete())
734  Cij->resumeFill();
735  Cij->fillComplete();
736  bC->setMatrix(i, j, Cij);
737  } else {
738  bC->setMatrix(i, j, Teuchos::null);
739  }
740  } // loop over columns j
741  } // loop over rows i
742 
743  } // end blocked recursive algorithm
744  } //MatrixMatrix::TwoMatrixAdd()
745 
746 
747  }; // class MatrixMatrix
748 
749 
750 #ifdef HAVE_XPETRA_EPETRA
751  // specialization MatrixMatrix for SC=double, LO=GO=int
752  template<>
753  class MatrixMatrix<double,int,int,EpetraNode> {
754  typedef double Scalar;
755  typedef int LocalOrdinal;
756  typedef int GlobalOrdinal;
757  typedef EpetraNode Node;
758 #include "Xpetra_UseShortNames.hpp"
759 
760  public:
761 
786  static void Multiply(const Matrix& A, bool transposeA,
787  const Matrix& B, bool transposeB,
788  Matrix& C,
789  bool call_FillComplete_on_result = true,
790  bool doOptimizeStorage = true,
791  const std::string & label = std::string(),
792  const RCP<ParameterList>& params = null) {
793  TEUCHOS_TEST_FOR_EXCEPTION(transposeA == false && C.getRowMap()->isSameAs(*A.getRowMap()) == false,
794  Xpetra::Exceptions::RuntimeError, "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as row map of A");
795  TEUCHOS_TEST_FOR_EXCEPTION(transposeA == true && C.getRowMap()->isSameAs(*A.getDomainMap()) == false,
796  Xpetra::Exceptions::RuntimeError, "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as domain map of A");
797 
798  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Xpetra::Exceptions::RuntimeError, "A is not fill-completed");
799  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Xpetra::Exceptions::RuntimeError, "B is not fill-completed");
800 
801  bool haveMultiplyDoFillComplete = call_FillComplete_on_result && doOptimizeStorage;
802 
803  if (C.getRowMap()->lib() == Xpetra::UseEpetra) {
804 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
805  Epetra_CrsMatrix& epA = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstEpetraCrs(A);
806  Epetra_CrsMatrix& epB = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstEpetraCrs(B);
807  Epetra_CrsMatrix& epC = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstEpetraCrs(C);
808 
809  int i = EpetraExt::MatrixMatrix::Multiply(epA, transposeA, epB, transposeB, epC, haveMultiplyDoFillComplete);
810  if (haveMultiplyDoFillComplete) {
811  // Due to Epetra wrapper intricacies, we need to explicitly call
812  // fillComplete on Xpetra matrix here. Specifically, EpetraCrsMatrix
813  // only keeps an internal variable to check whether we are in resumed
814  // state or not, but never touches the underlying Epetra object. As
815  // such, we need to explicitly update the state of Xpetra matrix to
816  // that of Epetra one afterwords
817  C.fillComplete();
818  }
819 
820  if (i != 0) {
821  std::ostringstream buf;
822  buf << i;
823  std::string msg = "EpetraExt::MatrixMatrix::Multiply return value of " + buf.str();
824  throw(Exceptions::RuntimeError(msg));
825  }
826 
827 #else
828  throw(Xpetra::Exceptions::RuntimeError("Xpetra::MatrixMatrix::Multiply requires EpetraExt to be compiled."));
829 #endif
830  } else if (C.getRowMap()->lib() == Xpetra::UseTpetra) {
831 #ifdef HAVE_XPETRA_TPETRA
832 # if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
833  (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
834  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra <double,int,int> ETI enabled."));
835 # else
836  const Tpetra::CrsMatrix<SC,LO,GO,NO> & tpA = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(A);
837  const Tpetra::CrsMatrix<SC,LO,GO,NO> & tpB = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(B);
838  Tpetra::CrsMatrix<SC,LO,GO,NO> & tpC = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstTpetraCrs(C);
839 
840  // 18Feb2013 JJH I'm reenabling the code that allows the matrix matrix multiply to do the fillComplete.
841  // Previously, Tpetra's matrix matrix multiply did not support fillComplete.
842  Tpetra::MatrixMatrix::Multiply(tpA, transposeA, tpB, transposeB, tpC, haveMultiplyDoFillComplete, label, params);
843 # endif
844 #else
845  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra."));
846 #endif
847  }
848 
849  if (call_FillComplete_on_result && !haveMultiplyDoFillComplete) {
850  RCP<Teuchos::ParameterList> fillParams = rcp(new Teuchos::ParameterList());
851  fillParams->set("Optimize Storage",doOptimizeStorage);
852  C.fillComplete((transposeB) ? B.getRangeMap() : B.getDomainMap(),
853  (transposeA) ? A.getDomainMap() : A.getRangeMap(),
854  fillParams);
855  }
856 
857  // transfer striding information
858  RCP<Matrix> rcpA = Teuchos::rcp_const_cast<Matrix>(Teuchos::rcpFromRef(A));
859  RCP<Matrix> rcpB = Teuchos::rcp_const_cast<Matrix>(Teuchos::rcpFromRef(B));
860  C.CreateView("stridedMaps", rcpA, transposeA, rcpB, transposeB); // TODO use references instead of RCPs
861  } // end Multiply
862 
885  static RCP<Matrix> Multiply(const Matrix& A, bool transposeA,
886  const Matrix& B, bool transposeB,
887  RCP<Matrix> C_in,
888  Teuchos::FancyOStream& fos,
889  bool doFillComplete = true,
890  bool doOptimizeStorage = true,
891  const std::string & label = std::string(),
892  const RCP<ParameterList>& params = null) {
893 
894  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Exceptions::RuntimeError, "A is not fill-completed");
895  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Exceptions::RuntimeError, "B is not fill-completed");
896 
897  // Optimization using ML Multiply when available and requested
898  // This feature is currently not supported. We would have to introduce the HAVE_XPETRA_ML_MMM flag
899 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT) && defined(HAVE_XPETRA_ML_MMM)
900  if (B.getDomainMap()->lib() == Xpetra::UseEpetra && !transposeA && !transposeB) {
901  RCP<const Epetra_CrsMatrix> epA = Xpetra::Helpers<SC,LO,GO,NO>::Op2EpetraCrs(rcpFromRef(A));
902  RCP<const Epetra_CrsMatrix> epB = Xpetra::Helpers<SC,LO,GO,NO>::Op2EpetraCrs(rcpFromRef(B));
903  RCP<Epetra_CrsMatrix> epC = MLTwoMatrixMultiply(*epA, *epB, fos);
904 
905  RCP<Matrix> C = Convert_Epetra_CrsMatrix_ToXpetra_CrsMatrixWrap<SC,LO,GO,NO> (epC);
906  if (doFillComplete) {
907  RCP<Teuchos::ParameterList> fillParams = rcp(new Teuchos::ParameterList());
908  fillParams->set("Optimize Storage", doOptimizeStorage);
909  C->fillComplete(B.getDomainMap(), A.getRangeMap(), fillParams);
910  }
911 
912  // Fill strided maps information
913  // This is necessary since the ML matrix matrix multiplication routine has no handling for this
914  // TODO: move this call to MLMultiply...
915  C->CreateView("stridedMaps", rcpFromRef(A), transposeA, rcpFromRef(B), transposeB);
916 
917  return C;
918  }
919 #endif // EPETRA + EPETRAEXT + ML
920 
921  // Default case: Xpetra Multiply
922  RCP<Matrix> C = C_in;
923 
924  if (C == Teuchos::null) {
925  double nnzPerRow = Teuchos::as<double>(0);
926 
927  if (A.getDomainMap()->lib() == Xpetra::UseTpetra) {
928  // For now, follow what ML and Epetra do.
929  GO numRowsA = A.getGlobalNumRows();
930  GO numRowsB = B.getGlobalNumRows();
931  nnzPerRow = sqrt(Teuchos::as<double>(A.getGlobalNumEntries())/numRowsA) +
932  sqrt(Teuchos::as<double>(B.getGlobalNumEntries())/numRowsB) - 1;
933  nnzPerRow *= nnzPerRow;
934  double totalNnz = nnzPerRow * A.getGlobalNumRows() * 0.75 + 100;
935  double minNnz = Teuchos::as<double>(1.2 * A.getGlobalNumEntries());
936  if (totalNnz < minNnz)
937  totalNnz = minNnz;
938  nnzPerRow = totalNnz / A.getGlobalNumRows();
939 
940  fos << "Matrix product nnz per row estimate = " << Teuchos::as<LO>(nnzPerRow) << std::endl;
941  }
942 
943  if (transposeA) C = MatrixFactory::Build(A.getDomainMap(), Teuchos::as<LO>(nnzPerRow));
944  else C = MatrixFactory::Build(A.getRowMap(), Teuchos::as<LO>(nnzPerRow));
945 
946  } else {
947  C->resumeFill(); // why this is not done inside of Tpetra MxM?
948  fos << "Reuse C pattern" << std::endl;
949  }
950 
951  Multiply(A, transposeA, B, transposeB, *C, doFillComplete, doOptimizeStorage, label, params); // call Multiply routine from above
952 
953  return C;
954  }
955 
966  static RCP<Matrix> Multiply(const Matrix& A, bool transposeA,
967  const Matrix& B, bool transposeB,
968  Teuchos::FancyOStream &fos,
969  bool callFillCompleteOnResult = true,
970  bool doOptimizeStorage = true,
971  const std::string & label = std::string(),
972  const RCP<ParameterList>& params = null) {
973  return Multiply(A, transposeA, B, transposeB, Teuchos::null, fos, callFillCompleteOnResult, doOptimizeStorage, label, params);
974  }
975 
976 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
977  // Michael Gee's MLMultiply
978  static RCP<Epetra_CrsMatrix> MLTwoMatrixMultiply(const Epetra_CrsMatrix& epA,
979  const Epetra_CrsMatrix& epB,
980  Teuchos::FancyOStream& fos) {
981 #if defined(HAVE_XPETRA_ML_MMM) // Note: this is currently not supported
982  ML_Comm* comm;
983  ML_Comm_Create(&comm);
984  fos << "****** USING ML's MATRIX MATRIX MULTIPLY ******" << std::endl;
985 #ifdef HAVE_MPI
986  // ML_Comm uses MPI_COMM_WORLD, so try to use the same communicator as epA.
987  const Epetra_MpiComm * Mcomm = dynamic_cast<const Epetra_MpiComm*>(&(epA.Comm()));
988  if (Mcomm)
989  ML_Comm_Set_UsrComm(comm,Mcomm->GetMpiComm());
990 # endif
991  //in order to use ML, there must be no indices missing from the matrix column maps.
992  EpetraExt::CrsMatrix_SolverMap Atransform;
993  EpetraExt::CrsMatrix_SolverMap Btransform;
994  const Epetra_CrsMatrix& A = Atransform(const_cast<Epetra_CrsMatrix&>(epA));
995  const Epetra_CrsMatrix& B = Btransform(const_cast<Epetra_CrsMatrix&>(epB));
996 
997  if (!A.Filled()) throw Exceptions::RuntimeError("A has to be FillCompleted");
998  if (!B.Filled()) throw Exceptions::RuntimeError("B has to be FillCompleted");
999 
1000  // create ML operators from EpetraCrsMatrix
1001  ML_Operator* ml_As = ML_Operator_Create(comm);
1002  ML_Operator* ml_Bs = ML_Operator_Create(comm);
1003  ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&A),ml_As); // Should we test if the lightweight wrapper is actually used or if WrapEpetraCrsMatrix fall backs to the heavy one?
1004  ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&B),ml_Bs);
1005  ML_Operator* ml_AtimesB = ML_Operator_Create(comm);
1006  {
1007  Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer("ML_2matmult kernel"));
1008  ML_2matmult(ml_As,ml_Bs,ml_AtimesB,ML_CSR_MATRIX); // do NOT use ML_EpetraCRS_MATRIX!!!
1009  }
1010  ML_Operator_Destroy(&ml_As);
1011  ML_Operator_Destroy(&ml_Bs);
1012 
1013  // For ml_AtimesB we have to reconstruct the column map in global indexing,
1014  // The following is going down to the salt-mines of ML ...
1015  // note: we use integers, since ML only works for Epetra...
1016  int N_local = ml_AtimesB->invec_leng;
1017  ML_CommInfoOP* getrow_comm = ml_AtimesB->getrow->pre_comm;
1018  if (!getrow_comm) throw(Exceptions::RuntimeError("ML_Operator does not have a CommInfo"));
1019  ML_Comm* comm_AB = ml_AtimesB->comm; // get comm object
1020  if (N_local != B.DomainMap().NumMyElements())
1021  throw(Exceptions::RuntimeError("Mismatch in local row dimension between ML and Epetra"));
1022  int N_rcvd = 0;
1023  int N_send = 0;
1024  int flag = 0;
1025  for (int i = 0; i < getrow_comm->N_neighbors; i++) {
1026  N_rcvd += (getrow_comm->neighbors)[i].N_rcv;
1027  N_send += (getrow_comm->neighbors)[i].N_send;
1028  if ( ((getrow_comm->neighbors)[i].N_rcv != 0) &&
1029  ((getrow_comm->neighbors)[i].rcv_list != NULL) ) flag = 1;
1030  }
1031  // For some unknown reason, ML likes to have stuff one larger than
1032  // neccessary...
1033  std::vector<double> dtemp(N_local + N_rcvd + 1); // "double" vector for comm function
1034  std::vector<int> cmap (N_local + N_rcvd + 1); // vector for gids
1035  for (int i = 0; i < N_local; ++i) {
1036  cmap[i] = B.DomainMap().GID(i);
1037  dtemp[i] = (double) cmap[i];
1038  }
1039  ML_cheap_exchange_bdry(&dtemp[0],getrow_comm,N_local,N_send,comm_AB); // do communication
1040  if (flag) { // process received data
1041  int count = N_local;
1042  const int neighbors = getrow_comm->N_neighbors;
1043  for (int i = 0; i < neighbors; i++) {
1044  const int nrcv = getrow_comm->neighbors[i].N_rcv;
1045  for (int j = 0; j < nrcv; j++)
1046  cmap[getrow_comm->neighbors[i].rcv_list[j]] = (int) dtemp[count++];
1047  }
1048  } else {
1049  for (int i = 0; i < N_local+N_rcvd; ++i)
1050  cmap[i] = (int)dtemp[i];
1051  }
1052  dtemp.clear(); // free double array
1053 
1054  // we can now determine a matching column map for the result
1055  Epetra_Map gcmap(-1, N_local+N_rcvd, &cmap[0], B.ColMap().IndexBase(), A.Comm());
1056 
1057  int allocated = 0;
1058  int rowlength;
1059  double* val = NULL;
1060  int* bindx = NULL;
1061 
1062  const int myrowlength = A.RowMap().NumMyElements();
1063  const Epetra_Map& rowmap = A.RowMap();
1064 
1065  // Determine the maximum bandwith for the result matrix.
1066  // replaces the old, very(!) memory-consuming guess:
1067  // int guessnpr = A.MaxNumEntries()*B.MaxNumEntries();
1068  int educatedguess = 0;
1069  for (int i = 0; i < myrowlength; ++i) {
1070  // get local row
1071  ML_get_matrix_row(ml_AtimesB, 1, &i, &allocated, &bindx, &val, &rowlength, 0);
1072  if (rowlength>educatedguess)
1073  educatedguess = rowlength;
1074  }
1075 
1076  // allocate our result matrix and fill it
1077  RCP<Epetra_CrsMatrix> result = rcp(new Epetra_CrsMatrix(::Copy, A.RangeMap(), gcmap, educatedguess, false));
1078 
1079  std::vector<int> gcid(educatedguess);
1080  for (int i = 0; i < myrowlength; ++i) {
1081  const int grid = rowmap.GID(i);
1082  // get local row
1083  ML_get_matrix_row(ml_AtimesB, 1, &i, &allocated, &bindx, &val, &rowlength, 0);
1084  if (!rowlength) continue;
1085  if ((int)gcid.size() < rowlength) gcid.resize(rowlength);
1086  for (int j = 0; j < rowlength; ++j) {
1087  gcid[j] = gcmap.GID(bindx[j]);
1088  if (gcid[j] < 0)
1089  throw Exceptions::RuntimeError("Error: cannot find gcid!");
1090  }
1091  int err = result->InsertGlobalValues(grid, rowlength, val, &gcid[0]);
1092  if (err != 0 && err != 1) {
1093  std::ostringstream errStr;
1094  errStr << "Epetra_CrsMatrix::InsertGlobalValues returned err=" << err;
1095  throw Exceptions::RuntimeError(errStr.str());
1096  }
1097  }
1098  // free memory
1099  if (bindx) ML_free(bindx);
1100  if (val) ML_free(val);
1101  ML_Operator_Destroy(&ml_AtimesB);
1102  ML_Comm_Destroy(&comm);
1103 
1104  return result;
1105 #else // no MUELU_ML
1106  TEUCHOS_TEST_FOR_EXCEPTION(true, Xpetra::Exceptions::RuntimeError,
1107  "No ML multiplication available. This feature is currently not supported by Xpetra.");
1108  return Teuchos::null;
1109 #endif
1110  }
1111 #endif //ifdef HAVE_XPETRA_EPETRAEXT
1112 
1123  static RCP<BlockedCrsMatrix> TwoMatrixMultiplyBlock(const BlockedCrsMatrix& A, bool transposeA,
1124  const BlockedCrsMatrix& B, bool transposeB,
1125  Teuchos::FancyOStream& fos,
1126  bool doFillComplete = true,
1127  bool doOptimizeStorage = true) {
1128  TEUCHOS_TEST_FOR_EXCEPTION(transposeA || transposeB, Exceptions::RuntimeError,
1129  "TwoMatrixMultiply for BlockedCrsMatrix not implemented for transposeA==true or transposeB==true");
1130 
1131  // Preconditions
1132  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Exceptions::RuntimeError, "A is not fill-completed");
1133  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Exceptions::RuntimeError, "B is not fill-completed");
1134 
1135  RCP<const MapExtractor> rgmapextractor = A.getRangeMapExtractor();
1136  RCP<const MapExtractor> domapextractor = B.getDomainMapExtractor();
1137 
1138  RCP<BlockedCrsMatrix> C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
1139 
1140  for (size_t i = 0; i < A.Rows(); ++i) { // loop over all block rows of A
1141  for (size_t j = 0; j < B.Cols(); ++j) { // loop over all block columns of B
1142  RCP<Matrix> Cij;
1143 
1144  for (size_t l = 0; l < B.Rows(); ++l) { // loop for calculating entry C_{ij}
1145  RCP<Matrix> crmat1 = A.getMatrix(i,l);
1146  RCP<Matrix> crmat2 = B.getMatrix(l,j);
1147 
1148  if (crmat1.is_null() || crmat2.is_null())
1149  continue;
1150  if (crmat1->getGlobalNumEntries() == 0 || crmat2->getGlobalNumEntries() == 0)
1151  continue;
1152 
1153  RCP<CrsMatrixWrap> crop1 = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(crmat1);
1154  RCP<CrsMatrixWrap> crop2 = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(crmat2);
1155  TEUCHOS_TEST_FOR_EXCEPTION((crop1==Teuchos::null) != (crop2==Teuchos::null), Xpetra::Exceptions::RuntimeError, "A and B must be either both (compatible) BlockedCrsMatrix objects or both CrsMatrixWrap objects.");
1156 
1157  // temporary matrix containing result of local block multiplication
1158  RCP<Matrix> temp = Teuchos::null;
1159 
1160  if(crop1 != Teuchos::null && crop2 != Teuchos::null)
1161  temp = Multiply (*crop1, false, *crop2, false, fos);
1162  else {
1163  RCP<BlockedCrsMatrix> bop1 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat1);
1164  RCP<BlockedCrsMatrix> bop2 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat2);
1165  TEUCHOS_TEST_FOR_EXCEPTION(bop1.is_null()==true, Xpetra::Exceptions::BadCast, "A is not a BlockedCrsMatrix. (TwoMatrixMultiplyBlock)");
1166  TEUCHOS_TEST_FOR_EXCEPTION(bop2.is_null()==true, Xpetra::Exceptions::BadCast, "B is not a BlockedCrsMatrix. (TwoMatrixMultiplyBlock)");
1167  TEUCHOS_TEST_FOR_EXCEPTION(bop1->Cols()!=bop2->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << bop1->Cols() << " columns and B has " << bop2->Rows() << " rows. Matrices are not compatible! (TwoMatrixMultiplyBlock)");
1168  TEUCHOS_TEST_FOR_EXCEPTION(bop1->getDomainMap()->isSameAs(*(bop2->getRangeMap()))==false, Xpetra::Exceptions::RuntimeError, "Domain map of A is not the same as range map of B. Matrices are not compatible! (TwoMatrixMultiplyBlock)");
1169 
1170  // recursive multiplication call
1171  temp = TwoMatrixMultiplyBlock(*bop1, transposeA, *bop2, transposeB, fos, doFillComplete, doOptimizeStorage);
1172 
1173  RCP<BlockedCrsMatrix> btemp = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(temp);
1174  TEUCHOS_TEST_FOR_EXCEPTION(btemp->Rows()!=bop1->Rows(), Xpetra::Exceptions::RuntimeError, "Number of block rows of local blocked operator is " << btemp->Rows() << " but should be " << bop1->Rows() << ". (TwoMatrixMultiplyBlock)");
1175  TEUCHOS_TEST_FOR_EXCEPTION(btemp->Cols()!=bop2->Cols(), Xpetra::Exceptions::RuntimeError, "Number of block cols of local blocked operator is " << btemp->Cols() << " but should be " << bop2->Cols() << ". (TwoMatrixMultiplyBlock)");
1176  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getRangeMapExtractor()->getFullMap()->isSameAs(*(bop1->getRangeMapExtractor()->getFullMap())) == false, Xpetra::Exceptions::RuntimeError, "Range map of local blocked operator should be same as first operator. (TwoMatrixMultiplyBlock)");
1177  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getDomainMapExtractor()->getFullMap()->isSameAs(*(bop2->getDomainMapExtractor()->getFullMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of local blocked operator should be same as second operator. (TwoMatrixMultiplyBlock)");
1178  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getRangeMapExtractor()->getThyraMode() != bop1->getRangeMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Thyra mode of local range map extractor incompatible with range map extractor of A (TwoMatrixMultiplyBlock)");
1179  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getDomainMapExtractor()->getThyraMode() != bop2->getDomainMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Thyra mode of local domain map extractor incompatible with domain map extractor of B (TwoMatrixMultiplyBlock)");
1180  }
1181 
1182  TEUCHOS_TEST_FOR_EXCEPTION(temp->isFillComplete() == false, Xpetra::Exceptions::RuntimeError, "Local block is not filled. (TwoMatrixMultiplyBlock)");
1183 
1184  RCP<Matrix> addRes = null;
1185  if (Cij.is_null ())
1186  Cij = temp;
1187  else {
1188  MatrixMatrix::TwoMatrixAdd (*temp, false, 1.0, *Cij, false, 1.0, addRes, fos);
1189  Cij = addRes;
1190  }
1191  }
1192 
1193  if (!Cij.is_null()) {
1194  if (Cij->isFillComplete())
1195  Cij->resumeFill();
1196  Cij->fillComplete(B.getDomainMap(j), A.getRangeMap(i));
1197  C->setMatrix(i, j, Cij);
1198  } else {
1199  C->setMatrix(i, j, Teuchos::null);
1200  }
1201  }
1202  }
1203 
1204  if (doFillComplete)
1205  C->fillComplete(); // call default fillComplete for BlockCrsMatrixWrap objects
1206 
1207  return C;
1208  } // TwoMatrixMultiplyBlock
1209 
1222  static void TwoMatrixAdd(const Matrix& A, bool transposeA, SC alpha, Matrix& B, SC beta) {
1223  TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRowMap()->isSameAs(*B.getRowMap())), Exceptions::Incompatible,
1224  "TwoMatrixAdd: matrix row maps are not the same.");
1225 
1226  if (A.getRowMap()->lib() == Xpetra::UseEpetra) {
1227 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
1228  const Epetra_CrsMatrix& epA = Xpetra::Helpers<SC,LO,GO,NO>::Op2EpetraCrs(A);
1229  Epetra_CrsMatrix& epB = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstEpetraCrs(B);
1230 
1231  //FIXME is there a bug if beta=0?
1232  int rv = EpetraExt::MatrixMatrix::Add(epA, transposeA, alpha, epB, beta);
1233 
1234  if (rv != 0)
1235  throw Exceptions::RuntimeError("EpetraExt::MatrixMatrix::Add return value " + Teuchos::toString(rv));
1236  std::ostringstream buf;
1237 #else
1238  throw Exceptions::RuntimeError("Xpetra must be compiled with EpetraExt.");
1239 #endif
1240  } else if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
1241 #ifdef HAVE_XPETRA_TPETRA
1242 # if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
1243  (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
1244  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra GO=int enabled."));
1245 # else
1246  const Tpetra::CrsMatrix<SC,LO,GO,NO>& tpA = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(A);
1247  Tpetra::CrsMatrix<SC,LO,GO,NO>& tpB = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstTpetraCrs(B);
1248 
1249  Tpetra::MatrixMatrix::Add(tpA, transposeA, alpha, tpB, beta);
1250 # endif
1251 #else
1252  throw Exceptions::RuntimeError("Xpetra must be compiled with Tpetra.");
1253 #endif
1254  }
1255  } //MatrixMatrix::TwoMatrixAdd()
1256 
1257 
1270  static void TwoMatrixAdd(const Matrix& A, bool transposeA, const SC& alpha,
1271  const Matrix& B, bool transposeB, const SC& beta,
1272  RCP<Matrix>& C, Teuchos::FancyOStream &fos, bool AHasFixedNnzPerRow = false) {
1273  RCP<const Matrix> rcpA = Teuchos::rcpFromRef(A);
1274  RCP<const Matrix> rcpB = Teuchos::rcpFromRef(B);
1275  RCP<const BlockedCrsMatrix> rcpBopA = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpA);
1276  RCP<const BlockedCrsMatrix> rcpBopB = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpB);
1277 
1278  if(rcpBopA == Teuchos::null && rcpBopB == Teuchos::null) {
1279 
1280  if (!(A.getRowMap()->isSameAs(*(B.getRowMap()))))
1281  throw Exceptions::Incompatible("TwoMatrixAdd: matrix row maps are not the same.");
1282 
1283  if (C == Teuchos::null) {
1284  size_t maxNzInA = 0;
1285  size_t maxNzInB = 0;
1286  size_t numLocalRows = 0;
1287  if (A.isFillComplete() && B.isFillComplete()) {
1288  maxNzInA = A.getGlobalMaxNumRowEntries();
1289  maxNzInB = B.getGlobalMaxNumRowEntries();
1290  numLocalRows = A.getNodeNumRows();
1291  }
1292 
1293  if (maxNzInA == 1 || maxNzInB == 1 || AHasFixedNnzPerRow) {
1294  // first check if either A or B has at most 1 nonzero per row
1295  // the case of both having at most 1 nz per row is handled by the ``else''
1296  Teuchos::ArrayRCP<size_t> exactNnzPerRow(numLocalRows);
1297 
1298  if ((maxNzInA == 1 && maxNzInB > 1) || AHasFixedNnzPerRow) {
1299  for (size_t i = 0; i < numLocalRows; ++i)
1300  exactNnzPerRow[i] = B.getNumEntriesInLocalRow(Teuchos::as<LO>(i)) + maxNzInA;
1301 
1302  } else {
1303  for (size_t i = 0; i < numLocalRows; ++i)
1304  exactNnzPerRow[i] = A.getNumEntriesInLocalRow(Teuchos::as<LO>(i)) + maxNzInB;
1305  }
1306 
1307  fos << "MatrixMatrix::TwoMatrixAdd : special case detected (one matrix has a fixed nnz per row)"
1308  << ", using static profiling" << std::endl;
1309  C = rcp(new Xpetra::CrsMatrixWrap<SC,LO,GO,NO>(A.getRowMap(), exactNnzPerRow, Xpetra::StaticProfile));
1310 
1311  } else {
1312  // general case
1313  double nnzPerRowInA = Teuchos::as<double>(A.getGlobalNumEntries()) / A.getGlobalNumRows();
1314  double nnzPerRowInB = Teuchos::as<double>(B.getGlobalNumEntries()) / B.getGlobalNumRows();
1315  LO nnzToAllocate = Teuchos::as<LO>( (nnzPerRowInA + nnzPerRowInB) * 1.5) + Teuchos::as<LO>(1);
1316 
1317  LO maxPossible = A.getGlobalMaxNumRowEntries() + B.getGlobalMaxNumRowEntries();
1318  //Use static profiling (more efficient) if the estimate is at least as big as the max
1319  //possible nnz's in any single row of the result.
1320  Xpetra::ProfileType pft = (maxPossible) > nnzToAllocate ? Xpetra::DynamicProfile : Xpetra::StaticProfile;
1321 
1322  fos << "nnzPerRowInA = " << nnzPerRowInA << ", nnzPerRowInB = " << nnzPerRowInB << std::endl;
1323  fos << "MatrixMatrix::TwoMatrixAdd : space allocated per row = " << nnzToAllocate
1324  << ", max possible nnz per row in sum = " << maxPossible
1325  << ", using " << (pft == Xpetra::DynamicProfile ? "dynamic" : "static" ) << " profiling"
1326  << std::endl;
1327  C = rcp(new Xpetra::CrsMatrixWrap<SC,LO,GO,NO>(A.getRowMap(), nnzToAllocate, pft));
1328  }
1329  if (transposeB)
1330  fos << "MatrixMatrix::TwoMatrixAdd : ** WARNING ** estimate could be badly wrong because second summand is transposed" << std::endl;
1331  }
1332 
1333  if (C->getRowMap()->lib() == Xpetra::UseEpetra) {
1334  #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
1335  const Epetra_CrsMatrix& epA = Xpetra::Helpers<SC,LO,GO,NO>::Op2EpetraCrs(A);
1336  const Epetra_CrsMatrix& epB = Xpetra::Helpers<SC,LO,GO,NO>::Op2EpetraCrs(B);
1337  RCP<Epetra_CrsMatrix> epC = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstEpetraCrs(C);
1338  Epetra_CrsMatrix* ref2epC = &*epC; //to avoid a compiler error...
1339 
1340  //FIXME is there a bug if beta=0?
1341  int rv = EpetraExt::MatrixMatrix::Add(epA, transposeA, alpha, epB, transposeB, beta, ref2epC);
1342 
1343  if (rv != 0)
1344  throw Exceptions::RuntimeError("EpetraExt::MatrixMatrix::Add return value of " + Teuchos::toString(rv));
1345  #else
1346  throw Exceptions::RuntimeError("MueLu must be compile with EpetraExt.");
1347  #endif
1348  } else if (C->getRowMap()->lib() == Xpetra::UseTpetra) {
1349  #ifdef HAVE_XPETRA_TPETRA
1350  # if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
1351  (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
1352  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra GO=int enabled."));
1353  # else
1354  const Tpetra::CrsMatrix<SC,LO,GO,NO>& tpA = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(A);
1355  const Tpetra::CrsMatrix<SC,LO,GO,NO>& tpB = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(B);
1356  RCP<Tpetra::CrsMatrix<SC,LO,GO,NO> > tpC = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstTpetraCrs(C);
1357 
1358  Tpetra::MatrixMatrix::Add(tpA, transposeA, alpha, tpB, transposeB, beta, tpC);
1359  # endif
1360  #else
1361  throw Exceptions::RuntimeError("Xpetra must be compile with Tpetra.");
1362  #endif
1363  }
1364 
1366  if (A.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpFromRef(A));
1367  if (B.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpFromRef(B));
1369  }
1370  // the first matrix is of type CrsMatrixWrap, the second is a blocked operator
1371  else if(rcpBopA == Teuchos::null && rcpBopB != Teuchos::null) {
1372  RCP<const MapExtractor> rgmapextractor = rcpBopB->getRangeMapExtractor();
1373  RCP<const MapExtractor> domapextractor = rcpBopB->getDomainMapExtractor();
1374 
1375  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
1376  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
1377 
1378  size_t i = 0;
1379  for (size_t j = 0; j < rcpBopB->Cols(); ++j) { // loop over all block columns of B
1380  RCP<Matrix> Cij = Teuchos::null;
1381  if(rcpA != Teuchos::null &&
1382  rcpBopB->getMatrix(i,j)!=Teuchos::null) {
1383  // recursive call
1384  TwoMatrixAdd(*rcpA, transposeA, alpha,
1385  *(rcpBopB->getMatrix(i,j)), transposeB, beta,
1386  Cij, fos, AHasFixedNnzPerRow);
1387  } else if (rcpA == Teuchos::null &&
1388  rcpBopB->getMatrix(i,j)!=Teuchos::null) {
1389  Cij = rcpBopB->getMatrix(i,j);
1390  } else if (rcpA != Teuchos::null &&
1391  rcpBopB->getMatrix(i,j)==Teuchos::null) {
1392  Cij = Teuchos::rcp_const_cast<Matrix>(rcpA);
1393  } else {
1394  Cij = Teuchos::null;
1395  }
1396 
1397  if (!Cij.is_null()) {
1398  if (Cij->isFillComplete())
1399  Cij->resumeFill();
1400  Cij->fillComplete();
1401  bC->setMatrix(i, j, Cij);
1402  } else {
1403  bC->setMatrix(i, j, Teuchos::null);
1404  }
1405  } // loop over columns j
1406  }
1407  // the second matrix is of type CrsMatrixWrap, the first is a blocked operator
1408  else if(rcpBopA != Teuchos::null && rcpBopB == Teuchos::null) {
1409  RCP<const MapExtractor> rgmapextractor = rcpBopA->getRangeMapExtractor();
1410  RCP<const MapExtractor> domapextractor = rcpBopA->getDomainMapExtractor();
1411 
1412  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
1413  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
1414 
1415  size_t j = 0;
1416  for (size_t i = 0; i < rcpBopA->Rows(); ++i) { // loop over all block rows of A
1417  RCP<Matrix> Cij = Teuchos::null;
1418  if(rcpBopA->getMatrix(i,j)!=Teuchos::null &&
1419  rcpB!=Teuchos::null) {
1420  // recursive call
1421  TwoMatrixAdd(*(rcpBopA->getMatrix(i,j)), transposeA, alpha,
1422  *rcpB, transposeB, beta,
1423  Cij, fos, AHasFixedNnzPerRow);
1424  } else if (rcpBopA->getMatrix(i,j) == Teuchos::null &&
1425  rcpB!=Teuchos::null) {
1426  Cij = Teuchos::rcp_const_cast<Matrix>(rcpB);
1427  } else if (rcpBopA->getMatrix(i,j) != Teuchos::null &&
1428  rcpB==Teuchos::null) {
1429  Cij = rcpBopA->getMatrix(i,j);
1430  } else {
1431  Cij = Teuchos::null;
1432  }
1433 
1434  if (!Cij.is_null()) {
1435  if (Cij->isFillComplete())
1436  Cij->resumeFill();
1437  Cij->fillComplete();
1438  bC->setMatrix(i, j, Cij);
1439  } else {
1440  bC->setMatrix(i, j, Teuchos::null);
1441  }
1442  } // loop over rows i
1443  }
1444  else {
1445  // This is the version for blocked matrices
1446 
1447  // check the compatibility of the blocked operators
1448  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA.is_null()==true, Xpetra::Exceptions::BadCast, "A is not a BlockedCrsMatrix. (TwoMatrixAdd)");
1449  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopB.is_null()==true, Xpetra::Exceptions::BadCast, "B is not a BlockedCrsMatrix. (TwoMatrixAdd)");
1450  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->Rows()!=rcpBopB->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << rcpBopA->Rows() << " rows and B has " << rcpBopA->Rows() << " rows. Matrices are not compatible! (TwoMatrixAdd)");
1451  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->Rows()!=rcpBopB->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << rcpBopA->Cols() << " cols and B has " << rcpBopA->Cols() << " cols. Matrices are not compatible! (TwoMatrixAdd)");
1452  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getRangeMap()->isSameAs(*(rcpBopB->getRangeMap()))==false, Xpetra::Exceptions::RuntimeError, "Range map of A is not the same as range map of B. Matrices are not compatible! (TwoMatrixAdd)");
1453  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getDomainMap()->isSameAs(*(rcpBopB->getDomainMap()))==false, Xpetra::Exceptions::RuntimeError, "Domain map of A is not the same as domain map of B. Matrices are not compatible! (TwoMatrixAdd)");
1454  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getRangeMapExtractor()->getThyraMode() != rcpBopB->getRangeMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Different Thyra/Xpetra style gids in RangeMapExtractor of A and B. Matrices are not compatible! (TwoMatrixAdd)");
1455  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getDomainMapExtractor()->getThyraMode() != rcpBopB->getDomainMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Different Thyra/Xpetra style gids in DomainMapExtractor of A and B. Matrices are not compatible! (TwoMatrixAdd)");
1456 
1457  RCP<const MapExtractor> rgmapextractor = rcpBopA->getRangeMapExtractor();
1458  RCP<const MapExtractor> domapextractor = rcpBopB->getDomainMapExtractor();
1459 
1460  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
1461  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
1462 
1463  for (size_t i = 0; i < rcpBopA->Rows(); ++i) { // loop over all block rows of A
1464  for (size_t j = 0; j < rcpBopB->Cols(); ++j) { // loop over all block columns of B
1465 
1466  RCP<Matrix> Cij = Teuchos::null;
1467  if(rcpBopA->getMatrix(i,j)!=Teuchos::null &&
1468  rcpBopB->getMatrix(i,j)!=Teuchos::null) {
1469  // recursive call
1470 
1471  TwoMatrixAdd(*(rcpBopA->getMatrix(i,j)), transposeA, alpha,
1472  *(rcpBopB->getMatrix(i,j)), transposeB, beta,
1473  Cij, fos, AHasFixedNnzPerRow);
1474  } else if (rcpBopA->getMatrix(i,j) == Teuchos::null &&
1475  rcpBopB->getMatrix(i,j)!=Teuchos::null) {
1476  Cij = rcpBopB->getMatrix(i,j);
1477  } else if (rcpBopA->getMatrix(i,j) != Teuchos::null &&
1478  rcpBopB->getMatrix(i,j)==Teuchos::null) {
1479  Cij = rcpBopA->getMatrix(i,j);
1480  } else {
1481  Cij = Teuchos::null;
1482  }
1483 
1484  if (!Cij.is_null()) {
1485  if (Cij->isFillComplete())
1486  Cij->resumeFill();
1487  //Cij->fillComplete(rcpBopA->getDomainMap(j), rcpBopA->getRangeMap(i));
1488  Cij->fillComplete();
1489  bC->setMatrix(i, j, Cij);
1490  } else {
1491  bC->setMatrix(i, j, Teuchos::null);
1492  }
1493  } // loop over columns j
1494  } // loop over rows i
1495 
1496  } // end blocked recursive algorithm
1497  } //MatrixMatrix::TwoMatrixAdd()
1498  }; // end specialization on SC=double, GO=int and NO=EpetraNode
1499 
1500  // specialization MatrixMatrix for SC=double, GO=long long and NO=EptraNode
1501  template<>
1502  class MatrixMatrix<double,int,long long,EpetraNode> {
1503  typedef double Scalar;
1504  typedef int LocalOrdinal;
1505  typedef long long GlobalOrdinal;
1506  typedef EpetraNode Node;
1507 #include "Xpetra_UseShortNames.hpp"
1508 
1509  public:
1510 
1535  static void Multiply(const Matrix& A, bool transposeA,
1536  const Matrix& B, bool transposeB,
1537  Matrix& C,
1538  bool call_FillComplete_on_result = true,
1539  bool doOptimizeStorage = true,
1540  const std::string & label = std::string(),
1541  const RCP<ParameterList>& params = null) {
1542  TEUCHOS_TEST_FOR_EXCEPTION(transposeA == false && C.getRowMap()->isSameAs(*A.getRowMap()) == false,
1543  Xpetra::Exceptions::RuntimeError, "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as row map of A");
1544  TEUCHOS_TEST_FOR_EXCEPTION(transposeA == true && C.getRowMap()->isSameAs(*A.getDomainMap()) == false,
1545  Xpetra::Exceptions::RuntimeError, "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as domain map of A");
1546 
1547 
1548  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Xpetra::Exceptions::RuntimeError, "A is not fill-completed");
1549  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Xpetra::Exceptions::RuntimeError, "B is not fill-completed");
1550 
1551  bool haveMultiplyDoFillComplete = call_FillComplete_on_result && doOptimizeStorage;
1552 
1553  if (C.getRowMap()->lib() == Xpetra::UseEpetra) {
1554 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
1555  Epetra_CrsMatrix & epA = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstEpetraCrs(A);
1556  Epetra_CrsMatrix & epB = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstEpetraCrs(B);
1557  Epetra_CrsMatrix & epC = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstEpetraCrs(C);
1558 
1559  int i = EpetraExt::MatrixMatrix::Multiply(epA, transposeA, epB, transposeB, epC, haveMultiplyDoFillComplete);
1560  if (haveMultiplyDoFillComplete) {
1561  // Due to Epetra wrapper intricacies, we need to explicitly call
1562  // fillComplete on Xpetra matrix here. Specifically, EpetraCrsMatrix
1563  // only keeps an internal variable to check whether we are in resumed
1564  // state or not, but never touches the underlying Epetra object. As
1565  // such, we need to explicitly update the state of Xpetra matrix to
1566  // that of Epetra one afterwords
1567  C.fillComplete();
1568  }
1569 
1570  if (i != 0) {
1571  std::ostringstream buf;
1572  buf << i;
1573  std::string msg = "EpetraExt::MatrixMatrix::Multiply return value of " + buf.str();
1574  throw(Exceptions::RuntimeError(msg));
1575  }
1576 
1577 #else
1578  throw(Xpetra::Exceptions::RuntimeError("Xpetra::MatrixMatrix::Multiply requires EpetraExt to be compiled."));
1579 #endif
1580  } else if (C.getRowMap()->lib() == Xpetra::UseTpetra) {
1581 #ifdef HAVE_XPETRA_TPETRA
1582 # if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))) || \
1583  (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))))
1584  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra <double,int,long long, EpetraNode> ETI enabled."));
1585 # else
1586  const Tpetra::CrsMatrix<SC,LO,GO,NO> & tpA = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(A);
1587  const Tpetra::CrsMatrix<SC,LO,GO,NO> & tpB = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(B);
1588  Tpetra::CrsMatrix<SC,LO,GO,NO> & tpC = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstTpetraCrs(C);
1589 
1590  //18Feb2013 JJH I'm reenabling the code that allows the matrix matrix multiply to do the fillComplete.
1591  //Previously, Tpetra's matrix matrix multiply did not support fillComplete.
1592  Tpetra::MatrixMatrix::Multiply(tpA, transposeA, tpB, transposeB, tpC, haveMultiplyDoFillComplete, label, params);
1593 # endif
1594 #else
1595  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra."));
1596 #endif
1597  }
1598 
1599  if(call_FillComplete_on_result && !haveMultiplyDoFillComplete) {
1600  RCP<Teuchos::ParameterList> fillParams = rcp(new Teuchos::ParameterList());
1601  fillParams->set("Optimize Storage",doOptimizeStorage);
1602  C.fillComplete((transposeB) ? B.getRangeMap() : B.getDomainMap(),
1603  (transposeA) ? A.getDomainMap() : A.getRangeMap(),
1604  fillParams);
1605  }
1606 
1607  // transfer striding information
1608  RCP<Matrix> rcpA = Teuchos::rcp_const_cast<Matrix>(Teuchos::rcpFromRef(A));
1609  RCP<Matrix> rcpB = Teuchos::rcp_const_cast<Matrix>(Teuchos::rcpFromRef(B));
1610  C.CreateView("stridedMaps", rcpA, transposeA, rcpB, transposeB); // TODO use references instead of RCPs
1611  } // end Multiply
1612 
1635  static RCP<Matrix> Multiply(const Matrix& A, bool transposeA,
1636  const Matrix& B, bool transposeB,
1637  RCP<Matrix> C_in,
1638  Teuchos::FancyOStream& fos,
1639  bool doFillComplete = true,
1640  bool doOptimizeStorage = true,
1641  const std::string & label = std::string(),
1642  const RCP<ParameterList>& params = null) {
1643 
1644  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Exceptions::RuntimeError, "A is not fill-completed");
1645  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Exceptions::RuntimeError, "B is not fill-completed");
1646 
1647  // Default case: Xpetra Multiply
1648  RCP<Matrix> C = C_in;
1649 
1650  if (C == Teuchos::null) {
1651  double nnzPerRow = Teuchos::as<double>(0);
1652 
1653  if (A.getDomainMap()->lib() == Xpetra::UseTpetra) {
1654  // For now, follow what ML and Epetra do.
1655  GO numRowsA = A.getGlobalNumRows();
1656  GO numRowsB = B.getGlobalNumRows();
1657  nnzPerRow = sqrt(Teuchos::as<double>(A.getGlobalNumEntries())/numRowsA) +
1658  sqrt(Teuchos::as<double>(B.getGlobalNumEntries())/numRowsB) - 1;
1659  nnzPerRow *= nnzPerRow;
1660  double totalNnz = nnzPerRow * A.getGlobalNumRows() * 0.75 + 100;
1661  double minNnz = Teuchos::as<double>(1.2 * A.getGlobalNumEntries());
1662  if (totalNnz < minNnz)
1663  totalNnz = minNnz;
1664  nnzPerRow = totalNnz / A.getGlobalNumRows();
1665 
1666  fos << "Matrix product nnz per row estimate = " << Teuchos::as<LO>(nnzPerRow) << std::endl;
1667  }
1668 
1669  if (transposeA) C = MatrixFactory::Build(A.getDomainMap(), Teuchos::as<LO>(nnzPerRow));
1670  else C = MatrixFactory::Build(A.getRowMap(), Teuchos::as<LO>(nnzPerRow));
1671 
1672  } else {
1673  C->resumeFill(); // why this is not done inside of Tpetra MxM?
1674  fos << "Reuse C pattern" << std::endl;
1675  }
1676 
1677  Multiply(A, transposeA, B, transposeB, *C, doFillComplete, doOptimizeStorage, label, params); // call Multiply routine from above
1678 
1679  return C;
1680  }
1681 
1692  static RCP<Matrix> Multiply(const Matrix& A, bool transposeA,
1693  const Matrix& B, bool transposeB,
1694  Teuchos::FancyOStream &fos,
1695  bool callFillCompleteOnResult = true,
1696  bool doOptimizeStorage = true,
1697  const std::string & label = std::string(),
1698  const RCP<ParameterList>& params = null) {
1699  return Multiply(A, transposeA, B, transposeB, Teuchos::null, fos, callFillCompleteOnResult, doOptimizeStorage, label, params);
1700  }
1701 
1702 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
1703  // Michael Gee's MLMultiply
1704  static RCP<Epetra_CrsMatrix> MLTwoMatrixMultiply(const Epetra_CrsMatrix& epA,
1705  const Epetra_CrsMatrix& epB,
1706  Teuchos::FancyOStream& fos) {
1707  TEUCHOS_TEST_FOR_EXCEPTION(true, Xpetra::Exceptions::RuntimeError,
1708  "No ML multiplication available. This feature is currently not supported by Xpetra.");
1709  return Teuchos::null;
1710  }
1711 #endif //ifdef HAVE_XPETRA_EPETRAEXT
1712 
1723  static RCP<BlockedCrsMatrix> TwoMatrixMultiplyBlock(const BlockedCrsMatrix& A, bool transposeA,
1724  const BlockedCrsMatrix& B, bool transposeB,
1725  Teuchos::FancyOStream& fos,
1726  bool doFillComplete = true,
1727  bool doOptimizeStorage = true) {
1728  TEUCHOS_TEST_FOR_EXCEPTION(transposeA || transposeB, Exceptions::RuntimeError,
1729  "TwoMatrixMultiply for BlockedCrsMatrix not implemented for transposeA==true or transposeB==true");
1730 
1731  // Preconditions
1732  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Exceptions::RuntimeError, "A is not fill-completed");
1733  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Exceptions::RuntimeError, "B is not fill-completed");
1734 
1735  RCP<const MapExtractor> rgmapextractor = A.getRangeMapExtractor();
1736  RCP<const MapExtractor> domapextractor = B.getDomainMapExtractor();
1737 
1738  RCP<BlockedCrsMatrix> C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
1739 
1740  for (size_t i = 0; i < A.Rows(); ++i) { // loop over all block rows of A
1741  for (size_t j = 0; j < B.Cols(); ++j) { // loop over all block columns of B
1742  RCP<Matrix> Cij;
1743 
1744  for (size_t l = 0; l < B.Rows(); ++l) { // loop for calculating entry C_{ij}
1745  RCP<Matrix> crmat1 = A.getMatrix(i,l);
1746  RCP<Matrix> crmat2 = B.getMatrix(l,j);
1747 
1748  if (crmat1.is_null() || crmat2.is_null())
1749  continue;
1750  if (crmat1->getGlobalNumEntries() == 0 || crmat2->getGlobalNumEntries() == 0)
1751  continue;
1752 
1753  RCP<CrsMatrixWrap> crop1 = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(crmat1);
1754  RCP<CrsMatrixWrap> crop2 = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(crmat2);
1755  TEUCHOS_TEST_FOR_EXCEPTION((crop1==Teuchos::null) != (crop2==Teuchos::null), Xpetra::Exceptions::RuntimeError, "A and B must be either both (compatible) BlockedCrsMatrix objects or both CrsMatrixWrap objects.");
1756 
1757  // temporary matrix containing result of local block multiplication
1758  RCP<Matrix> temp = Teuchos::null;
1759 
1760  if(crop1 != Teuchos::null && crop2 != Teuchos::null)
1761  temp = Multiply (*crop1, false, *crop2, false, fos);
1762  else {
1763  RCP<BlockedCrsMatrix> bop1 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat1);
1764  RCP<BlockedCrsMatrix> bop2 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat2);
1765  TEUCHOS_TEST_FOR_EXCEPTION(bop1.is_null()==true, Xpetra::Exceptions::BadCast, "A is not a BlockedCrsMatrix. (TwoMatrixMultiplyBlock)");
1766  TEUCHOS_TEST_FOR_EXCEPTION(bop2.is_null()==true, Xpetra::Exceptions::BadCast, "B is not a BlockedCrsMatrix. (TwoMatrixMultiplyBlock)");
1767  TEUCHOS_TEST_FOR_EXCEPTION(bop1->Cols()!=bop2->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << bop1->Cols() << " columns and B has " << bop2->Rows() << " rows. Matrices are not compatible! (TwoMatrixMultiplyBlock)");
1768  TEUCHOS_TEST_FOR_EXCEPTION(bop1->getDomainMap()->isSameAs(*(bop2->getRangeMap()))==false, Xpetra::Exceptions::RuntimeError, "Domain map of A is not the same as range map of B. Matrices are not compatible! (TwoMatrixMultiplyBlock)");
1769 
1770  // recursive multiplication call
1771  temp = TwoMatrixMultiplyBlock(*bop1, transposeA, *bop2, transposeB, fos, doFillComplete, doOptimizeStorage);
1772 
1773  RCP<BlockedCrsMatrix> btemp = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(temp);
1774  TEUCHOS_TEST_FOR_EXCEPTION(btemp->Rows()!=bop1->Rows(), Xpetra::Exceptions::RuntimeError, "Number of block rows of local blocked operator is " << btemp->Rows() << " but should be " << bop1->Rows() << ". (TwoMatrixMultiplyBlock)");
1775  TEUCHOS_TEST_FOR_EXCEPTION(btemp->Cols()!=bop2->Cols(), Xpetra::Exceptions::RuntimeError, "Number of block cols of local blocked operator is " << btemp->Cols() << " but should be " << bop2->Cols() << ". (TwoMatrixMultiplyBlock)");
1776  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getRangeMapExtractor()->getFullMap()->isSameAs(*(bop1->getRangeMapExtractor()->getFullMap())) == false, Xpetra::Exceptions::RuntimeError, "Range map of local blocked operator should be same as first operator. (TwoMatrixMultiplyBlock)");
1777  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getDomainMapExtractor()->getFullMap()->isSameAs(*(bop2->getDomainMapExtractor()->getFullMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of local blocked operator should be same as second operator. (TwoMatrixMultiplyBlock)");
1778  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getRangeMapExtractor()->getThyraMode() != bop1->getRangeMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Thyra mode of local range map extractor incompatible with range map extractor of A (TwoMatrixMultiplyBlock)");
1779  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getDomainMapExtractor()->getThyraMode() != bop2->getDomainMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Thyra mode of local domain map extractor incompatible with domain map extractor of B (TwoMatrixMultiplyBlock)");
1780  }
1781 
1782  TEUCHOS_TEST_FOR_EXCEPTION(temp->isFillComplete() == false, Xpetra::Exceptions::RuntimeError, "Local block is not filled. (TwoMatrixMultiplyBlock)");
1783 
1784  RCP<Matrix> addRes = null;
1785  if (Cij.is_null ())
1786  Cij = temp;
1787  else {
1788  MatrixMatrix::TwoMatrixAdd (*temp, false, 1.0, *Cij, false, 1.0, addRes, fos);
1789  Cij = addRes;
1790  }
1791  }
1792 
1793  if (!Cij.is_null()) {
1794  if (Cij->isFillComplete())
1795  Cij->resumeFill();
1796  Cij->fillComplete(B.getDomainMap(j), A.getRangeMap(i));
1797  C->setMatrix(i, j, Cij);
1798  } else {
1799  C->setMatrix(i, j, Teuchos::null);
1800  }
1801  }
1802  }
1803 
1804  if (doFillComplete)
1805  C->fillComplete(); // call default fillComplete for BlockCrsMatrixWrap objects
1806 
1807  return C;
1808  } // TwoMatrixMultiplyBlock
1809 
1822  static void TwoMatrixAdd(const Matrix& A, bool transposeA, SC alpha, Matrix& B, SC beta) {
1823  TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRowMap()->isSameAs(*(B.getRowMap()))), Exceptions::Incompatible,
1824  "TwoMatrixAdd: matrix row maps are not the same.");
1825 
1826  if (A.getRowMap()->lib() == Xpetra::UseEpetra) {
1827 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
1828  const Epetra_CrsMatrix& epA = Xpetra::Helpers<SC,LO,GO,NO>::Op2EpetraCrs(A);
1829  Epetra_CrsMatrix& epB = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstEpetraCrs(B);
1830 
1831  //FIXME is there a bug if beta=0?
1832  int rv = EpetraExt::MatrixMatrix::Add(epA, transposeA, alpha, epB, beta);
1833 
1834  if (rv != 0)
1835  throw Exceptions::RuntimeError("EpetraExt::MatrixMatrix::Add return value " + Teuchos::toString(rv));
1836  std::ostringstream buf;
1837 #else
1838  throw Exceptions::RuntimeError("Xpetra must be compiled with EpetraExt.");
1839 #endif
1840  } else if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
1841 #ifdef HAVE_XPETRA_TPETRA
1842 # if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))) || \
1843  (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))))
1844  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra GO=long long enabled."));
1845 # else
1846  const Tpetra::CrsMatrix<SC,LO,GO,NO>& tpA = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(A);
1847  Tpetra::CrsMatrix<SC,LO,GO,NO>& tpB = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstTpetraCrs(B);
1848 
1849  Tpetra::MatrixMatrix::Add(tpA, transposeA, alpha, tpB, beta);
1850 # endif
1851 #else
1852  throw Exceptions::RuntimeError("Xpetra must be compiled with Tpetra.");
1853 #endif
1854  }
1855  } //MatrixMatrix::TwoMatrixAdd()
1856 
1857 
1870  static void TwoMatrixAdd(const Matrix& A, bool transposeA, const SC& alpha,
1871  const Matrix& B, bool transposeB, const SC& beta,
1872  RCP<Matrix>& C, Teuchos::FancyOStream &fos, bool AHasFixedNnzPerRow = false) {
1873  RCP<const Matrix> rcpA = Teuchos::rcpFromRef(A);
1874  RCP<const Matrix> rcpB = Teuchos::rcpFromRef(B);
1875  RCP<const BlockedCrsMatrix> rcpBopA = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpA);
1876  RCP<const BlockedCrsMatrix> rcpBopB = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpB);
1877 
1878  if(rcpBopA == Teuchos::null && rcpBopB == Teuchos::null) {
1879  TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRowMap()->isSameAs(*(B.getRowMap()))), Exceptions::Incompatible,
1880  "TwoMatrixAdd: matrix row maps are not the same.");
1881 
1882  if (C == Teuchos::null) {
1883  size_t maxNzInA = 0;
1884  size_t maxNzInB = 0;
1885  size_t numLocalRows = 0;
1886  if (A.isFillComplete() && B.isFillComplete()) {
1887  maxNzInA = A.getGlobalMaxNumRowEntries();
1888  maxNzInB = B.getGlobalMaxNumRowEntries();
1889  numLocalRows = A.getNodeNumRows();
1890  }
1891 
1892  if (maxNzInA == 1 || maxNzInB == 1 || AHasFixedNnzPerRow) {
1893  // first check if either A or B has at most 1 nonzero per row
1894  // the case of both having at most 1 nz per row is handled by the ``else''
1895  Teuchos::ArrayRCP<size_t> exactNnzPerRow(numLocalRows);
1896 
1897  if ((maxNzInA == 1 && maxNzInB > 1) || AHasFixedNnzPerRow) {
1898  for (size_t i = 0; i < numLocalRows; ++i)
1899  exactNnzPerRow[i] = B.getNumEntriesInLocalRow(Teuchos::as<LO>(i)) + maxNzInA;
1900 
1901  } else {
1902  for (size_t i = 0; i < numLocalRows; ++i)
1903  exactNnzPerRow[i] = A.getNumEntriesInLocalRow(Teuchos::as<LO>(i)) + maxNzInB;
1904  }
1905 
1906  fos << "MatrixMatrix::TwoMatrixAdd : special case detected (one matrix has a fixed nnz per row)"
1907  << ", using static profiling" << std::endl;
1908  C = rcp(new Xpetra::CrsMatrixWrap<SC,LO,GO,NO>(A.getRowMap(), exactNnzPerRow, Xpetra::StaticProfile));
1909 
1910  } else {
1911  // general case
1912  double nnzPerRowInA = Teuchos::as<double>(A.getGlobalNumEntries()) / A.getGlobalNumRows();
1913  double nnzPerRowInB = Teuchos::as<double>(B.getGlobalNumEntries()) / B.getGlobalNumRows();
1914  LO nnzToAllocate = Teuchos::as<LO>( (nnzPerRowInA + nnzPerRowInB) * 1.5) + Teuchos::as<LO>(1);
1915 
1916  LO maxPossible = A.getGlobalMaxNumRowEntries() + B.getGlobalMaxNumRowEntries();
1917  //Use static profiling (more efficient) if the estimate is at least as big as the max
1918  //possible nnz's in any single row of the result.
1919  Xpetra::ProfileType pft = (maxPossible) > nnzToAllocate ? Xpetra::DynamicProfile : Xpetra::StaticProfile;
1920 
1921  fos << "nnzPerRowInA = " << nnzPerRowInA << ", nnzPerRowInB = " << nnzPerRowInB << std::endl;
1922  fos << "MatrixMatrix::TwoMatrixAdd : space allocated per row = " << nnzToAllocate
1923  << ", max possible nnz per row in sum = " << maxPossible
1924  << ", using " << (pft == Xpetra::DynamicProfile ? "dynamic" : "static" ) << " profiling"
1925  << std::endl;
1926  C = rcp(new Xpetra::CrsMatrixWrap<SC,LO,GO,NO>(A.getRowMap(), nnzToAllocate, pft));
1927  }
1928  if (transposeB)
1929  fos << "MatrixMatrix::TwoMatrixAdd : ** WARNING ** estimate could be badly wrong because second summand is transposed" << std::endl;
1930  }
1931 
1932  if (C->getRowMap()->lib() == Xpetra::UseEpetra) {
1933  #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
1934  const Epetra_CrsMatrix& epA = Xpetra::Helpers<SC,LO,GO,NO>::Op2EpetraCrs(A);
1935  const Epetra_CrsMatrix& epB = Xpetra::Helpers<SC,LO,GO,NO>::Op2EpetraCrs(B);
1936  RCP<Epetra_CrsMatrix> epC = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstEpetraCrs(C);
1937  Epetra_CrsMatrix* ref2epC = &*epC; //to avoid a compiler error...
1938 
1939  //FIXME is there a bug if beta=0?
1940  int rv = EpetraExt::MatrixMatrix::Add(epA, transposeA, alpha, epB, transposeB, beta, ref2epC);
1941 
1942  if (rv != 0)
1943  throw Exceptions::RuntimeError("EpetraExt::MatrixMatrix::Add return value of " + Teuchos::toString(rv));
1944  #else
1945  throw Exceptions::RuntimeError("MueLu must be compile with EpetraExt.");
1946  #endif
1947  } else if (C->getRowMap()->lib() == Xpetra::UseTpetra) {
1948  #ifdef HAVE_XPETRA_TPETRA
1949  # if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))) || \
1950  (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))))
1951  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra GO=long long enabled."));
1952  # else
1953  const Tpetra::CrsMatrix<SC,LO,GO,NO>& tpA = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(A);
1954  const Tpetra::CrsMatrix<SC,LO,GO,NO>& tpB = Xpetra::Helpers<SC,LO,GO,NO>::Op2TpetraCrs(B);
1955  RCP<Tpetra::CrsMatrix<SC,LO,GO,NO> > tpC = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstTpetraCrs(C);
1956 
1957  Tpetra::MatrixMatrix::Add(tpA, transposeA, alpha, tpB, transposeB, beta, tpC);
1958  # endif
1959  #else
1960  throw Exceptions::RuntimeError("Xpetra must be compile with Tpetra.");
1961  #endif
1962  }
1963 
1965  if (A.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpFromRef(A));
1966  if (B.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpFromRef(B));
1968  }
1969  // the first matrix is of type CrsMatrixWrap, the second is a blocked operator
1970  else if(rcpBopA == Teuchos::null && rcpBopB != Teuchos::null) {
1971  RCP<const MapExtractor> rgmapextractor = rcpBopB->getRangeMapExtractor();
1972  RCP<const MapExtractor> domapextractor = rcpBopB->getDomainMapExtractor();
1973 
1974  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
1975  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
1976 
1977  size_t i = 0;
1978  for (size_t j = 0; j < rcpBopB->Cols(); ++j) { // loop over all block columns of B
1979  RCP<Matrix> Cij = Teuchos::null;
1980  if(rcpA != Teuchos::null &&
1981  rcpBopB->getMatrix(i,j)!=Teuchos::null) {
1982  // recursive call
1983  TwoMatrixAdd(*rcpA, transposeA, alpha,
1984  *(rcpBopB->getMatrix(i,j)), transposeB, beta,
1985  Cij, fos, AHasFixedNnzPerRow);
1986  } else if (rcpA == Teuchos::null &&
1987  rcpBopB->getMatrix(i,j)!=Teuchos::null) {
1988  Cij = rcpBopB->getMatrix(i,j);
1989  } else if (rcpA != Teuchos::null &&
1990  rcpBopB->getMatrix(i,j)==Teuchos::null) {
1991  Cij = Teuchos::rcp_const_cast<Matrix>(rcpA);
1992  } else {
1993  Cij = Teuchos::null;
1994  }
1995 
1996  if (!Cij.is_null()) {
1997  if (Cij->isFillComplete())
1998  Cij->resumeFill();
1999  Cij->fillComplete();
2000  bC->setMatrix(i, j, Cij);
2001  } else {
2002  bC->setMatrix(i, j, Teuchos::null);
2003  }
2004  } // loop over columns j
2005  }
2006  // the second matrix is of type CrsMatrixWrap, the first is a blocked operator
2007  else if(rcpBopA != Teuchos::null && rcpBopB == Teuchos::null) {
2008  RCP<const MapExtractor> rgmapextractor = rcpBopA->getRangeMapExtractor();
2009  RCP<const MapExtractor> domapextractor = rcpBopA->getDomainMapExtractor();
2010 
2011  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
2012  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
2013 
2014  size_t j = 0;
2015  for (size_t i = 0; i < rcpBopA->Rows(); ++i) { // loop over all block rows of A
2016  RCP<Matrix> Cij = Teuchos::null;
2017  if(rcpBopA->getMatrix(i,j)!=Teuchos::null &&
2018  rcpB!=Teuchos::null) {
2019  // recursive call
2020  TwoMatrixAdd(*(rcpBopA->getMatrix(i,j)), transposeA, alpha,
2021  *rcpB, transposeB, beta,
2022  Cij, fos, AHasFixedNnzPerRow);
2023  } else if (rcpBopA->getMatrix(i,j) == Teuchos::null &&
2024  rcpB!=Teuchos::null) {
2025  Cij = Teuchos::rcp_const_cast<Matrix>(rcpB);
2026  } else if (rcpBopA->getMatrix(i,j) != Teuchos::null &&
2027  rcpB==Teuchos::null) {
2028  Cij = rcpBopA->getMatrix(i,j);
2029  } else {
2030  Cij = Teuchos::null;
2031  }
2032 
2033  if (!Cij.is_null()) {
2034  if (Cij->isFillComplete())
2035  Cij->resumeFill();
2036  Cij->fillComplete();
2037  bC->setMatrix(i, j, Cij);
2038  } else {
2039  bC->setMatrix(i, j, Teuchos::null);
2040  }
2041  } // loop over rows i
2042  }
2043  else {
2044  // This is the version for blocked matrices
2045 
2046  // check the compatibility of the blocked operators
2047  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA.is_null()==true, Xpetra::Exceptions::BadCast, "A is not a BlockedCrsMatrix. (TwoMatrixAdd)");
2048  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopB.is_null()==true, Xpetra::Exceptions::BadCast, "B is not a BlockedCrsMatrix. (TwoMatrixAdd)");
2049  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->Rows()!=rcpBopB->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << rcpBopA->Rows() << " rows and B has " << rcpBopA->Rows() << " rows. Matrices are not compatible! (TwoMatrixAdd)");
2050  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->Rows()!=rcpBopB->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << rcpBopA->Cols() << " cols and B has " << rcpBopA->Cols() << " cols. Matrices are not compatible! (TwoMatrixAdd)");
2051  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getRangeMap()->isSameAs(*(rcpBopB->getRangeMap()))==false, Xpetra::Exceptions::RuntimeError, "Range map of A is not the same as range map of B. Matrices are not compatible! (TwoMatrixAdd)");
2052  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getDomainMap()->isSameAs(*(rcpBopB->getDomainMap()))==false, Xpetra::Exceptions::RuntimeError, "Domain map of A is not the same as domain map of B. Matrices are not compatible! (TwoMatrixAdd)");
2053  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getRangeMapExtractor()->getThyraMode() != rcpBopB->getRangeMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Different Thyra/Xpetra style gids in RangeMapExtractor of A and B. Matrices are not compatible! (TwoMatrixAdd)");
2054  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getDomainMapExtractor()->getThyraMode() != rcpBopB->getDomainMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Different Thyra/Xpetra style gids in DomainMapExtractor of A and B. Matrices are not compatible! (TwoMatrixAdd)");
2055 
2056  RCP<const MapExtractor> rgmapextractor = rcpBopA->getRangeMapExtractor();
2057  RCP<const MapExtractor> domapextractor = rcpBopB->getDomainMapExtractor();
2058 
2059  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
2060  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
2061 
2062  for (size_t i = 0; i < rcpBopA->Rows(); ++i) { // loop over all block rows of A
2063  for (size_t j = 0; j < rcpBopB->Cols(); ++j) { // loop over all block columns of B
2064 
2065  RCP<Matrix> Cij = Teuchos::null;
2066  if(rcpBopA->getMatrix(i,j)!=Teuchos::null &&
2067  rcpBopB->getMatrix(i,j)!=Teuchos::null) {
2068  // recursive call
2069 
2070  TwoMatrixAdd(*(rcpBopA->getMatrix(i,j)), transposeA, alpha,
2071  *(rcpBopB->getMatrix(i,j)), transposeB, beta,
2072  Cij, fos, AHasFixedNnzPerRow);
2073  } else if (rcpBopA->getMatrix(i,j) == Teuchos::null &&
2074  rcpBopB->getMatrix(i,j)!=Teuchos::null) {
2075  Cij = rcpBopB->getMatrix(i,j);
2076  } else if (rcpBopA->getMatrix(i,j) != Teuchos::null &&
2077  rcpBopB->getMatrix(i,j)==Teuchos::null) {
2078  Cij = rcpBopA->getMatrix(i,j);
2079  } else {
2080  Cij = Teuchos::null;
2081  }
2082 
2083  if (!Cij.is_null()) {
2084  if (Cij->isFillComplete())
2085  Cij->resumeFill();
2086  //Cij->fillComplete(rcpBopA->getDomainMap(j), rcpBopA->getRangeMap(i));
2087  Cij->fillComplete();
2088  bC->setMatrix(i, j, Cij);
2089  } else {
2090  bC->setMatrix(i, j, Teuchos::null);
2091  }
2092  } // loop over columns j
2093  } // loop over rows i
2094 
2095  } // end blocked recursive algorithm
2096  } //MatrixMatrix::TwoMatrixAdd()
2097  }; // end specialization on GO=long long and NO=EpetraNode
2098 
2099 #endif // HAVE_XPETRA_EPETRA
2100 
2101 } // end namespace Xpetra
2102 
2103 #define XPETRA_MATRIXMATRIX_SHORT
2104 
2105 #endif /* PACKAGES_XPETRA_SUP_UTILS_XPETRA_MATRIXMATRIX_HPP_ */
virtual size_t getNodeNumRows() const =0
Returns the number of matrix rows owned on the calling node.
std::string toString(Xpetra::UnderlyingLib lib)
Convert a Xpetra::UnderlyingLib to a std::string.
static void TwoMatrixAdd(const Matrix &A, bool transposeA, const SC &alpha, const Matrix &B, bool transposeB, const SC &beta, RCP< Matrix > &C, Teuchos::FancyOStream &fos, bool AHasFixedNnzPerRow=false)
Helper function to calculate C = alpha*A + beta*B.
static void TwoMatrixAdd(const Matrix &A, bool transposeA, const SC &alpha, const Matrix &B, bool transposeB, const SC &beta, RCP< Matrix > &C, Teuchos::FancyOStream &fos, bool AHasFixedNnzPerRow=false)
Helper function to calculate C = alpha*A + beta*B.
virtual global_size_t getGlobalNumRows() const =0
Returns the number of global rows in this matrix.
virtual const RCP< const Map > & getRowMap() const
Returns the Map that describes the row distribution in this matrix.
bool isFillComplete() const
Returns true if fillComplete() has been called and the matrix is in compute mode. ...
static RCP< Matrix > Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, RCP< Matrix > C_in, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Helper function to do matrix-matrix multiply.
GlobalOrdinal GO
static Epetra_CrsMatrix & Op2NonConstEpetraCrs(const Matrix &Op)
Xpetra utility class containing transformation routines between Xpetra::Matrix and Epetra/Tpetra obje...
Xpetra namespace
virtual void resumeFill(const RCP< ParameterList > &params=null)=0
Exception throws to report errors in the internal logical of the program.
LocalOrdinal LO
static RCP< BlockedCrsMatrix > TwoMatrixMultiplyBlock(const BlockedCrsMatrix &A, bool transposeA, const BlockedCrsMatrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true)
Helper function to do matrix-matrix multiply "in-place".
virtual size_t getGlobalMaxNumRowEntries() const =0
Returns the maximum number of entries across all rows/columns on all nodes.
static const Epetra_CrsMatrix & Op2EpetraCrs(const Matrix &Op)
RCP< const MapExtractor > getRangeMapExtractor() const
Returns map extractor class for range map.
Exception indicating invalid cast attempted.
virtual void fillComplete(const RCP< const Map > &domainMap, const RCP< const Map > &rangeMap, const RCP< ParameterList > &params=null)=0
Signal that data entry is complete, specifying domain and range maps.
static void TwoMatrixAdd(const Matrix &A, bool transposeA, const SC &alpha, const Matrix &B, bool transposeB, const SC &beta, RCP< Matrix > &C, Teuchos::FancyOStream &fos, bool AHasFixedNnzPerRow=false)
Helper function to calculate C = alpha*A + beta*B.
static RCP< Matrix > Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool callFillCompleteOnResult=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Helper function to do matrix-matrix multiply.
virtual bool isFillComplete() const =0
Returns true if fillComplete() has been called and the matrix is in compute mode. ...
void CreateView(viewLabel_t viewLabel, const RCP< const Map > &rowMap, const RCP< const Map > &colMap)
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
RCP< CrsMatrix > getCrsMatrix() const
static RCP< Matrix > Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, RCP< Matrix > C_in, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Helper function to do matrix-matrix multiply.
static Tpetra::CrsMatrix< SC, LO, GO, NO > & Op2NonConstTpetraCrs(const Matrix &Op)
static RCP< Matrix > Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, RCP< Matrix > C_in, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Helper function to do matrix-matrix multiply.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
Returns the current number of entries on this node in the specified local row.
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
static RCP< BlockedCrsMatrix > TwoMatrixMultiplyBlock(const BlockedCrsMatrix &A, bool transposeA, const BlockedCrsMatrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true)
Helper function to do matrix-matrix multiply "in-place".
static void TwoMatrixAdd(const Matrix &A, bool transposeA, SC alpha, Matrix &B, SC beta)
Helper function to calculate B = alpha*A + beta*B.
static RCP< const Epetra_CrsMatrix > Op2EpetraCrs(RCP< Matrix > Op)
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
static RCP< Epetra_CrsMatrix > Op2NonConstEpetraCrs(RCP< Matrix > Op)
Exception throws to report incompatible objects (like maps).
static void TwoMatrixAdd(const Matrix &A, bool transposeA, SC alpha, Matrix &B, SC beta)
Helper function to calculate B = alpha*A + beta*B.
RCP< const Map > getRangeMap() const
Returns the Map associated with the full range of this operator.
virtual global_size_t getGlobalNumEntries() const =0
Returns the global number of entries in this matrix.
Concrete implementation of Xpetra::Matrix.
Teuchos::RCP< Matrix > getMatrix(size_t r, size_t c) const
return block (r,c)
virtual Teuchos::RCP< const Map > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
bool IsView(const viewLabel_t viewLabel) const
static RCP< Epetra_CrsMatrix > MLTwoMatrixMultiply(const Epetra_CrsMatrix &epA, const Epetra_CrsMatrix &epB, Teuchos::FancyOStream &fos)
RCP< const MapExtractor > getDomainMapExtractor() const
Returns map extractor for domain map.
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Constructor specifying the number of non-zeros for all rows.
static RCP< BlockedCrsMatrix > TwoMatrixMultiplyBlock(const BlockedCrsMatrix &A, bool transposeA, const BlockedCrsMatrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true)
Helper function to do matrix-matrix multiply "in-place".
virtual size_t Rows() const
number of row blocks
static RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > Op2NonConstTpetraCrs(RCP< Matrix > Op)
virtual size_t Cols() const
number of column blocks
static RCP< Matrix > Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool callFillCompleteOnResult=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Helper function to do matrix-matrix multiply.
RCP< const Map > getDomainMap() const
Returns the Map associated with the full domain of this operator.
static RCP< Epetra_CrsMatrix > MLTwoMatrixMultiply(const Epetra_CrsMatrix &epA, const Epetra_CrsMatrix &epB, Teuchos::FancyOStream &fos)
static RCP< Epetra_CrsMatrix > MLTwoMatrixMultiply(const Epetra_CrsMatrix &epA, const Epetra_CrsMatrix &epB, Teuchos::FancyOStream &fos)
static void TwoMatrixAdd(const Matrix &A, bool transposeA, SC alpha, Matrix &B, SC beta)
Helper function to calculate B = alpha*A + beta*B.
static const Tpetra::CrsMatrix< SC, LO, GO, NO > & Op2TpetraCrs(const Matrix &Op)
virtual Teuchos::RCP< const Map > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
Xpetra-specific matrix class.
static RCP< const Tpetra::CrsMatrix< SC, LO, GO, NO > > Op2TpetraCrs(RCP< Matrix > Op)
static RCP< Matrix > Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool callFillCompleteOnResult=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Helper function to do matrix-matrix multiply.