Tpetra parallel linear algebra  Version of the Day
Tpetra_CrsMatrix_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_CRSMATRIX_DEF_HPP
43 #define TPETRA_CRSMATRIX_DEF_HPP
44 
52 
53 #include "Tpetra_RowMatrix.hpp"
54 #include "Tpetra_Import_Util.hpp"
55 #include "Tpetra_Import_Util2.hpp"
58 #include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
59 //#include "Tpetra_Details_gathervPrint.hpp" (from above header)
60 //#include "Tpetra_Util.hpp" // comes in from Tpetra_CrsGraph_decl.hpp
61 #include "Teuchos_SerialDenseMatrix.hpp"
62 #include "Kokkos_Sparse_getDiagCopy.hpp"
63 #include <typeinfo>
64 
65 namespace Tpetra {
66 //
67 // Users must never rely on anything in the Details namespace.
68 //
69 namespace Details {
70 
80 template<class Scalar>
81 struct AbsMax {
83  Scalar operator() (const Scalar& x, const Scalar& y) {
84  typedef Teuchos::ScalarTraits<Scalar> STS;
85  return std::max (STS::magnitude (x), STS::magnitude (y));
86  }
87 };
88 
89 } // namespace Details
90 } // namespace Tpetra
91 
92 namespace Tpetra {
93 
94  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
95  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::
96  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
97  size_t maxNumEntriesPerRow,
98  ProfileType pftype,
99  const Teuchos::RCP<Teuchos::ParameterList>& params) :
100  dist_object_type (rowMap),
101  storageStatus_ (pftype == StaticProfile ?
102  Details::STORAGE_1D_UNPACKED :
103  Details::STORAGE_2D),
104  fillComplete_ (false),
105  frobNorm_ (-STM::one ())
106  {
107  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, size_t, "
108  "ProfileType[, RCP<ParameterList>]): ";
109  Teuchos::RCP<crs_graph_type> graph;
110  try {
111  graph = Teuchos::rcp (new crs_graph_type (rowMap, maxNumEntriesPerRow,
112  pftype, params));
113  }
114  catch (std::exception& e) {
115  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
116  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
117  "size_t, ProfileType[, RCP<ParameterList>]) threw an exception: "
118  << e.what ());
119  }
120  // myGraph_ not null means that the matrix owns the graph. That's
121  // different than the const CrsGraph constructor, where the matrix
122  // does _not_ own the graph.
123  myGraph_ = graph;
124  staticGraph_ = myGraph_;
125  resumeFill (params);
127  }
128 
129  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
131  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
132  const Teuchos::ArrayRCP<const size_t>& NumEntriesPerRowToAlloc,
133  ProfileType pftype,
134  const Teuchos::RCP<Teuchos::ParameterList>& params) :
135  dist_object_type (rowMap),
136  storageStatus_ (pftype == StaticProfile ?
137  Details::STORAGE_1D_UNPACKED :
138  Details::STORAGE_2D),
139  fillComplete_ (false),
140  frobNorm_ (-STM::one ())
141  {
142  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, "
143  "ArrayRCP<const size_t>, ProfileType[, RCP<ParameterList>]): ";
144  Teuchos::RCP<crs_graph_type> graph;
145  try {
146  graph = Teuchos::rcp (new crs_graph_type (rowMap, NumEntriesPerRowToAlloc,
147  pftype, params));
148  }
149  catch (std::exception &e) {
150  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
151  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
152  "ArrayRCP<const size_t>, ProfileType[, RCP<ParameterList>]) threw "
153  "an exception: " << e.what ());
154  }
155  // myGraph_ not null means that the matrix owns the graph. That's
156  // different than the const CrsGraph constructor, where the matrix
157  // does _not_ own the graph.
158  myGraph_ = graph;
159  staticGraph_ = graph;
160  resumeFill (params);
162  }
163 
164  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
166  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
167  const Teuchos::RCP<const map_type>& colMap,
168  size_t maxNumEntriesPerRow,
169  ProfileType pftype,
170  const Teuchos::RCP<Teuchos::ParameterList>& params) :
171  dist_object_type (rowMap),
172  storageStatus_ (pftype == StaticProfile ?
173  Details::STORAGE_1D_UNPACKED :
174  Details::STORAGE_2D),
175  fillComplete_ (false),
176  frobNorm_ (-STM::one ())
177  {
178  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, RCP<const Map>, "
179  "size_t, ProfileType[, RCP<ParameterList>]): ";
180 
181 #ifdef HAVE_TPETRA_DEBUG
182  // An artifact of debugging something a while back.
183  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
184  (! staticGraph_.is_null (), std::logic_error,
185  "staticGraph_ is not null at the beginning of the constructor. "
186  "Please report this bug to the Tpetra developers.");
187  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
188  (! myGraph_.is_null (), std::logic_error,
189  "myGraph_ is not null at the beginning of the constructor. "
190  "Please report this bug to the Tpetra developers.");
191 #endif // HAVE_TPETRA_DEBUG
192 
193  Teuchos::RCP<crs_graph_type> graph;
194  try {
195  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
196  maxNumEntriesPerRow,
197  pftype, params));
198  }
199  catch (std::exception &e) {
200  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
201  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
202  "RCP<const Map>, size_t, ProfileType[, RCP<ParameterList>]) threw an "
203  "exception: " << e.what ());
204  }
205  // myGraph_ not null means that the matrix owns the graph. That's
206  // different than the const CrsGraph constructor, where the matrix
207  // does _not_ own the graph.
208  myGraph_ = graph;
209  staticGraph_ = myGraph_;
210  resumeFill (params);
212  }
213 
214  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
216  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
217  const Teuchos::RCP<const map_type>& colMap,
218  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
219  ProfileType pftype,
220  const Teuchos::RCP<Teuchos::ParameterList>& params) :
221  dist_object_type (rowMap),
222  storageStatus_ (pftype == StaticProfile ?
223  Details::STORAGE_1D_UNPACKED :
224  Details::STORAGE_2D),
225  fillComplete_ (false),
226  frobNorm_ (-STM::one ())
227  {
228  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, RCP<const Map>, "
229  "ArrayRCP<const size_t>, ProfileType[, RCP<ParameterList>]): ";
230  Teuchos::RCP<crs_graph_type> graph;
231  try {
232  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, numEntPerRow,
233  pftype, params));
234  }
235  catch (std::exception &e) {
236  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
237  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
238  "RCP<const Map>, ArrayRCP<const size_t>, ProfileType[, "
239  "RCP<ParameterList>]) threw an exception: " << e.what ());
240  }
241  // myGraph_ not null means that the matrix owns the graph. That's
242  // different than the const CrsGraph constructor, where the matrix
243  // does _not_ own the graph.
244  myGraph_ = graph;
245  staticGraph_ = graph;
246  resumeFill (params);
248  }
249 
250  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
252  CrsMatrix (const Teuchos::RCP<const crs_graph_type>& graph,
253  const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
254  dist_object_type (graph->getRowMap ()),
255  staticGraph_ (graph),
256  storageStatus_ (Details::STORAGE_1D_PACKED),
257  fillComplete_ (false),
258  frobNorm_ (-STM::one ())
259  {
260  typedef typename local_matrix_type::values_type values_type;
261  const char tfecfFuncName[] = "CrsMatrix(RCP<const CrsGraph>[, "
262  "RCP<ParameterList>]): ";
263  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
264  (graph.is_null (), std::runtime_error, "Input graph is null.");
265  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
266  (! graph->isFillComplete (), std::runtime_error, "Input graph is not "
267  "fill complete. You must call fillComplete on the graph before using "
268  "it to construct a CrsMatrix. Note that calling resumeFill on the "
269  "graph makes it not fill complete, even if you had previously called "
270  "fillComplete. In that case, you must call fillComplete on the graph "
271  "again.");
272 
273  // The graph is fill complete, so it is locally indexed and has a
274  // fixed structure. This means we can allocate the (1-D) array of
275  // values and build the local matrix right now. Note that the
276  // local matrix's number of columns comes from the column Map, not
277  // the domain Map.
278 
279  const size_t numCols = graph->getColMap ()->getNodeNumElements ();
280  auto lclGraph = graph->getLocalGraph ();
281  const size_t numEnt = lclGraph.entries.dimension_0 ();
282  values_type val ("Tpetra::CrsMatrix::val", numEnt);
283 
284  this->lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
285  numCols, val, lclGraph);
286  // FIXME (22 Jun 2016) I would very much like to get rid of
287  // k_values1D_ at some point. I find it confusing to have all
288  // these extra references lying around.
289  this->k_values1D_ = this->lclMatrix_.values;
290 
292  }
293 
294  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
296  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
297  const Teuchos::RCP<const map_type>& colMap,
298  const typename local_matrix_type::row_map_type& rowPointers,
299  const typename local_graph_type::entries_type::non_const_type& columnIndices,
300  const typename local_matrix_type::values_type& values,
301  const Teuchos::RCP<Teuchos::ParameterList>& params) :
302  dist_object_type (rowMap),
303  storageStatus_ (Details::STORAGE_1D_PACKED),
304  fillComplete_ (false),
305  frobNorm_ (-STM::one ())
306  {
307  using Teuchos::RCP;
308  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
309  "RCP<const Map>, ptr, ind, val[, params]): ";
310  const char suffix[] = ". Please report this bug to the Tpetra developers.";
311 
312  // Check the user's input. Note that this might throw only on
313  // some processes but not others, causing deadlock. We prefer
314  // deadlock due to exceptions to segfaults, because users can
315  // catch exceptions.
316  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
317  (values.dimension_0 () != columnIndices.dimension_0 (),
318  std::invalid_argument, "Input arrays don't have matching dimensions. "
319  "values.dimension_0() = " << values.dimension_0 () << " != "
320  "columnIndices.dimension_0() = " << columnIndices.dimension_0 () << ".");
321 #ifdef HAVE_TPETRA_DEBUG
322  if (rowPointers.dimension_0 () != 0) {
323  using Kokkos::subview;
324  // Don't assume UVM. Use "0-D" mirror views to get the last
325  // entry. Only do this in a debug build because it requires an
326  // extra device-to-host copy.
327  auto ptr_last_d = subview (rowPointers, rowPointers.dimension_0 () - 1);
328  auto ptr_last_h = Kokkos::create_mirror_view (ptr_last_d);
329  Kokkos::deep_copy (ptr_last_h, ptr_last_d);
330  const size_t numEnt = static_cast<size_t> (ptr_last_h ());
331  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
332  (numEnt != static_cast<size_t> (columnIndices.dimension_0 ()) ||
333  numEnt != static_cast<size_t> (values.dimension_0 ()),
334  std::invalid_argument, "Last entry of rowPointers says that the matrix"
335  " has " << numEnt << " entr" << (numEnt != 1 ? "ies" : "y") << ", but "
336  "the dimensions of columnIndices and values don't match this. "
337  "columnIndices.dimension_0() = " << columnIndices.dimension_0 () <<
338  " and values.dimension_0() = " << values.dimension_0 () << ".");
339  }
340 #endif // HAVE_TPETRA_DEBUG
341 
342  RCP<crs_graph_type> graph;
343  try {
344  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, rowPointers,
345  columnIndices, params));
346  }
347  catch (std::exception& e) {
348  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
349  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
350  "RCP<const Map>, ptr, ind[, params]) threw an exception: "
351  << e.what ());
352  }
353  // The newly created CrsGraph _must_ have a local graph at this
354  // point. We don't really care whether CrsGraph's constructor
355  // deep-copies or shallow-copies the input, but the dimensions
356  // have to be right. That's how we tell whether the CrsGraph has
357  // a local graph.
358  auto lclGraph = graph->getLocalGraph ();
359  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
360  (lclGraph.row_map.dimension_0 () != rowPointers.dimension_0 () ||
361  lclGraph.entries.dimension_0 () != columnIndices.dimension_0 (),
362  std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, "
363  "ind[, params]) did not set the local graph correctly." << suffix);
364  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
365  (lclGraph.entries.dimension_0 () != values.dimension_0 (),
366  std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
367  "params]) did not set the local graph correctly. "
368  "lclGraph.entries.dimension_0() = " << lclGraph.entries.dimension_0 ()
369  << " != values.dimension_0() = " << values.dimension_0 () << suffix);
370 
371  // myGraph_ not null means that the matrix owns the graph. This
372  // is true because the column indices come in as nonconst,
373  // implying shared ownership.
374  myGraph_ = graph;
375  staticGraph_ = graph;
376 
377  // The graph may not be fill complete yet. However, it is locally
378  // indexed (since we have a column Map) and has a fixed structure
379  // (due to the input arrays). This means we can allocate the
380  // (1-D) array of values and build the local matrix right now.
381  // Note that the local matrix's number of columns comes from the
382  // column Map, not the domain Map.
383 
384  const size_t numCols = graph->getColMap ()->getNodeNumElements ();
385  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
386  numCols, values, lclGraph);
387  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
388  (lclMatrix_.values.dimension_0 () != values.dimension_0 (),
389  std::logic_error, "Local matrix's constructor did not set the values "
390  "correctly. lclMatrix_.values.dimension_0() = " <<
391  lclMatrix_.values.dimension_0 () << " != values.dimension_0() = " <<
392  values.dimension_0 () << suffix);
393 
394  // FIXME (22 Jun 2016) I would very much like to get rid of
395  // k_values1D_ at some point. I find it confusing to have all
396  // these extra references lying around.
397  this->k_values1D_ = this->lclMatrix_.values;
398 
400  }
401 
402  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
404  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
405  const Teuchos::RCP<const map_type>& colMap,
406  const Teuchos::ArrayRCP<size_t>& ptr,
407  const Teuchos::ArrayRCP<LocalOrdinal>& ind,
408  const Teuchos::ArrayRCP<Scalar>& val,
409  const Teuchos::RCP<Teuchos::ParameterList>& params) :
410  dist_object_type (rowMap),
411  storageStatus_ (Details::STORAGE_1D_PACKED),
412  fillComplete_ (false),
413  frobNorm_ (-STM::one ())
414  {
415  using Kokkos::Compat::getKokkosViewDeepCopy;
416  using Teuchos::av_reinterpret_cast;
417  using Teuchos::RCP;
418  typedef typename local_matrix_type::values_type values_type;
419  typedef impl_scalar_type IST;
420  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
421  "RCP<const Map>, ptr, ind, val[, params]): ";
422 
423  RCP<crs_graph_type> graph;
424  try {
425  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, ptr,
426  ind, params));
427  }
428  catch (std::exception& e) {
429  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
430  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
431  "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
432  "RCP<ParameterList>]) threw an exception: " << e.what ());
433  }
434  // myGraph_ not null means that the matrix owns the graph. This
435  // is true because the column indices come in as nonconst,
436  // implying shared ownership.
437  myGraph_ = graph;
438  staticGraph_ = graph;
439 
440  // The graph may not be fill complete yet. However, it is locally
441  // indexed (since we have a column Map) and has a fixed structure
442  // (due to the input arrays). This means we can allocate the
443  // (1-D) array of values and build the local matrix right now.
444  // Note that the local matrix's number of columns comes from the
445  // column Map, not the domain Map.
446 
447  // The graph _must_ have a local graph at this point. We don't
448  // really care whether CrsGraph's constructor deep-copies or
449  // shallow-copies the input, but the dimensions have to be right.
450  // That's how we tell whether the CrsGraph has a local graph.
451  auto lclGraph = staticGraph_->getLocalGraph ();
452  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
453  (static_cast<size_t> (lclGraph.row_map.dimension_0 ()) != static_cast<size_t> (ptr.size ()) ||
454  static_cast<size_t> (lclGraph.entries.dimension_0 ()) != static_cast<size_t> (ind.size ()),
455  std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, "
456  "ind[, params]) did not set the local graph correctly. Please "
457  "report this bug to the Tpetra developers.");
458 
459  const size_t numCols = staticGraph_->getColMap ()->getNodeNumElements ();
460  values_type valIn = getKokkosViewDeepCopy<device_type> (av_reinterpret_cast<IST> (val ()));
461  this->lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
462  numCols, valIn, lclGraph);
463  // FIXME (22 Jun 2016) I would very much like to get rid of
464  // k_values1D_ at some point. I find it confusing to have all
465  // these extra references lying around.
466  this->k_values1D_ = this->lclMatrix_.values;
467 
469  }
470 
471  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
473  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
474  const Teuchos::RCP<const map_type>& colMap,
475  const local_matrix_type& lclMatrix,
476  const Teuchos::RCP<Teuchos::ParameterList>& params) :
477  dist_object_type (rowMap),
478  lclMatrix_ (lclMatrix),
479  k_values1D_ (lclMatrix.values),
480  storageStatus_ (Details::STORAGE_1D_PACKED),
481  fillComplete_ (true),
482  frobNorm_ (-STM::one ())
483  {
484  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
485  "RCP<const Map>, local_matrix_type[, RCP<ParameterList>]): ";
486  Teuchos::RCP<crs_graph_type> graph;
487  try {
488  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
489  lclMatrix.graph, params));
490  }
491  catch (std::exception& e) {
492  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
493  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
494  "RCP<const Map>, local_graph_type[, RCP<ParameterList>]) threw an "
495  "exception: " << e.what ());
496  }
497  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
498  (!graph->isFillComplete (), std::logic_error, "CrsGraph constructor (RCP"
499  "<const Map>, RCP<const Map>, local_graph_type[, RCP<ParameterList>]) "
500  "did not produce a fill-complete graph. Please report this bug to the "
501  "Tpetra developers.");
502  // myGraph_ not null means that the matrix owns the graph. This
503  // is true because the column indices come in as nonconst through
504  // the matrix, implying shared ownership.
505  myGraph_ = graph;
506  staticGraph_ = graph;
508 
509  // Sanity checks at the end.
510 #ifdef HAVE_TPETRA_DEBUG
511  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive (), std::logic_error,
512  "We're at the end of fillComplete(), but isFillActive() is true. "
513  "Please report this bug to the Tpetra developers.");
514  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete (), std::logic_error,
515  "We're at the end of fillComplete(), but isFillComplete() is false. "
516  "Please report this bug to the Tpetra developers.");
517 #endif // HAVE_TPETRA_DEBUG
519  }
520 
521  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
524  {}
525 
526  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
527  Teuchos::RCP<const Teuchos::Comm<int> >
529  getComm () const {
530  return getCrsGraph ()->getComm ();
531  }
532 
533  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
534  Teuchos::RCP<Node>
536  getNode () const {
537  return getCrsGraph ()->getNode ();
538  }
539 
540  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
543  getProfileType () const {
544  return getCrsGraph ()->getProfileType ();
545  }
546 
547  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
548  bool
550  isFillComplete () const {
551  return fillComplete_;
552  }
553 
554  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
555  bool
557  isFillActive () const {
558  return ! fillComplete_;
559  }
560 
561  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
562  bool
565  return getCrsGraph()->isStorageOptimized();
566  }
567 
568  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
569  bool
572  return getCrsGraph ()->isLocallyIndexed ();
573  }
574 
575  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
576  bool
579  return getCrsGraph ()->isGloballyIndexed ();
580  }
581 
582  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
583  bool
585  hasColMap () const {
586  return getCrsGraph ()->hasColMap ();
587  }
588 
589  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
593  return getCrsGraph ()->getGlobalNumEntries ();
594  }
595 
596  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
597  size_t
600  return getCrsGraph ()->getNodeNumEntries ();
601  }
602 
603  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
607  return getCrsGraph ()->getGlobalNumRows ();
608  }
609 
610  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
614  return getCrsGraph ()->getGlobalNumCols ();
615  }
616 
617  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
618  size_t
620  getNodeNumRows () const {
621  return getCrsGraph ()->getNodeNumRows ();
622  }
623 
624  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
625  size_t
627  getNodeNumCols () const {
628  return getCrsGraph ()->getNodeNumCols ();
629  }
630 
631  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
635  return getCrsGraph ()->getGlobalNumDiags ();
636  }
637 
638  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
639  size_t
642  return getCrsGraph ()->getNodeNumDiags ();
643  }
644 
645  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
646  size_t
648  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const {
649  return getCrsGraph ()->getNumEntriesInGlobalRow (globalRow);
650  }
651 
652  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
653  size_t
655  getNumEntriesInLocalRow (LocalOrdinal localRow) const {
656  return getCrsGraph ()->getNumEntriesInLocalRow (localRow);
657  }
658 
659  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
660  size_t
663  return getCrsGraph ()->getGlobalMaxNumRowEntries ();
664  }
665 
666  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
667  size_t
670  return getCrsGraph ()->getNodeMaxNumRowEntries ();
671  }
672 
673  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
674  GlobalOrdinal
676  getIndexBase () const {
677  return getRowMap ()->getIndexBase ();
678  }
679 
680  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
681  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
683  getRowMap () const {
684  return getCrsGraph ()->getRowMap ();
685  }
686 
687  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
688  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
690  getColMap () const {
691  return getCrsGraph ()->getColMap ();
692  }
693 
694  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
695  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
697  getDomainMap () const {
698  return getCrsGraph ()->getDomainMap ();
699  }
700 
701  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
702  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
704  getRangeMap () const {
705  return getCrsGraph()->getRangeMap();
706  }
707 
708  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
709  Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
711  getGraph () const {
712  if (staticGraph_ != Teuchos::null) {
713  return staticGraph_;
714  }
715  return myGraph_;
716  }
717 
718  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
719  Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic> >
721  getCrsGraph () const {
722  if (staticGraph_ != Teuchos::null) {
723  return staticGraph_;
724  }
725  return myGraph_;
726  }
727 
728  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
729  bool
732  return getCrsGraph ()->isLowerTriangular ();
733  }
734 
735  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
736  bool
739  return getCrsGraph ()->isUpperTriangular ();
740  }
741 
742  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
743  bool
745  isStaticGraph () const {
746  return myGraph_.is_null ();
747  }
748 
749  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
750  bool
753  return true;
754  }
755 
756  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
757  bool
760  return true;
761  }
762 
763  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
764  void
766  allocateValues (ELocalGlobal lg, GraphAllocationStatus gas)
767  {
768 #ifdef HAVE_TPETRA_DEBUG
769  // If the graph indices are already allocated, then gas should be
770  // GraphAlreadyAllocated. Otherwise, gas should be
771  // GraphNotYetAllocated.
772  if ((gas == GraphAlreadyAllocated) != staticGraph_->indicesAreAllocated()) {
773  const std::string err1 ("allocateValues: The caller has asserted that "
774  "the graph is ");
775  const std::string err2 ("already allocated, but the static graph says "
776  "that its indices are ");
777  const std::string err3 ("already allocated. Please report this bug to "
778  "the Tpetra developers.");
779  TEUCHOS_TEST_FOR_EXCEPTION(gas == GraphAlreadyAllocated && ! staticGraph_->indicesAreAllocated(),
780  std::logic_error, err1 << err2 << "not " << err3);
781  TEUCHOS_TEST_FOR_EXCEPTION(gas != GraphAlreadyAllocated && staticGraph_->indicesAreAllocated(),
782  std::logic_error, err1 << "not " << err2 << err3);
783  }
784 
785  // If the graph is unallocated, then it had better be a
786  // matrix-owned graph. ("Matrix-owned graph" means that the
787  // matrix gets to define the graph structure. If the CrsMatrix
788  // constructor that takes an RCP<const CrsGraph> was used, then
789  // the matrix does _not_ own the graph.)
790  TEUCHOS_TEST_FOR_EXCEPTION(
791  ! staticGraph_->indicesAreAllocated() && myGraph_.is_null(),
792  std::logic_error,
793  "allocateValues: The static graph says that its indices are not "
794  "allocated, but the graph is not owned by the matrix. Please report "
795  "this bug to the Tpetra developers.");
796 #endif // HAVE_TPETRA_DEBUG
797 
798  if (gas == GraphNotYetAllocated) {
799  myGraph_->allocateIndices (lg);
800  }
801 
802  // Allocate matrix values.
803  if (getProfileType () == StaticProfile) {
804  // "Static profile" means that the number of matrix entries in
805  // each row was fixed at the time the CrsMatrix constructor was
806  // called. This lets us use 1-D storage for the matrix's
807  // values. ("1-D storage" means the same as that used by the
808  // three arrays in the classic compressed sparse row format.)
809 
810  const size_t lclNumRows = staticGraph_->getNodeNumRows ();
811  typename Graph::local_graph_type::row_map_type k_ptrs =
812  staticGraph_->k_rowPtrs_;
813  TEUCHOS_TEST_FOR_EXCEPTION(
814  k_ptrs.dimension_0 () != lclNumRows+1, std::logic_error,
815  "Tpetra::CrsMatrix::allocateValues: With StaticProfile, row offsets "
816  "array has length " << k_ptrs.dimension_0 () << " != (lclNumRows+1) = "
817  << (lclNumRows+1) << ".");
818 
819  // mfh 23 Jun 2016: Don't assume UVM. If we want to look at
820  // k_ptrs(lclNumRows), then copy that entry of k_ptrs to host
821  // first. We can do this with "0-D" subviews.
822  auto k_ptrs_ent_d = Kokkos::subview (k_ptrs, lclNumRows);
823  auto k_ptrs_ent_h = create_mirror_view (k_ptrs_ent_d);
824  Kokkos::deep_copy (k_ptrs_ent_h, k_ptrs_ent_d);
825  const size_t lclTotalNumEntries = static_cast<size_t> (k_ptrs_ent_h ());
826 
827  // // FIXME (mfh 08 Aug 2014) This assumes UVM. We could fix this
828  // // either by storing the row offsets in the graph as a DualView,
829  // // or by making a device View of that entry, and copying it back
830  // // to host.
831  // const size_t lclTotalNumEntries = k_ptrs(lclNumRows);
832 
833  // Allocate array of (packed???) matrix values.
834  typedef typename local_matrix_type::values_type values_type;
835  k_values1D_ = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
836  }
837  else {
838  // "Dynamic profile" means the number of matrix entries in each
839  // row is not fixed and may expand. Thus, we store the matrix's
840  // values in "2-D storage," meaning an array of arrays. The
841  // outer array has as many inner arrays as there are rows in the
842  // matrix, and each inner array stores the values in that row.
843  values2D_ = staticGraph_->template allocateValues2D<impl_scalar_type> ();
844  }
845  }
846 
847  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
848  void
850  getAllValues (Teuchos::ArrayRCP<const size_t>& rowPointers,
851  Teuchos::ArrayRCP<const LocalOrdinal>& columnIndices,
852  Teuchos::ArrayRCP<const Scalar>& values) const
853  {
854  using Teuchos::RCP;
855  const char tfecfFuncName[] = "getAllValues: ";
856  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
857  columnIndices.size () != values.size (), std::runtime_error,
858  "Requires that columnIndices and values are the same size.");
859 
860  RCP<const crs_graph_type> relevantGraph = getCrsGraph ();
861  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
862  relevantGraph.is_null (), std::runtime_error,
863  "Requires that getCrsGraph() is not null.");
864  try {
865  rowPointers = relevantGraph->getNodeRowPtrs ();
866  }
867  catch (std::exception &e) {
868  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
869  true, std::runtime_error,
870  "Caught exception while calling graph->getNodeRowPtrs(): "
871  << e.what ());
872  }
873  try {
874  columnIndices = relevantGraph->getNodePackedIndices ();
875  }
876  catch (std::exception &e) {
877  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
878  true, std::runtime_error,
879  "Caught exception while calling graph->getNodePackedIndices(): "
880  << e.what ());
881  }
882  Teuchos::ArrayRCP<const impl_scalar_type> vals =
883  Kokkos::Compat::persistingView (k_values1D_);
884  values = Teuchos::arcp_reinterpret_cast<const Scalar> (vals);
885  }
886 
887  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
888  void
890  fillLocalGraphAndMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
891  {
893  using Kokkos::create_mirror_view;
894  using Teuchos::arcp_const_cast;
895  using Teuchos::Array;
896  using Teuchos::ArrayRCP;
897  using Teuchos::null;
898  using Teuchos::RCP;
899  using Teuchos::rcp;
900  typedef typename local_matrix_type::row_map_type row_map_type;
901  typedef typename Graph::local_graph_type::entries_type::non_const_type lclinds_1d_type;
902  typedef typename local_matrix_type::values_type values_type;
903 #ifdef HAVE_TPETRA_DEBUG
904  const char tfecfFuncName[] = "fillLocalGraphAndMatrix (called from "
905  "fillComplete or expertStaticFillComplete): ";
906 #endif // HAVE_TPETRA_DEBUG
907 
908 #ifdef HAVE_TPETRA_DEBUG
909  // fillComplete() only calls fillLocalGraphAndMatrix() if the
910  // matrix owns the graph, which means myGraph_ is not null.
911  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
912  (myGraph_.is_null (), std::logic_error, "The nonconst graph (myGraph_) "
913  "is null. This means that the matrix has a const (a.k.a. \"static\") "
914  "graph. fillComplete or expertStaticFillComplete should never call "
915  "fillLocalGraphAndMatrix in that case. "
916  "Please report this bug to the Tpetra developers.");
917 #endif // HAVE_TPETRA_DEBUG
918 
919  const size_t lclNumRows = this->getNodeNumRows ();
920 
921  // This method's goal is to fill in the three arrays (compressed
922  // sparse row format) that define the sparse graph's and matrix's
923  // structure, and the sparse matrix's values.
924  //
925  // Use the nonconst version of row_map_type for k_ptrs,
926  // because row_map_type is const and we need to modify k_ptrs here.
927  typename row_map_type::non_const_type k_ptrs;
928  row_map_type k_ptrs_const;
929  lclinds_1d_type k_inds;
930  values_type k_vals;
931 
932  // Get references to the data in myGraph_, so we can modify them
933  // as well. Note that we only call fillLocalGraphAndMatrix() if
934  // the matrix owns the graph, which means myGraph_ is not null.
935  lclinds_1d_type k_lclInds1D_ = myGraph_->k_lclInds1D_;
936 
937  typedef decltype (myGraph_->k_numRowEntries_) row_entries_type;
938 
939  if (getProfileType () == DynamicProfile) {
940  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
941  //
942  // DynamicProfile means that the matrix's column indices and
943  // values are currently stored in a 2-D "unpacked" format, in
944  // the arrays-of-arrays myGraph_->lclInds2D_ (for column
945  // indices) and values2D_ (for values). We allocate 1-D storage
946  // (k_inds resp. k_vals), and then copy from 2-D storage
947  // (lclInds2D_ resp. values2D_) into 1-D storage (k_inds
948  // resp. k_vals).
949 
950  // We're be packing on host. k_numRowEntries_ lives on host,
951  // and computeOffsetsFromCounts accepts a host View for counts,
952  // even if offsets is a device View. (Furthermore, the "host"
953  // View may very well live in CudaUVMSpace, so doing this has no
954  // penalty, other than requiring synchronization between Cuda
955  // and host. UVM memory gets grumpy if both device and host
956  // attempt to access it at the same time without an intervening
957  // fence.)
958  typename row_entries_type::const_type numRowEnt_h =
959  myGraph_->k_numRowEntries_;
960 #ifdef HAVE_TPETRA_DEBUG
961  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
962  (static_cast<size_t> (numRowEnt_h.dimension_0 ()) != lclNumRows,
963  std::logic_error, "(DynamicProfile branch) numRowEnt_h has the "
964  "wrong length. numRowEnt_h.dimension_0() = "
965  << numRowEnt_h.dimension_0 () << " != getNodeNumRows() = "
966  << lclNumRows << ".");
967 #endif // HAVE_TPETRA_DEBUG
968 
969  // We're packing on host (since we can't read Teuchos data
970  // structures on device), so let's fill the packed row offsets
971  // on host first.
972  k_ptrs = typename row_map_type::non_const_type ("Tpetra::CrsGraph::ptr",
973  lclNumRows+1);
974  typename row_map_type::non_const_type::HostMirror h_ptrs =
975  create_mirror_view (k_ptrs);
976 
977  // Pack the row offsets into k_ptrs, by doing a sum-scan of
978  // the array of valid entry counts per row.
979  //
980  // Return value is the total number of entries in the matrix on
981  // the calling process. It's cheap to compute and useful as a
982  // sanity check.
983  const size_t lclTotalNumEntries =
984  computeOffsetsFromCounts (h_ptrs, numRowEnt_h);
985 #ifdef HAVE_TPETRA_DEBUG
986  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
987  (static_cast<size_t> (h_ptrs.dimension_0 ()) != lclNumRows + 1,
988  std::logic_error, "(DynamicProfile branch) After packing h_ptrs, "
989  "h_ptrs.dimension_0() = " << h_ptrs.dimension_0 () << " != "
990  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
991  {
992  const size_t h_ptrs_lastEnt = h_ptrs(lclNumRows); // it's a host View
993  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
994  (h_ptrs_lastEnt != lclTotalNumEntries, std::logic_error,
995  "(DynamicProfile branch) After packing h_ptrs, h_ptrs(lclNumRows="
996  << lclNumRows << ") = " << h_ptrs_lastEnt << " != total number "
997  "of entries on the calling process = " << lclTotalNumEntries << ".");
998  }
999 #endif // HAVE_TPETRA_DEBUG
1000 
1001  // Allocate the arrays of packed column indices and values.
1002  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
1003  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1004 
1005  // We need host views of the above, since 2-D storage lives on host.
1006  typename lclinds_1d_type::HostMirror h_inds = create_mirror_view (k_inds);
1007  typename values_type::HostMirror h_vals = create_mirror_view (k_vals);
1008 
1009  // Pack the column indices and values on the host.
1010  ArrayRCP<Array<LocalOrdinal> > lclInds2D = myGraph_->lclInds2D_;
1011  for (size_t row = 0; row < lclNumRows; ++row) {
1012  const size_t numEnt = numRowEnt_h(row);
1013  std::copy (lclInds2D[row].begin(),
1014  lclInds2D[row].begin() + numEnt,
1015  h_inds.ptr_on_device() + h_ptrs(row));
1016  std::copy (values2D_[row].begin(),
1017  values2D_[row].begin() + numEnt,
1018  h_vals.ptr_on_device() + h_ptrs(row));
1019  }
1020 
1021  // Copy the packed column indices and values to the device.
1022  Kokkos::deep_copy (k_inds, h_inds);
1023  Kokkos::deep_copy (k_vals, h_vals);
1024  // Copy the packed row offsets to the device too.
1025  // We didn't actually need them on device before.
1026  Kokkos::deep_copy (k_ptrs, h_ptrs);
1027  k_ptrs_const = k_ptrs; // const version of k_ptrs
1028 
1029 #ifdef HAVE_TPETRA_DEBUG
1030  // Sanity check of packed row offsets.
1031  if (k_ptrs.dimension_0 () != 0) {
1032  const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ());
1033  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1034  (numOffsets != lclNumRows + 1, std::logic_error, "(DynamicProfile "
1035  "branch) After copying into k_ptrs, k_ptrs.dimension_0() = " <<
1036  numOffsets << " != (lclNumRows+1) = " << (lclNumRows+1) << ".");
1037 
1038  // mfh 23 Jun 2016: Don't assume UVM. If we want to look at
1039  // k_ptrs(numOffsets-1), then copy that entry to host first.
1040  // We can do this with "0-D" subviews.
1041  auto k_ptrs_ent_d = Kokkos::subview (k_ptrs, numOffsets-1);
1042  auto k_ptrs_ent_h = create_mirror_view (k_ptrs_ent_d);
1043  Kokkos::deep_copy (k_ptrs_ent_h, k_ptrs_ent_d);
1044  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1045  (static_cast<size_t> (k_ptrs_ent_h ()) != k_vals.dimension_0 (),
1046  std::logic_error, "(DynamicProfile branch) After packing, k_ptrs("
1047  << (numOffsets-1) << ") = " << k_ptrs_ent_h () << " != "
1048  "k_vals.dimension_0() = " << k_vals.dimension_0 () << ".");
1049  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1050  (static_cast<size_t> (k_ptrs_ent_h ()) != k_inds.dimension_0 (),
1051  std::logic_error, "(DynamicProfile branch) After packing, k_ptrs("
1052  << (numOffsets-1) << ") = " << k_ptrs_ent_h () << " != "
1053  "k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
1054  }
1055 #endif // HAVE_TPETRA_DEBUG
1056  }
1057  else if (getProfileType () == StaticProfile) {
1058  // StaticProfile means that the matrix's column indices and
1059  // values are currently stored in a 1-D format, with row offsets
1060  // in k_rowPtrs_ and local column indices in k_lclInds1D_.
1061 
1062  // StaticProfile also means that the graph's array of row
1063  // offsets must already be allocated.
1064  typename Graph::local_graph_type::row_map_type curRowOffsets =
1065  myGraph_->k_rowPtrs_;
1066 
1067 #ifdef HAVE_TPETRA_DEBUG
1068  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1069  (curRowOffsets.dimension_0 () == 0, std::logic_error,
1070  "(StaticProfile branch) curRowOffsets.dimension_0() == 0.");
1071  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1072  (curRowOffsets.dimension_0 () != lclNumRows + 1, std::logic_error,
1073  "(StaticProfile branch) curRowOffsets.dimension_0() = "
1074  << curRowOffsets.dimension_0 () << " != lclNumRows + 1 = "
1075  << (lclNumRows + 1) << ".")
1076  {
1077  const size_t numOffsets = curRowOffsets.dimension_0 ();
1078  // mfh 23 Jun 2016: Don't assume UVM. If we want to look at
1079  // curRowOffsets(numOffsets-1), then copy that entry of the
1080  // View to host first. We can do this with "0-D" subviews.
1081  auto curRowOffsets_ent_d =
1082  Kokkos::subview (curRowOffsets, numOffsets - 1);
1083  auto curRowOffsets_ent_h = create_mirror_view (curRowOffsets_ent_d);
1084  Kokkos::deep_copy (curRowOffsets_ent_h, curRowOffsets_ent_d);
1085  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1086  (numOffsets != 0 &&
1087  myGraph_->k_lclInds1D_.dimension_0 () != curRowOffsets_ent_h (),
1088  std::logic_error, "(StaticProfile branch) numOffsets = " <<
1089  numOffsets << " != 0 and myGraph_->k_lclInds1D_.dimension_0() = "
1090  << myGraph_->k_lclInds1D_.dimension_0 () << " != curRowOffsets("
1091  << numOffsets << ") = " << curRowOffsets_ent_h () << ".");
1092  }
1093 #endif // HAVE_TPETRA_DEBUG
1094 
1095  if (myGraph_->nodeNumEntries_ != myGraph_->nodeNumAllocated_) {
1096  // The matrix's current 1-D storage is "unpacked." This means
1097  // the row offsets may differ from what the final row offsets
1098  // should be. This could happen, for example, if the user
1099  // specified StaticProfile in the constructor and set an upper
1100  // bound on the number of entries per row, but didn't fill all
1101  // those entries.
1102 #ifdef HAVE_TPETRA_DEBUG
1103  if (curRowOffsets.dimension_0 () != 0) {
1104  const size_t numOffsets =
1105  static_cast<size_t> (curRowOffsets.dimension_0 ());
1106  // mfh 23 Jun 2016: Don't assume UVM. If we want to look at
1107  // curRowOffsets(numOffsets-1), then copy that entry to host
1108  // first. We can do this with "0-D" subviews.
1109  auto curRowOffsets_ent_d = Kokkos::subview (curRowOffsets, numOffsets-1);
1110  auto curRowOffsets_ent_h = create_mirror_view (curRowOffsets_ent_d);
1111  Kokkos::deep_copy (curRowOffsets_ent_h, curRowOffsets_ent_d);
1112  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1113  (static_cast<size_t> (curRowOffsets_ent_h ()) !=
1114  static_cast<size_t> (k_values1D_.dimension_0 ()),
1115  std::logic_error, "(StaticProfile unpacked branch) Before "
1116  "allocating or packing, curRowOffsets(" << (numOffsets-1) << ") = "
1117  << curRowOffsets_ent_h () << " != k_values1D_.dimension_0()"
1118  " = " << k_values1D_.dimension_0 () << ".");
1119  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1120  (static_cast<size_t> (curRowOffsets_ent_h ()) !=
1121  static_cast<size_t> (myGraph_->k_lclInds1D_.dimension_0 ()),
1122  std::logic_error, "(StaticProfile unpacked branch) Before "
1123  "allocating or packing, curRowOffsets(" << (numOffsets-1) << ") = "
1124  << curRowOffsets_ent_h ()
1125  << " != myGraph_->k_lclInds1D_.dimension_0() = "
1126  << myGraph_->k_lclInds1D_.dimension_0 () << ".");
1127  }
1128 #endif // HAVE_TPETRA_DEBUG
1129 
1130  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1131  // the array of valid entry counts per row.
1132 
1133  // Total number of entries in the matrix on the calling
1134  // process. We will compute this in the loop below. It's
1135  // cheap to compute and useful as a sanity check.
1136  size_t lclTotalNumEntries = 0;
1137  // This will be a host view of packed row offsets.
1138  typename row_map_type::non_const_type::HostMirror h_ptrs;
1139  {
1140  // Allocate the packed row offsets array. We use a nonconst
1141  // temporary (packedRowOffsets) here, because k_ptrs is
1142  // const. We will assign packedRowOffsets to k_ptrs below.
1143  typename row_map_type::non_const_type
1144  packedRowOffsets ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
1145  typename row_entries_type::const_type numRowEnt_h =
1146  myGraph_->k_numRowEntries_;
1147  // We're computing offsets on device. This function can
1148  // handle numRowEnt_h being a host View.
1149  lclTotalNumEntries =
1150  computeOffsetsFromCounts (packedRowOffsets, numRowEnt_h);
1151  // packedRowOffsets is modifiable; k_ptrs isn't, so we have
1152  // to use packedRowOffsets in the loop above and assign here.
1153  k_ptrs = packedRowOffsets;
1154  k_ptrs_const = k_ptrs;
1155  }
1156 
1157 #ifdef HAVE_TPETRA_DEBUG
1158  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1159  (static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1,
1160  std::logic_error,
1161  "(StaticProfile unpacked branch) After packing k_ptrs, "
1162  "k_ptrs.dimension_0() = " << k_ptrs.dimension_0 () << " != "
1163  "lclNumRows+1 = " << (lclNumRows+1) << ".");
1164  {
1165  // mfh 23 Jun 2016: Don't assume UVM. If we want to look at
1166  // k_ptrs(lclNumRows), then copy that entry to host first.
1167  // We can do this with "0-D" subviews.
1168  auto k_ptrs_ent_d = Kokkos::subview (k_ptrs, lclNumRows);
1169  auto k_ptrs_ent_h = create_mirror_view (k_ptrs_ent_d);
1170  Kokkos::deep_copy (k_ptrs_ent_h, k_ptrs_ent_d);
1171  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1172  (k_ptrs_ent_h () != lclTotalNumEntries, std::logic_error,
1173  "(StaticProfile unpacked branch) After filling k_ptrs, "
1174  "k_ptrs(lclNumRows=" << lclNumRows << ") = " << k_ptrs_ent_h ()
1175  << " != total number of entries on the calling process = "
1176  << lclTotalNumEntries << ".");
1177  }
1178 #endif // HAVE_TPETRA_DEBUG
1179 
1180  // Allocate the arrays of packed column indices and values.
1181  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
1182  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1183 
1184  // curRowOffsets (myGraph_->k_rowPtrs_) (???), k_lclInds1D_,
1185  // and k_values1D_ are currently unpacked. Pack them, using
1186  // the packed row offsets array k_ptrs that we created above.
1187  //
1188  // FIXME (mfh 06 Aug 2014) If "Optimize Storage" is false, we
1189  // need to keep around the unpacked row offsets, column
1190  // indices, and values arrays.
1191 
1192  // Pack the column indices from unpacked k_lclInds1D_ into
1193  // packed k_inds. We will replace k_lclInds1D_ below.
1194  typedef pack_functor<typename Graph::local_graph_type::entries_type::non_const_type,
1195  typename Graph::local_graph_type::row_map_type>
1196  inds_packer_type;
1197  inds_packer_type indsPacker (k_inds, myGraph_->k_lclInds1D_,
1198  k_ptrs, curRowOffsets);
1199  Kokkos::parallel_for (lclNumRows, indsPacker);
1200 
1201  // Pack the values from unpacked k_values1D_ into packed
1202  // k_vals. We will replace k_values1D_ below.
1203  typedef pack_functor<values_type, row_map_type> vals_packer_type;
1204  vals_packer_type valsPacker (k_vals, this->k_values1D_,
1205  k_ptrs, curRowOffsets);
1206  Kokkos::parallel_for (lclNumRows, valsPacker);
1207 
1208 #ifdef HAVE_TPETRA_DEBUG
1209  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1210  (k_ptrs.dimension_0 () == 0, std::logic_error,
1211  "(StaticProfile \"Optimize Storage\" = "
1212  "true branch) After packing, k_ptrs.dimension_0() = 0. This "
1213  "probably means that k_rowPtrs_ was never allocated.");
1214  if (k_ptrs.dimension_0 () != 0) {
1215  const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ());
1216 
1217  // mfh 23 Jun 2016: Don't assume UVM. If we want to look at
1218  // k_ptrs(numOffsets-1), then copy that entry to host first.
1219  // We can do this with "0-D" subviews.
1220  auto k_ptrs_ent_d = Kokkos::subview (k_ptrs, numOffsets - 1);
1221  auto k_ptrs_ent_h = create_mirror_view (k_ptrs_ent_d);
1222  Kokkos::deep_copy (k_ptrs_ent_h, k_ptrs_ent_d);
1223  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1224  (static_cast<size_t> (k_ptrs_ent_h ()) != k_vals.dimension_0 (),
1225  std::logic_error,
1226  "(StaticProfile \"Optimize Storage\"=true branch) After packing, "
1227  "k_ptrs(" << (numOffsets-1) << ") = " << k_ptrs_ent_h () <<
1228  " != k_vals.dimension_0() = " << k_vals.dimension_0 () << ".");
1229  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1230  (static_cast<size_t> (k_ptrs_ent_h ()) != k_inds.dimension_0 (),
1231  std::logic_error,
1232  "(StaticProfile \"Optimize Storage\"=true branch) After packing, "
1233  "k_ptrs(" << (numOffsets-1) << ") = " << k_ptrs_ent_h () <<
1234  " != k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
1235  }
1236 #endif // HAVE_TPETRA_DEBUG
1237  }
1238  else { // We don't have to pack, so just set the pointers.
1239  k_ptrs_const = myGraph_->k_rowPtrs_;
1240  k_inds = myGraph_->k_lclInds1D_;
1241  k_vals = this->k_values1D_;
1242 
1243 #ifdef HAVE_TPETRA_DEBUG
1244  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1245  (k_ptrs_const.dimension_0 () == 0, std::logic_error,
1246  "(StaticProfile \"Optimize Storage\"=false branch) "
1247  "k_ptrs_const.dimension_0() = 0. This probably means that "
1248  "k_rowPtrs_ was never allocated.");
1249  if (k_ptrs_const.dimension_0 () != 0) {
1250  const size_t numOffsets = static_cast<size_t> (k_ptrs_const.dimension_0 ());
1251 
1252  // mfh 23 Jun 2016: Don't assume UVM. If we want to look at
1253  // k_ptrs_const(numOffsets-1), then copy that entry to host
1254  // first. We can do this with "0-D" subviews.
1255  auto k_ptrs_const_ent_d = Kokkos::subview (k_ptrs_const, numOffsets - 1);
1256  auto k_ptrs_const_ent_h = create_mirror_view (k_ptrs_const_ent_d);
1257  Kokkos::deep_copy (k_ptrs_const_ent_h, k_ptrs_const_ent_d);
1258  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1259  (static_cast<size_t> (k_ptrs_const_ent_h ()) != k_vals.dimension_0 (),
1260  std::logic_error,
1261  "(StaticProfile \"Optimize Storage\"=false branch) "
1262  "k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const_ent_h ()
1263  << " != k_vals.dimension_0() = " << k_vals.dimension_0 () << ".");
1264  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1265  (static_cast<size_t> (k_ptrs_const_ent_h ()) != k_inds.dimension_0 (),
1266  std::logic_error,
1267  "(StaticProfile \"Optimize Storage\" = false branch) "
1268  "k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const_ent_h ()
1269  << " != k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
1270  }
1271 #endif // HAVE_TPETRA_DEBUG
1272  }
1273  }
1274 
1275 #ifdef HAVE_TPETRA_DEBUG
1276  // Extra sanity checks.
1277  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1278  (static_cast<size_t> (k_ptrs_const.dimension_0 ()) != lclNumRows + 1,
1279  std::logic_error, "After packing, k_ptrs_const.dimension_0() = " <<
1280  k_ptrs_const.dimension_0 () << " != lclNumRows+1 = " << (lclNumRows+1)
1281  << ".");
1282  if (k_ptrs_const.dimension_0 () != 0) {
1283  const size_t numOffsets = static_cast<size_t> (k_ptrs_const.dimension_0 ());
1284 
1285  // mfh 23 Jun 2016: Don't assume UVM. If we want to look at
1286  // k_ptrs_const(numOffsets-1), then copy that entry to host
1287  // first. We can do this with "0-D" subviews.
1288  auto k_ptrs_const_ent_d = Kokkos::subview (k_ptrs_const, numOffsets - 1);
1289  auto k_ptrs_const_ent_h = create_mirror_view (k_ptrs_const_ent_d);
1290  Kokkos::deep_copy (k_ptrs_const_ent_h, k_ptrs_const_ent_d);
1291 
1292  const size_t k_ptrs_const_numOffsetsMinus1 = k_ptrs_const_ent_h ();
1293  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1294  (k_ptrs_const_numOffsetsMinus1 != k_vals.dimension_0 (),
1295  std::logic_error, "After packing, k_ptrs_const(" << (numOffsets-1) <<
1296  ") = " << k_ptrs_const_numOffsetsMinus1 << " != k_vals.dimension_0()"
1297  " = " << k_vals.dimension_0 () << ".");
1298  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1299  (k_ptrs_const_numOffsetsMinus1 != k_inds.dimension_0 (),
1300  std::logic_error, "After packing, k_ptrs_const(" << (numOffsets-1) <<
1301  ") = " << k_ptrs_const_numOffsetsMinus1 << " != k_inds.dimension_0()"
1302  " = " << k_inds.dimension_0 () << ".");
1303  }
1304 #endif // HAVE_TPETRA_DEBUG
1305 
1306  // May we ditch the old allocations for the packed (and otherwise
1307  // "optimized") allocations, later in this routine? Optimize
1308  // storage if the graph is not static, or if the graph already has
1309  // optimized storage.
1310  const bool defaultOptStorage =
1311  ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1312  const bool requestOptimizedStorage =
1313  (! params.is_null () && params->get ("Optimize Storage", defaultOptStorage)) ||
1314  (params.is_null () && defaultOptStorage);
1315 
1316  // The graph has optimized storage when indices are allocated,
1317  // myGraph_->k_numRowEntries_ is empty, and there are more than
1318  // zero rows on this process. It's impossible for the graph to
1319  // have dynamic profile (getProfileType() == DynamicProfile) and
1320  // be optimized (isStorageOptimized()).
1321  if (requestOptimizedStorage) {
1322  // Free the old, unpacked, unoptimized allocations.
1323  // Change the graph from dynamic to static allocation profile
1324 
1325  // Free graph data structures that are only needed for 2-D or
1326  // unpacked 1-D storage.
1327  myGraph_->lclInds2D_ = null; // legacy KokkosClassic 2-D storage
1328  myGraph_->k_numRowEntries_ = row_entries_type ();
1329 
1330  // Free the matrix's 2-D storage.
1331  this->values2D_ = null;
1332 
1333  // Keep the new 1-D packed allocations.
1334  myGraph_->k_rowPtrs_ = k_ptrs_const;
1335  myGraph_->k_lclInds1D_ = k_inds;
1336  this->k_values1D_ = k_vals;
1337 
1338  // Storage is packed now, so the number of allocated entries is
1339  // the same as the actual number of entries.
1340  myGraph_->nodeNumAllocated_ = myGraph_->nodeNumEntries_;
1341  // The graph is definitely StaticProfile now, whether or not it
1342  // was before.
1343  myGraph_->pftype_ = StaticProfile;
1344  myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1345  this->storageStatus_ = Details::STORAGE_1D_PACKED;
1346  }
1347 
1348  // Make the local graph, using the arrays of row offsets and
1349  // column indices that we built above. The local graph should be
1350  // null, but we delete it first so that any memory can be freed
1351  // before we allocate the new one.
1352  //
1353  // FIXME (mfh 06,28 Aug 2014) It would make more sense for
1354  // Tpetra::CrsGraph to have a protected method that accepts k_inds
1355  // and k_ptrs, and creates the local graph lclGraph_.
1356  myGraph_->lclGraph_ =
1357  typename Graph::local_graph_type (k_inds, k_ptrs_const);
1358 
1359  // Make the local matrix, using the local graph and vals array.
1360  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
1361  getNodeNumCols (), k_vals,
1362  myGraph_->lclGraph_);
1363  }
1364 
1365  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1366  void
1368  fillLocalMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
1369  {
1370  using Kokkos::create_mirror_view;
1371  using Teuchos::ArrayRCP;
1372  using Teuchos::Array;
1373  using Teuchos::null;
1374  using Teuchos::RCP;
1375  using Teuchos::rcp;
1376  typedef LocalOrdinal LO;
1377  typedef typename Graph::local_graph_type::row_map_type row_map_type;
1378  typedef typename row_map_type::non_const_type non_const_row_map_type;
1379  typedef typename local_matrix_type::values_type values_type;
1380 #ifdef HAVE_TPETRA_DEBUG
1381  const char tfecfFuncName[] = "fillLocalMatrix (called from fillComplete): ";
1382 #endif // HAVE_TPETRA_DEBUG
1383 
1384  const size_t lclNumRows = getNodeNumRows();
1385  const map_type& rowMap = * (getRowMap ());
1386  RCP<node_type> node = rowMap.getNode ();
1387 
1388  // The goals of this routine are first, to allocate and fill
1389  // packed 1-D storage (see below for an explanation) in the vals
1390  // array, and second, to give vals to the local matrix and
1391  // finalize the local matrix. We only need k_ptrs, the packed 1-D
1392  // row offsets, within the scope of this routine, since we're only
1393  // filling the local matrix here (use fillLocalGraphAndMatrix() to
1394  // fill both the graph and the matrix at the same time).
1395 
1396  // get data from staticGraph_
1397  ArrayRCP<Array<LO> > lclInds2D = staticGraph_->lclInds2D_;
1398  size_t nodeNumEntries = staticGraph_->nodeNumEntries_;
1399  size_t nodeNumAllocated = staticGraph_->nodeNumAllocated_;
1400  row_map_type k_rowPtrs_ = staticGraph_->lclGraph_.row_map;
1401 
1402  row_map_type k_ptrs; // "packed" row offsets array
1403  values_type k_vals; // "packed" values array
1404 
1405  // May we ditch the old allocations for the packed (and otherwise
1406  // "optimized") allocations, later in this routine? Request
1407  // optimized storage by default.
1408  bool requestOptimizedStorage = true;
1409  const bool default_OptimizeStorage =
1410  ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1411  if (! params.is_null () && ! params->get ("Optimize Storage", default_OptimizeStorage)) {
1412  requestOptimizedStorage = false;
1413  }
1414  // If we're not allowed to change a static graph, then we can't
1415  // change the storage of the matrix, either. This means that if
1416  // the graph's storage isn't already optimized, we can't optimize
1417  // the matrix's storage either. Check and give warning, as
1418  // appropriate.
1419  if (! staticGraph_->isStorageOptimized () && requestOptimizedStorage) {
1420  TPETRA_ABUSE_WARNING(true, std::runtime_error,
1421  "You requested optimized storage by setting the"
1422  "\"Optimize Storage\" flag to \"true\" in the parameter list, or by virtue"
1423  "of default behavior. However, the associated CrsGraph was filled separately"
1424  "and requested not to optimize storage. Therefore, the CrsMatrix cannot"
1425  "optimize storage.");
1426  requestOptimizedStorage = false;
1427  }
1428 
1429  typedef decltype (staticGraph_->k_numRowEntries_) row_entries_type;
1430 
1431  if (getProfileType() == DynamicProfile) {
1432  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
1433  //
1434  // DynamicProfile means that the matrix's values are currently
1435  // stored in a 2-D "unpacked" format, in the array-of-arrays
1436  // values2D_. We allocate 1-D storage and then copy from 2-D
1437  // storage in values2D_ into 1-D storage in k_vals. Since we're
1438  // only allocating the local matrix here, not the local graph,
1439  // we don't need to keep the row offsets array, but we do need
1440  // it here temporarily in order to convert to 1-D storage. (The
1441  // allocStorage() function needs it.) We'll free ptrs later in
1442  // this method.
1443  //
1444  // FIXME (mfh 08 Aug 2014) If we're in this method, then the
1445  // graph should already have packed 1-D storage. Why can't we
1446  // just use the graph's current row offsets array?
1447 
1448  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1449  // the array of valid entry counts per row.
1450  //
1451  // Total number of entries in the matrix on the calling
1452  // process. We will compute this in the loop below. It's
1453  // cheap to compute and useful as a sanity check.
1454  size_t lclTotalNumEntries = 0;
1455  // This will be a host view of packed row offsets.
1456  typename non_const_row_map_type::HostMirror h_ptrs;
1457 
1458  typename row_entries_type::const_type numRowEnt_h =
1459  staticGraph_->k_numRowEntries_;
1460  {
1461  non_const_row_map_type packedRowOffsets ("Tpetra::CrsGraph::ptr",
1462  lclNumRows+1);
1463  // NOTE (mfh 27 Jun 2016) We need h_ptrs on host anyway, so
1464  // let's just compute offsets on host.
1465  h_ptrs = create_mirror_view (packedRowOffsets);
1467  lclTotalNumEntries = computeOffsetsFromCounts (h_ptrs, numRowEnt_h);
1468  Kokkos::deep_copy (packedRowOffsets, h_ptrs);
1469  k_ptrs = packedRowOffsets;
1470  }
1471 
1472 #ifdef HAVE_TPETRA_DEBUG
1473  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1474  (static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1,
1475  std::logic_error, "In DynamicProfile branch, after packing k_ptrs, "
1476  "k_ptrs.dimension_0() = " << k_ptrs.dimension_0 () << " != "
1477  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
1478  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1479  (static_cast<size_t> (h_ptrs.dimension_0 ()) != lclNumRows + 1,
1480  std::logic_error, "In DynamicProfile branch, after packing h_ptrs, "
1481  "h_ptrs.dimension_0() = " << h_ptrs.dimension_0 () << " != "
1482  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
1483  {
1484  // mfh 23 Jun 2016: Don't assume UVM. If we want to look at
1485  // k_ptrs(lclNumRows), then copy that entry to host first. We
1486  // can do this with "0-D" subviews.
1487  auto k_ptrs_ent_d = Kokkos::subview (k_ptrs, lclNumRows);
1488  auto k_ptrs_ent_h = create_mirror_view (k_ptrs_ent_d);
1489  Kokkos::deep_copy (k_ptrs_ent_h, k_ptrs_ent_d);
1490  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1491  (static_cast<size_t> (k_ptrs_ent_h ()) != lclTotalNumEntries,
1492  std::logic_error, "(DynamicProfile branch) After packing k_ptrs, "
1493  "k_ptrs(lclNumRows = " << lclNumRows << ") = " << k_ptrs_ent_h ()
1494  << " != total number of entries on the calling process = "
1495  << lclTotalNumEntries << ".");
1496  }
1497 #endif // HAVE_TPETRA_DEBUG
1498 
1499  // Allocate the array of packed values.
1500  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1501  // We need a host view of the above, since 2-D storage lives on host.
1502  typename values_type::HostMirror h_vals = create_mirror_view (k_vals);
1503  // Pack the values on the host.
1504  for (size_t lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1505  const size_t numEnt = numRowEnt_h(lclRow);
1506  std::copy (values2D_[lclRow].begin(),
1507  values2D_[lclRow].begin() + numEnt,
1508  h_vals.ptr_on_device() + h_ptrs(lclRow));
1509  }
1510  // Copy the packed values to the device.
1511  Kokkos::deep_copy (k_vals, h_vals);
1512 
1513 #ifdef HAVE_TPETRA_DEBUG
1514  // Sanity check of packed row offsets.
1515  if (k_ptrs.dimension_0 () != 0) {
1516  const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ());
1517 
1518  // mfh 23 Jun 2016: Don't assume UVM. If we want to look at
1519  // k_ptrs(numOffsets-1), then copy that entry to host first.
1520  // We can do this with "0-D" subviews.
1521  auto k_ptrs_ent_d = Kokkos::subview (k_ptrs, numOffsets - 1);
1522  auto k_ptrs_ent_h = create_mirror_view (k_ptrs_ent_d);
1523  Kokkos::deep_copy (k_ptrs_ent_h, k_ptrs_ent_d);
1524  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1525  (static_cast<size_t> (k_ptrs_ent_h ()) != k_vals.dimension_0 (),
1526  std::logic_error, "(DynamicProfile branch) After packing, k_ptrs("
1527  << (numOffsets-1) << ") = " << k_ptrs_ent_h () << " != "
1528  "k_vals.dimension_0() = " << k_vals.dimension_0 () << ".");
1529  }
1530 #endif // HAVE_TPETRA_DEBUG
1531  }
1532  else if (getProfileType () == StaticProfile) {
1533  // StaticProfile means that the matrix's values are currently
1534  // stored in a 1-D format. However, this format is "unpacked";
1535  // it doesn't necessarily have the same row offsets as indicated
1536  // by the ptrs array returned by allocRowPtrs. This could
1537  // happen, for example, if the user specified StaticProfile in
1538  // the constructor and fixed the number of matrix entries in
1539  // each row, but didn't fill all those entries.
1540  //
1541  // As above, we don't need to keep the "packed" row offsets
1542  // array ptrs here, but we do need it here temporarily, so we
1543  // have to allocate it. We'll free ptrs later in this method.
1544  //
1545  // Note that this routine checks whether storage has already
1546  // been packed. This is a common case for solution of nonlinear
1547  // PDEs using the finite element method, as long as the
1548  // structure of the sparse matrix does not change between linear
1549  // solves.
1550  if (nodeNumEntries != nodeNumAllocated) {
1551  // We have to pack the 1-D storage, since the user didn't fill
1552  // up all requested storage.
1553  non_const_row_map_type tmpk_ptrs ("Tpetra::CrsGraph::ptr",
1554  lclNumRows+1);
1555  // Total number of entries in the matrix on the calling
1556  // process. We will compute this in the loop below. It's
1557  // cheap to compute and useful as a sanity check.
1558  size_t lclTotalNumEntries = 0;
1559  k_ptrs = tmpk_ptrs;
1560  {
1561  typename row_entries_type::const_type numRowEnt_d =
1562  staticGraph_->k_numRowEntries_;
1564  // This function can handle the counts being a host View.
1565  lclTotalNumEntries = computeOffsetsFromCounts (tmpk_ptrs, numRowEnt_d);
1566  }
1567 
1568  // Allocate the "packed" values array.
1569  // It has exactly the right number of entries.
1570  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1571 
1572  // Pack k_values1D_ into k_vals. We will replace k_values1D_ below.
1573  typedef pack_functor<values_type, row_map_type> packer_type;
1574  packer_type valsPacker (k_vals, k_values1D_, tmpk_ptrs, k_rowPtrs_);
1575  Kokkos::parallel_for (lclNumRows, valsPacker);
1576  }
1577  else { // We don't have to pack, so just set the pointer.
1578  k_vals = k_values1D_;
1579  }
1580  }
1581 
1582  // May we ditch the old allocations for the packed one?
1583  if (requestOptimizedStorage) {
1584  // The user requested optimized storage, so we can dump the
1585  // unpacked 2-D and 1-D storage, and keep the packed storage.
1586  values2D_ = null;
1587  k_values1D_ = k_vals;
1588  this->storageStatus_ = Details::STORAGE_1D_PACKED;
1589  }
1590 
1591  // Build the local sparse matrix object. At this point, the local
1592  // matrix certainly has a column Map. Remember that the local
1593  // matrix's number of columns comes from the column Map, not the
1594  // domain Map.
1595  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
1596  getColMap ()->getNodeNumElements (),
1597  k_vals,
1598  staticGraph_->getLocalGraph ());
1599  }
1600 
1601  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1602  void
1604  insertLocalValues (const LocalOrdinal localRow,
1605  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1606  const Teuchos::ArrayView<const Scalar>& values)
1607  {
1608  using Teuchos::Array;
1609  using Teuchos::ArrayView;
1610  using Teuchos::av_reinterpret_cast;
1611  using Teuchos::toString;
1612  using std::endl;
1613  const char tfecfFuncName[] = "insertLocalValues";
1614 
1615  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillActive (), std::runtime_error,
1616  ": Fill is not active. After calling fillComplete, you must call "
1617  "resumeFill before you may insert entries into the matrix again.");
1618  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isStaticGraph (), std::runtime_error,
1619  " cannot insert indices with static graph; use replaceLocalValues() instead.");
1620  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(myGraph_->isGloballyIndexed(),
1621  std::runtime_error, ": graph indices are global; use insertGlobalValues().");
1622  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! hasColMap (), std::runtime_error,
1623  " cannot insert local indices without a column map.");
1624  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(values.size() != indices.size(),
1625  std::runtime_error, ": values.size() must equal indices.size().");
1626  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1627  ! getRowMap()->isNodeLocalElement(localRow), std::runtime_error,
1628  ": Local row index " << localRow << " does not belong to this process.");
1629 
1630  if (! myGraph_->indicesAreAllocated ()) {
1631  try {
1632  allocateValues (LocalIndices, GraphNotYetAllocated);
1633  }
1634  catch (std::exception& e) {
1635  TEUCHOS_TEST_FOR_EXCEPTION(
1636  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1637  "allocateValues(LocalIndices,GraphNotYetAllocated) threw an "
1638  "exception: " << e.what ());
1639  }
1640  }
1641 
1642  const size_t numEntriesToAdd = static_cast<size_t> (indices.size ());
1643 #ifdef HAVE_TPETRA_DEBUG
1644  // In a debug build, if the matrix has a column Map, test whether
1645  // any of the given column indices are not in the column Map.
1646  // Keep track of the invalid column indices so we can tell the
1647  // user about them.
1648  if (hasColMap ()) {
1649  const map_type& colMap = * (getColMap ());
1650  Array<LocalOrdinal> badColInds;
1651  bool allInColMap = true;
1652  for (size_t k = 0; k < numEntriesToAdd; ++k) {
1653  if (! colMap.isNodeLocalElement (indices[k])) {
1654  allInColMap = false;
1655  badColInds.push_back (indices[k]);
1656  }
1657  }
1658  if (! allInColMap) {
1659  std::ostringstream os;
1660  os << "Tpetra::CrsMatrix::insertLocalValues: You attempted to insert "
1661  "entries in owned row " << localRow << ", at the following column "
1662  "indices: " << toString (indices) << "." << endl;
1663  os << "Of those, the following indices are not in the column Map on "
1664  "this process: " << toString (badColInds) << "." << endl << "Since "
1665  "the matrix has a column Map already, it is invalid to insert "
1666  "entries at those locations.";
1667  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
1668  }
1669  }
1670 #endif // HAVE_TPETRA_DEBUG
1671 
1672 #ifdef HAVE_TPETRA_DEBUG
1673  RowInfo rowInfo;
1674  try {
1675  rowInfo = myGraph_->getRowInfo (localRow);
1676  } catch (std::exception& e) {
1677  TEUCHOS_TEST_FOR_EXCEPTION(
1678  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1679  "myGraph_->getRowInfo threw an exception: " << e.what ());
1680  }
1681 #else
1682  RowInfo rowInfo = myGraph_->getRowInfo (localRow);
1683 #endif // HAVE_TPETRA_DEBUG
1684 
1685  const size_t curNumEntries = rowInfo.numEntries;
1686  const size_t newNumEntries = curNumEntries + numEntriesToAdd;
1687  if (newNumEntries > rowInfo.allocSize) {
1688  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1689  getProfileType() == StaticProfile, std::runtime_error,
1690  ": new indices exceed statically allocated graph structure.");
1691 
1692  // Make space for the new matrix entries.
1693  try {
1694  rowInfo = myGraph_->template updateLocalAllocAndValues<impl_scalar_type> (rowInfo,
1695  newNumEntries,
1696  values2D_[localRow]);
1697  } catch (std::exception& e) {
1698  TEUCHOS_TEST_FOR_EXCEPTION(
1699  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1700  "myGraph_->updateGlobalAllocAndValues threw an exception: "
1701  << e.what ());
1702  }
1703  }
1704  typename Graph::SLocalGlobalViews indsView;
1705  indsView.linds = indices;
1706 
1707 #ifdef HAVE_TPETRA_DEBUG
1708  ArrayView<impl_scalar_type> valsView;
1709  try {
1710  valsView = this->getViewNonConst (rowInfo);
1711  } catch (std::exception& e) {
1712  TEUCHOS_TEST_FOR_EXCEPTION(
1713  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1714  "getViewNonConst threw an exception: " << e.what ());
1715  }
1716 #else
1717  ArrayView<impl_scalar_type> valsView = this->getViewNonConst (rowInfo);
1718 #endif // HAVE_TPETRA_DEBUG
1719 
1720  ArrayView<const impl_scalar_type> valsIn =
1721  av_reinterpret_cast<const impl_scalar_type> (values);
1722  try {
1723  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, indsView,
1724  valsView, valsIn,
1725  LocalIndices,
1726  LocalIndices);
1727  } catch (std::exception& e) {
1728  TEUCHOS_TEST_FOR_EXCEPTION(
1729  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1730  "myGraph_->insertIndicesAndValues threw an exception: "
1731  << e.what ());
1732  }
1733 
1734 #ifdef HAVE_TPETRA_DEBUG
1735  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow);
1736  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1737  chkNewNumEntries != newNumEntries, std::logic_error,
1738  ": The row should have " << newNumEntries << " entries after insert, but "
1739  "instead has " << chkNewNumEntries << ". Please report this bug to the "
1740  "Tpetra developers.");
1741  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isLocallyIndexed(), std::logic_error,
1742  ": At end of insertLocalValues(), this CrsMatrix is not locally indexed. "
1743  "Please report this bug to the Tpetra developers.");
1744 #endif // HAVE_TPETRA_DEBUG
1745  }
1746 
1747  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1748  void
1750  insertLocalValues (const LocalOrdinal localRow,
1751  const LocalOrdinal numEnt,
1752  const Scalar vals[],
1753  const LocalOrdinal cols[])
1754  {
1755  Teuchos::ArrayView<const LocalOrdinal> colsT (cols, numEnt);
1756  Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
1757  this->insertLocalValues (localRow, colsT, valsT);
1758  }
1759 
1760  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1761  void
1763  insertLocalValuesFiltered (const LocalOrdinal localRow,
1764  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1765  const Teuchos::ArrayView<const Scalar>& values)
1766  {
1767  using Teuchos::Array;
1768  using Teuchos::ArrayView;
1769  using Teuchos::av_reinterpret_cast;
1770  const char tfecfFuncName[] = "insertLocalValues: ";
1771 
1772  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillActive (), std::runtime_error,
1773  "Requires that fill is active.");
1774  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isStaticGraph (), std::runtime_error,
1775  "Cannot insert indices with static graph; use replaceLocalValues() instead.");
1776  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(myGraph_->isGloballyIndexed(),
1777  std::runtime_error, "Graph indices are global; use insertGlobalValues().");
1778  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1779  ! hasColMap (), std::runtime_error, "The matrix has no column Map yet, "
1780  "so you cannot insert local indices. If you created the matrix without "
1781  "a column Map (or without a fill-complete graph), you must call "
1782  "fillComplete to create the column Map, before you may work with local "
1783  "indices.");
1784  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1785  values.size () != indices.size (), std::runtime_error, "values.size() = "
1786  << values.size () << " != indices.size() = " << indices.size ()<< ".");
1787  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1788  ! getRowMap()->isNodeLocalElement (localRow), std::runtime_error,
1789  "Local row index " << localRow << " does not belong to this process.");
1790  if (! myGraph_->indicesAreAllocated ()) {
1791  allocateValues (LocalIndices, GraphNotYetAllocated);
1792  }
1793  // Use the graph to filter incoming entries whose column indices
1794  // aren't in the column Map.
1795  Array<LocalOrdinal> f_inds (indices);
1796  ArrayView<const impl_scalar_type> valsIn =
1797  av_reinterpret_cast<const impl_scalar_type> (values);
1798  Array<impl_scalar_type> f_vals (valsIn);
1799  const size_t numFilteredEntries =
1800  myGraph_->template filterLocalIndicesAndValues<impl_scalar_type> (f_inds (),
1801  f_vals ());
1802  if (numFilteredEntries > 0) {
1803  RowInfo rowInfo = myGraph_->getRowInfo (localRow);
1804  const size_t curNumEntries = rowInfo.numEntries;
1805  const size_t newNumEntries = curNumEntries + numFilteredEntries;
1806  if (newNumEntries > rowInfo.allocSize) {
1807  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1808  getProfileType () == StaticProfile, std::runtime_error,
1809  ": new indices exceed statically allocated graph structure. "
1810  "newNumEntries (" << newNumEntries << " > rowInfo.allocSize ("
1811  << rowInfo.allocSize << ").");
1812  // Make space for the new matrix entries.
1813  rowInfo =
1814  myGraph_->template updateLocalAllocAndValues<impl_scalar_type> (rowInfo,
1815  newNumEntries,
1816  values2D_[localRow]);
1817  }
1818  typename Graph::SLocalGlobalViews inds_view;
1819  inds_view.linds = f_inds (0, numFilteredEntries);
1820  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, inds_view,
1821  this->getViewNonConst (rowInfo),
1822  f_vals, LocalIndices,
1823  LocalIndices);
1824 #ifdef HAVE_TPETRA_DEBUG
1825  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow);
1826  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries,
1827  std::logic_error, ": Internal logic error. Please contact Tpetra team.");
1828 #endif // HAVE_TPETRA_DEBUG
1829  }
1830 #ifdef HAVE_TPETRA_DEBUG
1831  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isLocallyIndexed(), std::logic_error,
1832  ": At end of insertLocalValues(), this CrsMatrix is not locally indexed. "
1833  "Please report this bug to the Tpetra developers.");
1834 #endif // HAVE_TPETRA_DEBUG
1835  }
1836 
1837 
1838  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1839  void
1841  insertGlobalValues (const GlobalOrdinal globalRow,
1842  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1843  const Teuchos::ArrayView<const Scalar>& values)
1844  {
1845  using Teuchos::Array;
1846  using Teuchos::ArrayView;
1847  using Teuchos::av_reinterpret_cast;
1848  using Teuchos::toString;
1849  using std::endl;
1850  typedef LocalOrdinal LO;
1851  typedef GlobalOrdinal GO;
1852  typedef typename ArrayView<const GO>::size_type size_type;
1853  const char tfecfFuncName[] = "insertGlobalValues: ";
1854 
1855 #ifdef HAVE_TPETRA_DEBUG
1856  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1857  values.size () != indices.size (), std::runtime_error,
1858  "values.size() = " << values.size() << " != indices.size() = "
1859  << indices.size() << ".");
1860 #endif // HAVE_TPETRA_DEBUG
1861 
1862  const LO localRow = getRowMap ()->getLocalElement (globalRow);
1863 
1864  if (localRow == OTL::invalid ()) { // globalRow _not_ owned by calling process
1865  insertNonownedGlobalValues (globalRow, indices, values);
1866  }
1867  else { // globalRow _is_ owned by calling process
1868  if (this->isStaticGraph ()) {
1869  // Uh oh! Not allowed to insert into owned rows in that case.
1870  std::ostringstream err;
1871  const int myRank = getRowMap ()->getComm ()->getRank ();
1872  const int numProcs = getRowMap ()->getComm ()->getSize ();
1873 
1874  err << "The matrix was constructed with a constant (\"static\") graph, "
1875  "yet the given global row index " << globalRow << " is in the row "
1876  "Map on the calling process (with rank " << myRank << ", of " <<
1877  numProcs << " process(es)). In this case, you may not insert new "
1878  "entries into rows owned by the calling process.";
1879 
1880  if (! getRowMap ()->isNodeGlobalElement (globalRow)) {
1881  err << " Furthermore, GID->LID conversion with the row Map claims that "
1882  "the global row index is owned on the calling process, yet "
1883  "getRowMap()->isNodeGlobalElement(globalRow) returns false. That's"
1884  " weird! This might indicate a Map bug. Please report this to the"
1885  " Tpetra developers.";
1886  }
1887  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1888  this->isStaticGraph (), std::runtime_error, err.str ());
1889  }
1890 
1891  if (! myGraph_->indicesAreAllocated ()) {
1892  try {
1893  allocateValues (GlobalIndices, GraphNotYetAllocated);
1894  }
1895  catch (std::exception& e) {
1896  TEUCHOS_TEST_FOR_EXCEPTION(
1897  true, std::runtime_error, "Tpetra::CrsMatrix::insertGlobalValues: "
1898  "allocateValues(GlobalIndices,GraphNotYetAllocated) threw an "
1899  "exception: " << e.what ());
1900  }
1901  }
1902 
1903  const size_type numEntriesToInsert = indices.size ();
1904  // If the matrix has a column Map, check at this point whether
1905  // the column indices belong to the column Map.
1906  //
1907  // FIXME (mfh 16 May 2013) We may want to consider deferring the
1908  // test to the CrsGraph method, since it may have to do this
1909  // anyway.
1910  if (hasColMap ()) {
1911  const map_type& colMap = * (getColMap ());
1912  // In a debug build, keep track of the nonowned ("bad") column
1913  // indices, so that we can display them in the exception
1914  // message. In a release build, just ditch the loop early if
1915  // we encounter a nonowned column index.
1916 #ifdef HAVE_TPETRA_DEBUG
1917  Array<GO> badColInds;
1918 #endif // HAVE_TPETRA_DEBUG
1919  bool allInColMap = true;
1920  for (size_type k = 0; k < numEntriesToInsert; ++k) {
1921  if (! colMap.isNodeGlobalElement (indices[k])) {
1922  allInColMap = false;
1923 #ifdef HAVE_TPETRA_DEBUG
1924  badColInds.push_back (indices[k]);
1925 #else
1926  break;
1927 #endif // HAVE_TPETRA_DEBUG
1928  }
1929  }
1930  if (! allInColMap) {
1931  std::ostringstream os;
1932  os << "You attempted to insert entries in owned row " << globalRow
1933  << ", at the following column indices: " << toString (indices)
1934  << "." << endl;
1935 #ifdef HAVE_TPETRA_DEBUG
1936  os << "Of those, the following indices are not in the column Map on "
1937  "this process: " << toString (badColInds) << "." << endl << "Since "
1938  "the matrix has a column Map already, it is invalid to insert "
1939  "entries at those locations.";
1940 #else
1941  os << "At least one of those indices is not in the column Map on this "
1942  "process." << endl << "It is invalid to insert into columns not in "
1943  "the column Map on the process that owns the row.";
1944 #endif // HAVE_TPETRA_DEBUG
1945  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1946  ! allInColMap, std::invalid_argument, os.str ());
1947  }
1948  }
1949 
1950  typename Graph::SLocalGlobalViews inds_view;
1951  ArrayView<const impl_scalar_type> vals_view;
1952 
1953  inds_view.ginds = indices;
1954  vals_view = av_reinterpret_cast<const impl_scalar_type> (values);
1955 
1956 #ifdef HAVE_TPETRA_DEBUG
1957  RowInfo rowInfo;
1958  try {
1959  rowInfo = myGraph_->getRowInfo (localRow);
1960  } catch (std::exception& e) {
1961  TEUCHOS_TEST_FOR_EXCEPTION(
1962  true, std::runtime_error, "myGraph_->getRowInfo(localRow=" << localRow
1963  << ") threw an exception: " << e.what ());
1964  }
1965 #else
1966  RowInfo rowInfo = myGraph_->getRowInfo (localRow);
1967 #endif // HAVE_TPETRA_DEBUG
1968 
1969  const size_t curNumEntries = rowInfo.numEntries;
1970  const size_t newNumEntries =
1971  curNumEntries + static_cast<size_t> (numEntriesToInsert);
1972  if (newNumEntries > rowInfo.allocSize) {
1973  TEUCHOS_TEST_FOR_EXCEPTION(
1974  getProfileType () == StaticProfile && newNumEntries > rowInfo.allocSize,
1975  std::runtime_error, "Tpetra::CrsMatrix::insertGlobalValues: new "
1976  "indices exceed statically allocated graph structure. curNumEntries"
1977  " (" << curNumEntries << ") + numEntriesToInsert (" <<
1978  numEntriesToInsert << ") > allocSize (" << rowInfo.allocSize << ").");
1979 
1980  // Update allocation only as much as necessary
1981  try {
1982  rowInfo =
1983  myGraph_->template updateGlobalAllocAndValues<impl_scalar_type> (rowInfo,
1984  newNumEntries,
1985  values2D_[localRow]);
1986  } catch (std::exception& e) {
1987  TEUCHOS_TEST_FOR_EXCEPTION(
1988  true, std::runtime_error, "myGraph_->updateGlobalAllocAndValues"
1989  "(...) threw an exception: " << e.what ());
1990  }
1991  }
1992  try {
1993  if (isGloballyIndexed ()) {
1994  // lg=GlobalIndices, I=GlobalIndices means the method calls
1995  // getGlobalViewNonConst() and does direct copying, which
1996  // should be reasonably fast.
1997  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, inds_view,
1998  this->getViewNonConst (rowInfo),
1999  vals_view,
2000  GlobalIndices, GlobalIndices);
2001  }
2002  else {
2003  // lg=GlobalIndices, I=LocalIndices means the method calls
2004  // the Map's getLocalElement() method once per entry to
2005  // insert. This may be slow.
2006  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, inds_view,
2007  this->getViewNonConst (rowInfo),
2008  vals_view,
2009  GlobalIndices, LocalIndices);
2010  }
2011  }
2012  catch (std::exception& e) {
2013  TEUCHOS_TEST_FOR_EXCEPTION(
2014  true, std::runtime_error, "myGraph_->insertIndicesAndValues(...) "
2015  "threw an exception: " << e.what ());
2016  }
2017 
2018 #ifdef HAVE_TPETRA_DEBUG
2019  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow);
2020  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries,
2021  std::logic_error, ": There should be a total of " << newNumEntries
2022  << " entries in the row, but the graph now reports " << chkNewNumEntries
2023  << " entries. Please report this bug to the Tpetra developers.");
2024 #endif // HAVE_TPETRA_DEBUG
2025  }
2026  }
2027 
2028 
2029  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2030  void
2032  insertGlobalValues (const GlobalOrdinal globalRow,
2033  const LocalOrdinal numEnt,
2034  const Scalar vals[],
2035  const GlobalOrdinal inds[])
2036  {
2037  Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2038  Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2039  this->insertGlobalValues (globalRow, indsT, valsT);
2040  }
2041 
2042 
2043  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2044  void
2046  insertGlobalValuesFiltered (const GlobalOrdinal globalRow,
2047  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2048  const Teuchos::ArrayView<const Scalar>& values)
2049  {
2050  using Teuchos::Array;
2051  using Teuchos::ArrayView;
2052  using Teuchos::av_reinterpret_cast;
2053  typedef LocalOrdinal LO;
2054  typedef GlobalOrdinal GO;
2055  typedef impl_scalar_type ST;
2056  const char tfecfFuncName[] = "insertGlobalValuesFiltered: ";
2057 
2058  // mfh 14 Dec 2012: Defer test for static graph until we know that
2059  // globalRow is in the row Map. If it's not in the row Map, it
2060  // doesn't matter whether or not the graph is static; the data
2061  // just get stashed for later use by globalAssemble().
2062  //
2063  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2064  // isStaticGraph(), std::runtime_error,
2065  // ": matrix was constructed with static graph. Cannot insert new entries.");
2066 #ifdef HAVE_TPETRA_DEBUG
2067  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2068  values.size () != indices.size (), std::runtime_error,
2069  "values.size() = " << values.size() << " != indices.size() = "
2070  << indices.size() << ".");
2071 #endif // HAVE_TPETRA_DEBUG
2072 
2073  ArrayView<const ST> valsIn = av_reinterpret_cast<const ST> (values);
2074  const LO lrow = getRowMap ()->getLocalElement (globalRow);
2075 
2076  if (lrow != Teuchos::OrdinalTraits<LO>::invalid ()) { // globalRow is in our row Map.
2077  // If the matrix has a static graph, this process is now allowed
2078  // to insert into rows it owns.
2079  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2080  this->isStaticGraph (), std::runtime_error,
2081  "The matrix was constructed with a static graph. In that case, "
2082  "it is forbidden to insert new entries into rows owned by the "
2083  "calling process.");
2084  if (! myGraph_->indicesAreAllocated ()) {
2085  allocateValues (GlobalIndices, GraphNotYetAllocated);
2086  }
2087  typename Graph::SLocalGlobalViews inds_view;
2088  ArrayView<const ST> vals_view;
2089 
2090  // We have to declare these Arrays here rather than in the
2091  // hasColMap() if branch, so that views to them will remain
2092  // valid for the whole scope.
2093  Array<GO> filtered_indices;
2094  Array<ST> filtered_values;
2095  if (hasColMap ()) { // We have a column Map.
2096  // Use column Map to filter the indices and corresponding
2097  // values, so that we only insert entries into columns we own.
2098  filtered_indices.assign (indices.begin (), indices.end ());
2099  filtered_values.assign (valsIn.begin (), valsIn.end ());
2100  const size_t numFilteredEntries =
2101  myGraph_->template filterGlobalIndicesAndValues<ST> (filtered_indices (),
2102  filtered_values ());
2103  inds_view.ginds = filtered_indices (0, numFilteredEntries);
2104  vals_view = filtered_values (0, numFilteredEntries);
2105  }
2106  else { // we don't have a column Map.
2107  inds_view.ginds = indices;
2108  vals_view = valsIn;
2109  }
2110  const size_t numFilteredEntries = vals_view.size ();
2111  // add the new indices and values
2112  if (numFilteredEntries > 0) {
2113  RowInfo rowInfo = myGraph_->getRowInfo (lrow);
2114  const size_t curNumEntries = rowInfo.numEntries;
2115  const size_t newNumEntries = curNumEntries + numFilteredEntries;
2116  if (newNumEntries > rowInfo.allocSize) {
2117  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2118  getProfileType () == StaticProfile, std::runtime_error,
2119  "New indices exceed statically allocated graph structure.");
2120 
2121  // Update allocation only as much as necessary
2122  rowInfo = myGraph_->template updateGlobalAllocAndValues<ST> (rowInfo,
2123  newNumEntries,
2124  values2D_[lrow]);
2125  }
2126  if (isGloballyIndexed ()) {
2127  // lg=GlobalIndices, I=GlobalIndices means the method calls
2128  // getGlobalViewNonConst() and does direct copying, which
2129  // should be reasonably fast.
2130  myGraph_->template insertIndicesAndValues<ST> (rowInfo, inds_view,
2131  this->getViewNonConst (rowInfo),
2132  vals_view,
2133  GlobalIndices, GlobalIndices);
2134  }
2135  else {
2136  // lg=GlobalIndices, I=LocalIndices means the method calls
2137  // the Map's getLocalElement() method once per entry to
2138  // insert. This may be slow.
2139  myGraph_->template insertIndicesAndValues<ST> (rowInfo, inds_view,
2140  this->getViewNonConst (rowInfo),
2141  vals_view,
2142  GlobalIndices, LocalIndices);
2143  }
2144 #ifdef HAVE_TPETRA_DEBUG
2145  {
2146  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (lrow);
2147  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries,
2148  std::logic_error, ": There should be a total of " << newNumEntries
2149  << " entries in the row, but the graph now reports " << chkNewNumEntries
2150  << " entries. Please report this bug to the Tpetra developers.");
2151  }
2152 #endif // HAVE_TPETRA_DEBUG
2153  }
2154  }
2155  else { // The calling process doesn't own the given row.
2156  insertNonownedGlobalValues (globalRow, indices, values);
2157  }
2158  }
2159 
2160 
2161  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2162  LocalOrdinal
2164  replaceLocalValues (const LocalOrdinal localRow,
2165  const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2166  const Teuchos::ArrayView<const Scalar>& vals) const
2167  {
2168  using Kokkos::MemoryUnmanaged;
2169  using Kokkos::View;
2170  typedef impl_scalar_type IST;
2171  typedef LocalOrdinal LO;
2172  typedef device_type DD;
2173  typedef typename View<LO*, DD>::HostMirror::device_type HD;
2174  // inputInds and inputVals come from the user, so they are host data.
2175  typedef View<const IST*, HD, MemoryUnmanaged> ISVT; // impl scalar view type
2176  typedef View<const LO*, HD, MemoryUnmanaged> LIVT; // lcl ind view type
2177 
2178  LIVT lclColsIn (lclCols.getRawPtr (), lclCols.size ());
2179  const IST* valsRaw = reinterpret_cast<const IST*> (vals.getRawPtr ());
2180  ISVT valsIn (valsRaw, vals.size ());
2181  return this->template replaceLocalValues<LIVT, ISVT> (localRow,
2182  lclColsIn,
2183  valsIn);
2184  }
2185 
2186  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2187  LocalOrdinal
2189  replaceLocalValues (const LocalOrdinal localRow,
2190  const LocalOrdinal numEnt,
2191  const Scalar inputVals[],
2192  const LocalOrdinal inputCols[]) const
2193  {
2194  using Kokkos::MemoryUnmanaged;
2195  using Kokkos::View;
2196  typedef impl_scalar_type IST;
2197  typedef LocalOrdinal LO;
2198  typedef device_type DD;
2199  typedef typename View<LO*, DD>::HostMirror::device_type HD;
2200  // inputInds and inputVals come from the user, so they are host data.
2201  typedef View<const LO*, HD, MemoryUnmanaged> LIVT; // lcl ind view type
2202  typedef View<const IST*, HD, MemoryUnmanaged> ISVT; // impl scalar view type
2203 
2204  LIVT indsK (inputCols, numEnt);
2205  ISVT valsK (reinterpret_cast<const IST*> (inputVals), numEnt);
2206  return this->template replaceLocalValues<LIVT, ISVT> (localRow,
2207  indsK,
2208  valsK);
2209  }
2210 
2211  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2212  LocalOrdinal
2214  replaceGlobalValues (const GlobalOrdinal globalRow,
2215  const Teuchos::ArrayView<const GlobalOrdinal>& inputInds,
2216  const Teuchos::ArrayView<const Scalar>& inputVals) const
2217  {
2218  using Kokkos::MemoryUnmanaged;
2219  using Kokkos::View;
2220  typedef impl_scalar_type IST;
2221  typedef GlobalOrdinal GO;
2222  typedef device_type DD;
2223  typedef typename View<GO*, DD>::HostMirror::device_type HD;
2224  // inputInds and inputVals come from the user, so they are host data.
2225  typedef View<const GO*, HD, MemoryUnmanaged> GIVT; // gbl ind view type
2226  typedef View<const IST*, HD, MemoryUnmanaged> ISVT; // impl scalar view type
2227 
2228  const IST* inputValsRaw =
2229  reinterpret_cast<const IST*> (inputVals.getRawPtr ());
2230  GIVT indsK (inputInds.getRawPtr (), inputInds.size ());
2231  ISVT valsK (inputValsRaw, inputVals.size ());
2232  return this->template replaceGlobalValues<GIVT, ISVT> (globalRow,
2233  indsK,
2234  valsK);
2235  }
2236 
2237 
2238  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2239  LocalOrdinal
2241  replaceGlobalValues (const GlobalOrdinal globalRow,
2242  const LocalOrdinal numEnt,
2243  const Scalar inputVals[],
2244  const GlobalOrdinal inputCols[]) const
2245  {
2246  using Kokkos::MemoryUnmanaged;
2247  using Kokkos::View;
2248  typedef impl_scalar_type IST;
2249  typedef GlobalOrdinal GO;
2250  typedef device_type DD;
2251  typedef typename View<GO*, DD>::HostMirror::device_type HD;
2252  // inputInds and inputVals come from the user, so they are host data.
2253  typedef View<const GO*, HD, MemoryUnmanaged> GIVT; // gbl ind view type
2254  typedef View<const IST*, HD, MemoryUnmanaged> ISVT; // impl scalar view type
2255 
2256  GIVT indsK (inputCols, numEnt);
2257  ISVT valsK (reinterpret_cast<const IST*> (inputVals), numEnt);
2258  return this->template replaceGlobalValues<GIVT, ISVT> (globalRow,
2259  indsK,
2260  valsK);
2261  }
2262 
2263 
2264  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2265  LocalOrdinal
2267  sumIntoGlobalValues (const GlobalOrdinal globalRow,
2268  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2269  const Teuchos::ArrayView<const Scalar>& values,
2270  const bool atomic)
2271  {
2272  using Kokkos::MemoryUnmanaged;
2273  using Kokkos::View;
2274  typedef impl_scalar_type ST;
2275  typedef LocalOrdinal LO;
2276  typedef GlobalOrdinal GO;
2277  typedef device_type DD;
2278  typedef typename View<LO*, DD>::HostMirror::device_type HD;
2279 
2280  if (! isFillActive ()) {
2281  // Fill must be active in order to call this method.
2282  return Teuchos::OrdinalTraits<LO>::invalid ();
2283  }
2284 
2285  // mfh 26 Nov 2015: Avoid calling getRowMap() or getCrsGraph(),
2286  // because they touch RCP's reference count, which is not thread
2287  // safe. Dereferencing an RCP or calling op-> does not touch the
2288  // reference count.
2289  const LO lrow = this->staticGraph_.is_null () ?
2290  myGraph_->rowMap_->getLocalElement (globalRow) :
2291  staticGraph_->rowMap_->getLocalElement (globalRow);
2292  //const LO lrow = this->getRowMap ()->getLocalElement (globalRow);
2293 
2294  if (lrow == Teuchos::OrdinalTraits<LO>::invalid ()) {
2295  // globalRow is not in the row Map, so stash the given entries
2296  // away in a separate data structure. globalAssemble() (called
2297  // during fillComplete()) will exchange that data and sum it in
2298  // using sumIntoGlobalValues().
2299  this->insertNonownedGlobalValues (globalRow, indices, values);
2300  // FIXME (mfh 08 Jul 2014) It's not clear what to return here,
2301  // since we won't know whether the given indices were valid
2302  // until globalAssemble (called in fillComplete) is called.
2303  // That's why insertNonownedGlobalValues doesn't return
2304  // anything. Just for consistency, I'll return the number of
2305  // entries that the user gave us.
2306  return static_cast<LO> (indices.size ());
2307  }
2308 
2309  if (staticGraph_.is_null ()) {
2310  return Teuchos::OrdinalTraits<LO>::invalid ();
2311  }
2312  const RowInfo rowInfo = this->staticGraph_->getRowInfo (lrow);
2313 
2314  auto curVals = this->getRowViewNonConst (rowInfo);
2315  const ST* valsRaw = reinterpret_cast<const ST*> (values.getRawPtr ());
2316  View<const ST*, HD, MemoryUnmanaged> valsIn (valsRaw, values.size ());
2317  View<const GO*, HD, MemoryUnmanaged> indsIn (indices.getRawPtr (),
2318  indices.size ());
2319  return staticGraph_->template sumIntoGlobalValues<ST, HD, DD> (rowInfo,
2320  curVals,
2321  indsIn,
2322  valsIn,
2323  atomic);
2324  }
2325 
2326 
2327  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2328  LocalOrdinal
2330  sumIntoGlobalValues (const GlobalOrdinal globalRow,
2331  const LocalOrdinal numEnt,
2332  const Scalar vals[],
2333  const GlobalOrdinal cols[],
2334  const bool atomic)
2335  {
2336  Teuchos::ArrayView<const GlobalOrdinal> colsIn (cols, numEnt);
2337  Teuchos::ArrayView<const Scalar> valsIn (vals, numEnt);
2338  return this->sumIntoGlobalValues (globalRow, colsIn, valsIn, atomic);
2339  }
2340 
2341 
2342  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2343  LocalOrdinal
2345  sumIntoLocalValues (const LocalOrdinal localRow,
2346  const Teuchos::ArrayView<const LocalOrdinal>& indices,
2347  const Teuchos::ArrayView<const Scalar>& values,
2348  const bool atomic) const
2349  {
2350  using Kokkos::MemoryUnmanaged;
2351  using Kokkos::View;
2352  typedef impl_scalar_type IST;
2353  typedef LocalOrdinal LO;
2354  typedef device_type DD;
2355  typedef typename View<LO*, DD>::HostMirror::device_type HD;
2356  typedef View<const IST*, HD, MemoryUnmanaged> IVT;
2357  typedef View<const LO*, HD, MemoryUnmanaged> IIT;
2358 
2359  const IST* valsRaw = reinterpret_cast<const IST*> (values.getRawPtr ());
2360  IVT valsIn (valsRaw, values.size ());
2361  IIT indsIn (indices.getRawPtr (), indices.size ());
2362  return this->template sumIntoLocalValues<IIT, IVT> (localRow, indsIn,
2363  valsIn, atomic);
2364  }
2365 
2366  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2367  LocalOrdinal
2369  sumIntoLocalValues (const LocalOrdinal localRow,
2370  const LocalOrdinal numEnt,
2371  const Scalar vals[],
2372  const LocalOrdinal cols[],
2373  const bool atomic) const
2374  {
2375  using Kokkos::MemoryUnmanaged;
2376  using Kokkos::View;
2377  typedef impl_scalar_type IST;
2378  typedef LocalOrdinal LO;
2379  typedef device_type DD;
2380  typedef typename View<LO*, DD>::HostMirror::device_type HD;
2381  typedef View<const IST*, HD, MemoryUnmanaged> IVT;
2382  typedef View<const LO*, HD, MemoryUnmanaged> IIT;
2383 
2384  const IST* valsRaw = reinterpret_cast<const IST*> (vals);
2385  IVT valsIn (valsRaw, numEnt);
2386  IIT indsIn (cols, numEnt);
2387  return this->template sumIntoLocalValues<IIT, IVT> (localRow, indsIn,
2388  valsIn, atomic);
2389  }
2390 
2391 
2392  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2393  Teuchos::ArrayView<const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::impl_scalar_type>
2395  getView (RowInfo rowinfo) const
2396  {
2397  using Kokkos::MemoryUnmanaged;
2398  using Kokkos::View;
2399  using Teuchos::ArrayView;
2400  typedef impl_scalar_type ST;
2401  typedef std::pair<size_t, size_t> range_type;
2402 
2403  if (k_values1D_.dimension_0 () != 0 && rowinfo.allocSize > 0) {
2404 #ifdef HAVE_TPETRA_DEBUG
2405  TEUCHOS_TEST_FOR_EXCEPTION(
2406  rowinfo.offset1D + rowinfo.allocSize > k_values1D_.dimension_0 (),
2407  std::range_error, "Tpetra::CrsMatrix::getView: Invalid access "
2408  "to 1-D storage of values." << std::endl << "rowinfo.offset1D (" <<
2409  rowinfo.offset1D << ") + rowinfo.allocSize (" << rowinfo.allocSize <<
2410  ") > k_values1D_.dimension_0() (" << k_values1D_.dimension_0 () << ").");
2411 #endif // HAVE_TPETRA_DEBUG
2412  range_type range (rowinfo.offset1D, rowinfo.offset1D + rowinfo.allocSize);
2413  typedef View<const ST*, execution_space, MemoryUnmanaged> subview_type;
2414  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
2415  // directly, because that first creates a _managed_ subview,
2416  // then returns an unmanaged version of that. That touches the
2417  // reference count, which costs performance in a measurable way.
2418  // Instead, we create a temporary unmanaged view, then create
2419  // the subview from that.
2420  subview_type sv = Kokkos::subview (subview_type (k_values1D_), range);
2421  const ST* const sv_raw = (rowinfo.allocSize == 0) ? NULL : sv.ptr_on_device ();
2422  return ArrayView<const ST> (sv_raw, rowinfo.allocSize);
2423  }
2424  else if (values2D_ != Teuchos::null) {
2425  return values2D_[rowinfo.localRow] ();
2426  }
2427  else {
2428  return ArrayView<impl_scalar_type> ();
2429  }
2430  }
2431 
2432 
2433  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2434  LocalOrdinal
2437  LocalOrdinal& numEnt,
2438  const RowInfo& rowinfo) const
2439  {
2440  if (k_values1D_.dimension_0 () != 0 && rowinfo.allocSize > 0) {
2441 #ifdef HAVE_TPETRA_DEBUG
2442  if (rowinfo.offset1D + rowinfo.allocSize > k_values1D_.dimension_0 ()) {
2443  vals = NULL;
2444  numEnt = 0;
2445  return Teuchos::OrdinalTraits<LocalOrdinal>::invalid ();
2446  }
2447 #endif // HAVE_TPETRA_DEBUG
2448  vals = k_values1D_.ptr_on_device () + rowinfo.offset1D;
2449  numEnt = rowinfo.allocSize;
2450  }
2451  else if (! values2D_.is_null ()) {
2452 #ifdef HAVE_TPETRA_DEBUG
2453  if (rowinfo.localRow >= static_cast<size_t> (values2D_.size ())) {
2454  vals = NULL;
2455  numEnt = 0;
2456  return Teuchos::OrdinalTraits<LocalOrdinal>::invalid ();
2457  }
2458 #endif // HAVE_TPETRA_DEBUG
2459  // Use const reference so that we don't update ArrayRCP's
2460  // reference count, which is not thread safe.
2461  const auto& curRow = values2D_[rowinfo.localRow];
2462  vals = curRow.getRawPtr ();
2463  numEnt = curRow.size ();
2464  }
2465  else {
2466  vals = NULL;
2467  numEnt = 0;
2468  }
2469 
2470  return static_cast<LocalOrdinal> (0);
2471  }
2472 
2473  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2474  LocalOrdinal
2477  LocalOrdinal& numEnt,
2478  const RowInfo& rowinfo) const
2479  {
2480  const impl_scalar_type* valsConst;
2481  const LocalOrdinal err = this->getViewRawConst (valsConst, numEnt, rowinfo);
2482  vals = const_cast<impl_scalar_type*> (valsConst);
2483  return err;
2484  }
2485 
2486  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2487  Kokkos::View<const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::impl_scalar_type*,
2489  Kokkos::MemoryUnmanaged>
2491  getRowView (const RowInfo& rowInfo) const
2492  {
2493  using Kokkos::MemoryUnmanaged;
2494  using Kokkos::View;
2495  typedef impl_scalar_type ST;
2496  typedef View<const ST*, execution_space, MemoryUnmanaged> subview_type;
2497  typedef std::pair<size_t, size_t> range_type;
2498 
2499  if (k_values1D_.dimension_0 () != 0 && rowInfo.allocSize > 0) {
2500 #ifdef HAVE_TPETRA_DEBUG
2501  TEUCHOS_TEST_FOR_EXCEPTION(
2502  rowInfo.offset1D + rowInfo.allocSize > k_values1D_.dimension_0 (),
2503  std::range_error, "Tpetra::CrsMatrix::getRowView: Invalid access "
2504  "to 1-D storage of values." << std::endl << "rowInfo.offset1D (" <<
2505  rowInfo.offset1D << ") + rowInfo.allocSize (" << rowInfo.allocSize <<
2506  ") > k_values1D_.dimension_0() (" << k_values1D_.dimension_0 () << ").");
2507 #endif // HAVE_TPETRA_DEBUG
2508  range_type range (rowInfo.offset1D, rowInfo.offset1D + rowInfo.allocSize);
2509  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
2510  // directly, because that first creates a _managed_ subview,
2511  // then returns an unmanaged version of that. That touches the
2512  // reference count, which costs performance in a measurable way.
2513  // Instead, we create a temporary unmanaged view, then create
2514  // the subview from that.
2515  return Kokkos::subview (subview_type (k_values1D_), range);
2516  }
2517  else if (values2D_ != Teuchos::null) {
2518  Teuchos::ArrayView<const ST> rowView = values2D_[rowInfo.localRow] ();
2519  return subview_type (rowView.getRawPtr (), rowView.size ());
2520  }
2521  else {
2522  return subview_type ();
2523  }
2524  }
2525 
2526  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2527  Kokkos::View<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::impl_scalar_type*,
2529  Kokkos::MemoryUnmanaged>
2530  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::
2531  getRowViewNonConst (const RowInfo& rowInfo) const
2532  {
2533  using Kokkos::MemoryUnmanaged;
2534  using Kokkos::View;
2535  typedef impl_scalar_type ST;
2536  typedef View<ST*, execution_space, MemoryUnmanaged> subview_type;
2537  typedef std::pair<size_t, size_t> range_type;
2538 
2539  if (k_values1D_.dimension_0 () != 0 && rowInfo.allocSize > 0) {
2540 #ifdef HAVE_TPETRA_DEBUG
2541  TEUCHOS_TEST_FOR_EXCEPTION(
2542  rowInfo.offset1D + rowInfo.allocSize > k_values1D_.dimension_0 (),
2543  std::range_error, "Tpetra::CrsMatrix::getRowViewNonConst: Invalid access "
2544  "to 1-D storage of values." << std::endl << "rowInfo.offset1D (" <<
2545  rowInfo.offset1D << ") + rowInfo.allocSize (" << rowInfo.allocSize <<
2546  ") > k_values1D_.dimension_0() (" << k_values1D_.dimension_0 () << ").");
2547 #endif // HAVE_TPETRA_DEBUG
2548  range_type range (rowInfo.offset1D, rowInfo.offset1D + rowInfo.allocSize);
2549  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
2550  // directly, because that first creates a _managed_ subview,
2551  // then returns an unmanaged version of that. That touches the
2552  // reference count, which costs performance in a measurable way.
2553  // Instead, we create a temporary unmanaged view, then create
2554  // the subview from that.
2555  return Kokkos::subview (subview_type (k_values1D_), range);
2556  }
2557  else if (values2D_ != Teuchos::null) {
2558  Teuchos::ArrayView<ST> rowView = values2D_[rowInfo.localRow] ();
2559  return subview_type (rowView.getRawPtr (), rowView.size ());
2560  }
2561  else {
2562  return subview_type ();
2563  }
2564  }
2565 
2566  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2567  Teuchos::ArrayView<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::impl_scalar_type>
2569  getViewNonConst (const RowInfo& rowinfo) const
2570  {
2571  return Teuchos::av_const_cast<impl_scalar_type> (this->getView (rowinfo));
2572  }
2573 
2574  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2575  void
2577  getLocalRowCopy (LocalOrdinal localRow,
2578  const Teuchos::ArrayView<LocalOrdinal>& indices,
2579  const Teuchos::ArrayView<Scalar>& values,
2580  size_t& numEntries) const
2581  {
2582  using Teuchos::ArrayView;
2583  using Teuchos::av_reinterpret_cast;
2584  const char tfecfFuncName[] = "getLocalRowCopy: ";
2585 
2586  TEUCHOS_TEST_FOR_EXCEPTION(
2587  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2588  "Tpetra::CrsMatrix::getLocalRowCopy: The matrix is globally indexed and "
2589  "does not have a column Map yet. That means we don't have local indices "
2590  "for columns yet, so it doesn't make sense to call this method. If the "
2591  "matrix doesn't have a column Map yet, you should call fillComplete on "
2592  "it first.");
2593 #ifdef HAVE_TPETRA_DEBUG
2594  TEUCHOS_TEST_FOR_EXCEPTION(
2595  ! staticGraph_->hasRowInfo (), std::runtime_error,
2596  "Tpetra::CrsMatrix::getLocalRowCopy: The graph's row information was "
2597  "deleted at fillComplete().");
2598 #endif // HAVE_TPETRA_DEBUG
2599 
2600  const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
2601  const size_t theNumEntries = rowinfo.numEntries;
2602  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2603  (static_cast<size_t> (indices.size ()) < theNumEntries ||
2604  static_cast<size_t> (values.size ()) < theNumEntries,
2605  std::runtime_error, "Row with local index " << localRow << " has " <<
2606  theNumEntries << " entry/ies, but indices.size() = " <<
2607  indices.size () << " and values.size() = " << values.size () << ".");
2608  numEntries = theNumEntries; // first side effect
2609 
2610  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2611  if (staticGraph_->isLocallyIndexed ()) {
2612  const LocalOrdinal* curLclInds;
2613  const impl_scalar_type* curVals;
2614  LocalOrdinal numSpots; // includes both current entries and extra space
2615 
2616  // If we got this far, rowinfo should be correct and should
2617  // refer to a valid local row. Thus, these error checks are
2618  // superfluous, but we retain them in a debug build.
2619 #ifdef HAVE_TPETRA_DEBUG
2620  int err =
2621  staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
2622  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2623  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2624  "staticGraph_->getLocalViewRawConst returned nonzero error code "
2625  << err << ".");
2626  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2627  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
2628  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
2629  << ".");
2630  const LocalOrdinal numSpotsBefore = numSpots;
2631  err = getViewRawConst (curVals, numSpots, rowinfo);
2632  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2633  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2634  "getViewRaw returned nonzero error code " << err << ".");
2635  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2636  (numSpotsBefore != numSpots, std::logic_error,
2637  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
2638  << numSpots << ".");
2639 #else
2640  (void) staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
2641  (void) getViewRawConst (curVals, numSpots, rowinfo);
2642 #endif // HAVE_TPETRA_DEBUG
2643 
2644  for (size_t j = 0; j < theNumEntries; ++j) {
2645  values[j] = curVals[j];
2646  indices[j] = curLclInds[j];
2647  }
2648  }
2649  else if (staticGraph_->isGloballyIndexed ()) {
2650  const map_type& colMap = * (staticGraph_->colMap_);
2651  const GlobalOrdinal* curGblInds;
2652  const impl_scalar_type* curVals;
2653  LocalOrdinal numSpots; // includes both current entries and extra space
2654 
2655  // If we got this far, rowinfo should be correct and should
2656  // refer to a valid local row. Thus, these error checks are
2657  // superfluous, but we retain them in a debug build.
2658 #ifdef HAVE_TPETRA_DEBUG
2659  int err =
2660  staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
2661  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2662  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2663  "staticGraph_->getGlobalViewRawConst returned nonzero error code "
2664  << err << ".");
2665  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2666  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
2667  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
2668  << ".");
2669  const LocalOrdinal numSpotsBefore = numSpots;
2670  err = getViewRawConst (curVals, numSpots, rowinfo);
2671  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2672  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2673  "getViewRawConst returned nonzero error code " << err << ".");
2674  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2675  (numSpotsBefore != numSpots, std::logic_error,
2676  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
2677  << numSpots << ".");
2678 #else
2679  (void) staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
2680  (void) getViewRawConst (curVals, numSpots, rowinfo);
2681 #endif //HAVE_TPETRA_DEBUG
2682 
2683  for (size_t j = 0; j < theNumEntries; ++j) {
2684  values[j] = curVals[j];
2685  indices[j] = colMap.getLocalElement (curGblInds[j]);
2686  }
2687  }
2688  }
2689  }
2690 
2691  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2692  void
2694  getGlobalRowCopy (GlobalOrdinal globalRow,
2695  const Teuchos::ArrayView<GlobalOrdinal>& indices,
2696  const Teuchos::ArrayView<Scalar>& values,
2697  size_t& numEntries) const
2698  {
2699  using Teuchos::ArrayView;
2700  using Teuchos::av_reinterpret_cast;
2701  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2702 
2703  const RowInfo rowinfo =
2704  staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
2705  const size_t theNumEntries = rowinfo.numEntries;
2706  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2707  static_cast<size_t> (indices.size ()) < theNumEntries ||
2708  static_cast<size_t> (values.size ()) < theNumEntries,
2709  std::runtime_error, "Row with global index " << globalRow << " has "
2710  << theNumEntries << " entry/ies, but indices.size() = " <<
2711  indices.size () << " and values.size() = " << values.size () << ".");
2712  numEntries = theNumEntries; // first side effect
2713 
2714  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2715  if (staticGraph_->isLocallyIndexed ()) {
2716  const map_type& colMap = * (staticGraph_->colMap_);
2717  const LocalOrdinal* curLclInds;
2718  const impl_scalar_type* curVals;
2719  LocalOrdinal numSpots; // includes both current entries and extra space
2720 
2721  // If we got this far, rowinfo should be correct and should
2722  // refer to a valid local row. Thus, these error checks are
2723  // superfluous, but we retain them in a debug build.
2724 #ifdef HAVE_TPETRA_DEBUG
2725  int err =
2726  staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
2727  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2728  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2729  "staticGraph_->getLocalViewRawConst returned nonzero error code "
2730  << err << ".");
2731  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2732  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
2733  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
2734  << ".");
2735  const LocalOrdinal numSpotsBefore = numSpots;
2736  err = getViewRawConst (curVals, numSpots, rowinfo);
2737  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2738  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2739  "getViewRaw returned nonzero error code " << err << ".");
2740  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2741  (numSpotsBefore != numSpots, std::logic_error,
2742  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
2743  << numSpots << ".");
2744 #else
2745  (void) staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
2746  (void) getViewRawConst (curVals, numSpots, rowinfo);
2747 #endif //HAVE_TPETRA_DEBUG
2748 
2749  for (size_t j = 0; j < theNumEntries; ++j) {
2750  values[j] = curVals[j];
2751  indices[j] = colMap.getGlobalElement (curLclInds[j]);
2752  }
2753  }
2754  else if (staticGraph_->isGloballyIndexed ()) {
2755  const GlobalOrdinal* curGblInds;
2756  const impl_scalar_type* curVals;
2757  LocalOrdinal numSpots; // includes both current entries and extra space
2758 
2759  // If we got this far, rowinfo should be correct and should
2760  // refer to a valid local row. Thus, these error checks are
2761  // superfluous, but we retain them in a debug build.
2762 #ifdef HAVE_TPETRA_DEBUG
2763  int err =
2764  staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
2765  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2766  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2767  "staticGraph_->getGlobalViewRawConst returned nonzero error code "
2768  << err << ".");
2769  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2770  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
2771  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
2772  << ".");
2773  const LocalOrdinal numSpotsBefore = numSpots;
2774  err = getViewRawConst (curVals, numSpots, rowinfo);
2775  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2776  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2777  "getViewRawConst returned nonzero error code " << err << ".");
2778  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2779  (numSpotsBefore != numSpots, std::logic_error,
2780  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
2781  << numSpots << ".");
2782 #else
2783  (void) staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
2784  (void) getViewRawConst (curVals, numSpots, rowinfo);
2785 #endif //HAVE_TPETRA_DEBUG
2786 
2787  for (size_t j = 0; j < theNumEntries; ++j) {
2788  values[j] = curVals[j];
2789  indices[j] = curGblInds[j];
2790  }
2791  }
2792  }
2793  }
2794 
2795  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2796  void
2798  getLocalRowView (LocalOrdinal localRow,
2799  Teuchos::ArrayView<const LocalOrdinal>& indices,
2800  Teuchos::ArrayView<const Scalar>& values) const
2801  {
2802  using Teuchos::ArrayView;
2803  using Teuchos::av_reinterpret_cast;
2804  typedef LocalOrdinal LO;
2805  const char tfecfFuncName[] = "getLocalRowView: ";
2806 
2807  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2808  isGloballyIndexed (), std::runtime_error, "The matrix currently stores "
2809  "its indices as global indices, so you cannot get a view with local "
2810  "column indices. If the matrix has a column Map, you may call "
2811  "getLocalRowCopy() to get local column indices; otherwise, you may get "
2812  "a view with global column indices by calling getGlobalRowCopy().");
2813  indices = Teuchos::null;
2814  values = Teuchos::null;
2815  const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
2816  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2817  rowinfo.numEntries > 0) {
2818  ArrayView<const LO> indTmp = staticGraph_->getLocalView (rowinfo);
2819  ArrayView<const Scalar> valTmp =
2820  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2821  indices = indTmp (0, rowinfo.numEntries);
2822  values = valTmp (0, rowinfo.numEntries);
2823  }
2824 
2825 #ifdef HAVE_TPETRA_DEBUG
2826  const char suffix[] = ". This should never happen. Please report this "
2827  "bug to the Tpetra developers.";
2828  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2829  (static_cast<size_t> (indices.size ()) !=
2830  static_cast<size_t> (values.size ()), std::logic_error,
2831  "At the end of this method, for local row " << localRow << ", "
2832  "indices.size() = " << indices.size () << " != values.size () = "
2833  << values.size () << suffix);
2834  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2835  (static_cast<size_t> (indices.size ()) !=
2836  static_cast<size_t> (rowinfo.numEntries), std::logic_error,
2837  "At the end of this method, for local row " << localRow << ", "
2838  "indices.size() = " << indices.size () << " != rowinfo.numEntries = "
2839  << rowinfo.numEntries << suffix);
2840  const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
2841  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2842  (rowinfo.numEntries != expectedNumEntries, std::logic_error, "At the end "
2843  "of this method, for local row " << localRow << ", rowinfo.numEntries = "
2844  << rowinfo.numEntries << " != getNumEntriesInLocalRow(localRow) = " <<
2845  expectedNumEntries << suffix);
2846 #endif // HAVE_TPETRA_DEBUG
2847  }
2848 
2849  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2850  LocalOrdinal
2852  getLocalRowView (const LocalOrdinal lclRow,
2853  LocalOrdinal& numEnt,
2854  const impl_scalar_type*& val,
2855  const LocalOrdinal*& ind) const
2856  {
2857  typedef LocalOrdinal LO;
2858 
2859  // Don't call getCrsGraph(), because that modfies an RCP reference
2860  // count, which is not thread safe. Checking whether an RCP is
2861  // null does NOT modify its reference count, and is therefore
2862  // thread safe. Note that isGloballyIndexed() calls
2863  // getCrsGraph(), so we have to go to the graph directly.
2864  if (staticGraph_.is_null () || staticGraph_->isGloballyIndexed ()) {
2866  }
2867  else {
2868  const RowInfo rowInfo = staticGraph_->getRowInfo (lclRow);
2869  if (rowInfo.localRow == Tpetra::Details::OrdinalTraits<size_t>::invalid ()) {
2870  numEnt = 0; // no valid entries in this row on the calling process
2871  val = NULL;
2872  ind = NULL;
2873  // First argument (lclRow) invalid, so make 1 the error code.
2874  return static_cast<LO> (1);
2875  }
2876  else {
2877  numEnt = static_cast<LO> (rowInfo.numEntries);
2878  auto lclColInds = staticGraph_->getLocalKokkosRowView (rowInfo);
2879  ind = lclColInds.ptr_on_device (); // FIXME (mfh 18 Jul 2016) UVM
2880  const LO err = this->getViewRawConst (val, numEnt, rowInfo);
2881  return err;
2882  }
2883  }
2884  }
2885 
2886  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2887  LocalOrdinal
2889  getLocalRowViewRaw (const LocalOrdinal lclRow,
2890  LocalOrdinal& numEnt,
2891  const LocalOrdinal*& lclColInds,
2892  const Scalar*& vals) const
2893  {
2894  const impl_scalar_type* vals_ist = NULL;
2895  const LocalOrdinal errCode =
2896  this->getLocalRowView (lclRow, numEnt, vals_ist, lclColInds);
2897  vals = reinterpret_cast<const Scalar*> (vals_ist);
2898  return errCode;
2899  }
2900 
2901  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2902  void
2904  getGlobalRowView (GlobalOrdinal globalRow,
2905  Teuchos::ArrayView<const GlobalOrdinal>& indices,
2906  Teuchos::ArrayView<const Scalar>& values) const
2907  {
2908  using Teuchos::ArrayView;
2909  using Teuchos::av_reinterpret_cast;
2910  typedef GlobalOrdinal GO;
2911  const char tfecfFuncName[] = "getGlobalRowView: ";
2912 
2913  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2914  isLocallyIndexed (), std::runtime_error,
2915  "The matrix is locally indexed, so we cannot return a view of the row "
2916  "with global column indices. Use getGlobalRowCopy() instead.");
2917  indices = Teuchos::null;
2918  values = Teuchos::null;
2919  const RowInfo rowinfo =
2920  staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
2921  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2922  rowinfo.numEntries > 0) {
2923  ArrayView<const GO> indTmp = staticGraph_->getGlobalView (rowinfo);
2924  ArrayView<const Scalar> valTmp =
2925  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2926  indices = indTmp (0, rowinfo.numEntries);
2927  values = valTmp (0, rowinfo.numEntries);
2928  }
2929 
2930 #ifdef HAVE_TPETRA_DEBUG
2931  const char suffix[] = ". This should never happen. Please report this "
2932  "bug to the Tpetra developers.";
2933  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2934  (static_cast<size_t> (indices.size ()) !=
2935  static_cast<size_t> (values.size ()), std::logic_error,
2936  "At the end of this method, for global row " << globalRow << ", "
2937  "indices.size() = " << indices.size () << " != values.size () = "
2938  << values.size () << suffix);
2939  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2940  (static_cast<size_t> (indices.size ()) !=
2941  static_cast<size_t> (rowinfo.numEntries), std::logic_error,
2942  "At the end of this method, for global row " << globalRow << ", "
2943  "indices.size() = " << indices.size () << " != rowinfo.numEntries = "
2944  << rowinfo.numEntries << suffix);
2945  const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
2946  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2947  (rowinfo.numEntries != expectedNumEntries, std::logic_error, "At the end "
2948  "of this method, for global row " << globalRow << ", rowinfo.numEntries "
2949  "= " << rowinfo.numEntries << " != getNumEntriesInGlobalRow(globalRow) ="
2950  " " << expectedNumEntries << suffix);
2951 #endif // HAVE_TPETRA_DEBUG
2952  }
2953 
2954  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2955  void
2957  scale (const Scalar& alpha)
2958  {
2959  typedef LocalOrdinal LO;
2960  typedef typename Teuchos::Array<Scalar>::size_type size_type;
2961  const char tfecfFuncName[] = "scale: ";
2962  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
2963 
2964  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2965  ! isFillActive (), std::runtime_error,
2966  "Fill must be active before you may call this method. "
2967  "Please call resumeFill() to make fill active.");
2968 
2969  const size_t nlrs = staticGraph_->getNodeNumRows ();
2970  const size_t numAlloc = staticGraph_->getNodeAllocationSize ();
2971  const size_t numEntries = staticGraph_->getNodeNumEntries ();
2972  if (! staticGraph_->indicesAreAllocated () || nlrs == 0 ||
2973  numAlloc == 0 || numEntries == 0) {
2974  // do nothing
2975  }
2976  else {
2977  if (staticGraph_->getProfileType () == StaticProfile) {
2978  const LO lclNumRows = lclMatrix_.numRows ();
2979  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
2980  auto row_i = lclMatrix_.row (lclRow);
2981  for (LO k = 0; k < row_i.length; ++k) {
2982  // FIXME (mfh 02 Jan 2015) This assumes CUDA UVM.
2983  row_i.value (k) *= theAlpha;
2984  }
2985  }
2986  }
2987  else if (staticGraph_->getProfileType () == DynamicProfile) {
2988  for (size_t row = 0; row < nlrs; ++row) {
2989  const size_type numEnt = getNumEntriesInLocalRow (row);
2990  Teuchos::ArrayView<impl_scalar_type> rowVals = values2D_[row] ();
2991  for (size_type k = 0; k < numEnt; ++k) {
2992  rowVals[k] *= theAlpha;
2993  }
2994  }
2995  }
2996  }
2997  }
2998 
2999  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3000  void
3002  setAllToScalar (const Scalar& alpha)
3003  {
3004  const char tfecfFuncName[] = "setAllToScalar: ";
3005  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
3006  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3007  ! isFillActive (), std::runtime_error,
3008  "Fill must be active before you may call this method. "
3009  "Please call resumeFill() to make fill active.");
3010 
3011  // replace all values in the matrix
3012  // it is easiest to replace all allocated values, instead of replacing only the ones with valid entries
3013  // however, if there are no valid entries, we can short-circuit
3014  // furthermore, if the values aren't allocated, we can short-circuit (no entry have been inserted so far)
3015  const size_t nlrs = staticGraph_->getNodeNumRows(),
3016  numAlloc = staticGraph_->getNodeAllocationSize(),
3017  numEntries = staticGraph_->getNodeNumEntries();
3018  if (! staticGraph_->indicesAreAllocated () || numAlloc == 0 || numEntries == 0) {
3019  // do nothing
3020  }
3021  else {
3022  const ProfileType profType = staticGraph_->getProfileType ();
3023  if (profType == StaticProfile) {
3024  // FIXME (mfh 24 Dec 2014) Once CrsMatrix implements DualView
3025  // semantics, this would be the place to mark memory as
3026  // modified.
3027  Kokkos::deep_copy (k_values1D_, theAlpha);
3028  }
3029  else if (profType == DynamicProfile) {
3030  for (size_t row = 0; row < nlrs; ++row) {
3031  std::fill (values2D_[row].begin (), values2D_[row].end (), theAlpha);
3032  }
3033  }
3034  }
3035  }
3036 
3037  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3038  void
3040  setAllValues (const typename local_matrix_type::row_map_type& rowPointers,
3041  const typename local_graph_type::entries_type::non_const_type& columnIndices,
3042  const typename local_matrix_type::values_type& values)
3043  {
3044  const char tfecfFuncName[] = "setAllValues: ";
3045  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3046  (columnIndices.size () != values.size (), std::invalid_argument,
3047  "columnIndices.size() = " << columnIndices.size () << " != values.size()"
3048  " = " << values.size () << ".");
3049  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3050  (myGraph_.is_null (), std::runtime_error, "myGraph_ must not be null.");
3051 
3052  try {
3053  myGraph_->setAllIndices (rowPointers, columnIndices);
3054  }
3055  catch (std::exception &e) {
3056  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3057  (true, std::runtime_error, "myGraph_->setAllIndices() threw an "
3058  "exception: " << e.what ());
3059  }
3060  // Make sure that myGraph_ now has a local graph. It may not be
3061  // fillComplete yet, so it's important to check. We don't care
3062  // whether setAllIndices() did a shallow copy or a deep copy, so a
3063  // good way to check is to compare dimensions.
3064  auto lclGraph = myGraph_->getLocalGraph ();
3065  const size_t numEnt = lclGraph.entries.dimension_0 ();
3066  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3067  (lclGraph.row_map.dimension_0 () != rowPointers.dimension_0 () ||
3068  numEnt != static_cast<size_t> (columnIndices.dimension_0 ()),
3069  std::logic_error, "myGraph_->setAllIndices() did not correctly create "
3070  "local graph. Please report this bug to the Tpetra developers.");
3071 
3072  const size_t numCols = myGraph_->getColMap ()->getNodeNumElements ();
3073  this->lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
3074  numCols, values, lclGraph);
3075  // FIXME (22 Jun 2016) I would very much like to get rid of
3076  // k_values1D_ at some point. I find it confusing to have all
3077  // these extra references lying around.
3078  this->k_values1D_ = this->lclMatrix_.values;
3079 
3080  checkInternalState ();
3081  }
3082 
3083  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3084  void
3086  setAllValues (const Teuchos::ArrayRCP<size_t>& ptr,
3087  const Teuchos::ArrayRCP<LocalOrdinal>& ind,
3088  const Teuchos::ArrayRCP<Scalar>& val)
3089  {
3090  using Kokkos::Compat::getKokkosViewDeepCopy;
3091  using Teuchos::ArrayRCP;
3092  using Teuchos::av_reinterpret_cast;
3093  typedef device_type DT;
3094  typedef impl_scalar_type IST;
3095  typedef typename local_matrix_type::row_map_type row_map_type;
3096  //typedef typename row_map_type::non_const_value_type row_offset_type;
3097  const char tfecfFuncName[] = "setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
3098 
3099  // The row offset type may depend on the execution space. It may
3100  // not necessarily be size_t. If it's not, we need to make a deep
3101  // copy. We need to make a deep copy anyway so that Kokkos can
3102  // own the memory. Regardless, ptrIn gets the copy.
3103  typename row_map_type::non_const_type ptrNative ("ptr", ptr.size ());
3104  Kokkos::View<const size_t*,
3105  typename row_map_type::array_layout,
3106  Kokkos::HostSpace,
3107  Kokkos::MemoryUnmanaged> ptrSizeT (ptr.getRawPtr (), ptr.size ());
3108  ::Tpetra::Details::copyOffsets (ptrNative, ptrSizeT);
3109 
3110  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3111  (ptrNative.dimension_0 () != ptrSizeT.dimension_0 (),
3112  std::logic_error, "ptrNative.dimension_0() = " <<
3113  ptrNative.dimension_0 () << " != ptrSizeT.dimension_0() = "
3114  << ptrSizeT.dimension_0 () << ". Please report this bug to the "
3115  "Tpetra developers.");
3116 
3117  auto indIn = getKokkosViewDeepCopy<DT> (ind ());
3118  auto valIn = getKokkosViewDeepCopy<DT> (av_reinterpret_cast<IST> (val ()));
3119  this->setAllValues (ptrNative, indIn, valIn);
3120  }
3121 
3122  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3123  void
3125  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
3126  {
3127  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
3128  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3129  (staticGraph_.is_null (), std::runtime_error, "The matrix has no graph.");
3130 
3131  // mfh 11 May 2016: We plan to deprecate the ArrayRCP version of
3132  // this method in CrsGraph too, so don't call it (otherwise build
3133  // warnings will show up and annoy users). Instead, copy results
3134  // in and out, if the memory space requires it.
3135 
3136  const size_t lclNumRows = staticGraph_->getNodeNumRows ();
3137  if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
3138  offsets.resize (lclNumRows);
3139  }
3140 
3141  // The input ArrayRCP must always be a host pointer. Thus, if
3142  // device_type::memory_space is Kokkos::HostSpace, it's OK for us
3143  // to write to that allocation directly as a Kokkos::View.
3144  typedef typename device_type::memory_space memory_space;
3145  if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
3146  // It is always syntactically correct to assign a raw host
3147  // pointer to a device View, so this code will compile correctly
3148  // even if this branch never runs.
3149  typedef Kokkos::View<size_t*, device_type,
3150  Kokkos::MemoryUnmanaged> output_type;
3151  output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3152  staticGraph_->getLocalDiagOffsets (offsetsOut);
3153  }
3154  else {
3155  Kokkos::View<size_t*, device_type> offsetsTmp ("diagOffsets", lclNumRows);
3156  staticGraph_->getLocalDiagOffsets (offsetsTmp);
3157  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
3158  Kokkos::MemoryUnmanaged> output_type;
3159  output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3160  Kokkos::deep_copy (offsetsOut, offsetsTmp);
3161  }
3162  }
3163 
3164  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3165  void
3168  {
3169  using Teuchos::ArrayRCP;
3170  using Teuchos::ArrayView;
3171  using Teuchos::av_reinterpret_cast;
3172  const char tfecfFuncName[] = "getLocalDiagCopy (1-arg): ";
3173  typedef local_ordinal_type LO;
3174 
3175 
3176  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3177  staticGraph_.is_null (), std::runtime_error,
3178  "This method requires that the matrix have a graph.");
3179  auto rowMapPtr = this->getRowMap ();
3180  if (rowMapPtr.is_null () || rowMapPtr->getComm ().is_null ()) {
3181  // Processes on which the row Map or its communicator is null
3182  // don't participate. Users shouldn't even call this method on
3183  // those processes.
3184  return;
3185  }
3186  auto colMapPtr = this->getColMap ();
3187  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3188  (! this->hasColMap () || colMapPtr.is_null (), std::runtime_error,
3189  "This method requires that the matrix have a column Map.");
3190  const map_type& rowMap = * rowMapPtr;
3191  const map_type& colMap = * colMapPtr;
3192  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
3193 
3194 #ifdef HAVE_TPETRA_DEBUG
3195  // isCompatible() requires an all-reduce, and thus this check
3196  // should only be done in debug mode.
3197  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3198  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
3199  "The input Vector's Map must be compatible with the CrsMatrix's row "
3200  "Map. You may check this by using Map's isCompatible method: "
3201  "diag.getMap ()->isCompatible (A.getRowMap ());");
3202 #endif // HAVE_TPETRA_DEBUG
3203 
3204 #ifdef HAVE_TPETRA_DEBUG
3205  // Keep a count of the local number of errors.
3206  LO lclNumErrs = 0;
3207 #endif // HAVE_TPETRA_DEBUG
3208  if (this->isFillComplete ()) {
3209  diag.template modify<device_type> ();
3210  const auto D_lcl = diag.template getLocalView<device_type> ();
3211  // 1-D subview of the first (and only) column of D_lcl.
3212  const auto D_lcl_1d =
3213  Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3214 
3215  const auto lclRowMap = rowMap.getLocalMap ();
3216  const auto lclColMap = colMap.getLocalMap ();
3217  const auto lclMatrix = this->lclMatrix_;
3219 #ifdef HAVE_TPETRA_DEBUG
3220  lclNumErrs = getDiagCopyWithoutOffsets (D_lcl_1d, lclRowMap,
3221  lclColMap, lclMatrix);
3222 #else
3223  (void) getDiagCopyWithoutOffsets (D_lcl_1d, lclRowMap,
3224  lclColMap, lclMatrix);
3225 #endif // HAVE_TPETRA_DEBUG
3226  }
3227  else {
3229 #ifdef HAVE_TPETRA_DEBUG
3230  lclNumErrs = getLocalDiagCopyWithoutOffsetsNotFillComplete (diag, *this);
3231 #else
3232  (void) getLocalDiagCopyWithoutOffsetsNotFillComplete (diag, *this);
3233 #endif // HAVE_TPETRA_DEBUG
3234  }
3235 
3236 #ifdef HAVE_TPETRA_DEBUG
3237  if (! this->getComm ().is_null ()) {
3238  using Teuchos::outArg;
3239  using Teuchos::REDUCE_SUM;
3240  using Teuchos::reduceAll;
3241  typedef global_ordinal_type GO;
3242 
3243  GO gblNumErrs = 0;
3244  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
3245  if (! comm.is_null ()) {
3246  reduceAll<int, GO> (*comm, REDUCE_SUM, static_cast<GO> (lclNumErrs),
3247  outArg (gblNumErrs));
3248  }
3249  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3250  (gblNumErrs != 0, std::logic_error, "Something went wrong on "
3251  << gblNumErrs << " out of " << this->getComm ()->getSize ()
3252  << " process(es).");
3253  }
3254 #endif // HAVE_TPETRA_DEBUG
3255  }
3256 
3257  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3258  void
3261  const Kokkos::View<const size_t*, device_type,
3262  Kokkos::MemoryUnmanaged>& offsets) const
3263  {
3264  typedef LocalOrdinal LO;
3265 
3266 #ifdef HAVE_TPETRA_DEBUG
3267  const char tfecfFuncName[] = "getLocalDiagCopy: ";
3268  const map_type& rowMap = * (this->getRowMap ());
3269  // isCompatible() requires an all-reduce, and thus this check
3270  // should only be done in debug mode.
3271  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3272  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
3273  "The input Vector's Map must be compatible with (in the sense of Map::"
3274  "isCompatible) the CrsMatrix's row Map.");
3275 #endif // HAVE_TPETRA_DEBUG
3276 
3277  // For now, we fill the Vector on the host and sync to device.
3278  // Later, we may write a parallel kernel that works entirely on
3279  // device.
3280  //
3281  // NOTE (mfh 21 Jan 2016): The host kernel here assumes UVM. Once
3282  // we write a device kernel, it will not need to assume UVM.
3283 
3284  diag.template modify<device_type> ();
3285  auto D_lcl = diag.template getLocalView<device_type> ();
3286  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
3287  // Get 1-D subview of the first (and only) column of D_lcl.
3288  auto D_lcl_1d =
3289  Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3290 
3291  KokkosSparse::getDiagCopy (D_lcl_1d, offsets, this->lclMatrix_);
3292  }
3293 
3294  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3295  void
3298  const Teuchos::ArrayView<const size_t>& offsets) const
3299  {
3300  typedef LocalOrdinal LO;
3301  typedef impl_scalar_type IST;
3303  typedef typename vec_type::dual_view_type dual_view_type;
3304  typedef typename dual_view_type::host_mirror_space::execution_space host_execution_space;
3305 
3306 #ifdef HAVE_TPETRA_DEBUG
3307  const char tfecfFuncName[] = "getLocalDiagCopy: ";
3308  const map_type& rowMap = * (this->getRowMap ());
3309  // isCompatible() requires an all-reduce, and thus this check
3310  // should only be done in debug mode.
3311  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3312  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
3313  "The input Vector's Map must be compatible with (in the sense of Map::"
3314  "isCompatible) the CrsMatrix's row Map.");
3315 #endif // HAVE_TPETRA_DEBUG
3316 
3317  // For now, we fill the Vector on the host and sync to device.
3318  // Later, we may write a parallel kernel that works entirely on
3319  // device.
3320  diag.template modify<host_execution_space> ();
3321  auto lclVecHost = diag.template getLocalView<host_execution_space> ();
3322  // 1-D subview of the first (and only) column of lclVecHost.
3323  auto lclVecHost1d = Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
3324 
3325  Kokkos::View<const size_t*, Kokkos::HostSpace,
3326  Kokkos::MemoryTraits<Kokkos::Unmanaged> >
3327  h_offsets (offsets.getRawPtr (), offsets.size ());
3328  // Find the diagonal entries and put them in lclVecHost1d.
3329  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
3330  typedef Kokkos::RangePolicy<host_execution_space, LO> policy_type;
3332 
3333  Kokkos::parallel_for (policy_type (0, myNumRows), [&] (const LO& lclRow) {
3334  lclVecHost1d(lclRow) = STS::zero (); // default value if no diag entry
3335  if (h_offsets[lclRow] != INV) {
3336  auto curRow = lclMatrix_.rowConst (lclRow);
3337  lclVecHost1d(lclRow) = static_cast<IST> (curRow.value(h_offsets[lclRow]));
3338  }
3339  });
3340  diag.template sync<execution_space> (); // sync changes back to device
3341  }
3342 
3343 
3344  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3345  void
3348  {
3349  using Teuchos::ArrayRCP;
3350  using Teuchos::ArrayView;
3351  using Teuchos::null;
3352  using Teuchos::RCP;
3353  using Teuchos::rcp;
3354  using Teuchos::rcpFromRef;
3356  const char tfecfFuncName[] = "leftScale";
3357 
3358  // FIXME (mfh 06 Aug 2014) This doesn't make sense. The matrix
3359  // should only be modified when it is not fill complete.
3360  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3361  ! isFillComplete (), std::runtime_error,
3362  ": matrix must be fill complete.");
3363  RCP<const vec_type> xp;
3364 
3365  if (getRangeMap ()->isSameAs (* (x.getMap ()))){
3366  // Take from Epetra: If we have a non-trivial exporter, we must
3367  // import elements that are permuted or are on other processors.
3368  // (We will use the exporter to perform the import ("reverse
3369  // mode").)
3370  if (getCrsGraph ()->getExporter () != Teuchos::null) {
3371  RCP<vec_type> tempVec = rcp (new vec_type (getRowMap ()));
3372  tempVec->doImport (x, * (getCrsGraph ()->getExporter ()), INSERT);
3373  xp = tempVec;
3374  }
3375  else {
3376  xp = rcpFromRef (x);
3377  }
3378  }
3379  else if (getRowMap ()->isSameAs (* (x.getMap ()))) {
3380  xp = rcpFromRef (x);
3381  }
3382  else {
3383  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::invalid_argument, ": The "
3384  "input scaling vector x's Map must be the same as either the row Map or "
3385  "the range Map of the CrsMatrix.");
3386  }
3387  ArrayRCP<const Scalar> vectorVals = xp->getData (0);
3388  ArrayView<impl_scalar_type> rowValues = null;
3389 
3390  const LocalOrdinal lclNumRows =
3391  static_cast<LocalOrdinal> (this->getNodeNumRows ());
3392  for (LocalOrdinal i = 0; i < lclNumRows; ++i) {
3393  const RowInfo rowinfo = staticGraph_->getRowInfo (i);
3394  rowValues = this->getViewNonConst (rowinfo);
3395  const impl_scalar_type scaleValue = static_cast<impl_scalar_type> (vectorVals[i]);
3396  for (size_t j = 0; j < rowinfo.numEntries; ++j) {
3397  rowValues[j] *= scaleValue;
3398  }
3399  }
3400  }
3401 
3402  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3403  void
3406  {
3407  using Teuchos::ArrayRCP;
3408  using Teuchos::ArrayView;
3409  using Teuchos::null;
3410  using Teuchos::RCP;
3411  using Teuchos::rcp;
3412  using Teuchos::rcpFromRef;
3414  const char tfecfFuncName[] = "rightScale: ";
3415 
3416  // FIXME (mfh 06 Aug 2014) This doesn't make sense. The matrix
3417  // should only be modified when it is not fill complete.
3418  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3419  ! isFillComplete (), std::runtime_error, "Matrix must be fill complete.");
3420  RCP<const vec_type> xp;
3421  if (getDomainMap ()->isSameAs (* (x.getMap ()))) {
3422  // Take from Epetra: If we have a non-trivial exporter, we must
3423  // import elements that are permuted or are on other processors.
3424  // (We will use the exporter to perform the import.)
3425  if (getCrsGraph ()->getImporter () != Teuchos::null) {
3426  RCP<vec_type> tempVec = rcp (new vec_type (getColMap ()));
3427  tempVec->doImport (x, * (getCrsGraph ()->getImporter ()), INSERT);
3428  xp = tempVec;
3429  }
3430  else {
3431  xp = rcpFromRef (x);
3432  }
3433  }
3434  else if (getRowMap ()->isSameAs (* (x.getMap ()))) {
3435  xp = rcpFromRef (x);
3436  } else {
3437  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3438  true, std::runtime_error, "The vector x must have the same Map as "
3439  "either the row Map or the range Map.");
3440  }
3441 
3442  ArrayRCP<const Scalar> vectorVals = xp->getData (0);
3443  ArrayView<impl_scalar_type> rowValues = null;
3444 
3445  const LocalOrdinal lclNumRows =
3446  static_cast<LocalOrdinal> (this->getNodeNumRows ());
3447  for (LocalOrdinal i = 0; i < lclNumRows; ++i) {
3448  const RowInfo rowinfo = staticGraph_->getRowInfo (i);
3449  rowValues = this->getViewNonConst (rowinfo);
3450  ArrayView<const LocalOrdinal> colInds;
3451  getCrsGraph ()->getLocalRowView (i, colInds);
3452  for (size_t j = 0; j < rowinfo.numEntries; ++j) {
3453  rowValues[j] *= static_cast<impl_scalar_type> (vectorVals[colInds[j]]);
3454  }
3455  }
3456  }
3457 
3458  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3462  {
3463  using Teuchos::ArrayView;
3464  using Teuchos::outArg;
3465  using Teuchos::REDUCE_SUM;
3466  using Teuchos::reduceAll;
3467  typedef typename Teuchos::ArrayRCP<const impl_scalar_type>::size_type size_type;
3468 
3469  // FIXME (mfh 05 Aug 2014) Write a thread-parallel kernel for the
3470  // local part of this computation. It could make sense to put
3471  // this operation in the Kokkos::CrsMatrix.
3472 
3473  // check the cache first
3474  mag_type frobNorm = frobNorm_;
3475  if (frobNorm == -STM::one ()) {
3476  mag_type mySum = STM::zero ();
3477  if (getNodeNumEntries() > 0) {
3478  if (isStorageOptimized ()) {
3479  // "Optimized" storage is packed storage. That means we can
3480  // iterate in one pass through the 1-D values array.
3481  const size_type numEntries =
3482  static_cast<size_type> (getNodeNumEntries ());
3483  for (size_type k = 0; k < numEntries; ++k) {
3484  // FIXME (mfh 05 Aug 2014) This assumes UVM.
3485  const impl_scalar_type val = k_values1D_(k);
3486  // Note (etp 06 Jan 2015) We need abs() here for composite types
3487  // (in general, if mag_type is on the left-hand-side, we need
3488  // abs() on the right-hand-side)
3489  const mag_type val_abs = STS::abs (val);
3490  mySum += val_abs * val_abs;
3491  }
3492  }
3493  else {
3494  const LocalOrdinal numRows =
3495  static_cast<LocalOrdinal> (this->getNodeNumRows ());
3496  for (LocalOrdinal r = 0; r < numRows; ++r) {
3497  const RowInfo rowInfo = myGraph_->getRowInfo (r);
3498  const size_type numEntries =
3499  static_cast<size_type> (rowInfo.numEntries);
3500  ArrayView<const impl_scalar_type> A_r =
3501  this->getView (rowInfo).view (0, numEntries);
3502  for (size_type k = 0; k < numEntries; ++k) {
3503  const impl_scalar_type val = A_r[k];
3504  const mag_type val_abs = STS::abs (val);
3505  mySum += val_abs * val_abs;
3506  }
3507  }
3508  }
3509  }
3510  mag_type totalSum = STM::zero ();
3511  reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
3512  mySum, outArg (totalSum));
3513  frobNorm = STM::sqrt (totalSum);
3514  }
3515  if (isFillComplete ()) {
3516  // Only cache the result if the matrix is fill complete.
3517  // Otherwise, the values might still change. resumeFill clears
3518  // the cache.
3519  frobNorm_ = frobNorm;
3520  }
3521  return frobNorm;
3522  }
3523 
3524  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3525  void
3527  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
3528  {
3529  const char tfecfFuncName[] = "replaceColMap: ";
3530  // FIXME (mfh 06 Aug 2014) What if the graph is locally indexed?
3531  // Then replacing the column Map might mean that we need to
3532  // reindex the column indices.
3533  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3534  myGraph_.is_null (), std::runtime_error,
3535  "This method does not work if the matrix has a const graph. The whole "
3536  "idea of a const graph is that you are not allowed to change it, but "
3537  "this method necessarily must modify the graph, since the graph owns "
3538  "the matrix's column Map.");
3539  myGraph_->replaceColMap (newColMap);
3540  }
3541 
3542  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3543  void
3546  const Teuchos::RCP<const map_type>& newColMap,
3547  const Teuchos::RCP<const import_type>& newImport,
3548  const bool sortEachRow)
3549  {
3550  const char tfecfFuncName[] = "reindexColumns: ";
3551  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3552  graph == NULL && myGraph_.is_null (), std::invalid_argument,
3553  "The input graph is NULL, but the matrix does not own its graph.");
3554 
3555  crs_graph_type& theGraph = (graph == NULL) ? *myGraph_ : *graph;
3556  const bool sortGraph = false; // we'll sort graph & matrix together below
3557  theGraph.reindexColumns (newColMap, newImport, sortGraph);
3558  if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
3559  // We can't just call sortEntries() here, because that fails if
3560  // the matrix has a const graph. We want to use the given graph
3561  // in that case.
3562  const LocalOrdinal lclNumRows =
3563  static_cast<LocalOrdinal> (theGraph.getNodeNumRows ());
3564  for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
3565  const RowInfo rowInfo = theGraph.getRowInfo (row);
3566  Teuchos::ArrayView<impl_scalar_type> rv = this->getViewNonConst (rowInfo);
3567  theGraph.template sortRowIndicesAndValues<impl_scalar_type> (rowInfo, rv);
3568  }
3569  theGraph.indicesAreSorted_ = true;
3570  }
3571  }
3572 
3573  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3574  void
3576  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
3577  Teuchos::RCP<const import_type>& newImporter)
3578  {
3579  const char tfecfFuncName[] = "replaceDomainMapAndImporter: ";
3580  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3581  myGraph_.is_null (), std::runtime_error,
3582  "This method does not work if the matrix has a const graph. The whole "
3583  "idea of a const graph is that you are not allowed to change it, but this"
3584  " method necessarily must modify the graph, since the graph owns the "
3585  "matrix's domain Map and Import objects.");
3586  myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
3587  }
3588 
3589  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3590  void
3592  insertNonownedGlobalValues (const GlobalOrdinal globalRow,
3593  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
3594  const Teuchos::ArrayView<const Scalar>& values)
3595  {
3596  using Teuchos::Array;
3597  typedef GlobalOrdinal GO;
3598  typedef typename Array<GO>::size_type size_type;
3599 
3600  const size_type numToInsert = indices.size ();
3601  // Add the new data to the list of nonlocals.
3602  // This creates the arrays if they don't exist yet.
3603  std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
3604  Array<GO>& curRowInds = curRow.first;
3605  Array<Scalar>& curRowVals = curRow.second;
3606  const size_type newCapacity = curRowInds.size () + numToInsert;
3607  curRowInds.reserve (newCapacity);
3608  curRowVals.reserve (newCapacity);
3609  for (size_type k = 0; k < numToInsert; ++k) {
3610  curRowInds.push_back (indices[k]);
3611  curRowVals.push_back (values[k]);
3612  }
3613  }
3614 
3615  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3616  void
3619  {
3620  using Teuchos::Comm;
3621  using Teuchos::outArg;
3622  using Teuchos::RCP;
3623  using Teuchos::rcp;
3624  using Teuchos::REDUCE_MAX;
3625  using Teuchos::REDUCE_MIN;
3626  using Teuchos::reduceAll;
3628  //typedef LocalOrdinal LO;
3629  typedef GlobalOrdinal GO;
3630  typedef typename Teuchos::Array<GO>::size_type size_type;
3631  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
3632 
3633  RCP<const Comm<int> > comm = getComm ();
3634 
3635  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3636  (! isFillActive (), std::runtime_error, "Fill must be active before "
3637  "you may call this method.");
3638 
3639  const size_t myNumNonlocalRows = nonlocals_.size ();
3640 
3641  // If no processes have nonlocal rows, then we don't have to do
3642  // anything. Checking this is probably cheaper than constructing
3643  // the Map of nonlocal rows (see below) and noticing that it has
3644  // zero global entries.
3645  {
3646  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
3647  int someoneHasNonlocalRows = 0;
3648  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
3649  outArg (someoneHasNonlocalRows));
3650  if (someoneHasNonlocalRows == 0) {
3651  return; // no process has nonlocal rows, so nothing to do
3652  }
3653  }
3654 
3655  // 1. Create a list of the "nonlocal" rows on each process. this
3656  // requires iterating over nonlocals_, so while we do this,
3657  // deduplicate the entries and get a count for each nonlocal
3658  // row on this process.
3659  // 2. Construct a new row Map corresponding to those rows. This
3660  // Map is likely overlapping. We know that the Map is not
3661  // empty on all processes, because the above all-reduce and
3662  // return exclude that case.
3663 
3664  RCP<const map_type> nonlocalRowMap;
3665  // Keep this for CrsGraph's constructor, so we can use StaticProfile.
3666  Teuchos::ArrayRCP<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
3667  {
3668  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
3669  size_type curPos = 0;
3670  for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
3671  ++mapIter, ++curPos) {
3672  myNonlocalGblRows[curPos] = mapIter->first;
3673  // Get the values and column indices by reference, since we
3674  // intend to change them in place (that's what "erase" does).
3675  Teuchos::Array<GO>& gblCols = (mapIter->second).first;
3676  Teuchos::Array<Scalar>& vals = (mapIter->second).second;
3677 
3678  // Sort both arrays jointly, using the column indices as keys,
3679  // then merge them jointly. "Merge" here adds values
3680  // corresponding to the same column indices. The first 2 args
3681  // of merge2 are output arguments that work just like the
3682  // return value of std::unique.
3683  sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
3684  typename Teuchos::Array<GO>::iterator gblCols_newEnd;
3685  typename Teuchos::Array<Scalar>::iterator vals_newEnd;
3686  merge2 (gblCols_newEnd, vals_newEnd,
3687  gblCols.begin (), gblCols.end (),
3688  vals.begin (), vals.end ());
3689  gblCols.erase (gblCols_newEnd, gblCols.end ());
3690  vals.erase (vals_newEnd, vals.end ());
3691  numEntPerNonlocalRow[curPos] = gblCols.size ();
3692  }
3693 
3694  // Currently, Map requires that its indexBase be the global min
3695  // of all its global indices. Map won't compute this for us, so
3696  // we must do it. If our process has no nonlocal rows, set the
3697  // "min" to the max possible GO value. This ensures that if
3698  // some process has at least one nonlocal row, then it will pick
3699  // that up as the min. We know that at least one process has a
3700  // nonlocal row, since the all-reduce and return at the top of
3701  // this method excluded that case.
3702  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
3703  {
3704  auto iter = std::min_element (myNonlocalGblRows.begin (),
3705  myNonlocalGblRows.end ());
3706  if (iter != myNonlocalGblRows.end ()) {
3707  myMinNonlocalGblRow = *iter;
3708  }
3709  }
3710  GO gblMinNonlocalGblRow = 0;
3711  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
3712  outArg (gblMinNonlocalGblRow));
3713  const GO indexBase = gblMinNonlocalGblRow;
3714  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3715  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
3716  }
3717 
3718  // 3. Use the values and column indices for each nonlocal row, as
3719  // stored in nonlocals_, to construct a CrsMatrix corresponding
3720  // to nonlocal rows. We may use StaticProfile, since we have
3721  // exact counts of the number of entries in each nonlocal row.
3722 
3723  RCP<crs_matrix_type> nonlocalMatrix =
3724  rcp (new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow,
3725  StaticProfile));
3726  {
3727  size_type curPos = 0;
3728  for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
3729  ++mapIter, ++curPos) {
3730  const GO gblRow = mapIter->first;
3731  // Get values & column indices by ref, just to avoid copy.
3732  Teuchos::Array<GO>& gblCols = (mapIter->second).first;
3733  Teuchos::Array<Scalar>& vals = (mapIter->second).second;
3734  //const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3735  nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
3736  }
3737  }
3738  // There's no need to fill-complete the nonlocals matrix.
3739  // We just use it as a temporary container for the Export.
3740 
3741  // 4. If the original row Map is one to one, then we can Export
3742  // directly from nonlocalMatrix into this. Otherwise, we have
3743  // to create a temporary matrix with a one-to-one row Map,
3744  // Export into that, then Import from the temporary matrix into
3745  // *this.
3746 
3747  auto origRowMap = this->getRowMap ();
3748  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3749 
3750  int isLocallyComplete = 1; // true by default
3751 
3752  if (origRowMapIsOneToOne) {
3753  export_type exportToOrig (nonlocalRowMap, origRowMap);
3754  if (! exportToOrig.isLocallyComplete ()) {
3755  isLocallyComplete = 0;
3756  }
3757  this->doExport (*nonlocalMatrix, exportToOrig, Tpetra::ADD);
3758  // We're done at this point!
3759  }
3760  else {
3761  // If you ask a Map whether it is one to one, it does some
3762  // communication and stashes intermediate results for later use
3763  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3764  // much more then the original cost of calling isOneToOne.
3765  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3766  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3767  if (! exportToOneToOne.isLocallyComplete ()) {
3768  isLocallyComplete = 0;
3769  }
3770 
3771  // Create a temporary matrix with the one-to-one row Map.
3772  //
3773  // TODO (mfh 09 Sep 2016, 12 Sep 2016) Estimate # entries in
3774  // each row, to avoid reallocation during the Export operation.
3775  crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
3776  // Export from matrix of nonlocals into the temp one-to-one matrix.
3777  oneToOneMatrix.doExport (*nonlocalMatrix, exportToOneToOne, Tpetra::ADD);
3778 
3779  // We don't need the matrix of nonlocals anymore, so get rid of
3780  // it, to keep the memory high-water mark down.
3781  nonlocalMatrix = Teuchos::null;
3782 
3783  // Import from the one-to-one matrix to the original matrix.
3784  import_type importToOrig (oneToOneRowMap, origRowMap);
3785  this->doImport (oneToOneMatrix, importToOrig, Tpetra::ADD);
3786  }
3787 
3788  // It's safe now to clear out nonlocals_, since we've already
3789  // committed side effects to *this. The standard idiom for
3790  // clearing a Container like std::map, is to swap it with an empty
3791  // Container and let the swapped Container fall out of scope.
3792  decltype (nonlocals_) newNonlocals;
3793  std::swap (nonlocals_, newNonlocals);
3794 
3795  // FIXME (mfh 12 Sep 2016) I don't like this all-reduce, and I
3796  // don't like throwing an exception here. A local return value
3797  // would likely be more useful to users. However, if users find
3798  // themselves exercising nonlocal inserts often, then they are
3799  // probably novice users who need the help. See Gibhub Issues
3800  // #603 and #601 (esp. the latter) for discussion.
3801 
3802  int isGloballyComplete = 0; // output argument of reduceAll
3803  reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
3804  outArg (isGloballyComplete));
3805  TEUCHOS_TEST_FOR_EXCEPTION
3806  (isGloballyComplete != 1, std::runtime_error, "On at least one process, "
3807  "you called insertGlobalValues with a global row index which is not in "
3808  "the matrix's row Map on any process in its communicator.");
3809  }
3810 
3811  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3812  void
3814  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3815  {
3816  if (! isStaticGraph ()) { // Don't resume fill of a nonowned graph.
3817  myGraph_->resumeFill (params);
3818  }
3819  clearGlobalConstants ();
3820  fillComplete_ = false;
3821  }
3822 
3823  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3824  void
3827  {
3828  // This method doesn't do anything. The analogous method in
3829  // CrsGraph does actually compute something.
3830  //
3831  // Oddly enough, clearGlobalConstants() clears frobNorm_ (by
3832  // setting it to -1), but computeGlobalConstants() does _not_
3833  // compute the Frobenius norm; this is done on demand in
3834  // getFrobeniusNorm(), and the result is cached there.
3835  }
3836 
3837  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3838  void
3841  // We use -1 to indicate that the Frobenius norm needs to be
3842  // recomputed, since the values might change between now and the
3843  // next fillComplete call.
3844  //
3845  // Oddly enough, clearGlobalConstants() clears frobNorm_, but
3846  // computeGlobalConstants() does _not_ compute the Frobenius norm;
3847  // this is done on demand in getFrobeniusNorm(), and the result is
3848  // cached there.
3849  frobNorm_ = -STM::one ();
3850  }
3851 
3852  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3853  void
3855  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3856  {
3857  TEUCHOS_TEST_FOR_EXCEPTION(
3858  getCrsGraph ().is_null (), std::logic_error, "Tpetra::CrsMatrix::"
3859  "fillComplete(params): getCrsGraph() returns null. "
3860  "This should not happen at this point. "
3861  "Please report this bug to the Tpetra developers.");
3862 
3863  if (isStaticGraph () && getCrsGraph ()->isFillComplete ()) {
3864  fillComplete (getCrsGraph ()->getDomainMap (),
3865  getCrsGraph ()->getRangeMap (), params);
3866  } else {
3867  fillComplete (getRowMap (), getRowMap (), params);
3868  }
3869  }
3870 
3871  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3872  void
3874  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3875  const Teuchos::RCP<const map_type>& rangeMap,
3876  const Teuchos::RCP<Teuchos::ParameterList>& params)
3877  {
3878  using Teuchos::ArrayRCP;
3879  using Teuchos::RCP;
3880  using Teuchos::rcp;
3881  const char tfecfFuncName[] = "fillComplete";
3882 
3883  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3884  ! isFillActive () || isFillComplete (),
3885  std::runtime_error, ": Matrix fill state must be active (isFillActive() "
3886  "must be true) before you may call fillComplete().");
3887  const int numProcs = getComm ()->getSize ();
3888 
3889  //
3890  // Read parameters from the input ParameterList.
3891  //
3892 
3893  // If true, the caller promises that no process did nonlocal
3894  // changes since the last call to fillComplete.
3895  bool assertNoNonlocalInserts = false;
3896  // If true, makeColMap sorts remote GIDs (within each remote
3897  // process' group).
3898  bool sortGhosts = true;
3899 
3900  if (! params.is_null ()) {
3901  assertNoNonlocalInserts = params->get ("No Nonlocal Changes",
3902  assertNoNonlocalInserts);
3903  if (params->isParameter ("sort column map ghost gids")) {
3904  sortGhosts = params->get ("sort column map ghost gids", sortGhosts);
3905  }
3906  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3907  sortGhosts = params->get ("Sort column Map ghost GIDs", sortGhosts);
3908  }
3909  }
3910  // We also don't need to do global assembly if there is only one
3911  // process in the communicator.
3912  const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3913  // This parameter only matters if this matrix owns its graph.
3914  if (! myGraph_.is_null ()) {
3915  myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
3916  }
3917 
3918  if (! getCrsGraph()->indicesAreAllocated()) {
3919  if (hasColMap ()) {
3920  // We have a column Map, so use local indices.
3921  allocateValues (LocalIndices, GraphNotYetAllocated);
3922  } else {
3923  // We don't have a column Map, so use global indices.
3924  allocateValues (GlobalIndices, GraphNotYetAllocated);
3925  }
3926  }
3927  // Global assemble, if we need to. This call only costs a single
3928  // all-reduce if we didn't need global assembly after all.
3929  if (needGlobalAssemble) {
3930  globalAssemble ();
3931  }
3932  else {
3933  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3934  numProcs == 1 && nonlocals_.size() > 0,
3935  std::runtime_error, ": cannot have nonlocal entries on a serial run. "
3936  "An invalid entry (i.e., with row index not in the row Map) must have "
3937  "been submitted to the CrsMatrix.");
3938  }
3939 
3940  if (isStaticGraph ()) {
3941  // FIXME (mfh 18 Jun 2014) This check for correctness of the
3942  // input Maps incurs a penalty of two all-reduces for the
3943  // otherwise optimal const graph case.
3944  //
3945  // We could turn these (max) 2 all-reduces into (max) 1, by
3946  // fusing them. We could do this by adding a "locallySameAs"
3947  // method to Map, which would return one of four states:
3948  //
3949  // a. Certainly globally the same
3950  // b. Certainly globally not the same
3951  // c. Locally the same
3952  // d. Locally not the same
3953  //
3954  // The first two states don't require further communication.
3955  // The latter two states require an all-reduce to communicate
3956  // globally, but we only need one all-reduce, since we only need
3957  // to check whether at least one of the Maps is wrong.
3958  const bool domainMapsMatch = staticGraph_->getDomainMap ()->isSameAs (*domainMap);
3959  const bool rangeMapsMatch = staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
3960 
3961  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3962  ! domainMapsMatch, std::runtime_error,
3963  ": The CrsMatrix's domain Map does not match the graph's domain Map. "
3964  "The graph cannot be changed because it was given to the CrsMatrix "
3965  "constructor as const. You can fix this by passing in the graph's "
3966  "domain Map and range Map to the matrix's fillComplete call.");
3967 
3968  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3969  ! rangeMapsMatch, std::runtime_error,
3970  ": The CrsMatrix's range Map does not match the graph's range Map. "
3971  "The graph cannot be changed because it was given to the CrsMatrix "
3972  "constructor as const. You can fix this by passing in the graph's "
3973  "domain Map and range Map to the matrix's fillComplete call.");
3974  }
3975  else {
3976  // Set the graph's domain and range Maps. This will clear the
3977  // Import if the domain Map has changed (is a different
3978  // pointer), and the Export if the range Map has changed (is a
3979  // different pointer).
3980  myGraph_->setDomainRangeMaps (domainMap, rangeMap);
3981 
3982  // Make the graph's column Map, if necessary.
3983  if (! myGraph_->hasColMap ()) {
3984  myGraph_->makeColMap ();
3985  }
3986 
3987  // Make indices local, if necessary. The method won't do
3988  // anything if the graph is already locally indexed.
3989  myGraph_->makeIndicesLocal ();
3990 
3991  if (! myGraph_->isSorted ()) {
3992  sortEntries ();
3993  }
3994  if (! myGraph_->isMerged ()) {
3995  mergeRedundantEntries ();
3996  }
3997  // Make the Import and Export, if they haven't been made already.
3998  myGraph_->makeImportExport ();
3999  myGraph_->computeGlobalConstants ();
4000  myGraph_->fillComplete_ = true;
4001  myGraph_->checkInternalState ();
4002  }
4003  computeGlobalConstants ();
4004  // fill local objects; will fill and finalize local graph if appropriate
4005  if (myGraph_.is_null ()) {
4006  // The matrix does _not_ own the graph, and the graph's
4007  // structure is already fixed, so just fill the local matrix.
4008  fillLocalMatrix (params);
4009  } else {
4010  // The matrix _does_ own the graph, so fill the local graph at
4011  // the same time as the local matrix.
4012  fillLocalGraphAndMatrix (params);
4013  }
4014 
4015  // Once we've initialized the sparse kernels, we're done with the
4016  // local objects. We may now release them and their memory, since
4017  // they will persist in the local sparse ops if necessary. We
4018  // keep the local graph if the parameters tell us to do so.
4019 
4020  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
4021 
4022  fillComplete_ = true; // Now we're fill complete!
4023  checkInternalState ();
4024  }
4025 
4026  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4027  void
4029  expertStaticFillComplete (const Teuchos::RCP<const map_type> & domainMap,
4030  const Teuchos::RCP<const map_type> & rangeMap,
4031  const Teuchos::RCP<const import_type>& importer,
4032  const Teuchos::RCP<const export_type>& exporter,
4033  const Teuchos::RCP<Teuchos::ParameterList> &params)
4034  {
4035 #ifdef HAVE_TPETRA_MMM_TIMINGS
4036  std::string label;
4037  if(!params.is_null())
4038  label = params->get("Timer Label",label);
4039  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
4040  using Teuchos::TimeMonitor;
4041  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-Graph"))));
4042 #endif
4043 
4044  const char tfecfFuncName[] = "expertStaticFillComplete: ";
4045  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
4046  std::runtime_error, "Matrix fill state must be active (isFillActive() "
4047  "must be true) before calling fillComplete().");
4048  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4049  myGraph_.is_null (), std::logic_error, "myGraph_ is null. This is not allowed.");
4050 
4051 
4052  // We will presume globalAssemble is not needed, so we do the ESFC on the graph
4053  myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
4054 
4055 #ifdef HAVE_TPETRA_MMM_TIMINGS
4056  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-cGC"))));
4057 #endif
4058 
4059  computeGlobalConstants ();
4060 
4061 #ifdef HAVE_TPETRA_MMM_TIMINGS
4062  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-fLGAM"))));
4063 #endif
4064 
4065  // Fill the local graph and matrix
4066  fillLocalGraphAndMatrix (params);
4067 
4068  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
4069 
4070  // Now we're fill complete!
4071  fillComplete_ = true;
4072 
4073  // Sanity checks at the end.
4074 #ifdef HAVE_TPETRA_DEBUG
4075  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
4076  ": We're at the end of fillComplete(), but isFillActive() is true. "
4077  "Please report this bug to the Tpetra developers.");
4078  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
4079  ": We're at the end of fillComplete(), but isFillActive() is true. "
4080  "Please report this bug to the Tpetra developers.");
4081 #endif // HAVE_TPETRA_DEBUG
4082 
4083 #ifdef HAVE_TPETRA_MMM_TIMINGS
4084  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-cIS"))));
4085 #endif
4086 
4087  checkInternalState();
4088  }
4089 
4090  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4091  void
4094  {
4095  TEUCHOS_TEST_FOR_EXCEPTION(
4096  isStaticGraph (), std::runtime_error, "Tpetra::CrsMatrix::sortEntries: "
4097  "Cannot sort with static graph.");
4098  if (! myGraph_->isSorted ()) {
4099  const LocalOrdinal lclNumRows =
4100  static_cast<LocalOrdinal> (this->getNodeNumRows ());
4101  for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
4102  const RowInfo rowInfo = myGraph_->getRowInfo (row);
4103  Teuchos::ArrayView<impl_scalar_type> rv = this->getViewNonConst (rowInfo);
4104  myGraph_->template sortRowIndicesAndValues<impl_scalar_type> (rowInfo, rv);
4105  }
4106  // we just sorted every row
4107  myGraph_->indicesAreSorted_ = true;
4108  }
4109  }
4110 
4111  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4112  void
4115  {
4116  TEUCHOS_TEST_FOR_EXCEPTION(
4117  isStaticGraph (), std::runtime_error, "Tpetra::CrsMatrix::"
4118  "mergeRedundantEntries: Cannot merge with static graph.");
4119  if (! myGraph_->isMerged ()) {
4120  const LocalOrdinal lclNumRows =
4121  static_cast<LocalOrdinal> (this->getNodeNumRows ());
4122  for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
4123  const RowInfo rowInfo = myGraph_->getRowInfo (row);
4124  Teuchos::ArrayView<impl_scalar_type> rv = this->getViewNonConst (rowInfo);
4125  myGraph_->template mergeRowIndicesAndValues<impl_scalar_type> (rowInfo, rv);
4126  }
4127  myGraph_->noRedundancies_ = true; // we just merged every row
4128  }
4129  }
4130 
4131  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4132  void
4136  Scalar alpha,
4137  Scalar beta) const
4138  {
4139  using Teuchos::null;
4140  using Teuchos::RCP;
4141  using Teuchos::rcp;
4142  using Teuchos::rcp_const_cast;
4143  using Teuchos::rcpFromRef;
4144  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4145  const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
4146 
4147  // mfh 05 Jun 2014: Special case for alpha == 0. I added this to
4148  // fix an Ifpack2 test (RILUKSingleProcessUnitTests), which was
4149  // failing only for the Kokkos refactor version of Tpetra. It's a
4150  // good idea regardless to have the bypass.
4151  if (alpha == ZERO) {
4152  if (beta == ZERO) {
4153  Y_in.putScalar (ZERO);
4154  } else if (beta != ONE) {
4155  Y_in.scale (beta);
4156  }
4157  return;
4158  }
4159 
4160  // It's possible that X is a view of Y or vice versa. We don't
4161  // allow this (apply() requires that X and Y not alias one
4162  // another), but it's helpful to detect and work around this case.
4163  // We don't try to to detect the more subtle cases (e.g., one is a
4164  // subview of the other, but their initial pointers differ). We
4165  // only need to do this if this matrix's Import is trivial;
4166  // otherwise, we don't actually apply the operator from X into Y.
4167 
4168  RCP<const import_type> importer = this->getGraph ()->getImporter ();
4169  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4170 
4171  // If beta == 0, then the output MV will be overwritten; none of
4172  // its entries should be read. (Sparse BLAS semantics say that we
4173  // must ignore any Inf or NaN entries in Y_in, if beta is zero.)
4174  // This matters if we need to do an Export operation; see below.
4175  const bool Y_is_overwritten = (beta == ZERO);
4176 
4177  // We treat the case of a replicated MV output specially.
4178  const bool Y_is_replicated = ! Y_in.isDistributed ();
4179 
4180  // This is part of the special case for replicated MV output.
4181  // We'll let each process do its thing, but do an all-reduce at
4182  // the end to sum up the results. Setting beta=0 on all processes
4183  // but Proc 0 makes the math work out for the all-reduce. (This
4184  // assumes that the replicated data is correctly replicated, so
4185  // that the data are the same on all processes.)
4186  if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4187  beta = ZERO;
4188  }
4189 
4190  // Temporary MV for Import operation. After the block of code
4191  // below, this will be an (Imported if necessary) column Map MV
4192  // ready to give to localMultiply().
4193  RCP<const MV> X_colMap;
4194  if (importer.is_null ()) {
4195  if (! X_in.isConstantStride ()) {
4196  // Not all sparse mat-vec kernels can handle an input MV with
4197  // nonconstant stride correctly, so we have to copy it in that
4198  // case into a constant stride MV. To make a constant stride
4199  // copy of X_in, we force creation of the column (== domain)
4200  // Map MV (if it hasn't already been created, else fetch the
4201  // cached copy). This avoids creating a new MV each time.
4202  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in, true);
4203  Tpetra::deep_copy (*X_colMapNonConst, X_in);
4204  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
4205  }
4206  else {
4207  // The domain and column Maps are the same, so do the local
4208  // multiply using the domain Map input MV X_in.
4209  X_colMap = rcpFromRef (X_in);
4210  }
4211  }
4212  else {
4213  // We're doing an Import anyway, which will copy the relevant
4214  // elements of the domain Map MV X_in into a separate column Map
4215  // MV. Thus, we don't have to worry whether X_in is constant
4216  // stride.
4217  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
4218 
4219  // Import from the domain Map MV to the column Map MV.
4220  X_colMapNonConst->doImport (X_in, *importer, INSERT);
4221  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
4222  }
4223 
4224  // Temporary MV for Export operation, or for copying a nonconstant
4225  // stride output MV into a constant stride MV.
4226  RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
4227 
4228  // If we have a nontrivial Export object, we must perform an
4229  // Export. In that case, the local multiply result will go into
4230  // the row Map multivector. We don't have to make a
4231  // constant-stride version of Y_in in this case, because we had to
4232  // make a constant stride Y_rowMap MV and do an Export anyway.
4233  if (! exporter.is_null ()) {
4234  this->template localMultiply<Scalar, Scalar> (*X_colMap, *Y_rowMap,
4235  Teuchos::NO_TRANS,
4236  alpha, ZERO);
4237  // If we're overwriting the output MV Y_in completely (beta ==
4238  // 0), then make sure that it is filled with zeros before we do
4239  // the Export. Otherwise, the ADD combine mode will use data in
4240  // Y_in, which is supposed to be zero.
4241  if (Y_is_overwritten) {
4242  Y_in.putScalar (ZERO);
4243  }
4244  else {
4245  // Scale the output MV by beta, so that the Export sums in the
4246  // mat-vec contribution: Y_in = beta*Y_in + alpha*A*X_in.
4247  Y_in.scale (beta);
4248  }
4249  // Do the Export operation.
4250  Y_in.doExport (*Y_rowMap, *exporter, ADD);
4251  }
4252  else { // Don't do an Export: row Map and range Map are the same.
4253  //
4254  // If Y_in does not have constant stride, or if the column Map
4255  // MV aliases Y_in, then we can't let the kernel write directly
4256  // to Y_in. Instead, we have to use the cached row (== range)
4257  // Map MV as temporary storage.
4258  //
4259  // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
4260  // the user passed in the same MultiVector for both X and Y. It
4261  // won't detect whether one MultiVector views the other. We
4262  // should also check the MultiVectors' raw data pointers.
4263  if (! Y_in.isConstantStride () || X_colMap.getRawPtr () == &Y_in) {
4264  // Force creating the MV if it hasn't been created already.
4265  // This will reuse a previously created cached MV.
4266  Y_rowMap = getRowMapMultiVector (Y_in, true);
4267 
4268  // If beta == 0, we don't need to copy Y_in into Y_rowMap,
4269  // since we're overwriting it anyway.
4270  if (beta != ZERO) {
4271  Tpetra::deep_copy (*Y_rowMap, Y_in);
4272  }
4273  this->template localMultiply<Scalar, Scalar> (*X_colMap,
4274  *Y_rowMap,
4275  Teuchos::NO_TRANS,
4276  alpha, beta);
4277  Tpetra::deep_copy (Y_in, *Y_rowMap);
4278  }
4279  else {
4280  this->template localMultiply<Scalar, Scalar> (*X_colMap, Y_in,
4281  Teuchos::NO_TRANS,
4282  alpha, beta);
4283  }
4284  }
4285 
4286  // If the range Map is a locally replicated Map, sum up
4287  // contributions from each process. We set beta = 0 on all
4288  // processes but Proc 0 initially, so this will handle the scaling
4289  // factor beta correctly.
4290  if (Y_is_replicated) {
4291  Y_in.reduce ();
4292  }
4293  }
4294 
4295  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4296  void
4300  const Teuchos::ETransp mode,
4301  Scalar alpha,
4302  Scalar beta) const
4303  {
4304  using Teuchos::null;
4305  using Teuchos::RCP;
4306  using Teuchos::rcp;
4307  using Teuchos::rcp_const_cast;
4308  using Teuchos::rcpFromRef;
4309  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4310 
4311  // Take shortcuts for alpha == 0.
4312  if (alpha == ZERO) {
4313  // Follow the Sparse BLAS convention by ignoring both the matrix
4314  // and X_in, in this case.
4315  if (beta == ZERO) {
4316  // Follow the Sparse BLAS convention by overwriting any Inf or
4317  // NaN values in Y_in, in this case.
4318  Y_in.putScalar (ZERO);
4319  }
4320  else {
4321  Y_in.scale (beta);
4322  }
4323  return;
4324  }
4325 
4326  const size_t numVectors = X_in.getNumVectors ();
4327 
4328  // We don't allow X_in and Y_in to alias one another. It's hard
4329  // to check this, because advanced users could create views from
4330  // raw pointers. However, if X_in and Y_in reference the same
4331  // object, we will do the user a favor by copying X into new
4332  // storage (with a warning). We only need to do this if we have
4333  // trivial importers; otherwise, we don't actually apply the
4334  // operator from X into Y.
4335  RCP<const import_type> importer = this->getGraph ()->getImporter ();
4336  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4337  // access X indirectly, in case we need to create temporary storage
4338  RCP<const MV> X;
4339 
4340  // some parameters for below
4341  const bool Y_is_replicated = ! Y_in.isDistributed ();
4342  const bool Y_is_overwritten = (beta == ZERO);
4343  if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4344  beta = ZERO;
4345  }
4346 
4347  // The kernels do not allow input or output with nonconstant stride.
4348  if (! X_in.isConstantStride () && importer.is_null ()) {
4349  X = rcp (new MV (X_in, Teuchos::Copy)); // Constant-stride copy of X_in
4350  } else {
4351  X = rcpFromRef (X_in); // Reference to X_in
4352  }
4353 
4354  // Set up temporary multivectors for Import and/or Export.
4355  if (importer != Teuchos::null) {
4356  if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
4357  importMV_ = null;
4358  }
4359  if (importMV_ == null) {
4360  importMV_ = rcp (new MV (this->getColMap (), numVectors));
4361  }
4362  }
4363  if (exporter != Teuchos::null) {
4364  if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
4365  exportMV_ = null;
4366  }
4367  if (exportMV_ == null) {
4368  exportMV_ = rcp (new MV (this->getRowMap (), numVectors));
4369  }
4370  }
4371 
4372  // If we have a non-trivial exporter, we must import elements that
4373  // are permuted or are on other processors.
4374  if (! exporter.is_null ()) {
4375  exportMV_->doImport (X_in, *exporter, INSERT);
4376  X = exportMV_; // multiply out of exportMV_
4377  }
4378 
4379  // If we have a non-trivial importer, we must export elements that
4380  // are permuted or belong to other processors. We will compute
4381  // solution into the to-be-exported MV; get a view.
4382  if (importer != Teuchos::null) {
4383  // FIXME (mfh 18 Apr 2015) Temporary fix suggested by Clark
4384  // Dohrmann on Fri 17 Apr 2015. At some point, we need to go
4385  // back and figure out why this helps. importMV_ SHOULD be
4386  // completely overwritten in the localMultiply() call below,
4387  // because beta == ZERO there.
4388  importMV_->putScalar (ZERO);
4389  // Do the local computation.
4390  this->template localMultiply<Scalar, Scalar> (*X, *importMV_, mode,
4391  alpha, ZERO);
4392  if (Y_is_overwritten) {
4393  Y_in.putScalar (ZERO);
4394  } else {
4395  Y_in.scale (beta);
4396  }
4397  Y_in.doExport (*importMV_, *importer, ADD);
4398  }
4399  // otherwise, multiply into Y
4400  else {
4401  // can't multiply in-situ; can't multiply into non-strided multivector
4402  //
4403  // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
4404  // the user passed in the same MultiVector for both X and Y. It
4405  // won't detect whether one MultiVector views the other. We
4406  // should also check the MultiVectors' raw data pointers.
4407  if (! Y_in.isConstantStride () || X.getRawPtr () == &Y_in) {
4408  // Make a deep copy of Y_in, into which to write the multiply result.
4409  MV Y (Y_in, Teuchos::Copy);
4410  this->template localMultiply<Scalar, Scalar> (*X, Y, mode, alpha, beta);
4411  Tpetra::deep_copy (Y_in, Y);
4412  } else {
4413  this->template localMultiply<Scalar, Scalar> (*X, Y_in, mode, alpha, beta);
4414  }
4415  }
4416 
4417  // If the range Map is a locally replicated map, sum the
4418  // contributions from each process. (That's why we set beta=0
4419  // above for all processes but Proc 0.)
4420  if (Y_is_replicated) {
4421  Y_in.reduce ();
4422  }
4423  }
4424 
4425  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4426  void
4430  Teuchos::ETransp mode,
4431  Scalar alpha,
4432  Scalar beta) const
4433  {
4434  TEUCHOS_TEST_FOR_EXCEPTION(
4435  ! isFillComplete (), std::runtime_error,
4436  "Tpetra::CrsMatrix::apply(): Cannot call apply() until fillComplete() "
4437  "has been called.");
4438 
4439  if (mode == Teuchos::NO_TRANS) {
4440  applyNonTranspose (X, Y, alpha, beta);
4441  } else {
4442  //Thyra was implicitly assuming that Y gets set to zero / or is overwritten
4443  //when bets==0. This was not the case with transpose in a multithreaded
4444  //environment where a multiplication with subsequent atomic_adds is used
4445  //since 0 is effectively not special cased. Doing the explicit set to zero here
4446  //This catches cases where Y is nan or inf.
4447  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4448  if(beta == ZERO)
4449  Y.putScalar (ZERO);
4450  applyTranspose (X, Y, mode, alpha, beta);
4451  }
4452  }
4453 
4454  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4455  void
4460  const Scalar& dampingFactor,
4461  const ESweepDirection direction,
4462  const int numSweeps) const
4463  {
4464  reorderedGaussSeidel (B, X, D, Teuchos::null, dampingFactor, direction, numSweeps);
4465  }
4466 
4467  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4468  void
4473  const Teuchos::ArrayView<LocalOrdinal>& rowIndices,
4474  const Scalar& dampingFactor,
4475  const ESweepDirection direction,
4476  const int numSweeps) const
4477  {
4478  using Teuchos::null;
4479  using Teuchos::RCP;
4480  using Teuchos::rcp;
4481  using Teuchos::rcp_const_cast;
4482  using Teuchos::rcpFromRef;
4483  typedef Scalar ST;
4484 
4485  TEUCHOS_TEST_FOR_EXCEPTION(
4486  isFillComplete() == false, std::runtime_error,
4487  "Tpetra::CrsMatrix::gaussSeidel: cannot call this method until "
4488  "fillComplete() has been called.");
4489  TEUCHOS_TEST_FOR_EXCEPTION(
4490  numSweeps < 0,
4491  std::invalid_argument,
4492  "Tpetra::CrsMatrix::gaussSeidel: The number of sweeps must be , "
4493  "nonnegative but you provided numSweeps = " << numSweeps << " < 0.");
4494 
4495  // Translate from global to local sweep direction.
4496  // While doing this, validate the input.
4497  KokkosClassic::ESweepDirection localDirection;
4498  if (direction == Forward) {
4499  localDirection = KokkosClassic::Forward;
4500  }
4501  else if (direction == Backward) {
4502  localDirection = KokkosClassic::Backward;
4503  }
4504  else if (direction == Symmetric) {
4505  // We'll control local sweep direction manually.
4506  localDirection = KokkosClassic::Forward;
4507  }
4508  else {
4509  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument,
4510  "Tpetra::CrsMatrix::gaussSeidel: The 'direction' enum does not have "
4511  "any of its valid values: Forward, Backward, or Symmetric.");
4512  }
4513 
4514  if (numSweeps == 0) {
4515  return; // Nothing to do.
4516  }
4517 
4518  // We don't need the Export object because this method assumes
4519  // that the row, domain, and range Maps are the same. We do need
4520  // the Import object, if there is one, though.
4521  RCP<const import_type> importer = this->getGraph()->getImporter();
4522  RCP<const export_type> exporter = this->getGraph()->getExporter();
4523  TEUCHOS_TEST_FOR_EXCEPTION(
4524  ! exporter.is_null (), std::runtime_error,
4525  "Tpetra's gaussSeidel implementation requires that the row, domain, "
4526  "and range Maps be the same. This cannot be the case, because the "
4527  "matrix has a nontrivial Export object.");
4528 
4529  RCP<const map_type> domainMap = this->getDomainMap ();
4530  RCP<const map_type> rangeMap = this->getRangeMap ();
4531  RCP<const map_type> rowMap = this->getGraph ()->getRowMap ();
4532  RCP<const map_type> colMap = this->getGraph ()->getColMap ();
4533 
4534 #ifdef HAVE_TEUCHOS_DEBUG
4535  {
4536  // The relation 'isSameAs' is transitive. It's also a
4537  // collective, so we don't have to do a "shared" test for
4538  // exception (i.e., a global reduction on the test value).
4539  TEUCHOS_TEST_FOR_EXCEPTION(
4540  ! X.getMap ()->isSameAs (*domainMap),
4541  std::runtime_error,
4542  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
4543  "multivector X be in the domain Map of the matrix.");
4544  TEUCHOS_TEST_FOR_EXCEPTION(
4545  ! B.getMap ()->isSameAs (*rangeMap),
4546  std::runtime_error,
4547  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
4548  "B be in the range Map of the matrix.");
4549  TEUCHOS_TEST_FOR_EXCEPTION(
4550  ! D.getMap ()->isSameAs (*rowMap),
4551  std::runtime_error,
4552  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
4553  "D be in the row Map of the matrix.");
4554  TEUCHOS_TEST_FOR_EXCEPTION(
4555  ! rowMap->isSameAs (*rangeMap),
4556  std::runtime_error,
4557  "Tpetra::CrsMatrix::gaussSeidel requires that the row Map and the "
4558  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
4559  TEUCHOS_TEST_FOR_EXCEPTION(
4560  ! domainMap->isSameAs (*rangeMap),
4561  std::runtime_error,
4562  "Tpetra::CrsMatrix::gaussSeidel requires that the domain Map and "
4563  "the range Map of the matrix be the same.");
4564  }
4565 #else
4566  // Forestall any compiler warnings for unused variables.
4567  (void) rangeMap;
4568  (void) rowMap;
4569 #endif // HAVE_TEUCHOS_DEBUG
4570 
4571  // If B is not constant stride, copy it into a constant stride
4572  // multivector. We'l handle the right-hand side B first and deal
4573  // with X right before the sweeps, to improve locality of the
4574  // first sweep. (If the problem is small enough, then that will
4575  // hopefully keep more of the entries of X in cache. This
4576  // optimizes for the typical case of a small number of sweeps.)
4577  RCP<const MV> B_in;
4578  if (B.isConstantStride()) {
4579  B_in = rcpFromRef (B);
4580  }
4581  else {
4582  // The range Map and row Map are the same in this case, so we
4583  // can use the (possibly cached) row Map multivector to store a
4584  // constant stride copy of B. We don't have to copy back, since
4585  // Gauss-Seidel won't modify B.
4586  RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true);
4587  deep_copy (*B_in_nonconst, B); // Copy from B into B_in(_nonconst).
4588  B_in = rcp_const_cast<const MV> (B_in_nonconst);
4589 
4591  ! B.isConstantStride (),
4592  std::runtime_error,
4593  "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
4594  "requires that X and B both have constant stride. Since B does not "
4595  "have constant stride, we had to make a copy. This is a limitation of "
4596  "the current implementation and not your fault, but we still report it "
4597  "as an efficiency warning for your information.");
4598  }
4599 
4600  // If X is not constant stride, copy it into a constant stride
4601  // multivector. Also, make the column Map multivector X_colMap,
4602  // and its domain Map view X_domainMap. (X actually must be a
4603  // domain Map view of a column Map multivector; exploit this, if X
4604  // has constant stride.)
4605 
4606  RCP<MV> X_domainMap;
4607  RCP<MV> X_colMap;
4608  bool copiedInput = false;
4609 
4610  if (importer.is_null ()) { // Domain and column Maps are the same.
4611  if (X.isConstantStride ()) {
4612  X_domainMap = rcpFromRef (X);
4613  X_colMap = X_domainMap;
4614  copiedInput = false;
4615  }
4616  else {
4617  // Get a temporary column Map multivector, make a domain Map
4618  // view of it, and copy X into the domain Map view. We have
4619  // to copy here because we won't be doing Import operations.
4620  X_colMap = getColumnMapMultiVector (X, true);
4621  X_domainMap = X_colMap; // Domain and column Maps are the same.
4622  deep_copy (*X_domainMap, X); // Copy X into the domain Map view.
4623  copiedInput = true;
4625  ! X.isConstantStride (), std::runtime_error,
4626  "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the "
4627  "Gauss-Seidel kernel requires that X and B both have constant "
4628  "stride. Since X does not have constant stride, we had to make a "
4629  "copy. This is a limitation of the current implementation and not "
4630  "your fault, but we still report it as an efficiency warning for "
4631  "your information.");
4632  }
4633  }
4634  else { // We will be doing Import operations in the sweeps.
4635  if (X.isConstantStride ()) {
4636  X_domainMap = rcpFromRef (X);
4637  // This kernel assumes that X is a domain Map view of a column
4638  // Map multivector. We will only check if this is valid if
4639  // the CMake configure Teuchos_ENABLE_DEBUG is ON.
4640  X_colMap = X_domainMap->offsetViewNonConst (colMap, 0);
4641 
4642  // FIXME (mfh 19 Mar 2013) Do we need to fill the remote
4643  // entries of X_colMap with zeros? Do we need to fill all of
4644  // X_domainMap initially with zeros? Ifpack
4645  // (Ifpack_PointRelaxation.cpp, line 906) creates an entirely
4646  // new MultiVector each time.
4647 
4648  // Do the first Import for the first sweep. This simplifies
4649  // the logic in the sweeps.
4650  X_colMap->doImport (X, *importer, INSERT);
4651  copiedInput = false;
4652  }
4653  else {
4654  // Get a temporary column Map multivector X_colMap, and make a
4655  // domain Map view X_domainMap of it. Instead of copying, we
4656  // do an Import from X into X_domainMap. This saves us a
4657  // copy, since the Import has to copy the data anyway.
4658  X_colMap = getColumnMapMultiVector (X, true);
4659  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
4660  X_colMap->doImport (X, *importer, INSERT);
4661  copiedInput = true;
4663  ! X.isConstantStride (), std::runtime_error,
4664  "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the "
4665  "Gauss-Seidel kernel requires that X and B both have constant stride. "
4666  "Since X does not have constant stride, we had to make a copy. "
4667  "This is a limitation of the current implementation and not your fault, "
4668  "but we still report it as an efficiency warning for your information.");
4669  }
4670  }
4671 
4672  for (int sweep = 0; sweep < numSweeps; ++sweep) {
4673  if (! importer.is_null () && sweep > 0) {
4674  // We already did the first Import for the zeroth sweep.
4675  X_colMap->doImport (*X_domainMap, *importer, INSERT);
4676  }
4677 
4678  // Do local Gauss-Seidel.
4679  if (direction != Symmetric) {
4680  if (rowIndices.is_null ()) {
4681  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4682  dampingFactor,
4683  localDirection);
4684  }
4685  else {
4686  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4687  D, rowIndices,
4688  dampingFactor,
4689  localDirection);
4690  }
4691  }
4692  else { // direction == Symmetric
4693  const bool doImportBetweenDirections = false;
4694  if (rowIndices.is_null ()) {
4695  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4696  dampingFactor,
4697  KokkosClassic::Forward);
4698  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
4699  // Gauss-Seidel" does _not_ do an Import between the forward
4700  // and backward sweeps. This makes sense, because Aztec
4701  // considers "symmetric Gauss-Seidel" a subdomain solver.
4702  if (doImportBetweenDirections) {
4703  // Communicate again before the Backward sweep.
4704  if (! importer.is_null ()) {
4705  X_colMap->doImport (*X_domainMap, *importer, INSERT);
4706  }
4707  }
4708  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4709  dampingFactor,
4710  KokkosClassic::Backward);
4711  }
4712  else {
4713  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4714  D, rowIndices,
4715  dampingFactor,
4716  KokkosClassic::Forward);
4717  if (doImportBetweenDirections) {
4718  // Communicate again before the Backward sweep.
4719  if (! importer.is_null ()) {
4720  X_colMap->doImport (*X_domainMap, *importer, INSERT);
4721  }
4722  }
4723  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4724  D, rowIndices,
4725  dampingFactor,
4726  KokkosClassic::Backward);
4727  }
4728  }
4729  }
4730 
4731  if (copiedInput) {
4732  deep_copy (X, *X_domainMap); // Copy back from X_domainMap to X.
4733  }
4734  }
4735 
4736  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4737  void
4742  const Scalar& dampingFactor,
4743  const ESweepDirection direction,
4744  const int numSweeps,
4745  const bool zeroInitialGuess) const
4746  {
4747  reorderedGaussSeidelCopy (X, B, D, Teuchos::null, dampingFactor, direction,
4748  numSweeps, zeroInitialGuess);
4749  }
4750 
4751  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4752  void
4757  const Teuchos::ArrayView<LocalOrdinal>& rowIndices,
4758  const Scalar& dampingFactor,
4759  const ESweepDirection direction,
4760  const int numSweeps,
4761  const bool zeroInitialGuess) const
4762  {
4763  using Teuchos::null;
4764  using Teuchos::RCP;
4765  using Teuchos::rcp;
4766  using Teuchos::rcpFromRef;
4767  using Teuchos::rcp_const_cast;
4768  typedef Scalar ST;
4769  const char prefix[] = "Tpetra::CrsMatrix::(reordered)gaussSeidelCopy: ";
4770  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4771 
4772  TEUCHOS_TEST_FOR_EXCEPTION(
4773  ! isFillComplete (), std::runtime_error,
4774  prefix << "The matrix is not fill complete.");
4775  TEUCHOS_TEST_FOR_EXCEPTION(
4776  numSweeps < 0, std::invalid_argument,
4777  prefix << "The number of sweeps must be nonnegative, "
4778  "but you provided numSweeps = " << numSweeps << " < 0.");
4779 
4780  // Translate from global to local sweep direction.
4781  // While doing this, validate the input.
4782  KokkosClassic::ESweepDirection localDirection;
4783  if (direction == Forward) {
4784  localDirection = KokkosClassic::Forward;
4785  }
4786  else if (direction == Backward) {
4787  localDirection = KokkosClassic::Backward;
4788  }
4789  else if (direction == Symmetric) {
4790  // We'll control local sweep direction manually.
4791  localDirection = KokkosClassic::Forward;
4792  }
4793  else {
4794  TEUCHOS_TEST_FOR_EXCEPTION(
4795  true, std::invalid_argument,
4796  prefix << "The 'direction' enum does not have any of its valid "
4797  "values: Forward, Backward, or Symmetric.");
4798  }
4799 
4800  if (numSweeps == 0) {
4801  return;
4802  }
4803 
4804  RCP<const import_type> importer = this->getGraph ()->getImporter ();
4805  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4806  TEUCHOS_TEST_FOR_EXCEPTION(
4807  ! exporter.is_null (), std::runtime_error,
4808  "This method's implementation currently requires that the matrix's row, "
4809  "domain, and range Maps be the same. This cannot be the case, because "
4810  "the matrix has a nontrivial Export object.");
4811 
4812  RCP<const map_type> domainMap = this->getDomainMap ();
4813  RCP<const map_type> rangeMap = this->getRangeMap ();
4814  RCP<const map_type> rowMap = this->getGraph ()->getRowMap ();
4815  RCP<const map_type> colMap = this->getGraph ()->getColMap ();
4816 
4817 #ifdef HAVE_TEUCHOS_DEBUG
4818  {
4819  // The relation 'isSameAs' is transitive. It's also a
4820  // collective, so we don't have to do a "shared" test for
4821  // exception (i.e., a global reduction on the test value).
4822  TEUCHOS_TEST_FOR_EXCEPTION(
4823  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
4824  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
4825  "multivector X be in the domain Map of the matrix.");
4826  TEUCHOS_TEST_FOR_EXCEPTION(
4827  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
4828  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
4829  "B be in the range Map of the matrix.");
4830  TEUCHOS_TEST_FOR_EXCEPTION(
4831  ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
4832  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
4833  "D be in the row Map of the matrix.");
4834  TEUCHOS_TEST_FOR_EXCEPTION(
4835  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
4836  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
4837  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
4838  TEUCHOS_TEST_FOR_EXCEPTION(
4839  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
4840  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
4841  "the range Map of the matrix be the same.");
4842  }
4843 #else
4844  // Forestall any compiler warnings for unused variables.
4845  (void) rangeMap;
4846  (void) rowMap;
4847 #endif // HAVE_TEUCHOS_DEBUG
4848 
4849  // Fetch a (possibly cached) temporary column Map multivector
4850  // X_colMap, and a domain Map view X_domainMap of it. Both have
4851  // constant stride by construction. We know that the domain Map
4852  // must include the column Map, because our Gauss-Seidel kernel
4853  // requires that the row Map, domain Map, and range Map are all
4854  // the same, and that each process owns all of its own diagonal
4855  // entries of the matrix.
4856 
4857  RCP<MV> X_colMap;
4858  RCP<MV> X_domainMap;
4859  bool copyBackOutput = false;
4860  if (importer.is_null ()) {
4861  if (X.isConstantStride ()) {
4862  X_colMap = rcpFromRef (X);
4863  X_domainMap = rcpFromRef (X);
4864  // Column Map and domain Map are the same, so there are no
4865  // remote entries. Thus, if we are not setting the initial
4866  // guess to zero, we don't have to worry about setting remote
4867  // entries to zero, even though we are not doing an Import in
4868  // this case.
4869  if (zeroInitialGuess) {
4870  X_colMap->putScalar (ZERO);
4871  }
4872  // No need to copy back to X at end.
4873  }
4874  else { // We must copy X into a constant stride multivector.
4875  // Just use the cached column Map multivector for that.
4876  // force=true means fill with zeros, so no need to fill
4877  // remote entries (not in domain Map) with zeros.
4878  X_colMap = getColumnMapMultiVector (X, true);
4879  // X_domainMap is always a domain Map view of the column Map
4880  // multivector. In this case, the domain and column Maps are
4881  // the same, so X_domainMap _is_ X_colMap.
4882  X_domainMap = X_colMap;
4883  if (! zeroInitialGuess) { // Don't copy if zero initial guess
4884  try {
4885  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
4886  } catch (std::exception& e) {
4887  std::ostringstream os;
4888  os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: "
4889  "deep_copy(*X_domainMap, X) threw an exception: "
4890  << e.what () << ".";
4891  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
4892  }
4893  }
4894  copyBackOutput = true; // Don't forget to copy back at end.
4896  ! X.isConstantStride (),
4897  std::runtime_error,
4898  "gaussSeidelCopy: The current implementation of the Gauss-Seidel "
4899  "kernel requires that X and B both have constant stride. Since X "
4900  "does not have constant stride, we had to make a copy. This is a "
4901  "limitation of the current implementation and not your fault, but we "
4902  "still report it as an efficiency warning for your information.");
4903  }
4904  }
4905  else { // Column Map and domain Map are _not_ the same.
4906  X_colMap = getColumnMapMultiVector (X);
4907  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
4908 
4909 #ifdef HAVE_TPETRA_DEBUG
4910  auto X_colMap_host_view =
4911  X_colMap->template getLocalView<Kokkos::HostSpace> ();
4912  auto X_domainMap_host_view =
4913  X_domainMap->template getLocalView<Kokkos::HostSpace> ();
4914 
4915  if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
4916  TEUCHOS_TEST_FOR_EXCEPTION
4917  (X_colMap_host_view.ptr_on_device () != X_domainMap_host_view.ptr_on_device (),
4918  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: Pointer to "
4919  "start of column Map view of X is not equal to pointer to start of "
4920  "(domain Map view of) X. This may mean that Tpetra::MultiVector::"
4921  "offsetViewNonConst is broken. "
4922  "Please report this bug to the Tpetra developers.");
4923  }
4924 
4925  TEUCHOS_TEST_FOR_EXCEPTION(
4926  X_colMap_host_view.dimension_0 () < X_domainMap_host_view.dimension_0 () ||
4927  X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
4928  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
4929  "X_colMap has fewer local rows than X_domainMap. "
4930  "X_colMap_host_view.dimension_0() = " << X_colMap_host_view.dimension_0 ()
4931  << ", X_domainMap_host_view.dimension_0() = "
4932  << X_domainMap_host_view.dimension_0 ()
4933  << ", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
4934  << ", and X_domainMap->getLocalLength() = "
4935  << X_domainMap->getLocalLength ()
4936  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
4937  "is broken. Please report this bug to the Tpetra developers.");
4938 
4939  TEUCHOS_TEST_FOR_EXCEPTION(
4940  X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
4941  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
4942  "X_colMap has a different number of columns than X_domainMap. "
4943  "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
4944  << " != X_domainMap->getNumVectors() = "
4945  << X_domainMap->getNumVectors ()
4946  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
4947  "is broken. Please report this bug to the Tpetra developers.");
4948 #endif // HAVE_TPETRA_DEBUG
4949 
4950  if (zeroInitialGuess) {
4951  // No need for an Import, since we're filling with zeros.
4952  X_colMap->putScalar (ZERO);
4953  } else {
4954  // We could just copy X into X_domainMap. However, that
4955  // wastes a copy, because the Import also does a copy (plus
4956  // communication). Since the typical use case for
4957  // Gauss-Seidel is a small number of sweeps (2 is typical), we
4958  // don't want to waste that copy. Thus, we do the Import
4959  // here, and skip the first Import in the first sweep.
4960  // Importing directly from X effects the copy into X_domainMap
4961  // (which is a view of X_colMap).
4962  X_colMap->doImport (X, *importer, INSERT);
4963  }
4964  copyBackOutput = true; // Don't forget to copy back at end.
4965  } // if column and domain Maps are (not) the same
4966 
4967  // The Gauss-Seidel / SOR kernel expects multivectors of constant
4968  // stride. X_colMap is by construction, but B might not be. If
4969  // it's not, we have to make a copy.
4970  RCP<const MV> B_in;
4971  if (B.isConstantStride ()) {
4972  B_in = rcpFromRef (B);
4973  }
4974  else {
4975  // Range Map and row Map are the same in this case, so we can
4976  // use the cached row Map multivector to store a constant stride
4977  // copy of B.
4978  RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true);
4979  try {
4980  deep_copy (*B_in_nonconst, B);
4981  } catch (std::exception& e) {
4982  std::ostringstream os;
4983  os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: "
4984  "deep_copy(*B_in_nonconst, B) threw an exception: "
4985  << e.what () << ".";
4986  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
4987  }
4988  B_in = rcp_const_cast<const MV> (B_in_nonconst);
4989 
4991  ! B.isConstantStride (),
4992  std::runtime_error,
4993  "gaussSeidelCopy: The current implementation requires that B have "
4994  "constant stride. Since B does not have constant stride, we had to "
4995  "copy it into a separate constant-stride multivector. This is a "
4996  "limitation of the current implementation and not your fault, but we "
4997  "still report it as an efficiency warning for your information.");
4998  }
4999 
5000  for (int sweep = 0; sweep < numSweeps; ++sweep) {
5001  if (! importer.is_null () && sweep > 0) {
5002  // We already did the first Import for the zeroth sweep above,
5003  // if it was necessary.
5004  X_colMap->doImport (*X_domainMap, *importer, INSERT);
5005  }
5006 
5007  // Do local Gauss-Seidel.
5008  if (direction != Symmetric) {
5009  if (rowIndices.is_null ()) {
5010  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5011  dampingFactor,
5012  localDirection);
5013  }
5014  else {
5015  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5016  D, rowIndices,
5017  dampingFactor,
5018  localDirection);
5019  }
5020  }
5021  else { // direction == Symmetric
5022  if (rowIndices.is_null ()) {
5023  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5024  dampingFactor,
5025  KokkosClassic::Forward);
5026  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
5027  // Gauss-Seidel" does _not_ do an Import between the forward
5028  // and backward sweeps. This makes symmetric Gauss-Seidel a
5029  // symmetric preconditioner if the matrix A is symmetric. We
5030  // imitate Aztec's behavior here.
5031  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5032  dampingFactor,
5033  KokkosClassic::Backward);
5034  }
5035  else {
5036  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5037  D, rowIndices,
5038  dampingFactor,
5039  KokkosClassic::Forward);
5040  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5041  D, rowIndices,
5042  dampingFactor,
5043  KokkosClassic::Backward);
5044 
5045  }
5046  }
5047  }
5048 
5049  if (copyBackOutput) {
5050  try {
5051  deep_copy (X , *X_domainMap); // Copy result back into X.
5052  } catch (std::exception& e) {
5053  TEUCHOS_TEST_FOR_EXCEPTION(
5054  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
5055  "threw an exception: " << e.what ());
5056  }
5057  }
5058  }
5059 
5060  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5061  template<class T>
5062  Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node, classic> >
5064  convert () const
5065  {
5066  using Teuchos::ArrayRCP;
5067  using Teuchos::RCP;
5068  using Teuchos::rcp;
5070  typedef typename out_mat_type::local_matrix_type out_lcl_mat_type;
5071  typedef typename out_lcl_mat_type::values_type out_vals_type;
5072  typedef ArrayRCP<size_t>::size_type size_type;
5073  const char tfecfFuncName[] = "convert";
5074 
5075  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5076  ! isFillComplete (), std::runtime_error, "This matrix (the source of "
5077  "the conversion) is not fill complete. You must first call "
5078  "fillComplete() (possibly with the domain and range Map) without an "
5079  "intervening call to resumeFill(), before you may call this method.");
5080 
5081  // mfh 27 Feb 2014: It seems reasonable that if this matrix has a
5082  // const graph, then the returned matrix should also. However, if
5083  // this matrix does not have a const graph, then neither should
5084  // the returned matrix. The code below implements this strategy.
5085 
5086  RCP<out_mat_type> newmat; // the matrix to return
5087 
5088  if (this->isStaticGraph ()) {
5089  // This matrix has a const graph, so the returned matrix should too.
5090  newmat = rcp (new out_mat_type (this->getCrsGraph ()));
5091 
5092  // Convert the values from Scalar to T, and stuff them directly
5093  // into the matrix to return.
5094  const size_type numVals =
5095  static_cast<size_type> (this->lclMatrix_.values.dimension_0 ());
5096 
5097  // FIXME (mfh 05 Aug 2014) Write a copy kernel (impl_scalar_type and
5098  // T differ, so we can't use Kokkos::deep_copy).
5099  //
5100  // FIXME (mfh 05 Aug 2014) This assumes UVM.
5101  out_vals_type newVals1D ("Tpetra::CrsMatrix::val", numVals);
5102  for (size_type k = 0; k < numVals; ++k) {
5103  newVals1D(k) = static_cast<T> (this->k_values1D_(k));
5104  }
5105  newmat->lclMatrix_ =
5106  out_lcl_mat_type ("Tpetra::CrsMatrix::lclMatrix_",
5107  this->lclMatrix_.numCols (), newVals1D,
5108  this->lclMatrix_.graph);
5109  newmat->k_values1D_ = newVals1D;
5110  // Since newmat has a static (const) graph, the graph already
5111  // has a column Map, and Import and Export objects already exist
5112  // (if applicable). Thus, calling fillComplete is cheap.
5113  newmat->fillComplete (this->getDomainMap (), this->getRangeMap ());
5114  }
5115  else {
5116  // This matrix has a nonconst graph, so the returned matrix
5117  // should also have a nonconst graph. However, it's fine for
5118  // the returned matrix to have static profile. This will
5119  // certainly speed up its fillComplete.
5120 
5121  //
5122  // FIXME (mfh 05 Aug 2014) Instead of the slow stuff below, we
5123  // should copy the values and existing graph into a new local
5124  // matrix (lclMatrix), and then use the Tpetra::CrsMatrix
5125  // constructor that takes (rowMap, colMap, lclMatrix, params).
5126  //
5127 
5128  // Get this matrix's local data.
5129  ArrayRCP<const size_t> ptr;
5130  ArrayRCP<const LocalOrdinal> ind;
5131  ArrayRCP<const Scalar> oldVal;
5132  this->getAllValues (ptr, ind, oldVal);
5133 
5134  RCP<const map_type> rowMap = this->getRowMap ();
5135  RCP<const map_type> colMap = this->getColMap ();
5136 
5137  // Get an array of the number of entries in each (locally owned)
5138  // row, so that we can make the new matrix with static profile.
5139  const size_type numLocalRows =
5140  static_cast<size_type> (rowMap->getNodeNumElements ());
5141  ArrayRCP<size_t> numEntriesPerRow (numLocalRows);
5142  for (size_type localRow = 0; localRow < numLocalRows; ++localRow) {
5143  numEntriesPerRow[localRow] =
5144  static_cast<size_type> (getNumEntriesInLocalRow (localRow));
5145  }
5146 
5147  newmat = rcp (new out_mat_type (rowMap, colMap, numEntriesPerRow,
5148  StaticProfile));
5149 
5150  // Convert this matrix's values from Scalar to T.
5151  const size_type numVals = this->lclMatrix_.values.dimension_0 ();
5152  ArrayRCP<T> newVals1D (numVals);
5153  // FIXME (mfh 05 Aug 2014) This assumes UVM.
5154  for (size_type k = 0; k < numVals; ++k) {
5155  newVals1D[k] = static_cast<T> (this->k_values1D_(k));
5156  }
5157 
5158  // Give this matrix all of its local data. We can all this
5159  // method because newmat was _not_ created with a const graph.
5160  // The data must be passed in as nonconst, so we have to copy it
5161  // first.
5162  ArrayRCP<size_t> newPtr (ptr.size ());
5163  std::copy (ptr.begin (), ptr.end (), newPtr.begin ());
5164  ArrayRCP<LocalOrdinal> newInd (ind.size ());
5165  std::copy (ind.begin (), ind.end (), newInd.begin ());
5166  newmat->setAllValues (newPtr, newInd, newVals1D);
5167 
5168  // We already have the Import and Export (if applicable) objects
5169  // from the graph, so we can save a lot of time by passing them
5170  // in to expertStaticFillComplete.
5171  RCP<const map_type> domainMap = this->getDomainMap ();
5172  RCP<const map_type> rangeMap = this->getRangeMap ();
5173  RCP<const import_type> importer = this->getCrsGraph ()->getImporter ();
5174  RCP<const export_type> exporter = this->getCrsGraph ()->getExporter ();
5175  newmat->expertStaticFillComplete (domainMap, rangeMap, importer, exporter);
5176  }
5177 
5178  return newmat;
5179  }
5180 
5181 
5182  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5183  void
5186  {
5187 #ifdef HAVE_TPETRA_DEBUG
5188  const char tfecfFuncName[] = "checkInternalState: ";
5189  const char err[] = "Internal state is not consistent. "
5190  "Please report this bug to the Tpetra developers.";
5191 
5192  // This version of the graph (RCP<const crs_graph_type>) must
5193  // always be nonnull.
5194  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5195  staticGraph_.is_null (),
5196  std::logic_error, err);
5197  // myGraph == null means that the matrix has a const ("static")
5198  // graph. Otherwise, the matrix has a dynamic graph (it owns its
5199  // graph).
5200  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5201  ! myGraph_.is_null () && myGraph_ != staticGraph_,
5202  std::logic_error, err);
5203  // if matrix is fill complete, then graph must be fill complete
5204  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5205  isFillComplete () && ! staticGraph_->isFillComplete (),
5206  std::logic_error, err << " Specifically, the matrix is fill complete, "
5207  "but its graph is NOT fill complete.");
5208  // if matrix is storage optimized, it should have a 1D allocation
5209  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5210  isStorageOptimized () && ! values2D_.is_null (),
5211  std::logic_error, err);
5212  // if matrix/graph are static profile, then 2D allocation should not be present
5213  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5214  getProfileType() == StaticProfile && values2D_ != Teuchos::null,
5215  std::logic_error, err);
5216  // if matrix/graph are dynamic profile, then 1D allocation should not be present
5217  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5218  getProfileType() == DynamicProfile && k_values1D_.dimension_0 () > 0,
5219  std::logic_error, err);
5220  // if values are allocated and they are non-zero in number, then
5221  // one of the allocations should be present
5222  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5223  staticGraph_->indicesAreAllocated () &&
5224  staticGraph_->getNodeAllocationSize() > 0 &&
5225  staticGraph_->getNodeNumRows() > 0
5226  && values2D_.is_null () &&
5227  k_values1D_.dimension_0 () == 0,
5228  std::logic_error, err);
5229  // we cannot have both a 1D and 2D allocation
5230  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5231  k_values1D_.dimension_0 () > 0 && values2D_ != Teuchos::null,
5232  std::logic_error, err << " Specifically, k_values1D_ is allocated (has "
5233  "size " << k_values1D_.dimension_0 () << " > 0) and values2D_ is also "
5234  "allocated. CrsMatrix is not suppose to have both a 1-D and a 2-D "
5235  "allocation at the same time.");
5236 #endif
5237  }
5238 
5239  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5240  std::string
5243  {
5244  std::ostringstream os;
5245 
5246  os << "Tpetra::CrsMatrix (Kokkos refactor): {";
5247  if (this->getObjectLabel () != "") {
5248  os << "Label: \"" << this->getObjectLabel () << "\", ";
5249  }
5250  if (isFillComplete ()) {
5251  os << "isFillComplete: true"
5252  << ", global dimensions: [" << getGlobalNumRows () << ", "
5253  << getGlobalNumCols () << "]"
5254  << ", global number of entries: " << getGlobalNumEntries ()
5255  << "}";
5256  }
5257  else {
5258  os << "isFillComplete: false"
5259  << ", global dimensions: [" << getGlobalNumRows () << ", "
5260  << getGlobalNumCols () << "]}";
5261  }
5262  return os.str ();
5263  }
5264 
5265  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5266  void
5268  describe (Teuchos::FancyOStream &out,
5269  const Teuchos::EVerbosityLevel verbLevel) const
5270  {
5271  using std::endl;
5272  using std::setw;
5273  using Teuchos::ArrayView;
5274  using Teuchos::Comm;
5275  using Teuchos::RCP;
5276  using Teuchos::TypeNameTraits;
5277  using Teuchos::VERB_DEFAULT;
5278  using Teuchos::VERB_NONE;
5279  using Teuchos::VERB_LOW;
5280  using Teuchos::VERB_MEDIUM;
5281  using Teuchos::VERB_HIGH;
5282  using Teuchos::VERB_EXTREME;
5283 
5284  const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
5285 
5286  if (vl == VERB_NONE) {
5287  return; // Don't print anything at all
5288  }
5289  // By convention, describe() always begins with a tab.
5290  Teuchos::OSTab tab0 (out);
5291 
5292  RCP<const Comm<int> > comm = this->getComm();
5293  const int myRank = comm->getRank();
5294  const int numProcs = comm->getSize();
5295  size_t width = 1;
5296  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5297  ++width;
5298  }
5299  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
5300 
5301  // none: print nothing
5302  // low: print O(1) info from node 0
5303  // medium: print O(P) info, num entries per process
5304  // high: print O(N) info, num entries per row
5305  // extreme: print O(NNZ) info: print indices and values
5306  //
5307  // for medium and higher, print constituent objects at specified verbLevel
5308  if (myRank == 0) {
5309  out << "Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5310  }
5311  Teuchos::OSTab tab1 (out);
5312 
5313  if (myRank == 0) {
5314  if (this->getObjectLabel () != "") {
5315  out << "Label: \"" << this->getObjectLabel () << "\", ";
5316  }
5317  {
5318  out << "Template parameters:" << endl;
5319  Teuchos::OSTab tab2 (out);
5320  out << "Scalar: " << TypeNameTraits<Scalar>::name () << endl
5321  << "LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
5322  << "GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
5323  << "Node: " << TypeNameTraits<Node>::name () << endl;
5324  }
5325  if (isFillComplete()) {
5326  out << "isFillComplete: true" << endl
5327  << "Global dimensions: [" << getGlobalNumRows () << ", "
5328  << getGlobalNumCols () << "]" << endl
5329  << "Global number of entries: " << getGlobalNumEntries () << endl
5330  << "Global number of diagonal entries: " << getGlobalNumDiags ()
5331  << endl << "Global max number of entries in a row: "
5332  << getGlobalMaxNumRowEntries () << endl;
5333  }
5334  else {
5335  out << "isFillComplete: false" << endl
5336  << "Global dimensions: [" << getGlobalNumRows () << ", "
5337  << getGlobalNumCols () << "]" << endl;
5338  }
5339  }
5340 
5341  if (vl < VERB_MEDIUM) {
5342  return; // all done!
5343  }
5344 
5345  // Describe the row Map.
5346  if (myRank == 0) {
5347  out << endl << "Row Map:" << endl;
5348  }
5349  if (getRowMap ().is_null ()) {
5350  if (myRank == 0) {
5351  out << "null" << endl;
5352  }
5353  }
5354  else {
5355  if (myRank == 0) {
5356  out << endl;
5357  }
5358  getRowMap ()->describe (out, vl);
5359  }
5360 
5361  // Describe the column Map.
5362  if (myRank == 0) {
5363  out << "Column Map: ";
5364  }
5365  if (getColMap ().is_null ()) {
5366  if (myRank == 0) {
5367  out << "null" << endl;
5368  }
5369  } else if (getColMap () == getRowMap ()) {
5370  if (myRank == 0) {
5371  out << "same as row Map" << endl;
5372  }
5373  } else {
5374  if (myRank == 0) {
5375  out << endl;
5376  }
5377  getColMap ()->describe (out, vl);
5378  }
5379 
5380  // Describe the domain Map.
5381  if (myRank == 0) {
5382  out << "Domain Map: ";
5383  }
5384  if (getDomainMap ().is_null ()) {
5385  if (myRank == 0) {
5386  out << "null" << endl;
5387  }
5388  } else if (getDomainMap () == getRowMap ()) {
5389  if (myRank == 0) {
5390  out << "same as row Map" << endl;
5391  }
5392  } else if (getDomainMap () == getColMap ()) {
5393  if (myRank == 0) {
5394  out << "same as column Map" << endl;
5395  }
5396  } else {
5397  if (myRank == 0) {
5398  out << endl;
5399  }
5400  getDomainMap ()->describe (out, vl);
5401  }
5402 
5403  // Describe the range Map.
5404  if (myRank == 0) {
5405  out << "Range Map: ";
5406  }
5407  if (getRangeMap ().is_null ()) {
5408  if (myRank == 0) {
5409  out << "null" << endl;
5410  }
5411  } else if (getRangeMap () == getDomainMap ()) {
5412  if (myRank == 0) {
5413  out << "same as domain Map" << endl;
5414  }
5415  } else if (getRangeMap () == getRowMap ()) {
5416  if (myRank == 0) {
5417  out << "same as row Map" << endl;
5418  }
5419  } else {
5420  if (myRank == 0) {
5421  out << endl;
5422  }
5423  getRangeMap ()->describe (out, vl);
5424  }
5425 
5426  // O(P) data
5427  for (int curRank = 0; curRank < numProcs; ++curRank) {
5428  if (myRank == curRank) {
5429  out << "Process rank: " << curRank << endl;
5430  Teuchos::OSTab tab2 (out);
5431  if (! staticGraph_->indicesAreAllocated ()) {
5432  out << "Graph indices not allocated" << endl;
5433  }
5434  else {
5435  out << "Number of allocated entries: "
5436  << staticGraph_->getNodeAllocationSize () << endl;
5437  }
5438  out << "Number of entries: " << getNodeNumEntries () << endl;
5439  if (isFillComplete ()) {
5440  out << "Number of diagonal entries: " << getNodeNumDiags () << endl;
5441  }
5442  out << "Max number of entries per row: " << getNodeMaxNumRowEntries ()
5443  << endl;
5444  }
5445  // Give output time to complete by executing some barriers.
5446  comm->barrier ();
5447  comm->barrier ();
5448  comm->barrier ();
5449  }
5450 
5451  if (vl < VERB_HIGH) {
5452  return; // all done!
5453  }
5454 
5455  // O(N) and O(NNZ) data
5456  for (int curRank = 0; curRank < numProcs; ++curRank) {
5457  if (myRank == curRank) {
5458  out << std::setw(width) << "Proc Rank"
5459  << std::setw(width) << "Global Row"
5460  << std::setw(width) << "Num Entries";
5461  if (vl == VERB_EXTREME) {
5462  out << std::setw(width) << "(Index,Value)";
5463  }
5464  out << endl;
5465  for (size_t r = 0; r < getNodeNumRows (); ++r) {
5466  const size_t nE = getNumEntriesInLocalRow(r);
5467  GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
5468  out << std::setw(width) << myRank
5469  << std::setw(width) << gid
5470  << std::setw(width) << nE;
5471  if (vl == VERB_EXTREME) {
5472  if (isGloballyIndexed()) {
5473  ArrayView<const GlobalOrdinal> rowinds;
5474  ArrayView<const Scalar> rowvals;
5475  getGlobalRowView (gid, rowinds, rowvals);
5476  for (size_t j = 0; j < nE; ++j) {
5477  out << " (" << rowinds[j]
5478  << ", " << rowvals[j]
5479  << ") ";
5480  }
5481  }
5482  else if (isLocallyIndexed()) {
5483  ArrayView<const LocalOrdinal> rowinds;
5484  ArrayView<const Scalar> rowvals;
5485  getLocalRowView (r, rowinds, rowvals);
5486  for (size_t j=0; j < nE; ++j) {
5487  out << " (" << getColMap()->getGlobalElement(rowinds[j])
5488  << ", " << rowvals[j]
5489  << ") ";
5490  }
5491  } // globally or locally indexed
5492  } // vl == VERB_EXTREME
5493  out << endl;
5494  } // for each row r on this process
5495  } // if (myRank == curRank)
5496 
5497  // Give output time to complete
5498  comm->barrier ();
5499  comm->barrier ();
5500  comm->barrier ();
5501  } // for each process p
5502  }
5503 
5504  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5505  bool
5508  {
5509  // It's not clear what kind of compatibility checks on sizes can
5510  // be performed here. Epetra_CrsGraph doesn't check any sizes for
5511  // compatibility.
5512 
5513  // Currently, the source object must be a RowMatrix with the same
5514  // four template parameters as the target CrsMatrix. We might
5515  // relax this requirement later.
5517  const row_matrix_type* srcRowMat =
5518  dynamic_cast<const row_matrix_type*> (&source);
5519  return (srcRowMat != NULL);
5520  }
5521 
5522  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5523  void
5526  size_t numSameIDs,
5527  const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
5528  const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
5529  {
5530  using Teuchos::Array;
5531  using Teuchos::ArrayView;
5532  typedef LocalOrdinal LO;
5533  typedef GlobalOrdinal GO;
5534  typedef node_type NT;
5535  // Method name string for TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC.
5536  const char tfecfFuncName[] = "copyAndPermute: ";
5537 
5538  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5539  permuteToLIDs.size() != permuteFromLIDs.size(),
5540  std::invalid_argument, "permuteToLIDs.size() = " << permuteToLIDs.size()
5541  << "!= permuteFromLIDs.size() = " << permuteFromLIDs.size() << ".");
5542 
5543  // This dynamic cast should succeed, because we've already tested
5544  // it in checkSizes().
5545  typedef RowMatrix<Scalar, LO, GO, NT> row_matrix_type;
5546  const row_matrix_type& srcMat = dynamic_cast<const row_matrix_type&> (source);
5547 
5548  const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5549  //
5550  // Copy the first numSame row from source to target (this matrix).
5551  // This involves copying rows corresponding to LIDs [0, numSame-1].
5552  //
5553  const map_type& srcRowMap = * (srcMat.getRowMap ());
5554  Array<GO> rowInds;
5555  Array<Scalar> rowVals;
5556  const LO numSameIDs_as_LID = static_cast<LO> (numSameIDs);
5557  for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5558  // Global ID for the current row index in the source matrix.
5559  // The first numSameIDs GIDs in the two input lists are the
5560  // same, so sourceGID == targetGID in this case.
5561  const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5562  const GO targetGID = sourceGID;
5563 
5564  // Input views for the combineGlobalValues() call below.
5565  ArrayView<const GO> rowIndsConstView;
5566  ArrayView<const Scalar> rowValsConstView;
5567 
5568  if (sourceIsLocallyIndexed) {
5569  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5570  if (rowLength > static_cast<size_t> (rowInds.size())) {
5571  rowInds.resize (rowLength);
5572  rowVals.resize (rowLength);
5573  }
5574  // Resizing invalidates an Array's views, so we must make new
5575  // ones, even if rowLength hasn't changed.
5576  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
5577  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
5578 
5579  // The source matrix is locally indexed, so we have to get a
5580  // copy. Really it's the GIDs that have to be copied (because
5581  // they have to be converted from LIDs).
5582  size_t checkRowLength = 0;
5583  srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength);
5584 
5585 #ifdef HAVE_TPETRA_DEBUG
5586  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength,
5587  std::logic_error, "For global row index " << sourceGID << ", the source"
5588  " matrix's getNumEntriesInGlobalRow() method returns a row length of "
5589  << rowLength << ", but the getGlobalRowCopy() method reports that "
5590  "the row length is " << checkRowLength << ". Please report this bug "
5591  "to the Tpetra developers.");
5592 #endif // HAVE_TPETRA_DEBUG
5593 
5594  rowIndsConstView = rowIndsView.view (0, rowLength);
5595  rowValsConstView = rowValsView.view (0, rowLength);
5596  }
5597  else { // source matrix is globally indexed.
5598  srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView);
5599  }
5600 
5601  // Combine the data into the target matrix.
5602  if (isStaticGraph()) {
5603  // Applying a permutation to a matrix with a static graph
5604  // means REPLACE-ing entries.
5605  combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, REPLACE);
5606  }
5607  else {
5608  // Applying a permutation to a matrix with a dynamic graph
5609  // means INSERT-ing entries. This has the same effect as
5610  // ADD, if the target graph already has an entry there.
5611  combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, INSERT);
5612  }
5613  } // For each of the consecutive source and target IDs that are the same
5614 
5615  //
5616  // Permute the remaining rows.
5617  //
5618  const map_type& tgtRowMap = * (this->getRowMap ());
5619  const size_t numPermuteToLIDs = static_cast<size_t> (permuteToLIDs.size ());
5620  for (size_t p = 0; p < numPermuteToLIDs; ++p) {
5621  const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5622  const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5623 
5624  // Input views for the combineGlobalValues() call below.
5625  ArrayView<const GO> rowIndsConstView;
5626  ArrayView<const Scalar> rowValsConstView;
5627 
5628  if (sourceIsLocallyIndexed) {
5629  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5630  if (rowLength > static_cast<size_t> (rowInds.size ())) {
5631  rowInds.resize (rowLength);
5632  rowVals.resize (rowLength);
5633  }
5634  // Resizing invalidates an Array's views, so we must make new
5635  // ones, even if rowLength hasn't changed.
5636  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
5637  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
5638 
5639  // The source matrix is locally indexed, so we have to get a
5640  // copy. Really it's the GIDs that have to be copied (because
5641  // they have to be converted from LIDs).
5642  size_t checkRowLength = 0;
5643  srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength);
5644 
5645 #ifdef HAVE_TPETRA_DEBUG
5646  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength,
5647  std::logic_error, "For the source matrix's global row index "
5648  << sourceGID << ", the source matrix's getNumEntriesInGlobalRow() "
5649  "method returns a row length of " << rowLength << ", but the "
5650  "getGlobalRowCopy() method reports that the row length is "
5651  << checkRowLength << ". Please report this bug to the Tpetra "
5652  "developers.");
5653 #endif // HAVE_TPETRA_DEBUG
5654 
5655  rowIndsConstView = rowIndsView.view (0, rowLength);
5656  rowValsConstView = rowValsView.view (0, rowLength);
5657  }
5658  else {
5659  srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView);
5660  }
5661 
5662  // Combine the data into the target matrix.
5663  if (isStaticGraph()) {
5664  this->combineGlobalValues (targetGID, rowIndsConstView,
5665  rowValsConstView, REPLACE);
5666  }
5667  else {
5668  this->combineGlobalValues (targetGID, rowIndsConstView,
5669  rowValsConstView, INSERT);
5670  }
5671  } // For each ID to permute
5672  }
5673 
5674  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5675  void
5677  packAndPrepare (const SrcDistObject& source,
5678  const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5679  Teuchos::Array<char>& exports,
5680  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5681  size_t& constantNumPackets,
5682  Distributor& distor)
5683  {
5684  using Teuchos::Array;
5685  using Teuchos::ArrayView;
5686  using Teuchos::av_reinterpret_cast;
5687  typedef LocalOrdinal LO;
5688  typedef GlobalOrdinal GO;
5689  const char tfecfFuncName[] = "packAndPrepare: ";
5690 
5691  // Attempt to cast the source object to RowMatrix. If the cast
5692  // succeeds, use the source object's pack method to pack its data
5693  // for communication. If the source object is really a CrsMatrix,
5694  // this will pick up the CrsMatrix's more efficient override. If
5695  // the RowMatrix cast fails, then the source object doesn't have
5696  // the right type.
5697  //
5698  // FIXME (mfh 30 Jun 2013) We don't even need the RowMatrix to
5699  // have the same Node type. Unfortunately, we don't have a way to
5700  // ask if the RowMatrix is "a RowMatrix with any Node type," since
5701  // RowMatrix doesn't have a base class. A hypothetical
5702  // RowMatrixBase<Scalar, LO, GO> class, which does not currently
5703  // exist, would satisfy this requirement.
5704  //
5705  // Why RowMatrixBase<Scalar, LO, GO>? The source object's Scalar
5706  // type doesn't technically need to match the target object's
5707  // Scalar type, so we could just have RowMatrixBase<LO, GO>. LO
5708  // and GO need not be the same, as long as there is no overflow of
5709  // the indices. However, checking for index overflow is global
5710  // and therefore undesirable.
5711  typedef RowMatrix<Scalar, LO, GO, Node> row_matrix_type;
5712  const row_matrix_type* srcRowMat =
5713  dynamic_cast<const row_matrix_type*> (&source);
5714  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5715  srcRowMat == NULL, std::invalid_argument,
5716  "The source object of the Import or Export operation is neither a "
5717  "CrsMatrix (with the same template parameters as the target object), "
5718  "nor a RowMatrix (with the same first four template parameters as the "
5719  "target object).");
5720 #ifdef HAVE_TPETRA_DEBUG
5721  {
5722  using Teuchos::reduceAll;
5723  std::ostringstream msg;
5724  int lclBad = 0;
5725  try {
5726  srcRowMat->pack (exportLIDs, exports, numPacketsPerLID,
5727  constantNumPackets, distor);
5728  } catch (std::exception& e) {
5729  lclBad = 1;
5730  msg << e.what ();
5731  }
5732  int gblBad = 0;
5733  const Teuchos::Comm<int>& comm = * (this->getComm ());
5734  reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
5735  lclBad, Teuchos::outArg (gblBad));
5736  if (gblBad != 0) {
5737  const int myRank = comm.getRank ();
5738  const int numProcs = comm.getSize ();
5739  for (int r = 0; r < numProcs; ++r) {
5740  if (r == myRank && lclBad != 0) {
5741  std::ostringstream os;
5742  os << "Proc " << myRank << ": " << msg.str () << std::endl;
5743  std::cerr << os.str ();
5744  }
5745  comm.barrier ();
5746  comm.barrier ();
5747  comm.barrier ();
5748  }
5749  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5750  true, std::logic_error, "pack() threw an exception on one or "
5751  "more participating processes.");
5752  }
5753  }
5754 #else
5755  srcRowMat->pack (exportLIDs, exports, numPacketsPerLID,
5756  constantNumPackets, distor);
5757 #endif // HAVE_TPETRA_DEBUG
5758  }
5759 
5760  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5761  bool
5762  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::
5763  packRow (char* const numEntOut,
5764  char* const valOut,
5765  char* const indOut,
5766  const size_t numEnt,
5767  const LocalOrdinal lclRow) const
5768  {
5769  using Teuchos::ArrayView;
5770  typedef LocalOrdinal LO;
5771  typedef GlobalOrdinal GO;
5772 
5773  const LO numEntLO = static_cast<LO> (numEnt);
5774  memcpy (numEntOut, &numEntLO, sizeof (LO));
5775  if (this->isLocallyIndexed ()) {
5776  // If the matrix is locally indexed on the calling process, we
5777  // have to use its column Map (which it _must_ have in this
5778  // case) to convert to global indices.
5779  ArrayView<const LO> indIn;
5780  ArrayView<const Scalar> valIn;
5781  this->getLocalRowView (lclRow, indIn, valIn);
5782  const map_type& colMap = * (this->getColMap ());
5783  // Copy column indices one at a time, so that we don't need
5784  // temporary storage.
5785  for (size_t k = 0; k < numEnt; ++k) {
5786  const GO gblIndIn = colMap.getGlobalElement (indIn[k]);
5787  memcpy (indOut + k * sizeof (GO), &gblIndIn, sizeof (GO));
5788  }
5789  memcpy (valOut, valIn.getRawPtr (), numEnt * sizeof (Scalar));
5790  }
5791  else if (this->isGloballyIndexed ()) {
5792  // If the matrix is globally indexed on the calling process,
5793  // then we can use the column indices directly. However, we
5794  // have to get the global row index. The calling process must
5795  // have a row Map, since otherwise it shouldn't be participating
5796  // in packing operations.
5797  ArrayView<const GO> indIn;
5798  ArrayView<const Scalar> valIn;
5799  const map_type& rowMap = * (this->getRowMap ());
5800  const GO gblRow = rowMap.getGlobalElement (lclRow);
5801  this->getGlobalRowView (gblRow, indIn, valIn);
5802  memcpy (indOut, indIn.getRawPtr (), numEnt * sizeof (GO));
5803  memcpy (valOut, valIn.getRawPtr (), numEnt * sizeof (Scalar));
5804  }
5805  else {
5806  if (numEnt != 0) {
5807  return false;
5808  }
5809  }
5810  return true;
5811  }
5812 
5813 
5814  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5815  bool
5816  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::
5817  unpackRow (Scalar* const valInTmp,
5818  GlobalOrdinal* const indInTmp,
5819  const size_t tmpSize,
5820  const char* const valIn,
5821  const char* const indIn,
5822  const size_t numEnt,
5823  const LocalOrdinal lclRow,
5824  const Tpetra::CombineMode combineMode)
5825  {
5826  if (tmpSize < numEnt || (numEnt != 0 && (valInTmp == NULL || indInTmp == NULL))) {
5827  return false;
5828  }
5829  memcpy (valInTmp, valIn, numEnt * sizeof (Scalar));
5830  memcpy (indInTmp, indIn, numEnt * sizeof (GlobalOrdinal));
5831  const GlobalOrdinal gblRow = this->getRowMap ()->getGlobalElement (lclRow);
5832  Teuchos::ArrayView<Scalar> val ((numEnt == 0) ? NULL : valInTmp, numEnt);
5833  Teuchos::ArrayView<GlobalOrdinal> ind ((numEnt == 0) ? NULL : indInTmp, numEnt);
5834  this->combineGlobalValues (gblRow, ind, val, combineMode);
5835  return true;
5836  }
5837 
5838 
5839  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5840  void
5841  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::
5842  allocatePackSpace (Teuchos::Array<char>& exports,
5843  size_t& totalNumEntries,
5844  const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs) const
5845  {
5846  typedef LocalOrdinal LO;
5847  typedef GlobalOrdinal GO;
5848  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
5849  //const char tfecfFuncName[] = "allocatePackSpace: ";
5850  const size_type numExportLIDs = exportLIDs.size ();
5851 
5852  // Count the total number of entries to send.
5853  totalNumEntries = 0;
5854  for (size_type i = 0; i < numExportLIDs; ++i) {
5855  const LO lclRow = exportLIDs[i];
5856  size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
5857  // FIXME (mfh 25 Jan 2015) We should actually report invalid row
5858  // indices as an error. Just consider them nonowned for now.
5859  if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
5860  curNumEntries = 0;
5861  }
5862  totalNumEntries += curNumEntries;
5863  }
5864 
5865  // FIXME (mfh 24 Feb 2013) This code is only correct if
5866  // sizeof(Scalar) is a meaningful representation of the amount of
5867  // data in a Scalar instance. (LO and GO are always built-in
5868  // integer types.)
5869  //
5870  // Allocate the exports array. It does NOT need padding for
5871  // alignment, since we use memcpy to write to / read from send /
5872  // receive buffers.
5873  const size_t allocSize =
5874  static_cast<size_t> (numExportLIDs) * sizeof (LO) +
5875  totalNumEntries * (sizeof (Scalar) + sizeof (GO));
5876  if (static_cast<size_t> (exports.size ()) < allocSize) {
5877  exports.resize (allocSize);
5878  }
5879  }
5880 
5881  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5882  void
5884  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5885  Teuchos::Array<char>& exports,
5886  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5887  size_t& constantNumPackets,
5888  Distributor& distor) const
5889  {
5890  using Teuchos::Array;
5891  using Teuchos::ArrayView;
5892  using Teuchos::av_reinterpret_cast;
5893  using Teuchos::RCP;
5894  typedef LocalOrdinal LO;
5895  typedef GlobalOrdinal GO;
5896  typedef typename ArrayView<const LO>::size_type size_type;
5897  const char tfecfFuncName[] = "pack: ";
5898 
5899  const size_type numExportLIDs = exportLIDs.size ();
5900  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5901  numExportLIDs != numPacketsPerLID.size (), std::invalid_argument,
5902  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5903  " = " << numPacketsPerLID.size () << ".");
5904 
5905  // Setting this to zero tells the caller to expect a possibly
5906  // different ("nonconstant") number of packets per local index
5907  // (i.e., a possibly different number of entries per row).
5908  constantNumPackets = 0;
5909 
5910  // The pack buffer 'exports' enters this method possibly
5911  // unallocated. Do the first two parts of "Count, allocate, fill,
5912  // compute."
5913  size_t totalNumEntries = 0;
5914  allocatePackSpace (exports, totalNumEntries, exportLIDs);
5915  const size_t bufSize = static_cast<size_t> (exports.size ());
5916 
5917  // Compute the number of "packets" (in this case, bytes) per
5918  // export LID (in this case, local index of the row to send), and
5919  // actually pack the data.
5920  //
5921  // FIXME (mfh 24 Feb 2013, 25 Jan 2015) This code is only correct
5922  // if sizeof(Scalar) is a meaningful representation of the amount
5923  // of data in a Scalar instance. (LO and GO are always built-in
5924  // integer types.)
5925 
5926  // Variables for error reporting in the loop.
5927  size_type firstBadIndex = 0; // only valid if outOfBounds == true.
5928  size_t firstBadOffset = 0; // only valid if outOfBounds == true.
5929  size_t firstBadNumBytes = 0; // only valid if outOfBounds == true.
5930  bool outOfBounds = false;
5931  bool packErr = false;
5932 
5933  char* const exportsRawPtr = exports.getRawPtr ();
5934  size_t offset = 0; // current index into 'exports' array.
5935  for (size_type i = 0; i < numExportLIDs; ++i) {
5936  const LO lclRow = exportLIDs[i];
5937  const size_t numEnt = this->getNumEntriesInLocalRow (lclRow);
5938 
5939  // Only pad this row if it has a nonzero number of entries.
5940  if (numEnt == 0) {
5941  numPacketsPerLID[i] = 0;
5942  }
5943  else {
5944  char* const numEntBeg = exportsRawPtr + offset;
5945  char* const numEntEnd = numEntBeg + sizeof (LO);
5946  char* const valBeg = numEntEnd;
5947  char* const valEnd = valBeg + numEnt * sizeof (Scalar);
5948  char* const indBeg = valEnd;
5949  const size_t numBytes = sizeof (LO) +
5950  numEnt * (sizeof (Scalar) + sizeof (GO));
5951  if (offset > bufSize || offset + numBytes > bufSize) {
5952  firstBadIndex = i;
5953  firstBadOffset = offset;
5954  firstBadNumBytes = numBytes;
5955  outOfBounds = true;
5956  break;
5957  }
5958  packErr = ! packRow (numEntBeg, valBeg, indBeg, numEnt, lclRow);
5959  if (packErr) {
5960  firstBadIndex = i;
5961  firstBadOffset = offset;
5962  firstBadNumBytes = numBytes;
5963  break;
5964  }
5965  // numPacketsPerLID[i] is the number of "packets" in the
5966  // current local row i. Packet=char (really "byte") so use
5967  // the number of bytes of the packed data for that row.
5968  numPacketsPerLID[i] = numBytes;
5969  offset += numBytes;
5970  }
5971  }
5972 
5973  TEUCHOS_TEST_FOR_EXCEPTION(
5974  outOfBounds, std::logic_error, "First invalid offset into 'exports' "
5975  "pack buffer at index i = " << firstBadIndex << ". exportLIDs[i]: "
5976  << exportLIDs[firstBadIndex] << ", bufSize: " << bufSize << ", offset: "
5977  << firstBadOffset << ", numBytes: " << firstBadNumBytes << ".");
5978  TEUCHOS_TEST_FOR_EXCEPTION(
5979  packErr, std::logic_error, "First error in packRow() at index i = "
5980  << firstBadIndex << ". exportLIDs[i]: " << exportLIDs[firstBadIndex]
5981  << ", bufSize: " << bufSize << ", offset: " << firstBadOffset
5982  << ", numBytes: " << firstBadNumBytes << ".");
5983  }
5984 
5985  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5986  void
5988  combineGlobalValues (const GlobalOrdinal globalRowIndex,
5989  const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
5990  const Teuchos::ArrayView<const Scalar>& values,
5991  const Tpetra::CombineMode combineMode)
5992  {
5993  const char tfecfFuncName[] = "combineGlobalValues: ";
5994 
5995  if (isStaticGraph ()) {
5996  // INSERT doesn't make sense for a static graph, since you
5997  // aren't allowed to change the structure of the graph.
5998  // However, all the other combine modes work.
5999  if (combineMode == ADD) {
6000  sumIntoGlobalValues (globalRowIndex, columnIndices, values);
6001  }
6002  else if (combineMode == REPLACE) {
6003  replaceGlobalValues (globalRowIndex, columnIndices, values);
6004  }
6005  else if (combineMode == ABSMAX) {
6006  using Details::AbsMax;
6007  AbsMax<Scalar> f;
6008  this->template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
6009  columnIndices,
6010  values, f);
6011  }
6012  else if (combineMode == INSERT) {
6013  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6014  isStaticGraph () && combineMode == INSERT, std::invalid_argument,
6015  "INSERT combine mode is not allowed if the matrix has a static graph "
6016  "(i.e., was constructed with the CrsMatrix constructor that takes a "
6017  "const CrsGraph pointer).");
6018  }
6019  else {
6020  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6021  true, std::logic_error, "Invalid combine mode; should never get "
6022  "here! Please report this bug to the Tpetra developers.");
6023  }
6024  }
6025  else { // The matrix has a dynamic graph.
6026  if (combineMode == ADD || combineMode == INSERT) {
6027  // For a dynamic graph, all incoming column indices are
6028  // inserted into the target graph. Duplicate indices will
6029  // have their values summed. In this context, ADD and INSERT
6030  // are equivalent. We need to call insertGlobalValues()
6031  // anyway if the column indices don't yet exist in this row,
6032  // so we just call insertGlobalValues() for both cases.
6033  insertGlobalValuesFiltered (globalRowIndex, columnIndices, values);
6034  }
6035  // FIXME (mfh 14 Mar 2012):
6036  //
6037  // Implementing ABSMAX or REPLACE for a dynamic graph would
6038  // require modifying assembly to attach a possibly different
6039  // combine mode to each inserted (i, j, A_ij) entry. For
6040  // example, consider two different Export operations to the same
6041  // target CrsMatrix, the first with ABSMAX combine mode and the
6042  // second with REPLACE. This isn't a common use case, so we
6043  // won't mess with it for now.
6044  else if (combineMode == ABSMAX) {
6045  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6046  ! isStaticGraph () && combineMode == ABSMAX, std::logic_error,
6047  "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
6048  "implemented.");
6049  }
6050  else if (combineMode == REPLACE) {
6051  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6052  ! isStaticGraph () && combineMode == REPLACE, std::logic_error,
6053  "REPLACE combine mode when the matrix has a dynamic graph is not yet "
6054  "implemented.");
6055  }
6056  else {
6057  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6058  true, std::logic_error, "Should never get here! Please report this "
6059  "bug to the Tpetra developers.");
6060  }
6061  }
6062  }
6063 
6064 
6065  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6066  void
6068  unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
6069  const Teuchos::ArrayView<const char>& imports,
6070  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
6071  size_t constantNumPackets,
6072  Distributor& distor,
6073  CombineMode combineMode)
6074  {
6075 #ifdef HAVE_TPETRA_DEBUG
6076  const char tfecfFuncName[] = "unpackAndCombine: ";
6077  const CombineMode validModes[4] = {ADD, REPLACE, ABSMAX, INSERT};
6078  const char* validModeNames[4] = {"ADD", "REPLACE", "ABSMAX", "INSERT"};
6079  const int numValidModes = 4;
6080 
6081  if (std::find (validModes, validModes+numValidModes, combineMode) ==
6082  validModes+numValidModes) {
6083  std::ostringstream os;
6084  os << "Invalid combine mode. Valid modes are {";
6085  for (int k = 0; k < numValidModes; ++k) {
6086  os << validModeNames[k];
6087  if (k < numValidModes - 1) {
6088  os << ", ";
6089  }
6090  }
6091  os << "}.";
6092  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6093  true, std::invalid_argument, os.str ());
6094  }
6095 
6096  {
6097  using Teuchos::reduceAll;
6098  std::ostringstream msg;
6099  int lclBad = 0;
6100  try {
6101  this->unpackAndCombineImpl (importLIDs, imports, numPacketsPerLID,
6102  constantNumPackets, distor, combineMode);
6103  } catch (std::exception& e) {
6104  lclBad = 1;
6105  msg << e.what ();
6106  }
6107  int gblBad = 0;
6108  const Teuchos::Comm<int>& comm = * (this->getComm ());
6109  reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
6110  lclBad, Teuchos::outArg (gblBad));
6111  if (gblBad != 0) {
6112  const int myRank = comm.getRank ();
6113  const int numProcs = comm.getSize ();
6114  for (int r = 0; r < numProcs; ++r) {
6115  if (r == myRank && lclBad != 0) {
6116  std::ostringstream os;
6117  os << "Proc " << myRank << ": " << msg.str () << std::endl;
6118  std::cerr << os.str ();
6119  }
6120  comm.barrier ();
6121  comm.barrier ();
6122  comm.barrier ();
6123  }
6124  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6125  true, std::logic_error, "unpackAndCombineImpl() threw an "
6126  "exception on one or more participating processes.");
6127  }
6128  }
6129 #else
6130  this->unpackAndCombineImpl (importLIDs, imports, numPacketsPerLID,
6131  constantNumPackets, distor, combineMode);
6132 #endif // HAVE_TPETRA_DEBUG
6133  }
6134 
6135  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6136  void
6138  unpackAndCombineImpl (const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
6139  const Teuchos::ArrayView<const char>& imports,
6140  const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
6141  size_t constantNumPackets,
6142  Distributor & /* distor */,
6143  CombineMode combineMode)
6144  {
6145  using Teuchos::Array;
6146  typedef LocalOrdinal LO;
6147  typedef GlobalOrdinal GO;
6148  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
6149  const char tfecfFuncName[] = "unpackAndCombine: ";
6150 
6151 #ifdef HAVE_TPETRA_DEBUG
6152  const CombineMode validModes[4] = {ADD, REPLACE, ABSMAX, INSERT};
6153  const char* validModeNames[4] = {"ADD", "REPLACE", "ABSMAX", "INSERT"};
6154  const int numValidModes = 4;
6155 
6156  if (std::find (validModes, validModes+numValidModes, combineMode) ==
6157  validModes+numValidModes) {
6158  std::ostringstream os;
6159  os << "Invalid combine mode. Valid modes are {";
6160  for (int k = 0; k < numValidModes; ++k) {
6161  os << validModeNames[k];
6162  if (k < numValidModes - 1) {
6163  os << ", ";
6164  }
6165  }
6166  os << "}.";
6167  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6168  true, std::invalid_argument, os.str ());
6169  }
6170 #endif // HAVE_TPETRA_DEBUG
6171 
6172  const size_type numImportLIDs = importLIDs.size ();
6173  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6174  numImportLIDs != numPacketsPerLID.size (), std::invalid_argument,
6175  "importLIDs.size() = " << numImportLIDs << " != numPacketsPerLID.size()"
6176  << " = " << numPacketsPerLID.size () << ".");
6177 
6178  // If a sanity check fails, keep track of some state at the
6179  // "first" place where it fails. After the first failure, "run
6180  // through the motions" until the end of this method, then raise
6181  // an error with an informative message.
6182  size_type firstBadIndex = 0;
6183  size_t firstBadOffset = 0;
6184  size_t firstBadExpectedNumBytes = 0;
6185  size_t firstBadNumBytes = 0;
6186  LO firstBadNumEnt = 0;
6187  // We have sanity checks for three kinds of errors:
6188  //
6189  // 1. Offset into array of all the incoming data (for all rows)
6190  // is out of bounds
6191  // 2. Too few bytes of incoming data for a row, given the
6192  // reported number of entries in those incoming data
6193  // 3. Error in unpacking the row's incoming data
6194  //
6195  bool outOfBounds = false;
6196  bool wrongNumBytes = false;
6197  bool unpackErr = false;
6198 
6199  const size_t bufSize = static_cast<size_t> (imports.size ());
6200  const char* const importsRawPtr = imports.getRawPtr ();
6201  size_t offset = 0;
6202 
6203  // Temporary storage for incoming values and indices. We need
6204  // this because the receive buffer does not align storage; it's
6205  // just contiguous bytes. In order to avoid violating ANSI
6206  // aliasing rules, we memcpy each incoming row's data into these
6207  // temporary arrays. We double their size every time we run out
6208  // of storage.
6209  Array<Scalar> valInTmp;
6210  Array<GO> indInTmp;
6211  for (size_type i = 0; i < numImportLIDs; ++i) {
6212  const LO lclRow = importLIDs[i];
6213  const size_t numBytes = numPacketsPerLID[i];
6214 
6215  if (numBytes > 0) { // there is actually something in the row
6216  const char* const numEntBeg = importsRawPtr + offset;
6217  const char* const numEntEnd = numEntBeg + sizeof (LO);
6218 
6219  // Now we know how many entries to expect in the received data
6220  // for this row.
6221  LO numEnt = 0;
6222  memcpy (&numEnt, numEntBeg, sizeof (LO));
6223 
6224  const char* const valBeg = numEntEnd;
6225  const char* const valEnd =
6226  valBeg + static_cast<size_t> (numEnt) * sizeof (Scalar);
6227  const char* const indBeg = valEnd;
6228  const size_t expectedNumBytes = sizeof (LO) +
6229  static_cast<size_t> (numEnt) * (sizeof (Scalar) + sizeof (GO));
6230 
6231  if (expectedNumBytes > numBytes) {
6232  firstBadIndex = i;
6233  firstBadOffset = offset;
6234  firstBadExpectedNumBytes = expectedNumBytes;
6235  firstBadNumBytes = numBytes;
6236  firstBadNumEnt = numEnt;
6237  wrongNumBytes = true;
6238  break;
6239  }
6240  if (offset > bufSize || offset + numBytes > bufSize) {
6241  firstBadIndex = i;
6242  firstBadOffset = offset;
6243  firstBadExpectedNumBytes = expectedNumBytes;
6244  firstBadNumBytes = numBytes;
6245  firstBadNumEnt = numEnt;
6246  outOfBounds = true;
6247  break;
6248  }
6249  size_t tmpNumEnt = static_cast<size_t> (valInTmp.size ());
6250  if (tmpNumEnt < static_cast<size_t> (numEnt) ||
6251  static_cast<size_t> (indInTmp.size ()) < static_cast<size_t> (numEnt)) {
6252  // Double the size of the temporary arrays for incoming data.
6253  tmpNumEnt = std::max (static_cast<size_t> (numEnt), tmpNumEnt * 2);
6254  valInTmp.resize (tmpNumEnt);
6255  indInTmp.resize (tmpNumEnt);
6256  }
6257  unpackErr =
6258  ! unpackRow (valInTmp.getRawPtr (), indInTmp.getRawPtr (), tmpNumEnt,
6259  valBeg, indBeg, numEnt, lclRow, combineMode);
6260  if (unpackErr) {
6261  firstBadIndex = i;
6262  firstBadOffset = offset;
6263  firstBadExpectedNumBytes = expectedNumBytes;
6264  firstBadNumBytes = numBytes;
6265  firstBadNumEnt = numEnt;
6266  break;
6267  }
6268  offset += numBytes;
6269  }
6270  }
6271 
6272  if (wrongNumBytes || outOfBounds || unpackErr) {
6273  std::ostringstream os;
6274  os << " importLIDs[i]: " << importLIDs[firstBadIndex]
6275  << ", bufSize: " << bufSize
6276  << ", offset: " << firstBadOffset
6277  << ", numBytes: " << firstBadNumBytes
6278  << ", expectedNumBytes: " << firstBadExpectedNumBytes
6279  << ", numEnt: " << firstBadNumEnt;
6280  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6281  wrongNumBytes, std::logic_error, "At index i = " << firstBadIndex
6282  << ", expectedNumBytes > numBytes." << os.str ());
6283  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6284  outOfBounds, std::logic_error, "First invalid offset into 'imports' "
6285  "unpack buffer at index i = " << firstBadIndex << "." << os.str ());
6286  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6287  unpackErr, std::logic_error, "First error in unpackRow() at index i = "
6288  << firstBadIndex << "." << os.str ());
6289  }
6290  }
6291 
6292  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6293  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
6295  getColumnMapMultiVector (const MV& X_domainMap,
6296  const bool force) const
6297  {
6298  using Teuchos::null;
6299  using Teuchos::RCP;
6300  using Teuchos::rcp;
6301 
6302  TEUCHOS_TEST_FOR_EXCEPTION(
6303  ! this->hasColMap (), std::runtime_error, "Tpetra::CrsMatrix::getColumn"
6304  "MapMultiVector: You may only call this method if the matrix has a "
6305  "column Map. If the matrix does not yet have a column Map, you should "
6306  "first call fillComplete (with domain and range Map if necessary).");
6307 
6308  // If the graph is not fill complete, then the Import object (if
6309  // one should exist) hasn't been constructed yet.
6310  TEUCHOS_TEST_FOR_EXCEPTION(
6311  ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
6312  "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
6313  "this matrix's graph is fill complete.");
6314 
6315  const size_t numVecs = X_domainMap.getNumVectors ();
6316  RCP<const import_type> importer = this->getGraph ()->getImporter ();
6317  RCP<const map_type> colMap = this->getColMap ();
6318 
6319  RCP<MV> X_colMap; // null by default
6320 
6321  // If the Import object is trivial (null), then we don't need a
6322  // separate column Map multivector. Just return null in that
6323  // case. The caller is responsible for knowing not to use the
6324  // returned null pointer.
6325  //
6326  // If the Import is nontrivial, then we do need a separate
6327  // column Map multivector for the Import operation. Check in
6328  // that case if we have to (re)create the column Map
6329  // multivector.
6330  if (! importer.is_null () || force) {
6331  if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
6332  X_colMap = rcp (new MV (colMap, numVecs));
6333 
6334  // Cache the newly created multivector for later reuse.
6335  importMV_ = X_colMap;
6336  }
6337  else { // Yay, we can reuse the cached multivector!
6338  X_colMap = importMV_;
6339  // mfh 09 Jan 2013: We don't have to fill with zeros first,
6340  // because the Import uses INSERT combine mode, which overwrites
6341  // existing entries.
6342  //
6343  //X_colMap->putScalar (ZERO);
6344  }
6345  }
6346  return X_colMap;
6347  }
6348 
6349  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6350  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
6353  const bool force) const
6354  {
6355  using Teuchos::null;
6356  using Teuchos::RCP;
6357  using Teuchos::rcp;
6358 
6359  // If the graph is not fill complete, then the Export object (if
6360  // one should exist) hasn't been constructed yet.
6361  TEUCHOS_TEST_FOR_EXCEPTION(
6362  ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
6363  "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
6364  "matrix's graph is fill complete.");
6365 
6366  const size_t numVecs = Y_rangeMap.getNumVectors ();
6367  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
6368  // Every version of the constructor takes either a row Map, or a
6369  // graph (all of whose constructors take a row Map). Thus, the
6370  // matrix always has a row Map.
6371  RCP<const map_type> rowMap = this->getRowMap ();
6372 
6373  RCP<MV> Y_rowMap; // null by default
6374 
6375  // If the Export object is trivial (null), then we don't need a
6376  // separate row Map multivector. Just return null in that case.
6377  // The caller is responsible for knowing not to use the returned
6378  // null pointer.
6379  //
6380  // If the Export is nontrivial, then we do need a separate row
6381  // Map multivector for the Export operation. Check in that case
6382  // if we have to (re)create the row Map multivector.
6383  if (! exporter.is_null () || force) {
6384  if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
6385  Y_rowMap = rcp (new MV (rowMap, numVecs));
6386  exportMV_ = Y_rowMap; // Cache the newly created MV for later reuse.
6387  }
6388  else { // Yay, we can reuse the cached multivector!
6389  Y_rowMap = exportMV_;
6390  }
6391  }
6392  return Y_rowMap;
6393  }
6394 
6395  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6396  void
6398  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6399  {
6400  TEUCHOS_TEST_FOR_EXCEPTION(
6401  myGraph_.is_null (), std::logic_error, "Tpetra::CrsMatrix::"
6402  "removeEmptyProcessesInPlace: This method does not work when the matrix "
6403  "was created with a constant graph (that is, when it was created using "
6404  "the version of its constructor that takes an RCP<const CrsGraph>). "
6405  "This is because the matrix is not allowed to modify the graph in that "
6406  "case, but removing empty processes requires modifying the graph.");
6407  myGraph_->removeEmptyProcessesInPlace (newMap);
6408  // Even though CrsMatrix's row Map (as returned by getRowMap())
6409  // comes from its CrsGraph, CrsMatrix still implements DistObject,
6410  // so we also have to change the DistObject's Map.
6411  this->map_ = this->getRowMap ();
6412  // In the nonconst graph case, staticGraph_ is just a const
6413  // pointer to myGraph_. This assignment is probably redundant,
6414  // but it doesn't hurt.
6415  staticGraph_ = Teuchos::rcp_const_cast<const Graph> (myGraph_);
6416  }
6417 
6418  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6419  Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
6421  add (const Scalar& alpha,
6423  const Scalar& beta,
6424  const Teuchos::RCP<const map_type>& domainMap,
6425  const Teuchos::RCP<const map_type>& rangeMap,
6426  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6427  {
6428  using Teuchos::Array;
6429  using Teuchos::ArrayRCP;
6430  using Teuchos::ArrayView;
6431  using Teuchos::ParameterList;
6432  using Teuchos::RCP;
6433  using Teuchos::rcp;
6434  using Teuchos::rcp_implicit_cast;
6435  using Teuchos::sublist;
6436  typedef LocalOrdinal LO;
6437  typedef GlobalOrdinal GO;
6440 
6441  const crs_matrix_type& B = *this; // a convenient abbreviation
6442  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
6443  const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
6444 
6445  // If the user didn't supply a domain or range Map, then try to
6446  // get one from B first (if it has them), then from A (if it has
6447  // them). If we don't have any domain or range Maps, scold the
6448  // user.
6449  RCP<const map_type> A_domainMap = A.getDomainMap ();
6450  RCP<const map_type> A_rangeMap = A.getRangeMap ();
6451  RCP<const map_type> B_domainMap = B.getDomainMap ();
6452  RCP<const map_type> B_rangeMap = B.getRangeMap ();
6453 
6454  RCP<const map_type> theDomainMap = domainMap;
6455  RCP<const map_type> theRangeMap = rangeMap;
6456 
6457  if (domainMap.is_null ()) {
6458  if (B_domainMap.is_null ()) {
6459  TEUCHOS_TEST_FOR_EXCEPTION(
6460  A_domainMap.is_null (), std::invalid_argument,
6461  "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
6462  "then you must supply a nonnull domain Map to this method.");
6463  theDomainMap = A_domainMap;
6464  } else {
6465  theDomainMap = B_domainMap;
6466  }
6467  }
6468  if (rangeMap.is_null ()) {
6469  if (B_rangeMap.is_null ()) {
6470  TEUCHOS_TEST_FOR_EXCEPTION(
6471  A_rangeMap.is_null (), std::invalid_argument,
6472  "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
6473  "then you must supply a nonnull range Map to this method.");
6474  theRangeMap = A_rangeMap;
6475  } else {
6476  theRangeMap = B_rangeMap;
6477  }
6478  }
6479 
6480 #ifdef HAVE_TPETRA_DEBUG
6481  // In a debug build, check that A and B have matching domain and
6482  // range Maps, if they have domain and range Maps at all. (If
6483  // they aren't fill complete, then they may not yet have them.)
6484  if (! A_domainMap.is_null () && ! A_rangeMap.is_null ()) {
6485  if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) {
6486  TEUCHOS_TEST_FOR_EXCEPTION(
6487  ! B_domainMap->isSameAs (*A_domainMap), std::invalid_argument,
6488  "Tpetra::CrsMatrix::add: The input RowMatrix A must have a domain Map "
6489  "which is the same as (isSameAs) this RowMatrix's domain Map.");
6490  TEUCHOS_TEST_FOR_EXCEPTION(
6491  ! B_rangeMap->isSameAs (*A_rangeMap), std::invalid_argument,
6492  "Tpetra::CrsMatrix::add: The input RowMatrix A must have a range Map "
6493  "which is the same as (isSameAs) this RowMatrix's range Map.");
6494  TEUCHOS_TEST_FOR_EXCEPTION(
6495  ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap),
6496  std::invalid_argument,
6497  "Tpetra::CrsMatrix::add: The input domain Map must be the same as "
6498  "(isSameAs) this RowMatrix's domain Map.");
6499  TEUCHOS_TEST_FOR_EXCEPTION(
6500  ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap),
6501  std::invalid_argument,
6502  "Tpetra::CrsMatrix::add: The input range Map must be the same as "
6503  "(isSameAs) this RowMatrix's range Map.");
6504  }
6505  }
6506  else if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) {
6507  TEUCHOS_TEST_FOR_EXCEPTION(
6508  ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap),
6509  std::invalid_argument,
6510  "Tpetra::CrsMatrix::add: The input domain Map must be the same as "
6511  "(isSameAs) this RowMatrix's domain Map.");
6512  TEUCHOS_TEST_FOR_EXCEPTION(
6513  ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap),
6514  std::invalid_argument,
6515  "Tpetra::CrsMatrix::add: The input range Map must be the same as "
6516  "(isSameAs) this RowMatrix's range Map.");
6517  }
6518  else {
6519  TEUCHOS_TEST_FOR_EXCEPTION(
6520  domainMap.is_null () || rangeMap.is_null (), std::invalid_argument,
6521  "Tpetra::CrsMatrix::add: If neither A nor B have a domain and range "
6522  "Map, then you must supply a nonnull domain and range Map to this "
6523  "method.");
6524  }
6525 #endif // HAVE_TPETRA_DEBUG
6526 
6527  // What parameters do we pass to C's constructor? Do we call
6528  // fillComplete on C after filling it? And if so, what parameters
6529  // do we pass to C's fillComplete call?
6530  bool callFillComplete = true;
6531  RCP<ParameterList> constructorSublist;
6532  RCP<ParameterList> fillCompleteSublist;
6533  if (! params.is_null ()) {
6534  callFillComplete = params->get ("Call fillComplete", callFillComplete);
6535  constructorSublist = sublist (params, "Constructor parameters");
6536  fillCompleteSublist = sublist (params, "fillComplete parameters");
6537  }
6538 
6539  RCP<const map_type> A_rowMap = A.getRowMap ();
6540  RCP<const map_type> B_rowMap = B.getRowMap ();
6541  RCP<const map_type> C_rowMap = B_rowMap; // see discussion in documentation
6542  RCP<crs_matrix_type> C; // The result matrix.
6543 
6544  // If A and B's row Maps are the same, we can compute an upper
6545  // bound on the number of entries in each row of C, before
6546  // actually computing the sum. A reasonable upper bound is the
6547  // sum of the two entry counts in each row. If we choose this as
6548  // the actual per-row upper bound, we can use static profile.
6549  if (A_rowMap->isSameAs (*B_rowMap)) {
6550  const LO localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
6551  ArrayRCP<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
6552 
6553  // Get the number of entries in each row of A.
6554  if (alpha != ZERO) {
6555  for (LO localRow = 0; localRow < localNumRows; ++localRow) {
6556  const size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
6557  C_maxNumEntriesPerRow[localRow] += A_numEntries;
6558  }
6559  }
6560  // Get the number of entries in each row of B.
6561  if (beta != ZERO) {
6562  for (LO localRow = 0; localRow < localNumRows; ++localRow) {
6563  const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
6564  C_maxNumEntriesPerRow[localRow] += B_numEntries;
6565  }
6566  }
6567  // Construct the result matrix C.
6568  if (constructorSublist.is_null ()) {
6569  C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow,
6570  StaticProfile));
6571  } else {
6572  C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow,
6573  StaticProfile, constructorSublist));
6574  }
6575  // Since A and B have the same row Maps, we could add them
6576  // together all at once and merge values before we call
6577  // insertGlobalValues. However, we don't really need to, since
6578  // we've already allocated enough space in each row of C for C
6579  // to do the merge itself.
6580  }
6581  else { // the row Maps of A and B are not the same
6582  // Construct the result matrix C.
6583  if (constructorSublist.is_null ()) {
6584  C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile));
6585  } else {
6586  C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile,
6587  constructorSublist));
6588  }
6589  }
6590 
6591 #ifdef HAVE_TPETRA_DEBUG
6592  TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
6593  "Tpetra::RowMatrix::add: C should not be null at this point. "
6594  "Please report this bug to the Tpetra developers.");
6595 #endif // HAVE_TPETRA_DEBUG
6596  //
6597  // Compute C = alpha*A + beta*B.
6598  //
6599  Array<GO> ind;
6600  Array<Scalar> val;
6601 
6602  if (alpha != ZERO) {
6603  const LO A_localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
6604  for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
6605  size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
6606  const GO globalRow = A_rowMap->getGlobalElement (localRow);
6607  if (A_numEntries > static_cast<size_t> (ind.size ())) {
6608  ind.resize (A_numEntries);
6609  val.resize (A_numEntries);
6610  }
6611  ArrayView<GO> indView = ind (0, A_numEntries);
6612  ArrayView<Scalar> valView = val (0, A_numEntries);
6613  A.getGlobalRowCopy (globalRow, indView, valView, A_numEntries);
6614 
6615  if (alpha != ONE) {
6616  for (size_t k = 0; k < A_numEntries; ++k) {
6617  valView[k] *= alpha;
6618  }
6619  }
6620  C->insertGlobalValues (globalRow, indView, valView);
6621  }
6622  }
6623 
6624  if (beta != ZERO) {
6625  const LO B_localNumRows = static_cast<LO> (B_rowMap->getNodeNumElements ());
6626  for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
6627  size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
6628  const GO globalRow = B_rowMap->getGlobalElement (localRow);
6629  if (B_numEntries > static_cast<size_t> (ind.size ())) {
6630  ind.resize (B_numEntries);
6631  val.resize (B_numEntries);
6632  }
6633  ArrayView<GO> indView = ind (0, B_numEntries);
6634  ArrayView<Scalar> valView = val (0, B_numEntries);
6635  B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
6636 
6637  if (beta != ONE) {
6638  for (size_t k = 0; k < B_numEntries; ++k) {
6639  valView[k] *= beta;
6640  }
6641  }
6642  C->insertGlobalValues (globalRow, indView, valView);
6643  }
6644  }
6645 
6646  if (callFillComplete) {
6647  if (fillCompleteSublist.is_null ()) {
6648  C->fillComplete (theDomainMap, theRangeMap);
6649  } else {
6650  C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
6651  }
6652  }
6653  return rcp_implicit_cast<row_matrix_type> (C);
6654  }
6655 
6656  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6657  void
6660  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6661  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
6662  const Teuchos::RCP<const map_type>& domainMap,
6663  const Teuchos::RCP<const map_type>& rangeMap,
6664  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6665  {
6667  using Teuchos::ArrayRCP;
6668  using Teuchos::ArrayView;
6669  using Teuchos::Comm;
6670  using Teuchos::ParameterList;
6671  using Teuchos::RCP;
6672  typedef LocalOrdinal LO;
6673  typedef GlobalOrdinal GO;
6674  typedef node_type NT;
6675  typedef CrsMatrix<Scalar, LO, GO, NT> this_type;
6676  typedef Vector<int, LO, GO, NT> IntVectorType;
6677 
6678 #ifdef HAVE_TPETRA_MMM_TIMINGS
6679  std::string label;
6680  if(!params.is_null())
6681  label = params->get("Timer Label",label);
6682  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
6683  using Teuchos::TimeMonitor;
6684  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Pack-1"))));
6685 #endif
6686 
6687  // Make sure that the input argument rowTransfer is either an
6688  // Import or an Export. Import and Export are the only two
6689  // subclasses of Transfer that we defined, but users might
6690  // (unwisely, for now at least) decide to implement their own
6691  // subclasses. Exclude this possibility.
6692  const import_type* xferAsImport = dynamic_cast<const import_type*> (&rowTransfer);
6693  const export_type* xferAsExport = dynamic_cast<const export_type*> (&rowTransfer);
6694  TEUCHOS_TEST_FOR_EXCEPTION(
6695  xferAsImport == NULL && xferAsExport == NULL, std::invalid_argument,
6696  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
6697  "argument must be either an Import or an Export, and its template "
6698  "parameters must match the corresponding template parameters of the "
6699  "CrsMatrix.");
6700 
6701  // Make sure that the input argument domainTransfer is either an
6702  // Import or an Export. Import and Export are the only two
6703  // subclasses of Transfer that we defined, but users might
6704  // (unwisely, for now at least) decide to implement their own
6705  // subclasses. Exclude this possibility.
6706  Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<const import_type> (domainTransfer);
6707  Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<const export_type> (domainTransfer);
6708 
6709  if(! domainTransfer.is_null()) {
6710  TEUCHOS_TEST_FOR_EXCEPTION(
6711  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
6712  "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
6713  "argument must be either an Import or an Export, and its template "
6714  "parameters must match the corresponding template parameters of the "
6715  "CrsMatrix.");
6716 
6717  TEUCHOS_TEST_FOR_EXCEPTION(
6718  ( xferAsImport != NULL || ! xferDomainAsImport.is_null() ) &&
6719  (( xferAsImport != NULL && xferDomainAsImport.is_null() ) ||
6720  ( xferAsImport == NULL && ! xferDomainAsImport.is_null() )), std::invalid_argument,
6721  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
6722  "arguments must be of the same type (either Import or Export).");
6723 
6724  TEUCHOS_TEST_FOR_EXCEPTION(
6725  ( xferAsExport != NULL || ! xferDomainAsExport.is_null() ) &&
6726  (( xferAsExport != NULL && xferDomainAsExport.is_null() ) ||
6727  ( xferAsExport == NULL && ! xferDomainAsExport.is_null() )), std::invalid_argument,
6728  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
6729  "arguments must be of the same type (either Import or Export).");
6730  } // domainTransfer != null
6731 
6732 
6733  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6734  // if the source Map is not distributed but the target Map is?
6735  const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
6736 
6737  //
6738  // Get the caller's parameters
6739  //
6740 
6741  bool reverseMode = false; // Are we in reverse mode?
6742  bool restrictComm = false; // Do we need to restrict the communicator?
6743  RCP<ParameterList> matrixparams; // parameters for the destination matrix
6744  if (! params.is_null ()) {
6745  reverseMode = params->get ("Reverse Mode", reverseMode);
6746  restrictComm = params->get ("Restrict Communicator", restrictComm);
6747  matrixparams = sublist (params, "CrsMatrix");
6748  }
6749 
6750  // Get the new domain and range Maps. We need some of them for
6751  // error checking, now that we have the reverseMode parameter.
6752  RCP<const map_type> MyRowMap = reverseMode ?
6753  rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
6754  RCP<const map_type> MyColMap; // create this below
6755  RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
6756  domainMap : getDomainMap ();
6757  RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
6758  rangeMap : getRangeMap ();
6759  RCP<const map_type> BaseRowMap = MyRowMap;
6760  RCP<const map_type> BaseDomainMap = MyDomainMap;
6761 
6762  // If the user gave us a nonnull destMat, then check whether it's
6763  // "pristine." That means that it has no entries.
6764  //
6765  // FIXME (mfh 15 May 2014) If this is not true on all processes,
6766  // then this exception test may hang. It would be better to
6767  // forward an error flag to the next communication phase.
6768  if (! destMat.is_null ()) {
6769  // FIXME (mfh 15 May 2014): The classic Petra idiom for checking
6770  // whether a graph or matrix has no entries on the calling
6771  // process, is that it is neither locally nor globally indexed.
6772  // This may change eventually with the Kokkos refactor version
6773  // of Tpetra, so it would be better just to check the quantity
6774  // of interest directly. Note that with the Kokkos refactor
6775  // version of Tpetra, asking for the total number of entries in
6776  // a graph or matrix that is not fill complete might require
6777  // computation (kernel launch), since it is not thread scalable
6778  // to update a count every time an entry is inserted.
6779  const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
6780  ! destMat->getGraph ()->isGloballyIndexed ();
6781  TEUCHOS_TEST_FOR_EXCEPTION(
6782  ! NewFlag, std::invalid_argument, "Tpetra::CrsMatrix::"
6783  "transferAndFillComplete: The input argument 'destMat' is only allowed "
6784  "to be nonnull, if its graph is empty (neither locally nor globally "
6785  "indexed).");
6786  // FIXME (mfh 15 May 2014) At some point, we want to change
6787  // graphs and matrices so that their DistObject Map
6788  // (this->getMap()) may differ from their row Map. This will
6789  // make redistribution for 2-D distributions more efficient. I
6790  // hesitate to change this check, because I'm not sure how much
6791  // the code here depends on getMap() and getRowMap() being the
6792  // same.
6793  TEUCHOS_TEST_FOR_EXCEPTION(
6794  ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
6795  "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
6796  "input argument 'destMat' is not the same as the (row) Map specified "
6797  "by the input argument 'rowTransfer'.");
6798  TEUCHOS_TEST_FOR_EXCEPTION(
6799  ! destMat->checkSizes (*this), std::invalid_argument,
6800  "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
6801  "destination matrix, but checkSizes() indicates that it is not a legal "
6802  "legal target for redistribution from the source matrix (*this). This "
6803  "may mean that they do not have the same dimensions.");
6804  }
6805 
6806  // If forward mode (the default), then *this's (row) Map must be
6807  // the same as the source Map of the Transfer. If reverse mode,
6808  // then *this's (row) Map must be the same as the target Map of
6809  // the Transfer.
6810  //
6811  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6812  // and matrices so that their DistObject Map (this->getMap()) may
6813  // differ from their row Map. This will make redistribution for
6814  // 2-D distributions more efficient. I hesitate to change this
6815  // check, because I'm not sure how much the code here depends on
6816  // getMap() and getRowMap() being the same.
6817  TEUCHOS_TEST_FOR_EXCEPTION(
6818  ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
6819  std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
6820  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6821  TEUCHOS_TEST_FOR_EXCEPTION(
6822  ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
6823  std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
6824  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6825 
6826  // checks for domainTransfer
6827  TEUCHOS_TEST_FOR_EXCEPTION(
6828  ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
6829  std::invalid_argument,
6830  "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
6831  "argument must be the same as the rebalanced domain map 'domainMap'");
6832 
6833  TEUCHOS_TEST_FOR_EXCEPTION(
6834  ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
6835  std::invalid_argument,
6836  "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
6837  "argument must be the same as the rebalanced domain map 'domainMap'");
6838 
6839  // The basic algorithm here is:
6840  //
6841  // 1. Call the moral equivalent of "distor.do" to handle the import.
6842  // 2. Copy all the Imported and Copy/Permuted data into the raw
6843  // CrsMatrix / CrsGraphData pointers, still using GIDs.
6844  // 3. Call an optimized version of MakeColMap that avoids the
6845  // Directory lookups (since the importer knows who owns all the
6846  // GIDs) AND reindexes to LIDs.
6847  // 4. Call expertStaticFillComplete()
6848 
6849  // Get information from the Importer
6850  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6851  ArrayView<const LO> ExportLIDs = reverseMode ?
6852  rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
6853  ArrayView<const LO> RemoteLIDs = reverseMode ?
6854  rowTransfer.getExportLIDs () : rowTransfer.getRemoteLIDs ();
6855  ArrayView<const LO> PermuteToLIDs = reverseMode ?
6856  rowTransfer.getPermuteFromLIDs () : rowTransfer.getPermuteToLIDs ();
6857  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
6858  rowTransfer.getPermuteToLIDs () : rowTransfer.getPermuteFromLIDs ();
6859  Distributor& Distor = rowTransfer.getDistributor ();
6860 
6861  // Owning PIDs
6862  Teuchos::Array<int> SourcePids;
6863  Teuchos::Array<int> TargetPids;
6864  int MyPID = getComm ()->getRank ();
6865 
6866  // Temp variables for sub-communicators
6867  RCP<const map_type> ReducedRowMap, ReducedColMap,
6868  ReducedDomainMap, ReducedRangeMap;
6869  RCP<const Comm<int> > ReducedComm;
6870 
6871  // If the user gave us a null destMat, then construct the new
6872  // destination matrix. We will replace its column Map later.
6873  if (destMat.is_null ()) {
6874  destMat = rcp (new this_type (MyRowMap, 0, StaticProfile, matrixparams));
6875  }
6876 
6877  /***************************************************/
6878  /***** 1) First communicator restriction phase ****/
6879  /***************************************************/
6880  if (restrictComm) {
6881  ReducedRowMap = MyRowMap->removeEmptyProcesses ();
6882  ReducedComm = ReducedRowMap.is_null () ?
6883  Teuchos::null :
6884  ReducedRowMap->getComm ();
6885  destMat->removeEmptyProcessesInPlace (ReducedRowMap);
6886 
6887  ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
6888  ReducedRowMap :
6889  MyDomainMap->replaceCommWithSubset (ReducedComm);
6890  ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
6891  ReducedRowMap :
6892  MyRangeMap->replaceCommWithSubset (ReducedComm);
6893 
6894  // Reset the "my" maps
6895  MyRowMap = ReducedRowMap;
6896  MyDomainMap = ReducedDomainMap;
6897  MyRangeMap = ReducedRangeMap;
6898 
6899  // Update my PID, if we've restricted the communicator
6900  if (! ReducedComm.is_null ()) {
6901  MyPID = ReducedComm->getRank ();
6902  }
6903  else {
6904  MyPID = -2; // For debugging
6905  }
6906  }
6907  else {
6908  ReducedComm = MyRowMap->getComm ();
6909  }
6910 
6911  /***************************************************/
6912  /***** 2) From Tpera::DistObject::doTransfer() ****/
6913  /***************************************************/
6914 #ifdef HAVE_TPETRA_MMM_TIMINGS
6915  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC ImportSetup"))));
6916 #endif
6917  // Get the owning PIDs
6918  RCP<const import_type> MyImporter = getGraph ()->getImporter ();
6919 
6920  // check whether domain maps of source matrix and base domain map is the same
6921  bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
6922 
6923  if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
6924  // Same domain map as source matrix
6925  //
6926  // NOTE: This won't work for restrictComm (because the Import
6927  // doesn't know the restricted PIDs), though writing an
6928  // optimized version for that case would be easy (Import an
6929  // IntVector of the new PIDs). Might want to add this later.
6930  Import_Util::getPids (*MyImporter, SourcePids, false);
6931  }
6932  else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
6933  // Same domain map as source matrix (restricted communicator)
6934  // We need one import from the domain to the column map
6935  IntVectorType SourceDomain_pids(getDomainMap (),true);
6936  IntVectorType SourceCol_pids(getColMap());
6937  // SourceDomain_pids contains the restricted pids
6938  SourceDomain_pids.putScalar(MyPID);
6939 
6940  SourceCol_pids.doImport (SourceDomain_pids, *MyImporter, INSERT);
6941  SourcePids.resize (getColMap ()->getNodeNumElements ());
6942  SourceCol_pids.get1dCopy (SourcePids ());
6943  }
6944  else if (MyImporter.is_null () && bSameDomainMap) {
6945  // Matrix has no off-process entries
6946  SourcePids.resize (getColMap ()->getNodeNumElements ());
6947  SourcePids.assign (getColMap ()->getNodeNumElements (), MyPID);
6948  }
6949  else if ( ! MyImporter.is_null () &&
6950  ! domainTransfer.is_null () ) {
6951  // general implementation for rectangular matrices with
6952  // domain map different than SourceMatrix domain map.
6953  // User has to provide a DomainTransfer object. We need
6954  // to communications (import/export)
6955 
6956  // TargetDomain_pids lives on the rebalanced new domain map
6957  IntVectorType TargetDomain_pids (domainMap);
6958  TargetDomain_pids.putScalar (MyPID);
6959 
6960  // SourceDomain_pids lives on the non-rebalanced old domain map
6961  IntVectorType SourceDomain_pids (getDomainMap ());
6962 
6963  // SourceCol_pids lives on the non-rebalanced old column map
6964  IntVectorType SourceCol_pids (getColMap ());
6965 
6966  if (! reverseMode && ! xferDomainAsImport.is_null() ) {
6967  SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport, INSERT);
6968  }
6969  else if (reverseMode && ! xferDomainAsExport.is_null() ) {
6970  SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport, INSERT);
6971  }
6972  else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
6973  SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport, INSERT);
6974  }
6975  else if (reverseMode && ! xferDomainAsImport.is_null() ) {
6976  SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport, INSERT);
6977  }
6978  else {
6979  TEUCHOS_TEST_FOR_EXCEPTION(
6980  true, std::logic_error, "Tpetra::CrsMatrix::"
6981  "transferAndFillComplete: Should never get here! "
6982  "Please report this bug to a Tpetra developer.");
6983  }
6984  SourceCol_pids.doImport (SourceDomain_pids, *MyImporter, INSERT);
6985  SourcePids.resize (getColMap ()->getNodeNumElements ());
6986  SourceCol_pids.get1dCopy (SourcePids ());
6987  }
6988  else if (BaseDomainMap->isSameAs (*BaseRowMap) &&
6989  getDomainMap ()->isSameAs (*getRowMap ())) {
6990  // We can use the rowTransfer + SourceMatrix's Import to find out who owns what.
6991  IntVectorType TargetRow_pids (domainMap);
6992  IntVectorType SourceRow_pids (getRowMap ());
6993  IntVectorType SourceCol_pids (getColMap ());
6994 
6995  TargetRow_pids.putScalar (MyPID);
6996  if (! reverseMode && xferAsImport != NULL) {
6997  SourceRow_pids.doExport (TargetRow_pids, *xferAsImport, INSERT);
6998  }
6999  else if (reverseMode && xferAsExport != NULL) {
7000  SourceRow_pids.doExport (TargetRow_pids, *xferAsExport, INSERT);
7001  }
7002  else if (! reverseMode && xferAsExport != NULL) {
7003  SourceRow_pids.doImport (TargetRow_pids, *xferAsExport, INSERT);
7004  }
7005  else if (reverseMode && xferAsImport != NULL) {
7006  SourceRow_pids.doImport (TargetRow_pids, *xferAsImport, INSERT);
7007  }
7008  else {
7009  TEUCHOS_TEST_FOR_EXCEPTION(
7010  true, std::logic_error, "Tpetra::CrsMatrix::"
7011  "transferAndFillComplete: Should never get here! "
7012  "Please report this bug to a Tpetra developer.");
7013  }
7014  SourceCol_pids.doImport (SourceRow_pids, *MyImporter, INSERT);
7015  SourcePids.resize (getColMap ()->getNodeNumElements ());
7016  SourceCol_pids.get1dCopy (SourcePids ());
7017  }
7018  else {
7019  TEUCHOS_TEST_FOR_EXCEPTION(
7020  true, std::invalid_argument, "Tpetra::CrsMatrix::"
7021  "transferAndFillComplete: This method only allows either domainMap == "
7022  "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
7023  "getDomainMap () == getRowMap ()).");
7024  }
7025 #ifdef HAVE_TPETRA_MMM_TIMINGS
7026  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Pack-2"))));
7027 #endif
7028 
7029  // Tpetra-specific stuff
7030  size_t constantNumPackets = destMat->constantNumberOfPackets ();
7031  if (constantNumPackets == 0) {
7032  // FIXME (mfh 25 Apr 2016) Once we've finished fixing #227, we
7033  // may be able to remove these fences that protect allocations.
7034  execution_space::fence ();
7035  destMat->numExportPacketsPerLID_ =
7036  decltype (destMat->numExportPacketsPerLID_) ("numExportPacketsPerLID",
7037  ExportLIDs.size ());
7038  execution_space::fence ();
7039  destMat->numImportPacketsPerLID_ =
7040  decltype (destMat->numImportPacketsPerLID_) ("numImportPacketsPerLID",
7041  RemoteLIDs.size ());
7042  execution_space::fence ();
7043  }
7044  else {
7045  // There are a constant number of packets per element. We
7046  // already know (from the number of "remote" (incoming)
7047  // elements) how many incoming elements we expect, so we can
7048  // resize the buffer accordingly.
7049  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
7050  destMat->reallocImportsIfNeeded (rbufLen);
7051  }
7052 
7053  // Pack & Prepare w/ owning PIDs
7054 #ifdef HAVE_TPETRA_DEBUG
7055  {
7056  using Teuchos::outArg;
7057  using Teuchos::REDUCE_MAX;
7058  using Teuchos::reduceAll;
7059  using std::cerr;
7060  using std::endl;
7061  RCP<const Teuchos::Comm<int> > comm = this->getComm ();
7062  const int myRank = comm->getRank ();
7063  const int numProcs = comm->getSize ();
7064 
7065  std::ostringstream os;
7066  int lclErr = 0;
7067  try {
7068  // packAndPrepare* methods modify numExportPacketsPerLID_.
7069  destMat->numExportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
7070  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7071  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
7072  Import_Util::packAndPrepareWithOwningPIDs (*this, ExportLIDs,
7073  destMat->exports_,
7074  numExportPacketsPerLID,
7075  constantNumPackets, Distor,
7076  SourcePids);
7077  }
7078  catch (std::exception& e) {
7079  os << "Proc " << myRank << ": " << e.what ();
7080  lclErr = 1;
7081  }
7082  int gblErr = 0;
7083  if (! comm.is_null ()) {
7084  reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
7085  }
7086  if (gblErr != 0) {
7087  if (myRank == 0) {
7088  cerr << "packAndPrepareWithOwningPIDs threw an exception: " << endl;
7089  }
7090  std::ostringstream err;
7091  for (int r = 0; r < numProcs; ++r) {
7092  if (r == myRank && lclErr != 0) {
7093  cerr << os.str () << endl;
7094  }
7095  comm->barrier ();
7096  comm->barrier ();
7097  comm->barrier ();
7098  }
7099 
7100  TEUCHOS_TEST_FOR_EXCEPTION(
7101  true, std::logic_error, "packAndPrepareWithOwningPIDs threw an "
7102  "exception.");
7103  }
7104  }
7105 
7106 #else
7107  {
7108  // packAndPrepare* methods modify numExportPacketsPerLID_.
7109  destMat->numExportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
7110  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7111  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
7112  Import_Util::packAndPrepareWithOwningPIDs (*this, ExportLIDs,
7113  destMat->exports_,
7114  numExportPacketsPerLID,
7115  constantNumPackets, Distor,
7116  SourcePids);
7117  }
7118 #endif // HAVE_TPETRA_DEBUG
7119 
7120  // Do the exchange of remote data.
7121 #ifdef HAVE_TPETRA_MMM_TIMINGS
7122  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Transfer"))));
7123 #endif
7124 
7125  if (communication_needed) {
7126  if (reverseMode) {
7127  if (constantNumPackets == 0) { // variable number of packets per LID
7128  // Make sure that host has the latest version, since we're
7129  // using the version on host. If host has the latest
7130  // version, syncing to host does nothing.
7131  destMat->numExportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
7132  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7133  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
7134  destMat->numImportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
7135  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7136  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
7137  Distor.doReversePostsAndWaits (numExportPacketsPerLID, 1,
7138  numImportPacketsPerLID);
7139  size_t totalImportPackets = 0;
7140  for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
7141  totalImportPackets += numImportPacketsPerLID[i];
7142  }
7143 
7144  // Reallocation MUST go before setting the modified flag,
7145  // because it may clear out the flags.
7146  destMat->reallocImportsIfNeeded (totalImportPackets);
7147  destMat->imports_.template modify<Kokkos::HostSpace> ();
7148  Teuchos::ArrayView<char> hostImports =
7149  getArrayViewFromDualView (destMat->imports_);
7150  // This is a legacy host pack/unpack path, so use the host
7151  // version of exports_.
7152  destMat->exports_.template sync<Kokkos::HostSpace> ();
7153  Teuchos::ArrayView<const char> hostExports =
7154  getArrayViewFromDualView (destMat->exports_);
7155  Distor.doReversePostsAndWaits (hostExports,
7156  numExportPacketsPerLID,
7157  hostImports,
7158  numImportPacketsPerLID);
7159  }
7160  else { // constant number of packets per LI
7161  destMat->imports_.template modify<Kokkos::HostSpace> ();
7162  Teuchos::ArrayView<char> hostImports =
7163  getArrayViewFromDualView (destMat->imports_);
7164  // This is a legacy host pack/unpack path, so use the host
7165  // version of exports_.
7166  destMat->exports_.template sync<Kokkos::HostSpace> ();
7167  Teuchos::ArrayView<const char> hostExports =
7168  getArrayViewFromDualView (destMat->exports_);
7169  Distor.doReversePostsAndWaits (hostExports,
7170  constantNumPackets,
7171  hostImports);
7172  }
7173  }
7174  else { // forward mode (the default)
7175  if (constantNumPackets == 0) { // variable number of packets per LID
7176  // Make sure that host has the latest version, since we're
7177  // using the version on host. If host has the latest
7178  // version, syncing to host does nothing.
7179  destMat->numExportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
7180  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7181  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
7182  destMat->numImportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
7183  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7184  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
7185  Distor.doPostsAndWaits (numExportPacketsPerLID, 1,
7186  numImportPacketsPerLID);
7187  size_t totalImportPackets = 0;
7188  for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
7189  totalImportPackets += numImportPacketsPerLID[i];
7190  }
7191 
7192  // Reallocation MUST go before setting the modified flag,
7193  // because it may clear out the flags.
7194  destMat->reallocImportsIfNeeded (totalImportPackets);
7195  destMat->imports_.template modify<Kokkos::HostSpace> ();
7196  Teuchos::ArrayView<char> hostImports =
7197  getArrayViewFromDualView (destMat->imports_);
7198  // This is a legacy host pack/unpack path, so use the host
7199  // version of exports_.
7200  destMat->exports_.template sync<Kokkos::HostSpace> ();
7201  Teuchos::ArrayView<const char> hostExports =
7202  getArrayViewFromDualView (destMat->exports_);
7203  Distor.doPostsAndWaits (hostExports,
7204  numExportPacketsPerLID,
7205  hostImports,
7206  numImportPacketsPerLID);
7207  }
7208  else { // constant number of packets per LID
7209  destMat->imports_.template modify<Kokkos::HostSpace> ();
7210  Teuchos::ArrayView<char> hostImports =
7211  getArrayViewFromDualView (destMat->imports_);
7212  // This is a legacy host pack/unpack path, so use the host
7213  // version of exports_.
7214  destMat->exports_.template sync<Kokkos::HostSpace> ();
7215  Teuchos::ArrayView<const char> hostExports =
7216  getArrayViewFromDualView (destMat->exports_);
7217  Distor.doPostsAndWaits (hostExports,
7218  constantNumPackets,
7219  hostImports);
7220  }
7221  }
7222  }
7223 
7224  /*********************************************************************/
7225  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
7226  /*********************************************************************/
7227 
7228 #ifdef HAVE_TPETRA_MMM_TIMINGS
7229  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-1"))));
7230 #endif
7231 
7232  // Backwards compatibility measure. We'll use this again below.
7233  destMat->numImportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
7234  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7235  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
7236  destMat->imports_.template sync<Kokkos::HostSpace> ();
7237  Teuchos::ArrayView<const char> hostImports =
7238  getArrayViewFromDualView (destMat->imports_);
7239  size_t mynnz =
7240  Import_Util::unpackAndCombineWithOwningPIDsCount (*this, RemoteLIDs,
7241  hostImports,
7242  numImportPacketsPerLID,
7243  constantNumPackets,
7244  Distor, INSERT,
7245  NumSameIDs,
7246  PermuteToLIDs,
7247  PermuteFromLIDs);
7248  size_t N = BaseRowMap->getNodeNumElements ();
7249 
7250  // Allocations
7251  ArrayRCP<size_t> CSR_rowptr(N+1);
7252  ArrayRCP<GO> CSR_colind_GID;
7253  ArrayRCP<LO> CSR_colind_LID;
7254  ArrayRCP<Scalar> CSR_vals;
7255  CSR_colind_GID.resize (mynnz);
7256  CSR_vals.resize (mynnz);
7257 
7258  // If LO and GO are the same, we can reuse memory when
7259  // converting the column indices from global to local indices.
7260  if (typeid (LO) == typeid (GO)) {
7261  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
7262  }
7263  else {
7264  CSR_colind_LID.resize (mynnz);
7265  }
7266 
7267  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
7268  // unpackAndCombine method on a "CrsArrays" object? This passing
7269  // in a huge list of arrays is icky. Can't we have a bit of an
7270  // abstraction? Implementing a concrete DistObject subclass only
7271  // takes five methods.
7272  Import_Util::unpackAndCombineIntoCrsArrays (*this, RemoteLIDs, hostImports,
7273  numImportPacketsPerLID,
7274  constantNumPackets, Distor, INSERT, NumSameIDs,
7275  PermuteToLIDs, PermuteFromLIDs, N, mynnz, MyPID,
7276  CSR_rowptr (), CSR_colind_GID (), CSR_vals (),
7277  SourcePids (), TargetPids);
7278 
7279  /**************************************************************/
7280  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
7281  /**************************************************************/
7282 #ifdef HAVE_TPETRA_MMM_TIMINGS
7283  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-2"))));
7284 #endif
7285  // Call an optimized version of makeColMap that avoids the
7286  // Directory lookups (since the Import object knows who owns all
7287  // the GIDs).
7288  Teuchos::Array<int> RemotePids;
7289  Import_Util::lowCommunicationMakeColMapAndReindex (CSR_rowptr (),
7290  CSR_colind_LID (),
7291  CSR_colind_GID (),
7292  BaseDomainMap,
7293  TargetPids, RemotePids,
7294  MyColMap);
7295 
7296  /*******************************************************/
7297  /**** 4) Second communicator restriction phase ****/
7298  /*******************************************************/
7299  if (restrictComm) {
7300  ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
7301  ReducedRowMap :
7302  MyColMap->replaceCommWithSubset (ReducedComm);
7303  MyColMap = ReducedColMap; // Reset the "my" maps
7304  }
7305 
7306  // Replace the col map
7307  destMat->replaceColMap (MyColMap);
7308 
7309  // Short circuit if the processor is no longer in the communicator
7310  //
7311  // NOTE: Epetra replaces modifies all "removed" processes so they
7312  // have a dummy (serial) Map that doesn't touch the original
7313  // communicator. Duplicating that here might be a good idea.
7314  if (ReducedComm.is_null ()) {
7315  return;
7316  }
7317 
7318  /***************************************************/
7319  /**** 5) Sort ****/
7320  /***************************************************/
7321 #ifdef HAVE_TPETRA_MMM_TIMINGS
7322  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-3"))));
7323 #endif
7324  Import_Util::sortCrsEntries (CSR_rowptr (),
7325  CSR_colind_LID (),
7326  CSR_vals ());
7327  if ((! reverseMode && xferAsImport != NULL) ||
7328  (reverseMode && xferAsExport != NULL)) {
7329  Import_Util::sortCrsEntries (CSR_rowptr (),
7330  CSR_colind_LID (),
7331  CSR_vals ());
7332  }
7333  else if ((! reverseMode && xferAsExport != NULL) ||
7334  (reverseMode && xferAsImport != NULL)) {
7335  Import_Util::sortAndMergeCrsEntries (CSR_rowptr (),
7336  CSR_colind_LID (),
7337  CSR_vals ());
7338  if (CSR_rowptr[N] != mynnz) {
7339  CSR_colind_LID.resize (CSR_rowptr[N]);
7340  CSR_vals.resize (CSR_rowptr[N]);
7341  }
7342  }
7343  else {
7344  TEUCHOS_TEST_FOR_EXCEPTION(
7345  true, std::logic_error, "Tpetra::CrsMatrix::"
7346  "transferAndFillComplete: Should never get here! "
7347  "Please report this bug to a Tpetra developer.");
7348  }
7349  /***************************************************/
7350  /**** 6) Reset the colmap and the arrays ****/
7351  /***************************************************/
7352 
7353  // Call constructor for the new matrix (restricted as needed)
7354  //
7355  // NOTE (mfh 15 May 2014) This should work fine for the Kokkos
7356  // refactor version of CrsMatrix, though it reserves the right to
7357  // make a deep copy of the arrays.
7358  destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
7359 
7360  /***************************************************/
7361  /**** 7) Build Importer & Call ESFC ****/
7362  /***************************************************/
7363  // Pre-build the importer using the existing PIDs
7364  Teuchos::ParameterList esfc_params;
7365 #ifdef HAVE_TPETRA_MMM_TIMINGS
7366  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC CreateImporter"))));
7367 #endif
7368  RCP<import_type> MyImport = rcp (new import_type (MyDomainMap, MyColMap, RemotePids));
7369 #ifdef HAVE_TPETRA_MMM_TIMINGS
7370  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC ESFC"))));
7371 
7372  esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7373 #endif
7374 
7375  destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(&esfc_params,false));
7376  }
7377 
7378  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
7379  void
7382  const import_type& importer,
7383  const Teuchos::RCP<const map_type>& domainMap,
7384  const Teuchos::RCP<const map_type>& rangeMap,
7385  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7386  {
7387  transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
7388  }
7389 
7390  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
7391  void
7394  const import_type& rowImporter,
7395  const import_type& domainImporter,
7396  const Teuchos::RCP<const map_type>& domainMap,
7397  const Teuchos::RCP<const map_type>& rangeMap,
7398  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7399  {
7400  transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
7401  }
7402 
7403  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
7404  void
7407  const export_type& exporter,
7408  const Teuchos::RCP<const map_type>& domainMap,
7409  const Teuchos::RCP<const map_type>& rangeMap,
7410  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7411  {
7412  transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
7413  }
7414 
7415  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
7416  void
7419  const export_type& rowExporter,
7420  const export_type& domainExporter,
7421  const Teuchos::RCP<const map_type>& domainMap,
7422  const Teuchos::RCP<const map_type>& rangeMap,
7423  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7424  {
7425  transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7426  }
7427 
7428 } // namespace Tpetra
7429 
7430 //
7431 // Explicit instantiation macro
7432 //
7433 // Must be expanded from within the Tpetra namespace!
7434 //
7435 
7436 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
7437  \
7438  template class CrsMatrix< SCALAR , LO , GO , NODE >; \
7439  template Teuchos::RCP< CrsMatrix< SCALAR , LO , GO , NODE > > \
7440  CrsMatrix< SCALAR , LO , GO , NODE >::convert< SCALAR > () const;
7441 
7442 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
7443  \
7444  template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
7445  CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
7446 
7447 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
7448  template<> \
7449  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
7450  importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
7451  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7452  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7453  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
7454  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7455  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7456  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
7457  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7458  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7459  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
7460  const Teuchos::RCP<Teuchos::ParameterList>& params);
7461 
7462 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
7463  template<> \
7464  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
7465  importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
7466  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7467  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7468  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
7469  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7470  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7471  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
7472  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7473  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7474  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
7475  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7476  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7477  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
7478  const Teuchos::RCP<Teuchos::ParameterList>& params);
7479 
7480 
7481 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
7482  template<> \
7483  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
7484  exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
7485  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7486  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7487  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
7488  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7489  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7490  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
7491  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7492  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7493  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
7494  const Teuchos::RCP<Teuchos::ParameterList>& params);
7495 
7496 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
7497  template<> \
7498  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
7499  exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
7500  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7501  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7502  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
7503  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7504  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7505  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
7506  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7507  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7508  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
7509  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7510  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7511  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
7512  const Teuchos::RCP<Teuchos::ParameterList>& params);
7513 
7514 
7515 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
7516  TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
7517  TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
7518  TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
7519  TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
7520  TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
7521 
7522 #endif // TPETRA_CRSMATRIX_DEF_HPP
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
ProfileType getProfileType() const
Returns true if the matrix was allocated with static data structures.
LocalOrdinal replaceGlobalValues(const GlobalOrdinal globalRow, const typename UnmanagedView< GlobalIndicesViewType >::type &inputInds, const typename UnmanagedView< ImplScalarViewType >::type &inputVals) const
Replace one or more entries&#39; values, using global indices.
void getGlobalRowView(GlobalOrdinal GlobalRow, Teuchos::ArrayView< const GlobalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices...
Kokkos::CrsMatrix< impl_scalar_type, LocalOrdinal, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
Functor for the the ABSMAX CombineMode of Import and Export operations.
size_t getNodeMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
virtual bool isLocallyIndexed() const =0
Whether matrix indices are locally indexed.
std::string description() const
A one-line description of this object.
mag_type getFrobeniusNorm() const
Compute and return the Frobenius norm of the matrix.
LocalOrdinal local_ordinal_type
This class&#39; second template parameter; the type of local indices.
size_t getNodeNumEntries() const
The local number of entries in this matrix.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
LocalOrdinal sumIntoLocalValues(const LocalOrdinal localRow, const typename UnmanagedView< LocalIndicesViewType >::type &inputInds, const typename UnmanagedView< ImplScalarViewType >::type &inputVals, const bool atomic=useAtomicUpdatesByDefault) const
Sum into one or more sparse matrix entries, using local row and column indices.
LocalOrdinal replaceLocalValues(const LocalOrdinal localRow, const typename UnmanagedView< LocalIndicesViewType >::type &inputInds, const typename UnmanagedView< ImplScalarViewType >::type &inputVals) const
Replace one or more entries&#39; values, using local row and column indices.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const
This matrix&#39;s graph, as a RowGraph.
global_size_t getGlobalNumDiags() const
Returns the number of global diagonal entries, based on global row/column index comparisons.
bool isFillActive() const
Whether the matrix is not fill complete.
void sortEntries()
Sort the entries of each row by their column indices.
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node, classic > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T...
virtual void copyAndPermute(const SrcDistObject &source, size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs)
Perform copies and permutations that are local to this process.
size_t getNodeNumDiags() const
Returns the number of local diagonal entries, based on global row/column index comparisons.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
Teuchos::ArrayView< const impl_scalar_type > getView(RowInfo rowinfo) const
Constant view of all entries (including extra space) in the given row.
global_size_t getGlobalNumEntries() const
The global number of entries in this matrix.
void gaussSeidelCopy(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of gaussSeidel(), with fewer requirements on X.
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &x)
bool isLocallyComplete() const
Do all source Map indices on the calling process exist on at least one process (not necessarily this ...
Teuchos::ArrayView< impl_scalar_type > getViewNonConst(const RowInfo &rowinfo) const
Nonconst view of all entries (including extra space) in the given row.
size_t getNodeNumCols() const
The number of columns connected to the locally owned rows of this matrix.
Teuchos::RCP< const map_type > getRowMap() const
The Map that describes the row distribution in this matrix.
bool hasColMap() const
Indicates whether the matrix has a well-defined column map.
One or more distributed dense vectors.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const
Returns the current number of entries on this node in the specified global row.
Teuchos::RCP< node_type > getNode() const
The Kokkos Node instance.
GlobalOrdinal global_ordinal_type
This class&#39; third template parameter; the type of global indices.
void mergeRedundantEntries()
Merge entries in each row with the same column indices.
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to an FancyOStream object.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute a sparse matrix-MultiVector multiply.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix&#39;s column Map with the given Map.
virtual bool checkSizes(const SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
Node node_type
This class&#39; fourth template parameter; the Kokkos device type.
bool isLowerTriangular() const
Indicates whether the matrix is lower triangular.
Node::device_type device_type
The Kokkos device type.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
virtual bool supportsRowViews() const
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &diag) const
Get a copy of the diagonal entries of the matrix.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes owning zero rows from the Maps and their communicator.
device_type::execution_space execution_space
The Kokkos execution space.
Implementation details of Tpetra.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
void reduce()
Sum values of a locally replicated multivector across all processes.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > &params)
Fill data into the local matrix.
size_t global_size_t
Global size_t object.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2 valEnd)
Merge values in place, additively, with the same index.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, const Teuchos::ArrayView< GlobalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const =0
Get a copy of the given global row&#39;s entries.
Kokkos::StaticCrsGraph< LocalOrdinal, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
Traits class for "invalid" (flag) values of integer types that Tpetra uses as local ordinals or globa...
Insert new values that don&#39;t currently exist.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Creates a one-to-one version of the given Map where each GID is owned by only one process...
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
global_size_t getGlobalNumCols() const
The number of global columns in the matrix.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
global_size_t getGlobalNumRows() const
Number of global elements in the row map of this matrix.
bool isFillComplete() const
Whether the matrix is fill complete.
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas)
Allocate values (and optionally indices) using the Node.
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void scale(const Scalar &alpha)
Scale the matrix&#39;s values: this := alpha*this.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
void unpackAndCombine(const Teuchos::ArrayView< const LocalOrdinal > &importLIDs, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode)
Unpack the imported column indices and values, and combine into matrix.
Sets up and executes a communication plan for a Tpetra DistObject.
bool isStorageOptimized() const
Returns true if storage has been optimized.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
void setAllValues(const typename local_matrix_type::row_map_type &ptr, const typename local_graph_type::entries_type::non_const_type &ind, const typename local_matrix_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
LocalOrdinal getLocalRowViewRaw(const LocalOrdinal lclRow, LocalOrdinal &numEnt, const LocalOrdinal *&lclColInds, const Scalar *&vals) const
Get a constant, nonpersisting, locally indexed view of the given row of the matrix, using "raw" pointers instead of Teuchos::ArrayView.
bool isUpperTriangular() const
Indicates whether the matrix is upper triangular.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Utility functions for packing and unpacking sparse matrix entries.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
virtual ~CrsMatrix()
Destructor.
void getLocalRowView(LocalOrdinal LocalRow, Teuchos::ArrayView< const LocalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
Replace old value with maximum of magnitudes of old and new values.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
Abstract base class for objects that can be the source of an Import or Export operation.
size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const
Returns the current number of entries on this node in the specified local row.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
static LocalMapType::local_ordinal_type getDiagCopyWithoutOffsets(const DiagType &D, const LocalMapType &rowMap, const LocalMapType &colMap, const CrsMatrixType &A)
Given a locally indexed, local sparse matrix, and corresponding local row and column Maps...
LocalOrdinal getViewRaw(impl_scalar_type *&vals, LocalOrdinal &numEnt, const RowInfo &rowinfo) const
Nonconst pointer to all entries (including extra space) in the given row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix&#39;s diagonal entries into a Tpetra::V...
Replace existing values with new values.
#define TPETRA_EFFICIENCY_WARNING(throw_exception_test, Exception, msg)
Print or throw an efficency warning.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void computeGlobalConstants()
Compute matrix properties that require collectives.
bool hasTransposeApply() const
Whether apply() allows applying the transpose or conjugate transpose.
Replace old values with zero.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
void reorderedGaussSeidel(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Teuchos::ArrayView< LocalOrdinal > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
Reordered "Hybrid" Jacobi + (Gauss-Seidel or SOR) on .
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, const Teuchos::ArrayView< GlobalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
void getLocalRowCopy(LocalOrdinal localRow, const Teuchos::ArrayView< LocalOrdinal > &colInds, const Teuchos::ArrayView< Scalar > &vals, size_t &numEntries) const
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
Kokkos::Details::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
size_t getNodeNumRows() const
The number of matrix rows owned by the calling process.
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &x)
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
void checkInternalState() const
Check that this object&#39;s state is sane; throw if it&#39;s not.
void reorderedGaussSeidelCopy(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Teuchos::ArrayView< LocalOrdinal > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of reorderedGaussSeidel(), with fewer requirements on X.
Describes a parallel distribution of objects over processes.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
size_t getNumVectors() const
Number of columns in the multivector.
Teuchos::RCP< Node > getNode() const
Get this Map&#39;s Node object.
A read-only, row-oriented interface to a sparse matrix.
size_t getGlobalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on all nodes.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
A distributed dense vector.
bool isGloballyIndexed() const
Whether the matrix is globally indexed on the calling process.
void gaussSeidel(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
"Hybrid" Jacobi + (Gauss-Seidel or SOR) on .
void insertLocalValues(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using local column indices.
Teuchos::RCP< const map_type > getColMap() const
The Map that describes the column distribution in this matrix.
Teuchos::RCP< const map_type > getDomainMap() const
The domain Map of this matrix.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, CombineMode CM)
Export data into this object using an Export object ("forward mode").
void globalAssemble()
Communicate nonlocal contributions to other processes.
Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
bool isDistributed() const
Whether this is a globally distributed object.
Teuchos::RCP< const map_type > getRangeMap() const
The range Map of this matrix.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params) const
Implementation of RowMatrix::add: return alpha*A + beta*this.
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix&#39;s graph, as a CrsGraph.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix is distributed.
void clearGlobalConstants()
Clear matrix properties that require collectives.
LocalOrdinal getViewRawConst(const impl_scalar_type *&vals, LocalOrdinal &numEnt, const RowInfo &rowinfo) const
Const pointer to all entries (including extra space) in the given row.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
virtual void pack(const Teuchos::ArrayView< const LocalOrdinal > &exportLIDs, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets, Distributor &distor) const
Pack this object&#39;s data for an Import or Export.
bool isLocallyIndexed() const
Whether the matrix is locally indexed on the calling process.
local_matrix_type lclMatrix_
The local sparse matrix.
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > &params)
Fill data into the local graph and matrix.
GlobalOrdinal getIndexBase() const
The index base for global indices for this matrix.