Tpetra parallel linear algebra  Version of the Day
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_CRSGRAPH_DEF_HPP
43 #define TPETRA_CRSGRAPH_DEF_HPP
44 
52 
55 #include "Tpetra_Details_getGraphDiagOffsets.hpp"
56 #include "Tpetra_Distributor.hpp"
57 #include "Teuchos_SerialDenseMatrix.hpp"
58 #include <algorithm>
59 #include <limits>
60 #include <string>
61 #include <utility>
62 
63 namespace Tpetra {
64  namespace Details {
65  namespace Impl {
66 
67  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
68  class ConvertColumnIndicesFromGlobalToLocal {
69  public:
70  ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
71  const ::Kokkos::View<const GO*, DT>& gblColInds,
72  const ::Kokkos::View<const OffsetType*, DT>& ptr,
73  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
74  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
75  lclColInds_ (lclColInds),
76  gblColInds_ (gblColInds),
77  ptr_ (ptr),
78  lclColMap_ (lclColMap),
79  numRowEnt_ (numRowEnt)
80  {}
81 
82  KOKKOS_FUNCTION void
83  operator () (const LO& lclRow, OffsetType& curNumBad) const
84  {
85  const OffsetType offset = ptr_(lclRow);
86  // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
87  // of entries in a row to LO, as long as the row doesn't have
88  // too many duplicate entries.
89  const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
90  for (LO j = 0; j < numEnt; ++j) {
91  const GO gid = gblColInds_(offset + j);
92  const LO lid = lclColMap_.getLocalElement (gid);
93  lclColInds_(offset + j) = lid;
95  ++curNumBad;
96  }
97  }
98  }
99 
100  static OffsetType
101  run (const ::Kokkos::View<LO*, DT>& lclColInds,
102  const ::Kokkos::View<const GO*, DT>& gblColInds,
103  const ::Kokkos::View<const OffsetType*, DT>& ptr,
104  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
105  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
106  {
107  typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
108  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
109 
110  const LO lclNumRows = ptr.dimension_0 () == 0 ?
111  static_cast<LO> (0) : static_cast<LO> (ptr.dimension_0 () - 1);
112  OffsetType numBad = 0;
113  // Count of "bad" column indices is a reduction over rows.
114  ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
115  functor_type (lclColInds, gblColInds, ptr,
116  lclColMap, numRowEnt),
117  numBad);
118  return numBad;
119  }
120 
121  private:
122  ::Kokkos::View<LO*, DT> lclColInds_;
123  ::Kokkos::View<const GO*, DT> gblColInds_;
124  ::Kokkos::View<const OffsetType*, DT> ptr_;
126  ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
127  };
128 
129  } // namespace Impl
130 
145  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
146  OffsetType
147  convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
148  const Kokkos::View<const GO*, DT>& gblColInds,
149  const Kokkos::View<const OffsetType*, DT>& ptr,
150  const LocalMap<LO, GO, DT>& lclColMap,
151  const Kokkos::View<const NumEntType*, DT>& numRowEnt)
152  {
153  using Impl::ConvertColumnIndicesFromGlobalToLocal;
154  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
155  return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
156  }
157 
158  } // namespace Details
159 
160  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
161  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
162  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
163  size_t maxNumEntriesPerRow,
164  ProfileType pftype,
165  const Teuchos::RCP<Teuchos::ParameterList>& params) :
166  dist_object_type (rowMap)
167  , rowMap_ (rowMap)
168  , nodeNumEntries_ (0)
169  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
170  , pftype_ (pftype)
171  , numAllocForAllRows_ (maxNumEntriesPerRow)
172  , storageStatus_ (pftype == StaticProfile ?
173  Details::STORAGE_1D_UNPACKED :
174  Details::STORAGE_2D)
175  , indicesAreAllocated_ (false)
176  , indicesAreLocal_ (false)
177  , indicesAreGlobal_ (false)
178  , fillComplete_ (false)
179  , indicesAreSorted_ (true)
180  , noRedundancies_ (true)
181  , haveLocalConstants_ (false)
182  , haveGlobalConstants_ (false)
183  , sortGhostsAssociatedWithEachProcessor_ (true)
184  {
185  const char tfecfFuncName[] = "CrsGraph(rowMap,maxNumEntriesPerRow,"
186  "pftype,params): ";
187  staticAssertions ();
188  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
189  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
190  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
191  "a valid size_t value, which in this case means it must not be "
192  "Teuchos::OrdinalTraits<size_t>::invalid().");
193  resumeFill (params);
194  checkInternalState ();
195  }
196 
197  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
199  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
200  const Teuchos::RCP<const map_type>& colMap,
201  const size_t maxNumEntriesPerRow,
202  const ProfileType pftype,
203  const Teuchos::RCP<Teuchos::ParameterList>& params) :
204  dist_object_type (rowMap)
205  , rowMap_ (rowMap)
206  , colMap_ (colMap)
207  , nodeNumEntries_ (0)
208  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
209  , pftype_ (pftype)
210  , numAllocForAllRows_ (maxNumEntriesPerRow)
211  , storageStatus_ (pftype == StaticProfile ?
212  Details::STORAGE_1D_UNPACKED :
213  Details::STORAGE_2D)
214  , indicesAreAllocated_ (false)
215  , indicesAreLocal_ (false)
216  , indicesAreGlobal_ (false)
217  , fillComplete_ (false)
218  , indicesAreSorted_ (true)
219  , noRedundancies_ (true)
220  , haveLocalConstants_ (false)
221  , haveGlobalConstants_ (false)
222  , sortGhostsAssociatedWithEachProcessor_ (true)
223  {
224  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,"
225  "pftype,params): ";
226  staticAssertions ();
227  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
228  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
229  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
230  "a valid size_t value, which in this case means it must not be "
231  "Teuchos::OrdinalTraits<size_t>::invalid().");
232  resumeFill (params);
233  checkInternalState ();
234  }
235 
236  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
238  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
239  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
240  const ProfileType pftype,
241  const Teuchos::RCP<Teuchos::ParameterList>& params) :
242  dist_object_type (rowMap)
243  , rowMap_ (rowMap)
244  , nodeNumEntries_ (0)
245  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
246  , pftype_ (pftype)
247  , numAllocForAllRows_ (0)
248  , storageStatus_ (pftype == StaticProfile ?
249  Details::STORAGE_1D_UNPACKED :
250  Details::STORAGE_2D)
251  , indicesAreAllocated_ (false)
252  , indicesAreLocal_ (false)
253  , indicesAreGlobal_ (false)
254  , fillComplete_ (false)
255  , indicesAreSorted_ (true)
256  , noRedundancies_ (true)
257  , haveLocalConstants_ (false)
258  , haveGlobalConstants_ (false)
259  , sortGhostsAssociatedWithEachProcessor_ (true)
260  {
261  const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
262  staticAssertions ();
263 
264  const size_t lclNumRows = rowMap.is_null () ?
265  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
266  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
267  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
268  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
269  << " != the local number of rows " << lclNumRows << " as specified by "
270  "the input row Map.");
271 
272 #ifdef HAVE_TPETRA_DEBUG
273  for (size_t r = 0; r < lclNumRows; ++r) {
274  const size_t curRowCount = numEntPerRow[r];
275  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
276  curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
277  std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid "
278  "number of entries (Teuchos::OrdinalTraits<size_t>::invalid()).");
279  }
280 #endif // HAVE_TPETRA_DEBUG
281 
282  // Deep-copy the input (ArrayRCP, therefore host accessible) into
283  // k_numAllocPerRow_. The latter is a const View, so we have to
284  // copy into a nonconst View first, then assign.
285  typedef decltype (k_numAllocPerRow_) out_view_type;
286  typedef typename out_view_type::non_const_type nc_view_type;
287  typedef Kokkos::View<const size_t*,
288  typename nc_view_type::array_layout,
289  Kokkos::HostSpace,
290  Kokkos::MemoryUnmanaged> in_view_type;
291  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
292  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
293  lclNumRows);
294  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
295  k_numAllocPerRow_ = numAllocPerRowOut;
296 
297  resumeFill (params);
298  checkInternalState ();
299  }
300 
301  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
303  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
304  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
305  const ProfileType pftype,
306  const Teuchos::RCP<Teuchos::ParameterList>& params) :
307  dist_object_type (rowMap)
308  , rowMap_ (rowMap)
309  , nodeNumEntries_ (0)
310  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
311  , pftype_ (pftype)
312  , k_numAllocPerRow_ (numEntPerRow.h_view)
313  , numAllocForAllRows_ (0)
314  , storageStatus_ (pftype == StaticProfile ?
315  Details::STORAGE_1D_UNPACKED :
316  Details::STORAGE_2D)
317  , indicesAreAllocated_ (false)
318  , indicesAreLocal_ (false)
319  , indicesAreGlobal_ (false)
320  , fillComplete_ (false)
321  , indicesAreSorted_ (true)
322  , noRedundancies_ (true)
323  , haveLocalConstants_ (false)
324  , haveGlobalConstants_ (false)
325  , sortGhostsAssociatedWithEachProcessor_ (true)
326  {
327  const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
328  staticAssertions ();
329 
330  const size_t lclNumRows = rowMap.is_null () ?
331  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
332  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
333  static_cast<size_t> (numEntPerRow.dimension_0 ()) != lclNumRows,
334  std::invalid_argument, "numEntPerRow has length " <<
335  numEntPerRow.dimension_0 () << " != the local number of rows " <<
336  lclNumRows << " as specified by " "the input row Map.");
337 
338 #ifdef HAVE_TPETRA_DEBUG
339  for (size_t r = 0; r < lclNumRows; ++r) {
340  const size_t curRowCount = numEntPerRow.h_view(r);
341  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
342  curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
343  std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid "
344  "number of entries (Teuchos::OrdinalTraits<size_t>::invalid()).");
345  }
346 #endif // HAVE_TPETRA_DEBUG
347 
348  resumeFill (params);
349  checkInternalState ();
350  }
351 
352 
353  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
355  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
356  const Teuchos::RCP<const map_type>& colMap,
357  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
358  const ProfileType pftype,
359  const Teuchos::RCP<Teuchos::ParameterList>& params) :
360  dist_object_type (rowMap)
361  , rowMap_ (rowMap)
362  , colMap_ (colMap)
363  , nodeNumEntries_ (0)
364  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
365  , pftype_ (pftype)
366  , k_numAllocPerRow_ (numEntPerRow.h_view)
367  , numAllocForAllRows_ (0)
368  , storageStatus_ (pftype == StaticProfile ?
369  Details::STORAGE_1D_UNPACKED :
370  Details::STORAGE_2D)
371  , indicesAreAllocated_ (false)
372  , indicesAreLocal_ (false)
373  , indicesAreGlobal_ (false)
374  , fillComplete_ (false)
375  , indicesAreSorted_ (true)
376  , noRedundancies_ (true)
377  , haveLocalConstants_ (false)
378  , haveGlobalConstants_ (false)
379  , sortGhostsAssociatedWithEachProcessor_ (true)
380  {
381  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
382  staticAssertions ();
383 
384  const size_t lclNumRows = rowMap.is_null () ?
385  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
386  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
387  static_cast<size_t> (numEntPerRow.dimension_0 ()) != lclNumRows,
388  std::invalid_argument, "numEntPerRow has length " <<
389  numEntPerRow.dimension_0 () << " != the local number of rows " <<
390  lclNumRows << " as specified by " "the input row Map.");
391 
392 #ifdef HAVE_TPETRA_DEBUG
393  for (size_t r = 0; r < lclNumRows; ++r) {
394  const size_t curRowCount = numEntPerRow.h_view(r);
395  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
396  curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
397  std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid "
398  "number of entries (Teuchos::OrdinalTraits<size_t>::invalid()).");
399  }
400 #endif // HAVE_TPETRA_DEBUG
401 
402  resumeFill (params);
403  checkInternalState ();
404  }
405 
406 
407  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
409  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
410  const Teuchos::RCP<const map_type>& colMap,
411  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
412  ProfileType pftype,
413  const Teuchos::RCP<Teuchos::ParameterList>& params) :
414  dist_object_type (rowMap)
415  , rowMap_ (rowMap)
416  , colMap_ (colMap)
417  , nodeNumEntries_ (0)
418  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
419  , pftype_ (pftype)
420  , numAllocForAllRows_ (0)
421  , storageStatus_ (pftype == StaticProfile ?
422  Details::STORAGE_1D_UNPACKED :
423  Details::STORAGE_2D)
424  , indicesAreAllocated_ (false)
425  , indicesAreLocal_ (false)
426  , indicesAreGlobal_ (false)
427  , fillComplete_ (false)
428  , indicesAreSorted_ (true)
429  , noRedundancies_ (true)
430  , haveLocalConstants_ (false)
431  , haveGlobalConstants_ (false)
432  , sortGhostsAssociatedWithEachProcessor_ (true)
433  {
434  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,"
435  "params): ";
436  staticAssertions ();
437 
438  const size_t lclNumRows = rowMap.is_null () ?
439  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
440  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
441  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
442  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
443  << " != the local number of rows " << lclNumRows << " as specified by "
444  "the input row Map.");
445 
446 #ifdef HAVE_TPETRA_DEBUG
447  for (size_t r = 0; r < lclNumRows; ++r) {
448  const size_t curRowCount = numEntPerRow[r];
449  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
450  curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
451  std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid "
452  "number of entries (Teuchos::OrdinalTraits<size_t>::invalid()).");
453  }
454 #endif // HAVE_TPETRA_DEBUG
455 
456  // Deep-copy the input (ArrayRCP, therefore host accessible) into
457  // k_numAllocPerRow_. The latter is a const View, so we have to
458  // copy into a nonconst View first, then assign.
459  typedef decltype (k_numAllocPerRow_) out_view_type;
460  typedef typename out_view_type::non_const_type nc_view_type;
461  typedef Kokkos::View<const size_t*,
462  typename nc_view_type::array_layout,
463  Kokkos::HostSpace,
464  Kokkos::MemoryUnmanaged> in_view_type;
465  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
466  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
467  lclNumRows);
468  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
469  k_numAllocPerRow_ = numAllocPerRowOut;
470 
471  resumeFill (params);
472  checkInternalState ();
473  }
474 
475 
476  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
478  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
479  const Teuchos::RCP<const map_type>& colMap,
480  const typename local_graph_type::row_map_type& rowPointers,
481  const typename local_graph_type::entries_type::non_const_type& columnIndices,
482  const Teuchos::RCP<Teuchos::ParameterList>& params) :
483  dist_object_type (rowMap)
484  , rowMap_(rowMap)
485  , colMap_(colMap)
486  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
487  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
488  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
489  , nodeNumEntries_(0)
490  , nodeNumAllocated_(Teuchos::OrdinalTraits<size_t>::invalid())
491  , pftype_(StaticProfile)
492  , numAllocForAllRows_(0)
493  , storageStatus_ (Details::STORAGE_1D_PACKED)
494  , indicesAreAllocated_(true)
495  , indicesAreLocal_(true)
496  , indicesAreGlobal_(false)
497  , fillComplete_(false)
498  , indicesAreSorted_(true)
499  , noRedundancies_(true)
500  , haveLocalConstants_ (false)
501  , haveGlobalConstants_ (false)
502  , sortGhostsAssociatedWithEachProcessor_(true)
503  {
504  staticAssertions ();
505  setAllIndices (rowPointers, columnIndices);
506  checkInternalState ();
507  }
508 
509 
510  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
512  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
513  const Teuchos::RCP<const map_type>& colMap,
514  const Teuchos::ArrayRCP<size_t>& rowPointers,
515  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
516  const Teuchos::RCP<Teuchos::ParameterList>& params) :
517  dist_object_type (rowMap)
518  , rowMap_ (rowMap)
519  , colMap_ (colMap)
520  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
521  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
522  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
523  , nodeNumEntries_ (0)
524  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
525  , pftype_ (StaticProfile)
526  , numAllocForAllRows_ (0)
527  , storageStatus_ (Details::STORAGE_1D_PACKED)
528  , indicesAreAllocated_ (true)
529  , indicesAreLocal_ (true)
530  , indicesAreGlobal_ (false)
531  , fillComplete_ (false)
532  , indicesAreSorted_ (true)
533  , noRedundancies_ (true)
534  , haveLocalConstants_ (false)
535  , haveGlobalConstants_ (false)
536  , sortGhostsAssociatedWithEachProcessor_ (true)
537  {
538  staticAssertions ();
539  setAllIndices (rowPointers, columnIndices);
540  checkInternalState ();
541  }
542 
543 
544  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
546  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
547  const Teuchos::RCP<const map_type>& colMap,
548  const local_graph_type& k_local_graph_,
549  const Teuchos::RCP<Teuchos::ParameterList>& params)
550  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
551  , rowMap_ (rowMap)
552  , colMap_ (colMap)
553  , lclGraph_ (k_local_graph_)
554  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
555  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
556  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
557  , nodeNumEntries_ (0) // FIXME (mfh 17 Mar 2014) should get from lclGraph_ right now
558  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
559  , pftype_ (StaticProfile)
560  , numAllocForAllRows_ (0)
561  , storageStatus_ (Details::STORAGE_1D_PACKED)
562  , indicesAreAllocated_ (true)
563  , indicesAreLocal_ (true)
564  , indicesAreGlobal_ (false)
565  , fillComplete_ (false)
566  , indicesAreSorted_ (true)
567  , noRedundancies_ (true)
568  , haveLocalConstants_ (false)
569  , haveGlobalConstants_ (false)
570  , sortGhostsAssociatedWithEachProcessor_(true)
571  {
572  using Teuchos::arcp;
573  using Teuchos::ArrayRCP;
574  using Teuchos::ParameterList;
575  using Teuchos::parameterList;
576  using Teuchos::rcp;
577  typedef GlobalOrdinal GO;
578  typedef LocalOrdinal LO;
579 
580  staticAssertions();
581  const char tfecfFuncName[] = "CrsGraph(Map,Map,Kokkos::LocalStaticCrsGraph)";
582 
583  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
584  colMap.is_null (), std::runtime_error,
585  ": The input column Map must be nonnull.");
586  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
587  k_local_graph_.numRows () != rowMap->getNodeNumElements (),
588  std::runtime_error,
589  ": The input row Map and the input local graph need to have the same "
590  "number of rows. The row Map claims " << rowMap->getNodeNumElements ()
591  << " row(s), but the local graph claims " << k_local_graph_.numRows ()
592  << " row(s).");
593  // NOTE (mfh 17 Mar 2014) getNodeNumRows() returns
594  // rowMap_->getNodeNumElements(), but it doesn't have to.
595  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
596  // k_local_graph_.numRows () != getNodeNumRows (), std::runtime_error,
597  // ": The input row Map and the input local graph need to have the same "
598  // "number of rows. The row Map claims " << getNodeNumRows () << " row(s), "
599  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
600  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
601  k_lclInds1D_.dimension_0 () != 0 || k_gblInds1D_.dimension_0 () != 0, std::logic_error,
602  ": cannot have 1D data structures allocated.");
603  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
604  ! lclInds2D_.is_null () || ! gblInds2D_.is_null (), std::logic_error,
605  ": cannot have 2D data structures allocated.");
606 
607  nodeNumAllocated_ = k_local_graph_.row_map (getNodeNumRows ());
608  nodeNumEntries_ = k_local_graph_.row_map (getNodeNumRows ());
609 
610  // NOTE (mfh 17 Mar 2014) We also need a version of this CrsGraph
611  // constructor that takes a domain and range Map, as well as a row
612  // and column Map. In that case, we must pass the domain and
613  // range Map into the following method.
614  setDomainRangeMaps (rowMap_, rowMap_);
615  makeImportExport ();
616 
617  k_lclInds1D_ = lclGraph_.entries;
618  k_rowPtrs_ = lclGraph_.row_map;
619 
620  typename local_graph_type::row_map_type d_ptrs = lclGraph_.row_map;
621  typename local_graph_type::entries_type d_inds = lclGraph_.entries;
622 
623  // Reset local properties
624  upperTriangular_ = true;
625  lowerTriangular_ = true;
626  nodeMaxNumRowEntries_ = 0;
627  nodeNumDiags_ = 0;
628 
629  // Compute triangular properties
630  const size_t numLocalRows = getNodeNumRows ();
631  for (size_t localRow = 0; localRow < numLocalRows; ++localRow) {
632  const GO globalRow = rowMap_->getGlobalElement (localRow);
633  const LO rlcid = colMap_->getLocalElement (globalRow);
634 
635  // It's entirely possible that the local matrix has no entries
636  // in the column corresponding to the current row. In that
637  // case, the column Map may not necessarily contain that GID.
638  // This is why we check whether rlcid is "invalid" (which means
639  // that globalRow is not a GID in the column Map).
640  if (rlcid != Teuchos::OrdinalTraits<LO>::invalid ()) {
641  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
642  rlcid + 1 >= static_cast<LO> (d_ptrs.dimension_0 ()),
643  std::runtime_error, ": The given row Map and/or column Map is/are "
644  "not compatible with the provided local graph.");
645  if (d_ptrs(rlcid) != d_ptrs(rlcid + 1)) {
646  const size_t smallestCol =
647  static_cast<size_t> (d_inds(d_ptrs(rlcid)));
648  const size_t largestCol =
649  static_cast<size_t> (d_inds(d_ptrs(rlcid + 1)-1));
650  if (smallestCol < localRow) {
651  upperTriangular_ = false;
652  }
653  if (localRow < largestCol) {
654  lowerTriangular_ = false;
655  }
656  for (size_t i = d_ptrs(rlcid); i < d_ptrs(rlcid + 1); ++i) {
657  if (d_inds(i) == rlcid) {
658  ++nodeNumDiags_;
659  }
660  }
661  }
662  nodeMaxNumRowEntries_ =
663  std::max (static_cast<size_t> (d_ptrs(rlcid + 1) - d_ptrs(rlcid)),
664  nodeMaxNumRowEntries_);
665  }
666  }
667 
668  haveLocalConstants_ = true;
669  computeGlobalConstants ();
670 
671  fillComplete_ = true;
672  checkInternalState ();
673  }
674 
675 
676  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
679  {}
680 
681 
682  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
683  Teuchos::RCP<const Teuchos::ParameterList>
686  {
687  using Teuchos::RCP;
688  using Teuchos::ParameterList;
689  using Teuchos::parameterList;
690 
691  RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
692 
693  // Make a sublist for the Import.
694  RCP<ParameterList> importSublist = parameterList ("Import");
695 
696  // FIXME (mfh 02 Apr 2012) We should really have the Import and
697  // Export objects fill in these lists. However, we don't want to
698  // create an Import or Export unless we need them. For now, we
699  // know that the Import and Export just pass the list directly to
700  // their Distributor, so we can create a Distributor here
701  // (Distributor's constructor is a lightweight operation) and have
702  // it fill in the list.
703 
704  // Fill in Distributor default parameters by creating a
705  // Distributor and asking it to do the work.
706  Distributor distributor (rowMap_->getComm (), importSublist);
707  params->set ("Import", *importSublist, "How the Import performs communication.");
708 
709  // Make a sublist for the Export. For now, it's a clone of the
710  // Import sublist. It's not a shallow copy, though, since we
711  // might like the Import to do communication differently than the
712  // Export.
713  params->set ("Export", *importSublist, "How the Export performs communication.");
714 
715  return params;
716  }
717 
718 
719  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
720  void
722  setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
723  {
724  Teuchos::RCP<const Teuchos::ParameterList> validParams =
725  getValidParameters ();
726  params->validateParametersAndSetDefaults (*validParams);
727  this->setMyParamList (params);
728  }
729 
730 
731  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
735  {
736  return rowMap_->getGlobalNumElements ();
737  }
738 
739 
740  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
744  {
745  const char tfecfFuncName[] = "getGlobalNumCols: ";
746  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
747  ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
748  "The graph does not have a domain Map. You may not call this method in "
749  "that case.");
750  return getDomainMap ()->getGlobalNumElements ();
751  }
752 
753 
754  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
755  size_t
758  {
759  return rowMap_.is_null () ? static_cast<size_t> (0) :
760  rowMap_->getNodeNumElements ();
761  }
762 
763 
764  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
765  size_t
768  {
769  const char tfecfFuncName[] = "getNodeNumCols: ";
770  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
771  ! hasColMap (), std::runtime_error,
772  "The graph does not have a column Map. You may not call this method "
773  "unless the graph has a column Map. This requires either that a custom "
774  "column Map was given to the constructor, or that fillComplete() has "
775  "been called.");
776  return colMap_.is_null () ? static_cast<size_t> (0) :
777  colMap_->getNodeNumElements ();
778  }
779 
780 
781  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
782  size_t
785  {
786  return nodeNumDiags_;
787  }
788 
789 
790  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
794  {
795  return globalNumDiags_;
796  }
797 
798 
799  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
800  Teuchos::RCP<Node>
802  getNode () const
803  {
804  return rowMap_.is_null () ? Teuchos::null : rowMap_->getNode ();
805  }
806 
807 
808  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
809  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
811  getRowMap () const
812  {
813  return rowMap_;
814  }
815 
816 
817  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
818  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
820  getColMap () const
821  {
822  return colMap_;
823  }
824 
825 
826  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
827  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
830  {
831  return domainMap_;
832  }
833 
834 
835  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
836  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
838  getRangeMap () const
839  {
840  return rangeMap_;
841  }
842 
843  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
844  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> >
846  getImporter () const
847  {
848  return importer_;
849  }
850 
851 
852  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
853  Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Node> >
855  getExporter () const
856  {
857  return exporter_;
858  }
859 
860 
861  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
862  bool
864  hasColMap () const
865  {
866  return ! colMap_.is_null ();
867  }
868 
869 
870  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
871  bool
874  {
875  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
876  // getNodeNumRows() is zero?
877 
878  const bool isOpt = indicesAreAllocated_ &&
879  k_numRowEntries_.dimension_0 () == 0 &&
880  getNodeNumRows () > 0;
881 
882 #ifdef HAVE_TPETRA_DEBUG
883  const char tfecfFuncName[] = "isStorageOptimized";
884  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
885  isOpt && getProfileType () == DynamicProfile, std::logic_error,
886  ": The matrix claims to have optimized storage, but getProfileType() "
887  "returns DynamicProfile. This should never happen. Please report this "
888  "bug to the Tpetra developers.");
889 #endif // HAVE_TPETRA_DEBUG
890 
891  return isOpt;
892  }
893 
894 
895  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
899  {
900  return pftype_;
901  }
902 
903 
904  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
908  {
909  return globalNumEntries_;
910  }
911 
912 
913  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
914  size_t
917  {
918  return nodeNumEntries_;
919  }
920 
921 
922  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
926  {
927  return globalMaxNumRowEntries_;
928  }
929 
930 
931  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
932  size_t
935  {
936  return nodeMaxNumRowEntries_;
937  }
938 
939 
940  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
941  bool
944  {
945  return fillComplete_;
946  }
947 
948 
949  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
950  bool
953  {
954  return ! fillComplete_;
955  }
956 
957 
958  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
959  bool
962  {
963  return upperTriangular_;
964  }
965 
966 
967  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
968  bool
971  {
972  return lowerTriangular_;
973  }
974 
975 
976  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
977  bool
980  {
981  return indicesAreLocal_;
982  }
983 
984 
985  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
986  bool
989  {
990  return indicesAreGlobal_;
991  }
992 
993 
994  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
995  size_t
998  {
999  return nodeNumAllocated_;
1000  }
1001 
1002 
1003  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1004  Teuchos::RCP<const Teuchos::Comm<int> >
1006  getComm () const
1007  {
1008  return rowMap_.is_null () ? Teuchos::null : rowMap_->getComm ();
1009  }
1010 
1011 
1012  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1013  GlobalOrdinal
1016  {
1017  return rowMap_->getIndexBase ();
1018  }
1019 
1020 
1021  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1022  bool
1024  indicesAreAllocated () const
1025  {
1026  return indicesAreAllocated_;
1027  }
1028 
1029 
1030  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1031  bool
1032  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
1033  isSorted () const
1034  {
1035  return indicesAreSorted_;
1036  }
1037 
1038 
1039  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1040  bool
1042  isMerged () const
1043  {
1044  return noRedundancies_;
1045  }
1046 
1047 
1048  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1049  void
1052  {
1053  // FIXME (mfh 07 May 2013) How do we know that the change
1054  // introduced a redundancy, or even that it invalidated the sorted
1055  // order of indices? CrsGraph has always made this conservative
1056  // guess. It could be a bit costly to check at insertion time,
1057  // though.
1058  indicesAreSorted_ = false;
1059  noRedundancies_ = false;
1060 
1061  // We've modified the graph, so we'll have to recompute local
1062  // constants like the number of diagonal entries on this process.
1063  haveLocalConstants_ = false;
1064  }
1065 
1066 
1067  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1068  void
1070  allocateIndices (const ELocalGlobal lg)
1071  {
1072  using Teuchos::arcp;
1073  using Teuchos::Array;
1074  using Teuchos::ArrayRCP;
1075  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1076  typedef typename local_graph_type::row_map_type::non_const_type
1077  non_const_row_map_type;
1078  typedef typename local_graph_type::entries_type::non_const_type
1079  lcl_col_inds_type;
1080  typedef Kokkos::View<GlobalOrdinal*,
1081  typename lcl_col_inds_type::array_layout,
1082  device_type> gbl_col_inds_type;
1083  const char tfecfFuncName[] = "allocateIndices: ";
1084 
1085  // This is a protected function, only callable by us. If it was
1086  // called incorrectly, it is our fault. That's why the tests
1087  // below throw std::logic_error instead of std::invalid_argument.
1088  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1089  isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1090  "The graph is locally indexed, but Tpetra code is calling this method "
1091  "with lg=GlobalIndices. Please report this bug to the Tpetra developers.");
1092  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1093  isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1094  "The graph is globally indexed, but Tpetra code is calling this method "
1095  "with lg=LocalIndices. Please report this bug to the Tpetra developers.");
1096  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1097  indicesAreAllocated (), std::logic_error, "The graph's indices are "
1098  "already allocated, but Tpetra code is calling allocateIndices) again. "
1099  "Please report this bug to the Tpetra developers.");
1100 
1101  const size_t numRows = getNodeNumRows ();
1102 
1103  if (getProfileType () == StaticProfile) {
1104  //
1105  // STATIC ALLOCATION PROFILE
1106  //
1107  non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1108 
1109  if (k_numAllocPerRow_.dimension_0 () != 0) {
1110  // It's OK to throw std::invalid_argument here, because we
1111  // haven't incurred any side effects yet. Throwing that
1112  // exception (and not, say, std::logic_error) implies that the
1113  // instance can recover.
1114  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1115  k_numAllocPerRow_.dimension_0 () != numRows, std::invalid_argument,
1116  "k_numAllocPerRow_ is allocated (has length != 0), but its length = "
1117  << k_numAllocPerRow_.dimension_0 () << " != numRows = " << numRows
1118  << ".");
1119 
1120  // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1121  // we want to compute here) lives on device. That's OK;
1122  // computeOffsetsFromCounts can handle this case.
1124 
1125  // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1126  // doesn't attempt to check its input for "invalid" flag
1127  // values. For now, we omit that feature of the sequential
1128  // code disabled below.
1129  computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1130  }
1131  else {
1132  // It's OK to throw std::invalid_argument here, because we
1133  // haven't incurred any side effects yet. Throwing that
1134  // exception (and not, say, std::logic_error) implies that the
1135  // instance can recover.
1136  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1137  (numAllocForAllRows_ == Teuchos::OrdinalTraits<size_t>::invalid (),
1138  std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1139  "namely Teuchos::OrdinalTraits<size_t>::invalid() = " <<
1140  Teuchos::OrdinalTraits<size_t>::invalid () << ".");
1141 
1143  computeOffsetsFromConstantCount (k_rowPtrs, numAllocForAllRows_);
1144  }
1145 
1146  // "Commit" the resulting row offsets.
1147  k_rowPtrs_ = k_rowPtrs;
1148 
1149  // FIXME (mfh 05,11 Aug 2014) This assumes UVM, since k_rowPtrs_
1150  // is currently a device View. Should instead use a DualView.
1151  const size_type numInds = static_cast<size_type> (k_rowPtrs_(numRows));
1152  if (lg == LocalIndices) {
1153  k_lclInds1D_ = lcl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1154  }
1155  else {
1156  k_gblInds1D_ = gbl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1157  }
1158  nodeNumAllocated_ = numInds;
1159  storageStatus_ = Details::STORAGE_1D_UNPACKED;
1160  }
1161  else {
1162  //
1163  // DYNAMIC ALLOCATION PROFILE
1164  //
1165 
1166  const bool useNumAllocPerRow = (k_numAllocPerRow_.dimension_0 () != 0);
1167 
1168  if (lg == LocalIndices) {
1169  lclInds2D_ = arcp<Array<LocalOrdinal> > (numRows);
1170  nodeNumAllocated_ = 0;
1171  for (size_t i = 0; i < numRows; ++i) {
1172  const size_t howMany = useNumAllocPerRow ?
1173  k_numAllocPerRow_(i) : numAllocForAllRows_;
1174  nodeNumAllocated_ += howMany;
1175  if (howMany > 0) {
1176  lclInds2D_[i].resize (howMany);
1177  }
1178  }
1179  }
1180  else { // allocate global indices
1181  gblInds2D_ = arcp<Array<GlobalOrdinal> > (numRows);
1182  nodeNumAllocated_ = 0;
1183  for (size_t i = 0; i < numRows; ++i) {
1184  const size_t howMany = useNumAllocPerRow ?
1185  k_numAllocPerRow_(i) : numAllocForAllRows_;
1186  nodeNumAllocated_ += howMany;
1187  if (howMany > 0) {
1188  gblInds2D_[i].resize (howMany);
1189  }
1190  }
1191  }
1192  storageStatus_ = Details::STORAGE_2D;
1193  }
1194 
1195  indicesAreLocal_ = (lg == LocalIndices);
1196  indicesAreGlobal_ = (lg == GlobalIndices);
1197 
1198  if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1199  using Kokkos::ViewAllocateWithoutInitializing;
1200  typedef decltype (k_numRowEntries_) row_ent_type;
1201  const char label[] = "Tpetra::CrsGraph::numRowEntries";
1202 
1203  row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1204  Kokkos::deep_copy (numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1205  k_numRowEntries_ = numRowEnt; // "commit" our allocation
1206  }
1207 
1208  // done with these
1209  numAllocForAllRows_ = 0;
1210  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1211  indicesAreAllocated_ = true;
1212  checkInternalState ();
1213  }
1214 
1215 
1216  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1217  Teuchos::ArrayView<const LocalOrdinal>
1218  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
1219  getLocalView (const RowInfo rowinfo) const
1220  {
1221  using Kokkos::subview;
1222  typedef LocalOrdinal LO;
1223  typedef Kokkos::View<const LO*, execution_space,
1224  Kokkos::MemoryUnmanaged> row_view_type;
1225 
1226  if (rowinfo.allocSize == 0) {
1227  return Teuchos::ArrayView<const LO> ();
1228  }
1229  else { // nothing in the row to view
1230  if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage
1231  const size_t start = rowinfo.offset1D;
1232  const size_t len = rowinfo.allocSize;
1233  const std::pair<size_t, size_t> rng (start, start + len);
1234  // mfh 23 Nov 2015: Don't just create a subview of
1235  // k_lclInds1D_ directly, because that first creates a
1236  // _managed_ subview, then returns an unmanaged version of
1237  // that. That touches the reference count, which costs
1238  // performance in a measurable way.
1239  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1240  const LO* const rowViewRaw = (len == 0) ? NULL : rowView.ptr_on_device ();
1241  return Teuchos::ArrayView<const LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1242  }
1243  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1244  return lclInds2D_[rowinfo.localRow] ();
1245  }
1246  else {
1247  return Teuchos::ArrayView<const LO> (); // nothing in the row to view
1248  }
1249  }
1250  }
1251 
1252  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1253  LocalOrdinal
1255  getLocalViewRawConst (const LocalOrdinal*& lclInds,
1256  LocalOrdinal& numEnt,
1257  const RowInfo& rowinfo) const
1258  {
1259  lclInds = NULL;
1260  numEnt = 0;
1261 
1262  if (rowinfo.allocSize != 0) {
1263  if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage
1264 #ifdef HAVE_TPETRA_DEBUG
1265  if (rowinfo.offset1D + rowinfo.allocSize >
1266  static_cast<size_t> (k_lclInds1D_.dimension_0 ())) {
1267  return static_cast<LocalOrdinal> (-1);
1268  }
1269 #endif // HAVE_TPETRA_DEBUG
1270  lclInds = &k_lclInds1D_[rowinfo.offset1D];
1271  numEnt = rowinfo.allocSize;
1272  }
1273  else { // 2-D storage
1274 #ifdef HAVE_TPETRA_DEBUG
1275  if (rowinfo.localRow >= static_cast<size_t> (lclInds2D_.size ())) {
1276  return static_cast<LocalOrdinal> (-1);
1277  }
1278 #endif // HAVE_TPETRA_DEBUG
1279  // Use a const reference so we don't touch the ArrayRCP's ref
1280  // count, since ArrayRCP's ref count is not thread safe.
1281  const auto& curRow = lclInds2D_[rowinfo.localRow];
1282  if (! curRow.empty ()) {
1283  lclInds = curRow.getRawPtr ();
1284  numEnt = curRow.size ();
1285  }
1286  }
1287  }
1288  return static_cast<LocalOrdinal> (0);
1289  }
1290 
1291  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1292  Teuchos::ArrayView<LocalOrdinal>
1295  {
1296  using Kokkos::subview;
1297  typedef LocalOrdinal LO;
1298  typedef Kokkos::View<LO*, execution_space,
1299  Kokkos::MemoryUnmanaged> row_view_type;
1300 
1301  if (rowinfo.allocSize == 0) { // nothing in the row to view
1302  return Teuchos::ArrayView<LO> ();
1303  }
1304  else {
1305  if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage
1306  const size_t start = rowinfo.offset1D;
1307  const size_t len = rowinfo.allocSize;
1308  const std::pair<size_t, size_t> rng (start, start + len);
1309  // mfh 23 Nov 2015: Don't just create a subview of
1310  // k_lclInds1D_ directly, because that first creates a
1311  // _managed_ subview, then returns an unmanaged version of
1312  // that. That touches the reference count, which costs
1313  // performance in a measurable way.
1314  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1315  LO* const rowViewRaw = (len == 0) ? NULL : rowView.ptr_on_device ();
1316  return Teuchos::ArrayView<LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1317  }
1318  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1319  return lclInds2D_[rowinfo.localRow] ();
1320  }
1321  else {
1322  return Teuchos::ArrayView<LO> (); // nothing in the row to view
1323  }
1324  }
1325  }
1326 
1327 
1328  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1329  Kokkos::View<const LocalOrdinal*,
1331  Kokkos::MemoryUnmanaged>
1333  getLocalKokkosRowView (const RowInfo& rowinfo) const
1334  {
1335  typedef LocalOrdinal LO;
1336  typedef Kokkos::View<const LO*, execution_space,
1337  Kokkos::MemoryUnmanaged> row_view_type;
1338 
1339  if (rowinfo.allocSize == 0) {
1340  return row_view_type ();
1341  }
1342  else { // nothing in the row to view
1343  if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage
1344  const size_t start = rowinfo.offset1D;
1345  const size_t len = rowinfo.allocSize;
1346  const std::pair<size_t, size_t> rng (start, start + len);
1347  // mfh 23 Nov 2015: Don't just create a subview of
1348  // k_lclInds1D_ directly, because that first creates a
1349  // _managed_ subview, then returns an unmanaged version of
1350  // that. That touches the reference count, which costs
1351  // performance in a measurable way.
1352  return Kokkos::subview (row_view_type (k_lclInds1D_), rng);
1353  }
1354  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1355  // Use a reference, to avoid doing the array lookup twice.
1356  //
1357  // NOTE (mfh 18 Jul 2016) Don't create a Teuchos::ArrayView,
1358  // because this is not thread safe in a debug build.
1359  Teuchos::Array<LocalOrdinal>& lclInds = lclInds2D_[rowinfo.localRow];
1360  const LO* rowPtr = lclInds.getRawPtr ();
1361  const auto rowSize = lclInds.size ();
1362  return row_view_type (rowPtr, rowSize);
1363  }
1364  else {
1365  return row_view_type (); // nothing in the row to view
1366  }
1367  }
1368  }
1369 
1370 
1371  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1372  Kokkos::View<LocalOrdinal*,
1373  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::execution_space,
1374  Kokkos::MemoryUnmanaged>
1375  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
1376  getLocalKokkosRowViewNonConst (const RowInfo& rowinfo)
1377  {
1378  typedef LocalOrdinal LO;
1379  typedef Kokkos::View<LO*, execution_space,
1380  Kokkos::MemoryUnmanaged> row_view_type;
1381 
1382  if (rowinfo.allocSize == 0) {
1383  return row_view_type ();
1384  }
1385  else { // nothing in the row to view
1386  if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage
1387  const size_t start = rowinfo.offset1D;
1388  const size_t len = rowinfo.allocSize;
1389  const std::pair<size_t, size_t> rng (start, start + len);
1390  // mfh 23 Nov 2015: Don't just create a subview of
1391  // k_lclInds1D_ directly, because that first creates a
1392  // _managed_ subview, then returns an unmanaged version of
1393  // that. That touches the reference count, which costs
1394  // performance in a measurable way.
1395  return Kokkos::subview (row_view_type (k_lclInds1D_), rng);
1396  }
1397  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1398  Teuchos::ArrayView<LO> rowAv = lclInds2D_[rowinfo.localRow] ();
1399  return row_view_type (rowAv.getRawPtr (), rowAv.size ());
1400  }
1401  else {
1402  return row_view_type (); // nothing in the row to view
1403  }
1404  }
1405  }
1406 
1407 
1408  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1409  Kokkos::View<const GlobalOrdinal*,
1410  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::execution_space,
1411  Kokkos::MemoryUnmanaged>
1412  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
1413  getGlobalKokkosRowView (const RowInfo& rowinfo) const
1414  {
1415  typedef GlobalOrdinal GO;
1416  typedef Kokkos::View<const GO*, execution_space,
1417  Kokkos::MemoryUnmanaged> row_view_type;
1418 
1419  if (rowinfo.allocSize == 0) {
1420  return row_view_type ();
1421  }
1422  else { // nothing in the row to view
1423  if (this->k_gblInds1D_.dimension_0 () != 0) { // 1-D storage
1424  const size_t start = rowinfo.offset1D;
1425  const size_t len = rowinfo.allocSize;
1426  const std::pair<size_t, size_t> rng (start, start + len);
1427  // mfh 23 Nov 2015: Don't just create a subview of
1428  // k_gblInds1D_ directly, because that first creates a
1429  // _managed_ subview, then returns an unmanaged version of
1430  // that. That touches the reference count, which costs
1431  // performance in a measurable way.
1432  return Kokkos::subview (row_view_type (this->k_gblInds1D_), rng);
1433  }
1434  else if (! this->gblInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1435  Teuchos::ArrayView<const GO> rowAv = this->gblInds2D_[rowinfo.localRow] ();
1436  // FIXME (mfh 26 Nov 2015) This assumes UVM, because it
1437  // assumes that host code can access device memory through
1438  // Teuchos::ArrayView.
1439  return row_view_type (rowAv.getRawPtr (), rowAv.size ());
1440  }
1441  else {
1442  return row_view_type (); // nothing in the row to view
1443  }
1444  }
1445  }
1446 
1447 
1448  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1449  Teuchos::ArrayView<const GlobalOrdinal>
1450  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
1451  getGlobalView (const RowInfo rowinfo) const
1452  {
1453  Teuchos::ArrayView<const GlobalOrdinal> view;
1454  if (rowinfo.allocSize > 0) {
1455  if (k_gblInds1D_.dimension_0 () != 0) {
1456  auto rng = std::make_pair (rowinfo.offset1D,
1457  rowinfo.offset1D + rowinfo.allocSize);
1458  // mfh 23 Nov 2015: Don't just create a subview of
1459  // k_gblInds1D_ directly, because that first creates a
1460  // _managed_ subview, then returns an unmanaged version of
1461  // that. That touches the reference count, which costs
1462  // performance in a measurable way.
1463  Kokkos::View<const GlobalOrdinal*, execution_space,
1464  Kokkos::MemoryUnmanaged> k_gblInds1D_unmanaged = k_gblInds1D_;
1465  view = Kokkos::Compat::getConstArrayView (Kokkos::subview (k_gblInds1D_unmanaged, rng));
1466  }
1467  else if (! gblInds2D_[rowinfo.localRow].empty()) {
1468  view = gblInds2D_[rowinfo.localRow] ();
1469  }
1470  }
1471  return view;
1472  }
1473 
1474 
1475  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1476  LocalOrdinal
1478  getGlobalViewRawConst (const GlobalOrdinal*& gblInds,
1479  LocalOrdinal& numEnt,
1480  const RowInfo& rowinfo) const
1481  {
1482  gblInds = NULL;
1483  numEnt = 0;
1484 
1485  if (rowinfo.allocSize != 0) {
1486  if (k_gblInds1D_.dimension_0 () != 0) { // 1-D storage
1487 #ifdef HAVE_TPETRA_DEBUG
1488  if (rowinfo.offset1D + rowinfo.allocSize >
1489  static_cast<size_t> (k_gblInds1D_.dimension_0 ())) {
1490  return static_cast<LocalOrdinal> (-1);
1491  }
1492 #endif // HAVE_TPETRA_DEBUG
1493  gblInds = &k_gblInds1D_[rowinfo.offset1D];
1494  numEnt = rowinfo.allocSize;
1495  }
1496  else {
1497 #ifdef HAVE_TPETRA_DEBUG
1498  if (rowinfo.localRow >= static_cast<size_t> (gblInds2D_.size ())) {
1499  return static_cast<LocalOrdinal> (-1);
1500  }
1501 #endif // HAVE_TPETRA_DEBUG
1502  const auto& curRow = gblInds2D_[rowinfo.localRow];
1503  if (! curRow.empty ()) {
1504  gblInds = curRow.getRawPtr ();
1505  numEnt = curRow.size ();
1506  }
1507  }
1508  }
1509  return static_cast<LocalOrdinal> (0);
1510  }
1511 
1512 
1513  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1514  Teuchos::ArrayView<GlobalOrdinal>
1517  {
1518  Teuchos::ArrayView<GlobalOrdinal> view;
1519  if (rowinfo.allocSize > 0) {
1520  if (k_gblInds1D_.dimension_0 () != 0) {
1521  auto rng = std::make_pair (rowinfo.offset1D,
1522  rowinfo.offset1D + rowinfo.allocSize);
1523  // mfh 23 Nov 2015: Don't just create a subview of
1524  // k_gblInds1D_ directly, because that first creates a
1525  // _managed_ subview, then returns an unmanaged version of
1526  // that. That touches the reference count, which costs
1527  // performance in a measurable way.
1528  Kokkos::View<GlobalOrdinal*, execution_space,
1529  Kokkos::MemoryUnmanaged> k_gblInds1D_unmanaged = k_gblInds1D_;
1530  view = Kokkos::Compat::getArrayView (Kokkos::subview (k_gblInds1D_unmanaged, rng));
1531  }
1532  else if (! gblInds2D_[rowinfo.localRow].empty()) {
1533  view = gblInds2D_[rowinfo.localRow] ();
1534  }
1535  }
1536  return view;
1537  }
1538 
1539 
1540  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1541  RowInfo
1543  getRowInfo (const LocalOrdinal myRow) const
1544  {
1545 #ifdef HAVE_TPETRA_DEBUG
1546  const char tfecfFuncName[] = "getRowInfo: ";
1547  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1548  ! hasRowInfo (), std::logic_error,
1549  "Late catch! Graph does not have row info anymore. "
1550  "Error should have been caught earlier. "
1551  "Please report this bug to the Tpetra developers.");
1552 #endif // HAVE_TPETRA_DEBUG
1553 
1554  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1555  RowInfo ret;
1556  if (! hasRowInfo () || rowMap_.is_null () ||
1557  ! rowMap_->isNodeLocalElement (myRow)) {
1558  ret.localRow = STINV;
1559  ret.allocSize = 0;
1560  ret.numEntries = 0;
1561  ret.offset1D = STINV;
1562  return ret;
1563  }
1564 
1565  ret.localRow = static_cast<size_t> (myRow);
1566  if (nodeNumAllocated_ != 0 && nodeNumAllocated_ != STINV) {
1567  // graph data structures have the info that we need
1568  //
1569  // if static graph, offsets tell us the allocation size
1570  if (getProfileType() == StaticProfile) {
1571  ret.offset1D = k_rowPtrs_(myRow);
1572  ret.allocSize = k_rowPtrs_(myRow+1) - k_rowPtrs_(myRow);
1573  if (k_numRowEntries_.dimension_0 () == 0) {
1574  ret.numEntries = ret.allocSize;
1575  } else {
1576  ret.numEntries = k_numRowEntries_(myRow);
1577  }
1578  }
1579  else {
1580  ret.offset1D = STINV;
1581  if (isLocallyIndexed ()) {
1582  ret.allocSize = lclInds2D_[myRow].size ();
1583  }
1584  else {
1585  ret.allocSize = gblInds2D_[myRow].size ();
1586  }
1587  ret.numEntries = k_numRowEntries_(myRow);
1588  }
1589  }
1590  else if (nodeNumAllocated_ == 0) {
1591  // have performed allocation, but the graph has no allocation or entries
1592  ret.allocSize = 0;
1593  ret.numEntries = 0;
1594  ret.offset1D = STINV;
1595  }
1596  else if (! indicesAreAllocated ()) {
1597  // haven't performed allocation yet; probably won't hit this code
1598  //
1599  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1600  // allocate, rather than doing lazy allocation at first insert.
1601  // This will make k_numAllocPerRow_ obsolete.
1602  const bool useNumAllocPerRow = (k_numAllocPerRow_.dimension_0 () != 0);
1603  if (useNumAllocPerRow) {
1604  ret.allocSize = k_numAllocPerRow_(myRow); // this is a host View
1605  } else {
1606  ret.allocSize = numAllocForAllRows_;
1607  }
1608  ret.numEntries = 0;
1609  ret.offset1D = STINV;
1610  }
1611  else {
1612  // don't know how we ended up here...
1613  TEUCHOS_TEST_FOR_EXCEPT(true);
1614  }
1615  return ret;
1616  }
1617 
1618 
1619  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1620  RowInfo
1622  getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1623  {
1624  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1625  RowInfo ret;
1626  if (! this->hasRowInfo () || this->rowMap_.is_null ()) {
1627  ret.localRow = STINV;
1628  ret.allocSize = 0;
1629  ret.numEntries = 0;
1630  ret.offset1D = STINV;
1631  return ret;
1632  }
1633  const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1634  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1635  ret.localRow = STINV;
1636  ret.allocSize = 0;
1637  ret.numEntries = 0;
1638  ret.offset1D = STINV;
1639  return ret;
1640  }
1641 
1642  ret.localRow = static_cast<size_t> (myRow);
1643  if (nodeNumAllocated_ != 0 && nodeNumAllocated_ != STINV) {
1644  // graph data structures have the info that we need
1645  //
1646  // if static graph, offsets tell us the allocation size
1647  if (getProfileType() == StaticProfile) {
1648  ret.offset1D = k_rowPtrs_(myRow);
1649  ret.allocSize = k_rowPtrs_(myRow+1) - k_rowPtrs_(myRow);
1650  if (k_numRowEntries_.dimension_0 () == 0) {
1651  ret.numEntries = ret.allocSize;
1652  } else {
1653  ret.numEntries = k_numRowEntries_(myRow);
1654  }
1655  }
1656  else {
1657  ret.offset1D = STINV;
1658  if (isLocallyIndexed ()) {
1659  ret.allocSize = lclInds2D_[myRow].size ();
1660  }
1661  else {
1662  ret.allocSize = gblInds2D_[myRow].size ();
1663  }
1664  ret.numEntries = k_numRowEntries_(myRow);
1665  }
1666  }
1667  else if (nodeNumAllocated_ == 0) {
1668  // have performed allocation, but the graph has no allocation or entries
1669  ret.allocSize = 0;
1670  ret.numEntries = 0;
1671  ret.offset1D = STINV;
1672  }
1673  else if (! indicesAreAllocated ()) {
1674  // haven't performed allocation yet; probably won't hit this code
1675  //
1676  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1677  // allocate, rather than doing lazy allocation at first insert.
1678  // This will make k_numAllocPerRow_ obsolete.
1679  const bool useNumAllocPerRow = (k_numAllocPerRow_.dimension_0 () != 0);
1680  if (useNumAllocPerRow) {
1681  ret.allocSize = k_numAllocPerRow_(myRow); // this is a host View
1682  } else {
1683  ret.allocSize = numAllocForAllRows_;
1684  }
1685  ret.numEntries = 0;
1686  ret.offset1D = STINV;
1687  }
1688  else {
1689  // don't know how we ended up here...
1690  TEUCHOS_TEST_FOR_EXCEPT(true);
1691  }
1692  return ret;
1693  }
1694 
1695 
1696  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1697  void
1699  staticAssertions () const
1700  {
1701  using Teuchos::OrdinalTraits;
1702  typedef LocalOrdinal LO;
1703  typedef GlobalOrdinal GO;
1704  typedef global_size_t GST;
1705 
1706  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1707  // This is so that we can store local indices in the memory
1708  // formerly occupied by global indices.
1709  static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1710  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1711  // Assumption: max(size_t) >= max(LocalOrdinal)
1712  // This is so that we can represent any LocalOrdinal as a size_t.
1713  static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1714  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1715  static_assert (sizeof(GST) >= sizeof(size_t),
1716  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1717 
1718  // FIXME (mfh 30 Sep 2015) We're not using
1719  // Teuchos::CompileTimeAssert any more. Can we do these checks
1720  // with static_assert?
1721 
1722  // can't call max() with CompileTimeAssert, because it isn't a
1723  // constant expression; will need to make this a runtime check
1724  const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1725  "given template arguments: size assumptions are not valid.";
1726  TEUCHOS_TEST_FOR_EXCEPTION(
1727  static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
1728  std::runtime_error, msg);
1729  TEUCHOS_TEST_FOR_EXCEPTION(
1730  static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
1731  std::runtime_error, msg);
1732  TEUCHOS_TEST_FOR_EXCEPTION(
1733  static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
1734  std::runtime_error, msg);
1735  TEUCHOS_TEST_FOR_EXCEPTION(
1736  Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
1737  std::runtime_error, msg);
1738  }
1739 
1740 
1741  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1742  size_t
1743  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
1744  insertIndices (const RowInfo& rowinfo,
1745  const SLocalGlobalViews &newInds,
1746  const ELocalGlobal lg,
1747  const ELocalGlobal I)
1748  {
1749  using Teuchos::ArrayView;
1750 #ifdef HAVE_TPETRA_DEBUG
1751  TEUCHOS_TEST_FOR_EXCEPTION(
1752  lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1753  "Tpetra::CrsGraph::insertIndices: lg must be either GlobalIndices or "
1754  "LocalIndices.");
1755 #endif // HAVE_TPETRA_DEBUG
1756  size_t numNewInds = 0;
1757  if (lg == GlobalIndices) { // input indices are global
1758  ArrayView<const GlobalOrdinal> new_ginds = newInds.ginds;
1759  numNewInds = new_ginds.size();
1760  if (I == GlobalIndices) { // store global indices
1761  ArrayView<GlobalOrdinal> gind_view = getGlobalViewNonConst(rowinfo);
1762  std::copy(new_ginds.begin(), new_ginds.end(), gind_view.begin()+rowinfo.numEntries);
1763  }
1764  else if (I == LocalIndices) { // store local indices
1765  ArrayView<LocalOrdinal> lind_view = getLocalViewNonConst(rowinfo);
1766  typename ArrayView<const GlobalOrdinal>::iterator in = new_ginds.begin();
1767  const typename ArrayView<const GlobalOrdinal>::iterator stop = new_ginds.end();
1768  typename ArrayView<LocalOrdinal>::iterator out = lind_view.begin()+rowinfo.numEntries;
1769  while (in != stop) {
1770  *out++ = colMap_->getLocalElement (*in++);
1771  }
1772  }
1773  }
1774  else if (lg == LocalIndices) { // input indices are local
1775  ArrayView<const LocalOrdinal> new_linds = newInds.linds;
1776  numNewInds = new_linds.size();
1777  if (I == LocalIndices) { // store local indices
1778  ArrayView<LocalOrdinal> lind_view = getLocalViewNonConst(rowinfo);
1779  std::copy(new_linds.begin(), new_linds.end(), lind_view.begin()+rowinfo.numEntries);
1780  }
1781  else if (I == GlobalIndices) {
1782  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Tpetra::CrsGraph::"
1783  "insertIndices: the case where the input indices are local and the "
1784  "indices to write are global (lg=LocalIndices, I=GlobalIndices) is "
1785  "not implemented, because it does not make sense." << std::endl <<
1786  "If you have correct local column indices, that means the graph has "
1787  "a column Map. In that case, you should be storing local indices.");
1788  }
1789  }
1790 
1791  k_numRowEntries_(rowinfo.localRow) += numNewInds;
1792  nodeNumEntries_ += numNewInds;
1793  setLocallyModified ();
1794  return numNewInds;
1795  }
1796 
1797 
1798  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1799  void
1801  insertGlobalIndicesImpl (const LocalOrdinal myRow,
1802  const Teuchos::ArrayView<const GlobalOrdinal>& indices)
1803  {
1804  using Kokkos::subview;
1805  typedef Kokkos::pair<size_t, size_t> range_type;
1806  const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1807 
1808  RowInfo rowInfo = getRowInfo(myRow);
1809  size_t numNewInds = indices.size();
1810  size_t newNumEntries = rowInfo.numEntries + numNewInds;
1811 
1812  if (newNumEntries > rowInfo.allocSize) {
1813  if (getProfileType () == StaticProfile) {
1814 
1815  // Count how many new indices are just duplicates of the old
1816  // ones. If enough are duplicates, then we're safe.
1817  //
1818  // TODO (09 Sep 2016) CrsGraph never supported this use case
1819  // before. Thus, we're justified in not optimizing it. We
1820  // could use binary search if the graph's current entries are
1821  // sorted, for example, but we just use linear search for now.
1822 
1823  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1824  (rowInfo.numEntries > rowInfo.allocSize, std::logic_error,
1825  "For local row " << myRow << ", rowInfo.numEntries = "
1826  << rowInfo.numEntries << " > rowInfo.allocSize = "
1827  << rowInfo.allocSize
1828  << ". Please report this bug to the Tpetra developers.");
1829 
1830  size_t dupCount = 0;
1831  if (k_gblInds1D_.dimension_0 () != 0) {
1832  const size_t curOffset = rowInfo.offset1D;
1833  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1834  (static_cast<size_t> (k_gblInds1D_.dimension_0 ()) < curOffset,
1835  std::logic_error, "k_gblInds1D_.dimension_0() = " <<
1836  k_gblInds1D_.dimension_0 () << " < offset1D = " << curOffset
1837  << ". Please report this bug to the Tpetra developers.");
1838  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1839  (static_cast<size_t> (k_gblInds1D_.dimension_0 ()) <
1840  curOffset + rowInfo.numEntries,
1841  std::logic_error, "k_gblInds1D_.dimension_0() = " <<
1842  k_gblInds1D_.dimension_0 () << " < offset1D (= " << curOffset <<
1843  ") + rowInfo.numEntries (= " << rowInfo.numEntries << "). Please "
1844  "report this bug to the Tpetra developers.");
1845  const Kokkos::pair<size_t, size_t>
1846  range (curOffset, curOffset + rowInfo.numEntries);
1847 
1848  auto gblIndsCur = subview (k_gblInds1D_, range);
1849  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1850  (static_cast<size_t> (gblIndsCur.dimension_0 ()) != rowInfo.numEntries,
1851  std::logic_error, "gblIndsCur.dimension_0() = " <<
1852  gblIndsCur.dimension_0 () << " != rowInfo.numEntries = " <<
1853  rowInfo.numEntries << ". Please report this bug to the Tpetra "
1854  "developers.");
1855 
1856  const size_t numInput = static_cast<size_t> (indices.size ());
1857  for (size_t k_new = 0; k_new < numInput; ++k_new) {
1858  const GlobalOrdinal gblIndToInsert = indices[k_new];
1859  for (size_t k_old = 0; k_old < rowInfo.numEntries; ++k_old) {
1860  if (gblIndsCur[k_old] == gblIndToInsert) {
1861  // Input could itself have duplicates. Input is
1862  // const, so we can't remove duplicates. That's OK
1863  // here, though, because dupCount just refers to the
1864  // number of input entries that actually need to be
1865  // inserted.
1866  ++dupCount;
1867  }
1868  }
1869  }
1870  }
1871  else {
1872  Teuchos::ArrayView<GlobalOrdinal> gblInds = (gblInds2D_[myRow]) ();
1873  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1874  (rowInfo.allocSize != static_cast<size_t> (gblInds.size ()),
1875  std::logic_error, "rowInfo.allocSize = " << rowInfo.allocSize
1876  << " != gblInds.size() = " << gblInds.size ()
1877  << ". Please report this bug to the Tpetra developers.");
1878  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1879  (rowInfo.numEntries > static_cast<size_t> (gblInds.size ()),
1880  std::logic_error, "rowInfo.numEntries = " << rowInfo.numEntries
1881  << " > gblInds.size() = " << gblInds.size ()
1882  << ". Please report this bug to the Tpetra developers.");
1883  auto gblIndsCur = gblInds (0, rowInfo.numEntries);
1884 
1885  const size_t numInput = static_cast<size_t> (indices.size ());
1886  for (size_t k_new = 0; k_new < numInput; ++k_new) {
1887  const GlobalOrdinal gblIndToInsert = indices[k_new];
1888  for (size_t k_old = 0; k_old < rowInfo.numEntries; ++k_old) {
1889  if (gblIndsCur[k_old] == gblIndToInsert) {
1890  // Input could itself have duplicates. Input is
1891  // const, so we can't remove duplicates. That's OK
1892  // here, though, because dupCount just refers to the
1893  // number of input entries that actually need to be
1894  // inserted.
1895  ++dupCount;
1896  }
1897  }
1898  }
1899  }
1900 
1901  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1902  (static_cast<size_t> (indices.size ()) < dupCount, std::logic_error,
1903  "indices.size() = " << indices.size () << " < dupCount = " <<
1904  dupCount << ". Please report this bug to the Tpetra developers.");
1905  const size_t numNewToInsert =
1906  static_cast<size_t> (indices.size ()) - dupCount;
1907 
1908  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1909  (rowInfo.numEntries + numNewToInsert > rowInfo.allocSize,
1910  std::runtime_error, "For local row " << myRow << " on Process " <<
1911  this->getComm ()->getRank () << ", even after excluding " << dupCount
1912  << " duplicate(s) in input, the new number of entries " <<
1913  (rowInfo.numEntries + numNewToInsert) << " still exceeds this row's "
1914  "static allocation size " << rowInfo.allocSize << ". You must "
1915  "either fix the upper bound on the number of entries in this row, "
1916  "or switch from StaticProfile to DynamicProfile.");
1917 
1918  if (k_gblInds1D_.dimension_0 () != 0) {
1919  const size_t curOffset = rowInfo.offset1D;
1920  auto gblIndsCur =
1921  subview (k_gblInds1D_, range_type (curOffset,
1922  curOffset + rowInfo.numEntries));
1923  auto gblIndsNew =
1924  subview (k_gblInds1D_, range_type (curOffset + rowInfo.numEntries,
1925  curOffset + rowInfo.allocSize));
1926 
1927  size_t curPos = 0;
1928  for (size_t k_new = 0; k_new < numNewInds; ++k_new) {
1929  const GlobalOrdinal gblIndToInsert = indices[k_new];
1930 
1931  bool isAlreadyInOld = false;
1932  for (size_t k_old = 0; k_old < rowInfo.numEntries; ++k_old) {
1933  if (gblIndsCur[k_old] == gblIndToInsert) {
1934  isAlreadyInOld = true;
1935  break;
1936  }
1937  }
1938  if (! isAlreadyInOld) {
1939  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1940  (curPos >= numNewToInsert, std::logic_error, "curPos = " <<
1941  curPos << " >= numNewToInsert = " << numNewToInsert << ". "
1942  "Please report this bug to the Tpetra developers.");
1943  gblIndsNew[curPos] = gblIndToInsert;
1944  ++curPos;
1945  }
1946  } // for each input column index
1947  }
1948  else {
1949  Teuchos::ArrayView<GlobalOrdinal> gblInds = (gblInds2D_[myRow]) ();
1950  // Teuchos::ArrayView::operator() takes (offset, size).
1951  auto gblIndsCur = gblInds (0, rowInfo.numEntries);
1952  auto gblIndsNew = gblInds (rowInfo.numEntries,
1953  rowInfo.allocSize - rowInfo.numEntries);
1954 
1955  size_t curPos = 0;
1956  for (size_t k_new = 0; k_new < numNewInds; ++k_new) {
1957  const GlobalOrdinal gblIndToInsert = indices[k_new];
1958 
1959  bool isAlreadyInOld = false;
1960  for (size_t k_old = 0; k_old < rowInfo.numEntries; ++k_old) {
1961  if (gblIndsCur[k_old] == gblIndToInsert) {
1962  isAlreadyInOld = true;
1963  break;
1964  }
1965  }
1966  if (! isAlreadyInOld) {
1967  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1968  (curPos >= numNewToInsert, std::logic_error, "curPos = " <<
1969  curPos << " >= numNewToInsert = " << numNewToInsert << ". "
1970  "Please report this bug to the Tpetra developers.");
1971  gblIndsNew[curPos] = gblIndToInsert;
1972  ++curPos;
1973  }
1974  } // for each input column index
1975  }
1976 
1977  k_numRowEntries_(myRow) = rowInfo.numEntries + numNewToInsert;
1978  nodeNumEntries_ += numNewToInsert;
1979  setLocallyModified ();
1980 
1981 #ifdef HAVE_TPETRA_DEBUG
1982  newNumEntries = rowInfo.numEntries + numNewToInsert;
1983  const size_t chkNewNumEntries = getNumEntriesInLocalRow (myRow);
1984  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1985  (chkNewNumEntries != newNumEntries, std::logic_error,
1986  "After inserting new entries, getNumEntriesInLocalRow(" << myRow <<
1987  ") = " << chkNewNumEntries << " != newNumEntries = " << newNumEntries
1988  << ". Please report this bug to the Tpetra developers.");
1989 #endif // HAVE_TPETRA_DEBUG
1990 
1991  return; // all done!
1992  }
1993  else {
1994  // update allocation, doubling size to reduce # reallocations
1995  size_t newAllocSize = 2*rowInfo.allocSize;
1996  if (newAllocSize < newNumEntries) {
1997  newAllocSize = newNumEntries;
1998  }
1999  gblInds2D_[myRow].resize(newAllocSize);
2000  nodeNumAllocated_ += (newAllocSize - rowInfo.allocSize);
2001  }
2002  }
2003 
2004  // Copy new indices at end of global index array
2005  if (k_gblInds1D_.dimension_0 () != 0) {
2006  const size_t numIndsToCopy = static_cast<size_t> (indices.size ());
2007  const size_t offset = rowInfo.offset1D + rowInfo.numEntries;
2008  for (size_t k = 0; k < numIndsToCopy; ++k) {
2009  k_gblInds1D_[offset + k] = indices[k];
2010  }
2011  }
2012  else {
2013  std::copy(indices.begin(), indices.end(),
2014  gblInds2D_[myRow].begin()+rowInfo.numEntries);
2015  }
2016 
2017  k_numRowEntries_(myRow) += numNewInds;
2018  nodeNumEntries_ += numNewInds;
2019  setLocallyModified ();
2020 
2021 #ifdef HAVE_TPETRA_DEBUG
2022  {
2023  const size_t chkNewNumEntries = getNumEntriesInLocalRow (myRow);
2024  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2025  chkNewNumEntries != newNumEntries, std::logic_error,
2026  ": Internal logic error. Please contact Tpetra team.");
2027  }
2028 #endif
2029  }
2030 
2031 
2032  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2033  void
2034  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
2035  insertLocalIndicesImpl (const LocalOrdinal myRow,
2036  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2037  {
2038  using Kokkos::MemoryUnmanaged;
2039  using Kokkos::subview;
2040  using Kokkos::View;
2041  typedef LocalOrdinal LO;
2042  const char* tfecfFuncName ("insertLocallIndicesImpl");
2043 
2044  const RowInfo rowInfo = this->getRowInfo(myRow);
2045  const size_t numNewInds = indices.size();
2046  const size_t newNumEntries = rowInfo.numEntries + numNewInds;
2047  if (newNumEntries > rowInfo.allocSize) {
2048  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2049  getProfileType() == StaticProfile, std::runtime_error,
2050  ": new indices exceed statically allocated graph structure.");
2051 
2052  // update allocation, doubling size to reduce number of reallocations
2053  size_t newAllocSize = 2*rowInfo.allocSize;
2054  if (newAllocSize < newNumEntries)
2055  newAllocSize = newNumEntries;
2056  lclInds2D_[myRow].resize(newAllocSize);
2057  nodeNumAllocated_ += (newAllocSize - rowInfo.allocSize);
2058  }
2059 
2060  // Store the new indices at the end of row myRow.
2061  if (k_lclInds1D_.dimension_0 () != 0) {
2062  typedef View<const LO*, execution_space, MemoryUnmanaged> input_view_type;
2063  typedef View<LO*, execution_space, MemoryUnmanaged> row_view_type;
2064 
2065  input_view_type inputInds (indices.getRawPtr (), indices.size ());
2066  const size_t start = rowInfo.offset1D + rowInfo.numEntries; // end of row
2067  const std::pair<size_t, size_t> rng (start, start + newNumEntries);
2068  // mfh 23 Nov 2015: Don't just create a subview of k_lclInds1D_
2069  // directly, because that first creates a _managed_ subview,
2070  // then returns an unmanaged version of that. That touches the
2071  // reference count, which costs performance in a measurable way.
2072  row_view_type myInds = subview (row_view_type (k_lclInds1D_), rng);
2073  Kokkos::deep_copy (myInds, inputInds);
2074  }
2075  else {
2076  std::copy (indices.begin (), indices.end (),
2077  lclInds2D_[myRow].begin () + rowInfo.numEntries);
2078  }
2079 
2080  k_numRowEntries_(myRow) += numNewInds;
2081  nodeNumEntries_ += numNewInds;
2082  setLocallyModified ();
2083 #ifdef HAVE_TPETRA_DEBUG
2084  {
2085  const size_t chkNewNumEntries = getNumEntriesInLocalRow (myRow);
2086  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2087  chkNewNumEntries != newNumEntries, std::logic_error,
2088  ": Internal logic error. Please contact Tpetra team.");
2089  }
2090 #endif
2091  }
2092 
2093 
2094  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2095  void
2096  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
2097  sortRowIndices (const RowInfo rowinfo)
2098  {
2099  if (rowinfo.numEntries > 0) {
2100  Teuchos::ArrayView<LocalOrdinal> inds_view =
2101  this->getLocalViewNonConst (rowinfo);
2102  std::sort (inds_view.begin (), inds_view.begin () + rowinfo.numEntries);
2103  }
2104  }
2105 
2106 
2107  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2108  void
2111  {
2112  using Teuchos::ArrayView;
2113  const char tfecfFuncName[] = "mergeRowIndices: ";
2114  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2115  isStorageOptimized (), std::logic_error, "The graph is already storage "
2116  "optimized, so we shouldn't be merging any indices. "
2117  "Please report this bug to the Tpetra developers.");
2118 
2119  ArrayView<LocalOrdinal> inds_view = this->getLocalViewNonConst (rowinfo);
2120  typename ArrayView<LocalOrdinal>::iterator beg, end, newend;
2121  beg = inds_view.begin();
2122  end = inds_view.begin() + rowinfo.numEntries;
2123  newend = std::unique(beg,end);
2124  const size_t mergedEntries = newend - beg;
2125 #ifdef HAVE_TPETRA_DEBUG
2126  // merge should not have eliminated any entries; if so, the
2127  // assignment below will destroy the packed structure
2128  TEUCHOS_TEST_FOR_EXCEPT( isStorageOptimized () && mergedEntries != rowinfo.numEntries );
2129 #endif // HAVE_TPETRA_DEBUG
2130 
2131  k_numRowEntries_(rowinfo.localRow) = mergedEntries;
2132  nodeNumEntries_ -= (rowinfo.numEntries - mergedEntries);
2133  }
2134 
2135 
2136  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2137  void
2139  setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
2140  const Teuchos::RCP<const map_type>& rangeMap)
2141  {
2142  // simple pointer comparison for equality
2143  if (domainMap_ != domainMap) {
2144  domainMap_ = domainMap;
2145  importer_ = Teuchos::null;
2146  }
2147  if (rangeMap_ != rangeMap) {
2148  rangeMap_ = rangeMap;
2149  exporter_ = Teuchos::null;
2150  }
2151  }
2152 
2153 
2154  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2155  void
2158  {
2159  globalNumEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2160  globalNumDiags_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2161  globalMaxNumRowEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2162  haveGlobalConstants_ = false;
2163  }
2164 
2165 
2166  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2167  void
2168  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
2169  checkInternalState () const
2170  {
2171 #ifdef HAVE_TPETRA_DEBUG
2172  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2173  const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid ();
2174  const char err[] = "Tpetra::CrsGraph::checkInternalState: Likely internal "
2175  "logic error. Please contact Tpetra team.";
2176  // check the internal state of this data structure
2177  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
2178  // always remains in a valid state
2179  // the graph should have been allocated with a row map
2180  TEUCHOS_TEST_FOR_EXCEPTION( rowMap_ == Teuchos::null, std::logic_error, err );
2181  // am either complete or active
2182  TEUCHOS_TEST_FOR_EXCEPTION( isFillActive() == isFillComplete(), std::logic_error, err );
2183  // if the graph has been fill completed, then all maps should be present
2184  TEUCHOS_TEST_FOR_EXCEPTION( isFillComplete() == true && (colMap_ == Teuchos::null || rangeMap_ == Teuchos::null || domainMap_ == Teuchos::null), std::logic_error, err );
2185  // if storage has been optimized, then indices should have been allocated (even if trivially so)
2186  TEUCHOS_TEST_FOR_EXCEPTION( isStorageOptimized() == true && indicesAreAllocated() == false, std::logic_error, err );
2187  // if storage has been optimized, then number of allocated is now the number of entries
2188  TEUCHOS_TEST_FOR_EXCEPTION( isStorageOptimized() == true && getNodeAllocationSize() != getNodeNumEntries(), std::logic_error, err );
2189  // if graph doesn't have the global constants, then they should all be marked as invalid
2190  TEUCHOS_TEST_FOR_EXCEPTION( haveGlobalConstants_ == false && ( globalNumEntries_ != GSTI || globalNumDiags_ != GSTI || globalMaxNumRowEntries_ != GSTI ), std::logic_error, err );
2191  // if the graph has global cosntants, then they should be valid.
2192  TEUCHOS_TEST_FOR_EXCEPTION( haveGlobalConstants_ == true && ( globalNumEntries_ == GSTI || globalNumDiags_ == GSTI || globalMaxNumRowEntries_ == GSTI ), std::logic_error, err );
2193  TEUCHOS_TEST_FOR_EXCEPTION( haveGlobalConstants_ == true && ( globalNumEntries_ < nodeNumEntries_ || globalNumDiags_ < nodeNumDiags_ || globalMaxNumRowEntries_ < nodeMaxNumRowEntries_ ),
2194  std::logic_error, err );
2195  // if indices are allocated, then the allocation specifications should have been released
2196  TEUCHOS_TEST_FOR_EXCEPTION(
2197  indicesAreAllocated () && (numAllocForAllRows_ != 0 ||
2198  k_numAllocPerRow_.dimension_0 () != 0),
2199  std::logic_error, err );
2200  // if indices are not allocated, then information dictating allocation quantities should be present
2201  TEUCHOS_TEST_FOR_EXCEPTION(
2202  ! indicesAreAllocated () && (nodeNumAllocated_ != STI ||
2203  nodeNumEntries_ != 0),
2204  std::logic_error, err );
2205  // if storage is optimized, then profile should be static
2206  TEUCHOS_TEST_FOR_EXCEPTION( isStorageOptimized() && pftype_ != StaticProfile, std::logic_error, err );
2207 
2208  // If k_rowPtrs_ exists (has nonzero size), it must have N+1 rows,
2209  // and k_rowPtrs_(N) must equal k_gblInds1D_.dimension_0() (if
2210  // globally indexed) or k_lclInds1D_.dimension_0() (if locally
2211  // indexed).
2212 
2213  TEUCHOS_TEST_FOR_EXCEPTION(
2214  isGloballyIndexed () && k_rowPtrs_.dimension_0 () != 0 &&
2215  (static_cast<size_t> (k_rowPtrs_.dimension_0 ()) != getNodeNumRows () + 1 ||
2216  k_rowPtrs_(getNodeNumRows ()) != static_cast<size_t> (k_gblInds1D_.dimension_0 ())),
2217  std::logic_error, err );
2218 
2219  TEUCHOS_TEST_FOR_EXCEPTION(
2220  isLocallyIndexed () && k_rowPtrs_.dimension_0 () != 0 &&
2221  (static_cast<size_t> (k_rowPtrs_.dimension_0 ()) != getNodeNumRows () + 1 ||
2222  k_rowPtrs_(getNodeNumRows ()) != static_cast<size_t> (k_lclInds1D_.dimension_0 ())),
2223  std::logic_error, err );
2224 
2225  // If profile is dynamic and indices are allocated, then 2-D
2226  // allocations of column index storage (either local or global)
2227  // must be present.
2228  TEUCHOS_TEST_FOR_EXCEPTION(
2229  pftype_ == DynamicProfile &&
2230  indicesAreAllocated () &&
2231  getNodeNumRows () > 0 &&
2232  lclInds2D_.is_null () && gblInds2D_.is_null (),
2233  std::logic_error, err );
2234 
2235  // If profile is dynamic and the calling process owns nonzero
2236  // rows, then k_numRowEntries_ and 2-D storage of column indices
2237  // (whether local or global) must be present.
2238  TEUCHOS_TEST_FOR_EXCEPTION(
2239  pftype_ == DynamicProfile &&
2240  indicesAreAllocated () &&
2241  getNodeNumRows () > 0 &&
2242  (k_numRowEntries_.dimension_0 () == 0 ||
2243  (lclInds2D_.is_null () && gblInds2D_.is_null ())),
2244  std::logic_error, err );
2245 
2246  // if profile is dynamic, then 1D allocations should not be present
2247  TEUCHOS_TEST_FOR_EXCEPTION(
2248  pftype_ == DynamicProfile &&
2249  (k_lclInds1D_.dimension_0 () != 0 || k_gblInds1D_.dimension_0 () != 0),
2250  std::logic_error, err );
2251  // if profile is dynamic, then row offsets should not be present
2252  TEUCHOS_TEST_FOR_EXCEPTION(
2253  pftype_ == DynamicProfile && k_rowPtrs_.dimension_0 () != 0,
2254  std::logic_error, err );
2255  // if profile is static and we have allocated non-trivially, then
2256  // 1D allocations should be present
2257  TEUCHOS_TEST_FOR_EXCEPTION(
2258  pftype_ == StaticProfile && indicesAreAllocated () &&
2259  getNodeAllocationSize () > 0 && k_lclInds1D_.dimension_0 () == 0 &&
2260  k_gblInds1D_.dimension_0 () == 0,
2261  std::logic_error, err);
2262  // if profile is static, then 2D allocations should not be present
2263  TEUCHOS_TEST_FOR_EXCEPTION(
2264  pftype_ == StaticProfile && (lclInds2D_ != Teuchos::null || gblInds2D_ != Teuchos::null),
2265  std::logic_error, err );
2266 
2267  // if indices are not allocated, then none of the buffers should be.
2268  TEUCHOS_TEST_FOR_EXCEPTION(
2269  ! indicesAreAllocated () &&
2270  ((k_rowPtrs_.dimension_0 () != 0 || k_numRowEntries_.dimension_0 () != 0) ||
2271  k_lclInds1D_.dimension_0 () != 0 || lclInds2D_ != Teuchos::null ||
2272  k_gblInds1D_.dimension_0 () != 0 || gblInds2D_ != Teuchos::null),
2273  std::logic_error, err );
2274 
2275  // indices may be local or global only if they are allocated
2276  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2277  // indicesAreGlobal_)
2278  TEUCHOS_TEST_FOR_EXCEPTION( (indicesAreLocal_ || indicesAreGlobal_) && ! indicesAreAllocated_, std::logic_error, err );
2279  // indices may be local or global, but not both
2280  TEUCHOS_TEST_FOR_EXCEPTION( indicesAreLocal_ == true && indicesAreGlobal_ == true, std::logic_error, err );
2281  // if indices are local, then global allocations should not be present
2282  TEUCHOS_TEST_FOR_EXCEPTION(
2283  indicesAreLocal_ && (k_gblInds1D_.dimension_0 () != 0 || gblInds2D_ != Teuchos::null),
2284  std::logic_error, err );
2285  // if indices are global, then local allocations should not be present
2286  TEUCHOS_TEST_FOR_EXCEPTION(
2287  indicesAreGlobal_ && (k_lclInds1D_.dimension_0 () != 0 || lclInds2D_ != Teuchos::null),
2288  std::logic_error, err );
2289  // if indices are local, then local allocations should be present
2290  TEUCHOS_TEST_FOR_EXCEPTION(
2291  indicesAreLocal_ && getNodeAllocationSize () > 0 &&
2292  k_lclInds1D_.dimension_0 () == 0 && getNodeNumRows () > 0 &&
2293  lclInds2D_.is_null (),
2294  std::logic_error, err);
2295  // if indices are global, then global allocations should be present
2296  TEUCHOS_TEST_FOR_EXCEPTION(
2297  indicesAreGlobal_ && getNodeAllocationSize () > 0 &&
2298  k_gblInds1D_.dimension_0 () == 0 && getNodeNumRows () > 0 &&
2299  gblInds2D_.is_null (),
2300  std::logic_error, err);
2301  // if indices are allocated, then we should have recorded how many were allocated
2302  TEUCHOS_TEST_FOR_EXCEPTION( indicesAreAllocated() == true && nodeNumAllocated_ == STI, std::logic_error, err );
2303  // if indices are not allocated, then the allocation size should be marked invalid
2304  TEUCHOS_TEST_FOR_EXCEPTION( indicesAreAllocated() == false && nodeNumAllocated_ != STI, std::logic_error, err );
2305  // check the actual allocations
2306  if (indicesAreAllocated()) {
2307  size_t actualNumAllocated = 0;
2308  if (pftype_ == DynamicProfile) {
2309  if (isGloballyIndexed() && gblInds2D_ != Teuchos::null) {
2310  for (size_t r = 0; r < getNodeNumRows(); ++r) {
2311  actualNumAllocated += gblInds2D_[r].size();
2312  }
2313  }
2314  else if (isLocallyIndexed() && lclInds2D_ != Teuchos::null) {
2315  for (size_t r = 0; r < getNodeNumRows(); ++r) {
2316  actualNumAllocated += lclInds2D_[r].size();
2317  }
2318  }
2319  TEUCHOS_TEST_FOR_EXCEPTION(actualNumAllocated != nodeNumAllocated_, std::logic_error, err );
2320  }
2321  else if (k_rowPtrs_.dimension_0 () != 0) { // pftype_ == StaticProfile
2322  TEUCHOS_TEST_FOR_EXCEPTION(
2323  static_cast<size_t> (k_rowPtrs_.dimension_0 ()) != getNodeNumRows () + 1,
2324  std::logic_error, err);
2325 
2326  actualNumAllocated = k_rowPtrs_(getNodeNumRows ());
2327  TEUCHOS_TEST_FOR_EXCEPTION(
2328  isLocallyIndexed () &&
2329  static_cast<size_t> (k_lclInds1D_.dimension_0 ()) != actualNumAllocated,
2330  std::logic_error, err );
2331  TEUCHOS_TEST_FOR_EXCEPTION(
2332  isGloballyIndexed () &&
2333  static_cast<size_t> (k_gblInds1D_.dimension_0 ()) != actualNumAllocated,
2334  std::logic_error, err );
2335  TEUCHOS_TEST_FOR_EXCEPTION(actualNumAllocated != nodeNumAllocated_, std::logic_error, err );
2336  }
2337  }
2338 #endif // HAVE_TPETRA_DEBUG
2339  }
2340 
2341 
2342  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2343  size_t
2345  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2346  {
2347  using Teuchos::OrdinalTraits;
2348  const LocalOrdinal lrow = rowMap_->getLocalElement (globalRow);
2349  if (hasRowInfo () && lrow != OrdinalTraits<LocalOrdinal>::invalid ()) {
2350  const RowInfo rowinfo = this->getRowInfo (lrow);
2351  return rowinfo.numEntries;
2352  } else {
2353  return OrdinalTraits<size_t>::invalid ();
2354  }
2355  }
2356 
2357 
2358  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2359  size_t
2361  getNumEntriesInLocalRow (LocalOrdinal localRow) const
2362  {
2363  if (hasRowInfo () && rowMap_->isNodeLocalElement (localRow)) {
2364  const RowInfo rowinfo = this->getRowInfo (localRow);
2365  return rowinfo.numEntries;
2366  } else {
2367  return Teuchos::OrdinalTraits<size_t>::invalid ();
2368  }
2369  }
2370 
2371 
2372  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2373  size_t
2375  getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2376  {
2377  const LocalOrdinal lrow = rowMap_->getLocalElement (globalRow);
2378  if (hasRowInfo () && lrow != Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
2379  const RowInfo rowinfo = this->getRowInfo (lrow);
2380  return rowinfo.allocSize;
2381  } else {
2382  return Teuchos::OrdinalTraits<size_t>::invalid ();
2383  }
2384  }
2385 
2386 
2387  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2388  size_t
2390  getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2391  {
2392  if (hasRowInfo () && rowMap_->isNodeLocalElement (localRow)) {
2393  const RowInfo rowinfo = this->getRowInfo (localRow);
2394  return rowinfo.allocSize;
2395  } else {
2396  return Teuchos::OrdinalTraits<size_t>::invalid();
2397  }
2398  }
2399 
2400 
2401  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2402  Teuchos::ArrayRCP<const size_t>
2405  {
2406  using Kokkos::ViewAllocateWithoutInitializing;
2407  using Kokkos::create_mirror_view;
2408  using Teuchos::ArrayRCP;
2409  typedef typename local_graph_type::row_map_type row_map_type;
2410  typedef typename row_map_type::non_const_value_type row_offset_type;
2411 #ifdef HAVE_TPETRA_DEBUG
2412  const char prefix[] = "Tpetra::CrsGraph::getNodeRowPtrs: ";
2413  const char suffix[] = " Please report this bug to the Tpetra developers.";
2414 #endif // HAVE_TPETRA_DEBUG
2415  const size_t size = k_rowPtrs_.dimension_0 ();
2416  const bool same = Kokkos::Impl::is_same<size_t, row_offset_type>::value;
2417 
2418  if (size == 0) {
2419  return ArrayRCP<const size_t> ();
2420  }
2421 
2422  ArrayRCP<const row_offset_type> ptr_rot;
2423  ArrayRCP<const size_t> ptr_st;
2424  if (same) { // size_t == row_offset_type
2425  // NOTE (mfh 22 Mar 2015) In a debug build of Kokkos, the result
2426  // of create_mirror_view might actually be a new allocation.
2427  // This helps with debugging when there are two memory spaces.
2428  typename row_map_type::HostMirror ptr_h = create_mirror_view (k_rowPtrs_);
2429  Kokkos::deep_copy (ptr_h, k_rowPtrs_);
2430 #ifdef HAVE_TPETRA_DEBUG
2431  TEUCHOS_TEST_FOR_EXCEPTION(
2432  ptr_h.dimension_0 () != k_rowPtrs_.dimension_0 (), std::logic_error,
2433  prefix << "size_t == row_offset_type, but ptr_h.dimension_0() = "
2434  << ptr_h.dimension_0 () << " != k_rowPtrs_.dimension_0() = "
2435  << k_rowPtrs_.dimension_0 () << ".");
2436  TEUCHOS_TEST_FOR_EXCEPTION(
2437  same && size != 0 && k_rowPtrs_.ptr_on_device () == NULL, std::logic_error,
2438  prefix << "size_t == row_offset_type and k_rowPtrs_.dimension_0() = "
2439  << size << " != 0, but k_rowPtrs_.ptr_on_device() == NULL." << suffix);
2440  TEUCHOS_TEST_FOR_EXCEPTION(
2441  same && size != 0 && ptr_h.ptr_on_device () == NULL, std::logic_error,
2442  prefix << "size_t == row_offset_type and k_rowPtrs_.dimension_0() = "
2443  << size << " != 0, but create_mirror_view(k_rowPtrs_).ptr_on_device() "
2444  "== NULL." << suffix);
2445 #endif // HAVE_TPETRA_DEBUG
2446  ptr_rot = Kokkos::Compat::persistingView (ptr_h);
2447  }
2448  else { // size_t != row_offset_type
2449  typedef Kokkos::View<size_t*, device_type> ret_view_type;
2450  ret_view_type ptr_d (ViewAllocateWithoutInitializing ("ptr"), size);
2451  ::Tpetra::Details::copyOffsets (ptr_d, k_rowPtrs_);
2452  typename ret_view_type::HostMirror ptr_h = create_mirror_view (ptr_d);
2453  Kokkos::deep_copy (ptr_h, ptr_d);
2454  ptr_st = Kokkos::Compat::persistingView (ptr_h);
2455  }
2456 #ifdef HAVE_TPETRA_DEBUG
2457  TEUCHOS_TEST_FOR_EXCEPTION(
2458  same && size != 0 && ptr_rot.is_null (), std::logic_error,
2459  prefix << "size_t == row_offset_type and size = " << size
2460  << " != 0, but ptr_rot is null." << suffix);
2461  TEUCHOS_TEST_FOR_EXCEPTION(
2462  ! same && size != 0 && ptr_st.is_null (), std::logic_error,
2463  prefix << "size_t != row_offset_type and size = " << size
2464  << " != 0, but ptr_st is null." << suffix);
2465 #endif // HAVE_TPETRA_DEBUG
2466 
2467  // If size_t == row_offset_type, return a persisting host view of
2468  // k_rowPtrs_. Otherwise, return a size_t host copy of k_rowPtrs_.
2469 #ifdef HAVE_TPETRA_DEBUG
2470  ArrayRCP<const size_t> retval =
2471  Kokkos::Impl::if_c<same,
2472  ArrayRCP<const row_offset_type>,
2473  ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2474  TEUCHOS_TEST_FOR_EXCEPTION(
2475  size != 0 && retval.is_null (), std::logic_error,
2476  prefix << "size = " << size << " != 0, but retval is null." << suffix);
2477  return retval;
2478 #else
2479  return Kokkos::Impl::if_c<same,
2480  ArrayRCP<const row_offset_type>,
2481  ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2482 #endif // HAVE_TPETRA_DEBUG
2483  }
2484 
2485 
2486  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2487  Teuchos::ArrayRCP<const LocalOrdinal>
2490  {
2491  return Kokkos::Compat::persistingView (k_lclInds1D_);
2492  }
2493 
2494 
2495  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2496  void
2498  getLocalRowCopy (LocalOrdinal localRow,
2499  const Teuchos::ArrayView<LocalOrdinal>&indices,
2500  size_t& numEntries) const
2501  {
2502  using Teuchos::ArrayView;
2503  typedef LocalOrdinal LO;
2504  typedef GlobalOrdinal GO;
2505  const char tfecfFuncName[] = "getLocalRowCopy: ";
2506 
2507  TEUCHOS_TEST_FOR_EXCEPTION(
2508  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2509  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2510  "does not have a column Map yet. That means we don't have local indices "
2511  "for columns yet, so it doesn't make sense to call this method. If the "
2512  "graph doesn't have a column Map yet, you should call fillComplete on "
2513  "it first.");
2514 #ifdef HAVE_TPETRA_DEBUG
2515  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2516  ! hasRowInfo(), std::runtime_error,
2517  "Graph row information was deleted at fillComplete.");
2518 #endif // HAVE_TPETRA_DEBUG
2519 
2520  // This does the right thing (reports an empty row) if the input
2521  // row is invalid.
2522  const RowInfo rowinfo = getRowInfo (localRow);
2523  // No side effects on error.
2524  const size_t theNumEntries = rowinfo.numEntries;
2525  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2526  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2527  "Specified storage (size==" << indices.size () << ") does not suffice "
2528  "to hold all " << theNumEntries << " entry/ies for this row.");
2529  numEntries = theNumEntries;
2530 
2531  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2532  if (isLocallyIndexed ()) {
2533  ArrayView<const LO> lview = getLocalView (rowinfo);
2534  for (size_t j = 0; j < theNumEntries; ++j) {
2535  indices[j] = lview[j];
2536  }
2537  }
2538  else if (isGloballyIndexed ()) {
2539  ArrayView<const GO> gview = getGlobalView (rowinfo);
2540  for (size_t j = 0; j < theNumEntries; ++j) {
2541  indices[j] = colMap_->getLocalElement (gview[j]);
2542  }
2543  }
2544  }
2545  }
2546 
2547 
2548  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2549  void
2551  getGlobalRowCopy (GlobalOrdinal globalRow,
2552  const Teuchos::ArrayView<GlobalOrdinal>& indices,
2553  size_t& numEntries) const
2554  {
2555  using Teuchos::ArrayView;
2556  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2557 #ifdef HAVE_TPETRA_DEBUG
2558  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2559  ! hasRowInfo (), std::runtime_error,
2560  "Graph row information was deleted at fillComplete.");
2561 #endif // HAVE_TPETRA_DEBUG
2562 
2563  // This does the right thing (reports an empty row) if the input
2564  // row is invalid.
2565  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2566  const size_t theNumEntries = rowinfo.numEntries;
2567  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2568  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2569  "Specified storage (size==" << indices.size () << ") does not suffice "
2570  "to hold all " << theNumEntries << " entry/ies for this row.");
2571  numEntries = theNumEntries; // first side effect
2572 
2573  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2574  if (isLocallyIndexed ()) {
2575  ArrayView<const LocalOrdinal> lview = getLocalView (rowinfo);
2576  for (size_t j = 0; j < theNumEntries; ++j) {
2577  indices[j] = colMap_->getGlobalElement (lview[j]);
2578  }
2579  }
2580  else if (isGloballyIndexed ()) {
2581  ArrayView<const GlobalOrdinal> gview = getGlobalView (rowinfo);
2582  for (size_t j = 0; j < theNumEntries; ++j) {
2583  indices[j] = gview[j];
2584  }
2585  }
2586  }
2587  }
2588 
2589 
2590  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2591  void
2593  getLocalRowView (LocalOrdinal localRow,
2594  Teuchos::ArrayView<const LocalOrdinal>& indices) const
2595  {
2596  const char tfecfFuncName[] = "getLocalRowView: ";
2597  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2598  isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2599  "currently stored as global indices, so we cannot return a view with "
2600  "local column indices, whether or not the graph has a column Map. If "
2601  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2602 #ifdef HAVE_TPETRA_DEBUG
2603  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2604  ! hasRowInfo (), std::runtime_error, "Graph row information was "
2605  "deleted at fillComplete().");
2606 #endif // HAVE_TPETRA_DEBUG
2607 
2608  // This does the right thing (reports an empty row) if the input
2609  // row is invalid.
2610  const RowInfo rowInfo = getRowInfo (localRow);
2611  indices = Teuchos::null;
2612  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2613  rowInfo.numEntries > 0) {
2614  indices = this->getLocalView (rowInfo);
2615  // getLocalView returns a view of the _entire_ row, including
2616  // any extra space at the end (which 1-D unpacked storage
2617  // might have, for example). That's why we have to take a
2618  // subview of the returned view.
2619  indices = indices (0, rowInfo.numEntries);
2620  }
2621 
2622 #ifdef HAVE_TPETRA_DEBUG
2623  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2624  (static_cast<size_t> (indices.size ()) !=
2625  getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2626  "= " << indices.size () << " != getNumEntriesInLocalRow(localRow=" <<
2627  localRow << ") = " << getNumEntriesInLocalRow (localRow) <<
2628  ". Please report this bug to the Tpetra developers.");
2629 #endif // HAVE_TPETRA_DEBUG
2630  }
2631 
2632 
2633  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2634  void
2636  getGlobalRowView (GlobalOrdinal globalRow,
2637  Teuchos::ArrayView<const GlobalOrdinal>& indices) const
2638  {
2639  const char tfecfFuncName[] = "getGlobalRowView: ";
2640  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2641  isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2642  "currently stored as local indices, so we cannot return a view with "
2643  "global column indices. Use getGlobalRowCopy() instead.");
2644 #ifdef HAVE_TPETRA_DEBUG
2645  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2646  ! hasRowInfo (), std::runtime_error,
2647  "Graph row information was deleted at fillComplete().");
2648 #endif // HAVE_TPETRA_DEBUG
2649 
2650  // This does the right thing (reports an empty row) if the input
2651  // row is invalid.
2652  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2653  indices = Teuchos::null;
2654  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2655  rowInfo.numEntries > 0) {
2656  indices = (this->getGlobalView (rowInfo)) (0, rowInfo.numEntries);
2657  }
2658 
2659 #ifdef HAVE_TPETRA_DEBUG
2660  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2661  (static_cast<size_t> (indices.size ()) != getNumEntriesInGlobalRow (globalRow),
2662  std::logic_error, "indices.size() = " << indices.size ()
2663  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2664  << getNumEntriesInGlobalRow (globalRow)
2665  << ". Please report this bug to the Tpetra developers.");
2666 #endif // HAVE_TPETRA_DEBUG
2667  }
2668 
2669 
2670  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2671  void
2673  insertLocalIndices (const LocalOrdinal localRow,
2674  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2675  {
2676  const char tfecfFuncName[] = "insertLocalIndices";
2677 
2678  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2679  ! isFillActive (), std::runtime_error,
2680  ": requires that fill is active.");
2681  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2682  isGloballyIndexed (), std::runtime_error,
2683  ": graph indices are global; use insertGlobalIndices().");
2684  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2685  ! hasColMap (), std::runtime_error,
2686  ": cannot insert local indices without a column map.");
2687  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2688  ! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2689  ": row does not belong to this node.");
2690  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2691  ! hasRowInfo (), std::runtime_error,
2692  ": graph row information was deleted at fillComplete().");
2693  if (! indicesAreAllocated ()) {
2694  allocateIndices (LocalIndices);
2695  }
2696 
2697 #ifdef HAVE_TPETRA_DEBUG
2698  // In a debug build, if the graph has a column Map, test whether
2699  // any of the given column indices are not in the column Map.
2700  // Keep track of the invalid column indices so we can tell the
2701  // user about them.
2702  if (hasColMap ()) {
2703  using Teuchos::Array;
2704  using Teuchos::toString;
2705  using std::endl;
2706  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2707 
2708  const map_type& colMap = * (getColMap ());
2709  Array<LocalOrdinal> badColInds;
2710  bool allInColMap = true;
2711  for (size_type k = 0; k < indices.size (); ++k) {
2712  if (! colMap.isNodeLocalElement (indices[k])) {
2713  allInColMap = false;
2714  badColInds.push_back (indices[k]);
2715  }
2716  }
2717  if (! allInColMap) {
2718  std::ostringstream os;
2719  os << "Tpetra::CrsMatrix::insertLocalIndices: You attempted to insert "
2720  "entries in owned row " << localRow << ", at the following column "
2721  "indices: " << toString (indices) << "." << endl;
2722  os << "Of those, the following indices are not in the column Map on "
2723  "this process: " << toString (badColInds) << "." << endl << "Since "
2724  "the graph has a column Map already, it is invalid to insert entries "
2725  "at those locations.";
2726  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2727  }
2728  }
2729 #endif // HAVE_TPETRA_DEBUG
2730 
2731  insertLocalIndicesImpl (localRow, indices);
2732 
2733 #ifdef HAVE_TPETRA_DEBUG
2734  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2735  indicesAreAllocated() == false || isLocallyIndexed() == false,
2736  std::logic_error,
2737  ": Violated stated post-conditions. Please contact Tpetra team.");
2738 #endif
2739  }
2740 
2741  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2742  void
2744  insertLocalIndices (const LocalOrdinal localRow,
2745  const LocalOrdinal numEnt,
2746  const LocalOrdinal inds[])
2747  {
2748  Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
2749  this->insertLocalIndices (localRow, indsT);
2750  }
2751 
2752  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2753  void
2755  insertLocalIndicesFiltered (const LocalOrdinal localRow,
2756  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2757  {
2758  typedef LocalOrdinal LO;
2759  const char tfecfFuncName[] = "insertLocalIndicesFiltered";
2760 
2761  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2762  isFillActive() == false, std::runtime_error,
2763  ": requires that fill is active.");
2764  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2765  isGloballyIndexed() == true, std::runtime_error,
2766  ": graph indices are global; use insertGlobalIndices().");
2767  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2768  hasColMap() == false, std::runtime_error,
2769  ": cannot insert local indices without a column map.");
2770  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2771  rowMap_->isNodeLocalElement(localRow) == false, std::runtime_error,
2772  ": row does not belong to this node.");
2773  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2774  ! hasRowInfo (), std::runtime_error,
2775  ": graph row information was deleted at fillComplete().");
2776  if (! indicesAreAllocated ()) {
2777  allocateIndices (LocalIndices);
2778  }
2779 
2780  // If we have a column map, use it to filter the entries.
2781  if (hasColMap ()) {
2782  Teuchos::Array<LO> filtered_indices (indices);
2783  SLocalGlobalViews inds_view;
2784  SLocalGlobalNCViews inds_ncview;
2785  inds_ncview.linds = filtered_indices();
2786  const size_t numFilteredEntries =
2787  filterIndices<LocalIndices>(inds_ncview);
2788  inds_view.linds = filtered_indices (0, numFilteredEntries);
2789  insertLocalIndicesImpl(localRow, inds_view.linds);
2790  }
2791  else {
2792  insertLocalIndicesImpl(localRow, indices);
2793  }
2794 #ifdef HAVE_TPETRA_DEBUG
2795  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2796  indicesAreAllocated() == false || isLocallyIndexed() == false,
2797  std::logic_error,
2798  ": Violated stated post-conditions. Please contact Tpetra team.");
2799 #endif
2800  }
2801 
2802 
2803  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2804  void
2806  insertGlobalIndices (const GlobalOrdinal grow,
2807  const Teuchos::ArrayView<const GlobalOrdinal>& indices)
2808  {
2809  using Teuchos::Array;
2810  using Teuchos::ArrayView;
2811  typedef LocalOrdinal LO;
2812  typedef GlobalOrdinal GO;
2813  typedef typename ArrayView<const GO>::size_type size_type;
2814  const char tfecfFuncName[] = "insertGlobalIndices";
2815 
2816  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2817  isLocallyIndexed() == true, std::runtime_error,
2818  ": graph indices are local; use insertLocalIndices().");
2819  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2820  ! hasRowInfo (), std::runtime_error,
2821  ": graph row information was deleted at fillComplete().");
2822  // This can't really be satisfied for now, because if we are
2823  // fillComplete(), then we are local. In the future, this may
2824  // change. However, the rule that modification require active
2825  // fill will not change.
2826  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2827  isFillActive() == false, std::runtime_error,
2828  ": You are not allowed to call this method if fill is not active. "
2829  "If fillComplete has been called, you must first call resumeFill "
2830  "before you may insert indices.");
2831  if (! indicesAreAllocated ()) {
2832  allocateIndices (GlobalIndices);
2833  }
2834  const LO myRow = rowMap_->getLocalElement (grow);
2835  if (myRow != Teuchos::OrdinalTraits<LO>::invalid ()) {
2836 #ifdef HAVE_TPETRA_DEBUG
2837  if (hasColMap ()) {
2838  using std::endl;
2839  const map_type& colMap = * (getColMap ());
2840  // In a debug build, keep track of the nonowned ("bad") column
2841  // indices, so that we can display them in the exception
2842  // message. In a release build, just ditch the loop early if
2843  // we encounter a nonowned column index.
2844  Array<GO> badColInds;
2845  bool allInColMap = true;
2846  for (size_type k = 0; k < indices.size (); ++k) {
2847  if (! colMap.isNodeGlobalElement (indices[k])) {
2848  allInColMap = false;
2849  badColInds.push_back (indices[k]);
2850  }
2851  }
2852  if (! allInColMap) {
2853  std::ostringstream os;
2854  os << "Tpetra::CrsGraph::insertGlobalIndices: You attempted to insert "
2855  "entries in owned row " << grow << ", at the following column "
2856  "indices: " << toString (indices) << "." << endl;
2857  os << "Of those, the following indices are not in the column Map on "
2858  "this process: " << toString (badColInds) << "." << endl << "Since "
2859  "the matrix has a column Map already, it is invalid to insert "
2860  "entries at those locations.";
2861  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2862  }
2863  }
2864 #endif // HAVE_TPETRA_DEBUG
2865  insertGlobalIndicesImpl (myRow, indices);
2866  }
2867  else { // a nonlocal row
2868  const size_type numIndices = indices.size ();
2869  // This creates the Array if it doesn't exist yet. std::map's
2870  // operator[] does a lookup each time, so it's better to pull
2871  // nonlocals_[grow] out of the loop.
2872  std::vector<GO>& nonlocalRow = nonlocals_[grow];
2873  for (size_type k = 0; k < numIndices; ++k) {
2874  nonlocalRow.push_back (indices[k]);
2875  }
2876  }
2877 #ifdef HAVE_TPETRA_DEBUG
2878  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2879  indicesAreAllocated() == false || isGloballyIndexed() == false,
2880  std::logic_error,
2881  ": Violated stated post-conditions. Please contact Tpetra team.");
2882 #endif
2883  }
2884 
2885 
2886  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2887  void
2889  insertGlobalIndices (const GlobalOrdinal globalRow,
2890  const LocalOrdinal numEnt,
2891  const GlobalOrdinal inds[])
2892  {
2893  Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2894  this->insertGlobalIndices (globalRow, indsT);
2895  }
2896 
2897 
2898  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2899  void
2901  insertGlobalIndicesFiltered (const GlobalOrdinal grow,
2902  const Teuchos::ArrayView<const GlobalOrdinal>& indices)
2903  {
2904  using Teuchos::Array;
2905  using Teuchos::ArrayView;
2906  typedef LocalOrdinal LO;
2907  typedef GlobalOrdinal GO;
2908  const char tfecfFuncName[] = "insertGlobalIndicesFiltered";
2909 
2910  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2911  isLocallyIndexed() == true, std::runtime_error,
2912  ": graph indices are local; use insertLocalIndices().");
2913  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2914  ! hasRowInfo (), std::runtime_error,
2915  ": graph row information was deleted at fillComplete().");
2916  // This can't really be satisfied for now, because if we are
2917  // fillComplete(), then we are local. In the future, this may
2918  // change. However, the rule that modification require active
2919  // fill will not change.
2920  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2921  isFillActive() == false, std::runtime_error,
2922  ": You are not allowed to call this method if fill is not active. "
2923  "If fillComplete has been called, you must first call resumeFill "
2924  "before you may insert indices.");
2925  if (! indicesAreAllocated ()) {
2926  allocateIndices (GlobalIndices);
2927  }
2928  const LO myRow = rowMap_->getLocalElement (grow);
2929  if (myRow != Teuchos::OrdinalTraits<LO>::invalid ()) {
2930  // If we have a column map, use it to filter the entries.
2931  if (hasColMap ()) {
2932  Array<GO> filtered_indices(indices);
2933  SLocalGlobalViews inds_view;
2934  SLocalGlobalNCViews inds_ncview;
2935  inds_ncview.ginds = filtered_indices();
2936  const size_t numFilteredEntries =
2937  filterIndices<GlobalIndices> (inds_ncview);
2938  inds_view.ginds = filtered_indices (0, numFilteredEntries);
2939  insertGlobalIndicesImpl(myRow, inds_view.ginds);
2940  }
2941  else {
2942  insertGlobalIndicesImpl(myRow, indices);
2943  }
2944  }
2945  else { // a nonlocal row
2946  typedef typename ArrayView<const GO>::size_type size_type;
2947  const size_type numIndices = indices.size ();
2948  for (size_type k = 0; k < numIndices; ++k) {
2949  nonlocals_[grow].push_back (indices[k]);
2950  }
2951  }
2952 #ifdef HAVE_TPETRA_DEBUG
2953  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2954  indicesAreAllocated() == false || isGloballyIndexed() == false,
2955  std::logic_error,
2956  ": Violated stated post-conditions. Please contact Tpetra team.");
2957 #endif
2958  }
2959 
2960 
2961  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2962  void
2964  removeLocalIndices (LocalOrdinal lrow)
2965  {
2966  const char tfecfFuncName[] = "removeLocalIndices: ";
2967  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2968  ! isFillActive (), std::runtime_error, "requires that fill is active.");
2969  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2970  isStorageOptimized (), std::runtime_error,
2971  "cannot remove indices after optimizeStorage() has been called.");
2972  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2973  isGloballyIndexed (), std::runtime_error, "graph indices are global.");
2974  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2975  ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
2976  "Local row " << lrow << " is not in the row Map on the calling process.");
2977  if (! indicesAreAllocated ()) {
2978  allocateIndices (LocalIndices);
2979  }
2980 
2981  // FIXME (mfh 13 Aug 2014) What if they haven't been cleared on
2982  // all processes?
2983  clearGlobalConstants ();
2984 
2985  if (k_numRowEntries_.dimension_0 () != 0) {
2986  const size_t oldNumEntries = k_numRowEntries_(lrow);
2987  nodeNumEntries_ -= oldNumEntries;
2988  k_numRowEntries_(lrow) = 0;
2989  }
2990 #ifdef HAVE_TPETRA_DEBUG
2991  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2992  getNumEntriesInLocalRow (lrow) != 0 ||
2993  ! indicesAreAllocated () ||
2994  ! isLocallyIndexed (), std::logic_error,
2995  ": Violated stated post-conditions. Please contact Tpetra team.");
2996 #endif // HAVE_TPETRA_DEBUG
2997  }
2998 
2999 
3000  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3001  void
3003  setAllIndices (const typename local_graph_type::row_map_type& rowPointers,
3004  const typename local_graph_type::entries_type::non_const_type& columnIndices)
3005  {
3006  const char tfecfFuncName[] = "setAllIndices: ";
3007  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3008  ! hasColMap () || getColMap ().is_null (), std::runtime_error,
3009  "The graph must have a column Map before you may call this method.");
3010  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3011  static_cast<size_t> (rowPointers.size ()) != this->getNodeNumRows () + 1,
3012  std::runtime_error, "rowPointers.size() = " << rowPointers.size () <<
3013  " != this->getNodeNumRows()+1 = " << (this->getNodeNumRows () + 1) <<
3014  ".");
3015 
3016  // FIXME (mfh 07 Aug 2014) We need to relax this restriction,
3017  // since the future model will be allocation at construction, not
3018  // lazy allocation on first insert.
3019  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3020  k_lclInds1D_.dimension_0 () != 0 || k_gblInds1D_.dimension_0 () != 0,
3021  std::runtime_error, "You may not call this method if 1-D data structures "
3022  "are already allocated.");
3023 
3024  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3025  lclInds2D_ != Teuchos::null || gblInds2D_ != Teuchos::null,
3026  std::runtime_error, "You may not call this method if 2-D data structures "
3027  "are already allocated.");
3028 
3029  const size_t localNumEntries = rowPointers(getNodeNumRows ());
3030 
3031  indicesAreAllocated_ = true;
3032  indicesAreLocal_ = true;
3033  pftype_ = StaticProfile; // if the profile wasn't static before, it sure is now.
3034  k_lclInds1D_ = columnIndices;
3035  k_rowPtrs_ = rowPointers;
3036  nodeNumAllocated_ = localNumEntries;
3037  nodeNumEntries_ = localNumEntries;
3038 
3039  // Build the local graph.
3040  lclGraph_ = local_graph_type (k_lclInds1D_, k_rowPtrs_);
3041 
3042  // These normally get cleared out at the end of allocateIndices.
3043  // It makes sense to clear them out here, because at the end of
3044  // this method, the graph is allocated on the calling process.
3045  numAllocForAllRows_ = 0;
3046  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3047 
3048  checkInternalState ();
3049  }
3050 
3051 
3052  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3053  void
3055  setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
3056  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
3057  {
3058  using Kokkos::View;
3059  typedef typename local_graph_type::row_map_type row_map_type;
3060  typedef typename row_map_type::array_layout layout_type;
3061  typedef typename row_map_type::non_const_value_type row_offset_type;
3062  typedef View<size_t*, layout_type , Kokkos::HostSpace,
3063  Kokkos::MemoryUnmanaged> input_view_type;
3064  typedef typename row_map_type::non_const_type nc_row_map_type;
3065 
3066  const size_t size = static_cast<size_t> (rowPointers.size ());
3067  const bool same = Kokkos::Impl::is_same<size_t, row_offset_type>::value;
3068  input_view_type ptr_in (rowPointers.getRawPtr (), size);
3069 
3070  nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
3071 
3072  if (same) { // size_t == row_offset_type
3073  // This compile-time logic ensures that the compiler never sees
3074  // an assignment of View<row_offset_type*, ...> to View<size_t*,
3075  // ...> unless size_t == row_offset_type.
3076  input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
3077  Kokkos::deep_copy (Kokkos::Impl::if_c<same,
3078  nc_row_map_type,
3079  input_view_type>::select (ptr_rot, ptr_decoy),
3080  ptr_in);
3081  }
3082  else { // size_t != row_offset_type
3083  // CudaUvmSpace != HostSpace, so this will be false in that case.
3084  const bool inHostMemory =
3085  Kokkos::Impl::is_same<typename row_map_type::memory_space,
3086  Kokkos::HostSpace>::value;
3087  if (inHostMemory) {
3088  // Copy (with cast from size_t to row_offset_type, with bounds
3089  // checking if necessary) to ptr_rot.
3090  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
3091  }
3092  else { // Copy input row offsets to device first.
3093  //
3094  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
3095  // execution space would avoid the double copy.
3096  //
3097  View<size_t*, layout_type ,execution_space > ptr_st ("Tpetra::CrsGraph::ptr", size);
3098  Kokkos::deep_copy (ptr_st, ptr_in);
3099  // Copy on device (casting from size_t to row_offset_type,
3100  // with bounds checking if necessary) to ptr_rot. This
3101  // executes in the output View's execution space, which is the
3102  // same as execution_space.
3103  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
3104  }
3105  }
3106 
3107  Kokkos::View<LocalOrdinal*, layout_type , execution_space > k_ind =
3108  Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
3109  setAllIndices (ptr_rot, k_ind);
3110  }
3111 
3112 
3113  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3114  void
3116  getNumEntriesPerLocalRowUpperBound (Teuchos::ArrayRCP<const size_t>& boundPerLocalRow,
3117  size_t& boundForAllLocalRows,
3118  bool& boundSameForAllLocalRows) const
3119  {
3120  // The three output arguments. We assign them to the actual
3121  // output arguments at the end, in order to implement
3122  // transactional semantics.
3123  Teuchos::ArrayRCP<const size_t> numEntriesPerRow;
3124  size_t numEntriesForAll = 0;
3125  bool allRowsSame = true;
3126 
3127  const ptrdiff_t numRows = static_cast<ptrdiff_t> (this->getNodeNumRows ());
3128 
3129  if (! this->indicesAreAllocated ()) {
3130  if (k_numAllocPerRow_.dimension_0 () != 0) {
3131  // This is a shallow copy; the ArrayRCP wraps the View in a
3132  // custom destructor, which ensures correct deallocation if
3133  // that is the only reference to the View. Furthermore, this
3134  // View is a host View, so this doesn't assume UVM.
3135  numEntriesPerRow = Kokkos::Compat::persistingView (k_numAllocPerRow_);
3136  allRowsSame = false; // conservatively; we don't check the array
3137  }
3138  else {
3139  numEntriesForAll = numAllocForAllRows_;
3140  allRowsSame = true;
3141  }
3142  }
3143  else if (k_numRowEntries_.dimension_0 () != 0) {
3144  // This is a shallow copy; the ArrayRCP wraps the View in a
3145  // custom destructor, which ensures correct deallocation if that
3146  // is the only reference to the View. Furthermore, this View is
3147  // a host View, so this doesn't assume UVM.
3148  numEntriesPerRow = Kokkos::Compat::persistingView (k_numRowEntries_);
3149  allRowsSame = false; // conservatively; we don't check the array
3150  }
3151  else if (this->nodeNumAllocated_ == 0) {
3152  numEntriesForAll = 0;
3153  allRowsSame = true;
3154  }
3155  else {
3156  // left with the case that we have optimized storage. in this
3157  // case, we have to construct a list of row sizes.
3158  TEUCHOS_TEST_FOR_EXCEPTION(
3159  this->getProfileType () != StaticProfile, std::logic_error,
3160  "Tpetra::CrsGraph::getNumEntriesPerRowUpperBound: "
3161  "The graph is not StaticProfile, but storage appears to be optimized. "
3162  "Please report this bug to the Tpetra developers.");
3163  TEUCHOS_TEST_FOR_EXCEPTION(
3164  numRows != 0 && k_rowPtrs_.dimension_0 () == 0, std::logic_error,
3165  "Tpetra::CrsGraph::getNumEntriesPerRowUpperBound: "
3166  "The graph has " << numRows << " (> 0) row" << (numRows != 1 ? "s" : "")
3167  << " on the calling process, but the k_rowPtrs_ array has zero entries. "
3168  "Please report this bug to the Tpetra developers.");
3169 
3170  Teuchos::ArrayRCP<size_t> numEnt;
3171  if (numRows != 0) {
3172  numEnt = Teuchos::arcp<size_t> (numRows);
3173  }
3174 
3175  // We have to iterate through the row offsets anyway, so we
3176  // might as well check whether all rows' bounds are the same.
3177  bool allRowsReallySame = false;
3178  for (ptrdiff_t i = 0; i < numRows; ++i) {
3179  numEnt[i] = k_rowPtrs_(i+1) - k_rowPtrs_(i);
3180  if (i != 0 && numEnt[i] != numEnt[i-1]) {
3181  allRowsReallySame = false;
3182  }
3183  }
3184  if (allRowsReallySame) {
3185  if (numRows == 0) {
3186  numEntriesForAll = 0;
3187  } else {
3188  numEntriesForAll = numEnt[1] - numEnt[0];
3189  }
3190  allRowsSame = true;
3191  }
3192  else {
3193  numEntriesPerRow = numEnt; // Teuchos::arcp_const_cast<const size_t> (numEnt);
3194  allRowsSame = false; // conservatively; we don't check the array
3195  }
3196  }
3197 
3198  TEUCHOS_TEST_FOR_EXCEPTION(
3199  numEntriesForAll != 0 && numEntriesPerRow.size () != 0, std::logic_error,
3200  "Tpetra::CrsGraph::getNumEntriesPerLocalRowUpperBound: "
3201  "numEntriesForAll and numEntriesPerRow are not consistent. The former "
3202  "is nonzero (" << numEntriesForAll << "), but the latter has nonzero "
3203  "size " << numEntriesPerRow.size () << ". "
3204  "Please report this bug to the Tpetra developers.");
3205  TEUCHOS_TEST_FOR_EXCEPTION(
3206  numEntriesForAll != 0 && ! allRowsSame, std::logic_error,
3207  "Tpetra::CrsGraph::getNumEntriesPerLocalRowUpperBound: "
3208  "numEntriesForAll and allRowsSame are not consistent. The former "
3209  "is nonzero (" << numEntriesForAll << "), but the latter is false. "
3210  "Please report this bug to the Tpetra developers.");
3211  TEUCHOS_TEST_FOR_EXCEPTION(
3212  numEntriesPerRow.size () != 0 && allRowsSame, std::logic_error,
3213  "Tpetra::CrsGraph::getNumEntriesPerLocalRowUpperBound: "
3214  "numEntriesPerRow and allRowsSame are not consistent. The former has "
3215  "nonzero length " << numEntriesForAll << ", but the latter is true. "
3216  "Please report this bug to the Tpetra developers.");
3217 
3218  boundPerLocalRow = numEntriesPerRow;
3219  boundForAllLocalRows = numEntriesForAll;
3220  boundSameForAllLocalRows = allRowsSame;
3221  }
3222 
3223 
3224  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3225  void
3228  {
3229  using Teuchos::Comm;
3230  using Teuchos::outArg;
3231  using Teuchos::RCP;
3232  using Teuchos::rcp;
3233  using Teuchos::REDUCE_MAX;
3234  using Teuchos::REDUCE_MIN;
3235  using Teuchos::reduceAll;
3237  typedef LocalOrdinal LO;
3238  typedef GlobalOrdinal GO;
3239  typedef typename Teuchos::Array<GO>::size_type size_type;
3240  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
3241 
3242  RCP<const Comm<int> > comm = getComm ();
3243 
3244  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3245  (! isFillActive (), std::runtime_error, "Fill must be active before "
3246  "you may call this method.");
3247 
3248  const size_t myNumNonlocalRows = nonlocals_.size ();
3249 
3250  // If no processes have nonlocal rows, then we don't have to do
3251  // anything. Checking this is probably cheaper than constructing
3252  // the Map of nonlocal rows (see below) and noticing that it has
3253  // zero global entries.
3254  {
3255  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
3256  int someoneHasNonlocalRows = 0;
3257  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
3258  outArg (someoneHasNonlocalRows));
3259  if (someoneHasNonlocalRows == 0) {
3260  return; // no process has nonlocal rows, so nothing to do
3261  }
3262  }
3263 
3264  // 1. Create a list of the "nonlocal" rows on each process. this
3265  // requires iterating over nonlocals_, so while we do this,
3266  // deduplicate the entries and get a count for each nonlocal
3267  // row on this process.
3268  // 2. Construct a new row Map corresponding to those rows. This
3269  // Map is likely overlapping. We know that the Map is not
3270  // empty on all processes, because the above all-reduce and
3271  // return exclude that case.
3272 
3273  RCP<const map_type> nonlocalRowMap;
3274  // Keep this for CrsGraph's constructor, so we can use StaticProfile.
3275  Teuchos::ArrayRCP<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
3276  {
3277  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
3278  size_type curPos = 0;
3279  for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
3280  ++mapIter, ++curPos) {
3281  myNonlocalGblRows[curPos] = mapIter->first;
3282  std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
3283  std::sort (gblCols.begin (), gblCols.end ());
3284  auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
3285  gblCols.erase (vecLast, gblCols.end ());
3286  numEntPerNonlocalRow[curPos] = gblCols.size ();
3287  }
3288 
3289  // Currently, Map requires that its indexBase be the global min
3290  // of all its global indices. Map won't compute this for us, so
3291  // we must do it. If our process has no nonlocal rows, set the
3292  // "min" to the max possible GO value. This ensures that if
3293  // some process has at least one nonlocal row, then it will pick
3294  // that up as the min. We know that at least one process has a
3295  // nonlocal row, since the all-reduce and return at the top of
3296  // this method excluded that case.
3297  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
3298  {
3299  auto iter = std::min_element (myNonlocalGblRows.begin (),
3300  myNonlocalGblRows.end ());
3301  if (iter != myNonlocalGblRows.end ()) {
3302  myMinNonlocalGblRow = *iter;
3303  }
3304  }
3305  GO gblMinNonlocalGblRow = 0;
3306  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
3307  outArg (gblMinNonlocalGblRow));
3308  const GO indexBase = gblMinNonlocalGblRow;
3309  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3310  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
3311  }
3312 
3313  // 3. Use the column indices for each nonlocal row, as stored in
3314  // nonlocals_, to construct a CrsGraph corresponding to
3315  // nonlocal rows. We may use StaticProfile, since we have
3316  // exact counts of the number of entries in each nonlocal row.
3317 
3318  RCP<crs_graph_type> nonlocalGraph =
3319  rcp (new crs_graph_type (nonlocalRowMap, numEntPerNonlocalRow,
3320  StaticProfile));
3321  {
3322  size_type curPos = 0;
3323  for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
3324  ++mapIter, ++curPos) {
3325  const GO gblRow = mapIter->first;
3326  std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
3327  const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3328  nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
3329  }
3330  }
3331  // There's no need to fill-complete the nonlocals graph.
3332  // We just use it as a temporary container for the Export.
3333 
3334  // 4. If the original row Map is one to one, then we can Export
3335  // directly from nonlocalGraph into this. Otherwise, we have
3336  // to create a temporary graph with a one-to-one row Map,
3337  // Export into that, then Import from the temporary graph into
3338  // *this.
3339 
3340  auto origRowMap = this->getRowMap ();
3341  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3342 
3343  if (origRowMapIsOneToOne) {
3344  export_type exportToOrig (nonlocalRowMap, origRowMap);
3345  this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3346  // We're done at this point!
3347  }
3348  else {
3349  // If you ask a Map whether it is one to one, it does some
3350  // communication and stashes intermediate results for later use
3351  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3352  // much more then the original cost of calling isOneToOne.
3353  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3354  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3355 
3356  // Create a temporary graph with the one-to-one row Map.
3357  //
3358  // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3359  // row, to avoid reallocation during the Export operation.
3360  crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3361  // Export from graph of nonlocals into the temp one-to-one graph.
3362  oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3363 
3364  // We don't need the graph of nonlocals anymore, so get rid of
3365  // it, to keep the memory high-water mark down.
3366  nonlocalGraph = Teuchos::null;
3367 
3368  // Import from the one-to-one graph to the original graph.
3369  import_type importToOrig (oneToOneRowMap, origRowMap);
3370  this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3371  }
3372 
3373  // It's safe now to clear out nonlocals_, since we've already
3374  // committed side effects to *this. The standard idiom for
3375  // clearing a Container like std::map, is to swap it with an empty
3376  // Container and let the swapped Container fall out of scope.
3377  decltype (nonlocals_) newNonlocals;
3378  std::swap (nonlocals_, newNonlocals);
3379 
3380  checkInternalState ();
3381  }
3382 
3383 
3384  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3385  void
3387  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3388  {
3389  const char tfecfFuncName[] = "resumeFill";
3390  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! hasRowInfo(), std::runtime_error,
3391  ": Sorry, you cannot resume fill of the CrsGraph, since the graph's row "
3392  "information was deleted in fillComplete().");
3393 
3394 #ifdef HAVE_TPETRA_DEBUG
3395  Teuchos::barrier( *rowMap_->getComm() );
3396 #endif // HAVE_TPETRA_DEBUG
3397  clearGlobalConstants();
3398  if (params != Teuchos::null) this->setParameterList (params);
3399  lowerTriangular_ = false;
3400  upperTriangular_ = false;
3401  // either still sorted/merged or initially sorted/merged
3402  indicesAreSorted_ = true;
3403  noRedundancies_ = true;
3404  fillComplete_ = false;
3405 #ifdef HAVE_TPETRA_DEBUG
3406  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3407  ! isFillActive() || isFillComplete(), std::logic_error,
3408  "::resumeFill(): At end of method, either fill is not active or fill is "
3409  "complete. This violates stated post-conditions. Please report this bug "
3410  "to the Tpetra developers.");
3411 #endif // HAVE_TPETRA_DEBUG
3412  }
3413 
3414 
3415  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3416  void
3418  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3419  {
3420  // If the graph already has domain and range Maps, don't clobber
3421  // them. If it doesn't, use the current row Map for both the
3422  // domain and range Maps.
3423  //
3424  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3425  // column Map, and column indices are inserted which are not in
3426  // the row Map on any process, this will cause troubles. However,
3427  // that is not a common case for most applications that we
3428  // encounter, and checking for it might require more
3429  // communication.
3430  Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3431  if (domMap.is_null ()) {
3432  domMap = this->getRowMap ();
3433  }
3434  Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3435  if (ranMap.is_null ()) {
3436  ranMap = this->getRowMap ();
3437  }
3438  this->fillComplete (domMap, ranMap, params);
3439  }
3440 
3441 
3442  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3443  void
3445  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3446  const Teuchos::RCP<const map_type>& rangeMap,
3447  const Teuchos::RCP<Teuchos::ParameterList>& params)
3448  {
3449  const char tfecfFuncName[] = "fillComplete";
3450 
3451 #ifdef HAVE_TPETRA_DEBUG
3452  rowMap_->getComm ()->barrier ();
3453 #endif // HAVE_TPETRA_DEBUG
3454 
3455  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
3456  std::runtime_error, ": Graph fill state must be active (isFillActive() "
3457  "must be true) before calling fillComplete().");
3458 
3459  const int numProcs = getComm ()->getSize ();
3460 
3461  //
3462  // Read and set parameters
3463  //
3464 
3465  // Does the caller want to sort remote GIDs (within those owned by
3466  // the same process) in makeColMap()?
3467  if (! params.is_null ()) {
3468  if (params->isParameter ("sort column map ghost gids")) {
3469  sortGhostsAssociatedWithEachProcessor_ =
3470  params->get<bool> ("sort column map ghost gids",
3471  sortGhostsAssociatedWithEachProcessor_);
3472  }
3473  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3474  sortGhostsAssociatedWithEachProcessor_ =
3475  params->get<bool> ("Sort column Map ghost GIDs",
3476  sortGhostsAssociatedWithEachProcessor_);
3477  }
3478  }
3479 
3480  // If true, the caller promises that no process did nonlocal
3481  // changes since the last call to fillComplete.
3482  bool assertNoNonlocalInserts = false;
3483  if (! params.is_null ()) {
3484  assertNoNonlocalInserts =
3485  params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3486  }
3487 
3488  //
3489  // Allocate indices, if they haven't already been allocated
3490  //
3491  if (! indicesAreAllocated ()) {
3492  if (hasColMap ()) {
3493  // We have a column Map, so use local indices.
3494  allocateIndices (LocalIndices);
3495  } else {
3496  // We don't have a column Map, so use global indices.
3497  allocateIndices (GlobalIndices);
3498  }
3499  }
3500 
3501  //
3502  // Do global assembly, if requested and if the communicator
3503  // contains more than one process.
3504  //
3505  const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3506  if (mayNeedGlobalAssemble) {
3507  // This first checks if we need to do global assembly.
3508  // The check costs a single all-reduce.
3509  globalAssemble ();
3510  }
3511  else {
3512  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3513  numProcs > 1 && nonlocals_.size() > 0, std::runtime_error,
3514  ":" << std::endl << "The graph's communicator contains only one "
3515  "process, but there are nonlocal entries. " << std::endl <<
3516  "This probably means that invalid entries were added to the graph.");
3517  }
3518 
3519  // Set domain and range Map. This may clear the Import / Export
3520  // objects if the new Maps differ from any old ones.
3521  setDomainRangeMaps (domainMap, rangeMap);
3522 
3523  // If the graph does not already have a column Map (either from
3524  // the user constructor calling the version of the constructor
3525  // that takes a column Map, or from a previous fillComplete call),
3526  // then create it.
3527  if (! hasColMap ()) {
3528  makeColMap ();
3529  }
3530 
3531  // Make indices local, if they aren't already.
3532  // The method doesn't do any work if the indices are already local.
3533  makeIndicesLocal ();
3534 
3535  if (! isSorted ()) {
3536  // If this process has no indices, then CrsGraph considers it
3537  // already trivially sorted. Thus, this method need not be
3538  // called on all processes in the row Map's communicator.
3539  sortAllIndices ();
3540  }
3541 
3542  if (! isMerged()) {
3543  mergeAllIndices ();
3544  }
3545  makeImportExport (); // Make Import and Export objects, if necessary
3546  computeGlobalConstants ();
3547  fillLocalGraph (params);
3548  fillComplete_ = true;
3549 
3550 #ifdef HAVE_TPETRA_DEBUG
3551  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3552  isFillActive() == true || isFillComplete() == false, std::logic_error,
3553  ": Violated stated post-conditions. Please contact Tpetra team.");
3554 #endif
3555 
3556  checkInternalState ();
3557  }
3558 
3559 
3560  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3561  void
3563  expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3564  const Teuchos::RCP<const map_type>& rangeMap,
3565  const Teuchos::RCP<const import_type>& importer,
3566  const Teuchos::RCP<const export_type>& exporter,
3567  const Teuchos::RCP<Teuchos::ParameterList>& params)
3568  {
3569  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3570 #ifdef HAVE_TPETRA_MMM_TIMINGS
3571  std::string label;
3572  if(!params.is_null())
3573  label = params->get("Timer Label",label);
3574  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3575  using Teuchos::TimeMonitor;
3576  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3577 #endif
3578 
3579 
3580  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3581  domainMap.is_null () || rangeMap.is_null (),
3582  std::runtime_error, "The input domain Map and range Map must be nonnull.");
3583  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3584  pftype_ != StaticProfile, std::runtime_error, "You may not call this "
3585  "method unless the graph is StaticProfile.");
3586  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3587  isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3588  "call this method unless the graph has a column Map.");
3589  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3590  getNodeNumRows () > 0 && k_rowPtrs_.dimension_0 () == 0,
3591  std::runtime_error, "The calling process has getNodeNumRows() = "
3592  << getNodeNumRows () << " > 0 rows, but the row offsets array has not "
3593  "been set.");
3594  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3595  static_cast<size_t> (k_rowPtrs_.dimension_0 ()) != getNodeNumRows () + 1,
3596  std::runtime_error, "The row offsets array has length " <<
3597  k_rowPtrs_.dimension_0 () << " != getNodeNumRows()+1 = " <<
3598  (getNodeNumRows () + 1) << ".");
3599 
3600  // Note: We don't need to do the following things which are normally done in fillComplete:
3601  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAllIndices, mergeAllIndices
3602 
3603  // Note: Need to do this so computeGlobalConstants & fillLocalGraph work
3604  //
3605  // The first assignment is always true if the graph has 1-D
3606  // storage (StaticProfile). The second assignment is only true if
3607  // storage is packed.
3608  nodeNumAllocated_ = k_rowPtrs_(getNodeNumRows ());
3609  nodeNumEntries_ = nodeNumAllocated_;
3610 
3611  // Constants from allocateIndices
3612  //
3613  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3614  // away once the graph is allocated. expertStaticFillComplete
3615  // either presumes that the graph is allocated, or "allocates" it.
3616  //
3617  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3618  // version of CrsGraph is to allocate in the constructor, not
3619  // lazily on first insert. That will make both
3620  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3621  numAllocForAllRows_ = 0;
3622  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3623  indicesAreAllocated_ = true;
3624 
3625  // Constants from makeIndicesLocal
3626  //
3627  // The graph has a column Map, so its indices had better be local.
3628  indicesAreLocal_ = true;
3629  indicesAreGlobal_ = false;
3630 
3631  // set domain/range map: may clear the import/export objects
3632 #ifdef HAVE_TPETRA_MMM_TIMINGS
3633  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3634 #endif
3635  setDomainRangeMaps (domainMap, rangeMap);
3636 
3637  // Presume the user sorted and merged the arrays first
3638  indicesAreSorted_ = true;
3639  noRedundancies_ = true;
3640 
3641  // makeImportExport won't create a new importer/exporter if I set one here first.
3642 #ifdef HAVE_TPETRA_MMM_TIMINGS
3643  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3644 #endif
3645 
3646  importer_ = Teuchos::null;
3647  exporter_ = Teuchos::null;
3648  if (importer != Teuchos::null) {
3649  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3650  ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3651  ! importer->getTargetMap ()->isSameAs (*getColMap ()),
3652  std::invalid_argument,": importer does not match matrix maps.");
3653  importer_ = importer;
3654 
3655  }
3656 
3657 #ifdef HAVE_TPETRA_MMM_TIMINGS
3658  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3659 #endif
3660 
3661  if (exporter != Teuchos::null) {
3662  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3663  ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3664  ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3665  std::invalid_argument,": exporter does not match matrix maps.");
3666  exporter_ = exporter;
3667  }
3668 
3669 #ifdef HAVE_TPETRA_MMM_TIMINGS
3670  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3671 #endif
3672 
3673  makeImportExport ();
3674 
3675  // Compute the constants
3676 #ifdef HAVE_TPETRA_MMM_TIMINGS
3677  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC"))));
3678 #endif
3679  computeGlobalConstants ();
3680 
3681  // Since we have a StaticProfile, fillLocalGraph will do the right thing...
3682 #ifdef HAVE_TPETRA_MMM_TIMINGS
3683  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3684 #endif
3685  fillLocalGraph (params);
3686  fillComplete_ = true;
3687 
3688 #ifdef HAVE_TPETRA_MMM_TIMINGS
3689  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3690 #endif
3691  checkInternalState ();
3692  }
3693 
3694 
3695  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3696  void
3698  fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
3699  {
3701  using Kokkos::create_mirror_view;
3702  typedef decltype (k_numRowEntries_) row_entries_type;
3703  typedef typename local_graph_type::row_map_type row_map_type;
3704  typedef typename row_map_type::non_const_type non_const_row_map_type;
3705  typedef typename local_graph_type::entries_type::non_const_type lclinds_1d_type;
3706 #ifdef HAVE_TPETRA_DEBUG
3707  const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
3708  "expertStaticFillComplete): ";
3709 #endif // HAVE_TPETRA_DEBUG
3710  const size_t lclNumRows = this->getNodeNumRows ();
3711 
3712  // This method's goal is to fill in the two arrays (compressed
3713  // sparse row format) that define the sparse graph's structure.
3714  //
3715  // Use the nonconst version of row_map_type for ptr_d, because
3716  // the latter is const and we need to modify ptr_d here.
3717  non_const_row_map_type ptr_d;
3718  row_map_type ptr_d_const;
3719  lclinds_1d_type ind_d;
3720 
3721  bool requestOptimizedStorage = true;
3722  if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
3723  requestOptimizedStorage = false;
3724  }
3725  if (getProfileType () == DynamicProfile) {
3726  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
3727  //
3728  // DynamicProfile means that the graph's column indices are
3729  // currently stored in a 2-D "unpacked" format, in the
3730  // arrays-of-arrays lclInds2D_. We allocate 1-D storage
3731  // (ind_d) and then copy from 2-D storage (lclInds2D_) into 1-D
3732  // storage (ind_d).
3733 #ifdef HAVE_TPETRA_DEBUG
3734  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3735  (static_cast<size_t> (k_numRowEntries_.dimension_0 ()) != lclNumRows,
3736  std::logic_error, "(DynamicProfile branch) "
3737  "k_numRowEntries_.dimension_0() = "
3738  << k_numRowEntries_.dimension_0 ()
3739  << " != getNodeNumRows() = " << lclNumRows << "");
3740 #endif // HAVE_TPETRA_DEBUG
3741 
3742  // Pack the row offsets into ptr_d, by doing a sum-scan of the
3743  // array of valid entry counts per row (k_numRowEntries_). The
3744  // pack method can handle its counts input being a host View.
3745  //
3746  // Total number of entries in the matrix on the calling
3747  // process. We will compute this in the loop below. It's
3748  // cheap to compute and useful as a sanity check.
3749  size_t lclTotalNumEntries = 0;
3750  {
3751  // Allocate the packed row offsets array.
3752  ptr_d = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows+1);
3753  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3754  // This function can handle that numRowEnt_h lives on host.
3755  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
3756  ptr_d_const = ptr_d;
3757  }
3758 
3759 #ifdef HAVE_TPETRA_DEBUG
3760  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3761  (static_cast<size_t> (ptr_d.dimension_0 ()) != lclNumRows + 1,
3762  std::logic_error, "(DynamicProfile branch) After packing ptr_d, "
3763  "ptr_d.dimension_0() = " << ptr_d.dimension_0 () << " != "
3764  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
3765  {
3766  // mfh 26 Jun 2016: Don't assume UVM. ptr_d lives in device
3767  // memory, so if we want to check its last entry on host, copy
3768  // it back to host explicitly.
3769  auto ptr_d_ent_d = Kokkos::subview (ptr_d, lclNumRows);
3770  auto ptr_d_ent_h = create_mirror_view (ptr_d_ent_d);
3771  Kokkos::deep_copy (ptr_d_ent_h, ptr_d_ent_d);
3772  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3773  (ptr_d_ent_h () != lclTotalNumEntries, std::logic_error,
3774  "(DynamicProfile branch) After packing ptr_d, ptr_d(lclNumRows = "
3775  << lclNumRows << ") = " << ptr_d_ent_h () << " != total number of "
3776  "entries on the calling process = " << lclTotalNumEntries << ".");
3777  }
3778 #endif // HAVE_TPETRA_DEBUG
3779 
3780  // Allocate the array of packed column indices.
3781  ind_d = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
3782  // Pack the column indices. We have to do this sequentially on
3783  // host, since lclInds2D_ is an ArrayRCP<Array<LO>>, which
3784  // doesn't work in parallel kernels (its iterators aren't even
3785  // thread safe in debug mode).
3786  {
3787  auto ptr_h = create_mirror_view (ptr_d);
3788  Kokkos::deep_copy (ptr_h, ptr_d); // we need the entries on host
3789  auto ind_h = create_mirror_view (ind_d); // we'll fill this on host
3790 
3791  // k_numRowEntries_ is a host View already, so we can use it here.
3792  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3793  for (size_t row = 0; row < lclNumRows; ++row) {
3794  const size_t numEnt = numRowEnt_h(row);
3795  std::copy (lclInds2D_[row].begin (),
3796  lclInds2D_[row].begin () + numEnt,
3797  ind_h.ptr_on_device () + ptr_h(row));
3798  }
3799  Kokkos::deep_copy (ind_d, ind_h);
3800  }
3801 
3802 #ifdef HAVE_TPETRA_DEBUG
3803  // Sanity check of packed row offsets.
3804  if (ptr_d.dimension_0 () != 0) {
3805  const size_t numOffsets = static_cast<size_t> (ptr_d.dimension_0 ());
3806 
3807  // mfh 26 Jun 2016: Don't assume UVM. ptr_d lives in device
3808  // memory, so if we want to check its last entry on host, copy
3809  // it back to host explicitly.
3810  auto ptr_d_ent_d = Kokkos::subview (ptr_d, numOffsets - 1);
3811  auto ptr_d_ent_h = create_mirror_view (ptr_d_ent_d);
3812  Kokkos::deep_copy (ptr_d_ent_h, ptr_d_ent_d);
3813  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3814  (static_cast<size_t> (ptr_d_ent_h ()) != ind_d.dimension_0 (),
3815  std::logic_error, "(DynamicProfile branch) After packing column "
3816  "indices, ptr_d(" << (numOffsets-1) << ") = " << ptr_d_ent_h ()
3817  << " != ind_d.dimension_0() = " << ind_d.dimension_0 () << ".");
3818  }
3819 #endif // HAVE_TPETRA_DEBUG
3820  }
3821  else if (getProfileType () == StaticProfile) {
3822  // StaticProfile means that the graph's column indices are
3823  // currently stored in a 1-D format, with row offsets in
3824  // k_rowPtrs_ and local column indices in k_lclInds1D_.
3825 
3826 #ifdef HAVE_TPETRA_DEBUG
3827  // StaticProfile also means that the graph's array of row
3828  // offsets must already be allocated.
3829  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3830  (k_rowPtrs_.dimension_0 () == 0, std::logic_error,
3831  "(StaticProfile branch) k_rowPtrs_ has size zero, but shouldn't");
3832  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3833  (k_rowPtrs_.dimension_0 () != lclNumRows + 1, std::logic_error,
3834  "(StaticProfile branch) k_rowPtrs_.dimension_0() = "
3835  << k_rowPtrs_.dimension_0 () << " != (lclNumRows + 1) = "
3836  << (lclNumRows + 1) << ".");
3837  {
3838  const size_t numOffsets = k_rowPtrs_.dimension_0 ();
3839 
3840  // mfh 26 Jun 2016: Don't assume UVM. k_rowPtrs_ lives in
3841  // device memory, so if we want to check its last entry on
3842  // host, copy it back to host explicitly.
3843  auto k_rowPtrs_ent_d = Kokkos::subview (k_rowPtrs_, numOffsets - 1);
3844  auto k_rowPtrs_ent_h = create_mirror_view (k_rowPtrs_ent_d);
3845  Kokkos::deep_copy (k_rowPtrs_ent_h, k_rowPtrs_ent_d);
3846 
3847  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3848  (numOffsets != 0 &&
3849  k_lclInds1D_.dimension_0 () != k_rowPtrs_ent_h (),
3850  std::logic_error, "(StaticProfile branch) numOffsets = " <<
3851  numOffsets << " != 0 and k_lclInds1D_.dimension_0() = " <<
3852  k_lclInds1D_.dimension_0 () << " != k_rowPtrs_(" << numOffsets <<
3853  ") = " << k_rowPtrs_ent_h () << ".");
3854  }
3855 #endif // HAVE_TPETRA_DEBUG
3856 
3857  if (nodeNumEntries_ != nodeNumAllocated_) {
3858  // The graph's current 1-D storage is "unpacked." This means
3859  // the row offsets may differ from what the final row offsets
3860  // should be. This could happen, for example, if the user
3861  // specified StaticProfile in the constructor and set an upper
3862  // bound on the number of entries in each row, but didn't fill
3863  // all those entries.
3864 
3865 #ifdef HAVE_TPETRA_DEBUG
3866  if (k_rowPtrs_.dimension_0 () != 0) {
3867  const size_t numOffsets =
3868  static_cast<size_t> (k_rowPtrs_.dimension_0 ());
3869  // mfh 26 Jun 2016: Don't assume UVM. k_rowPtrs_ lives in
3870  // device memory, so if we want to check its last entry on
3871  // host, copy it back to host explicitly.
3872  auto k_rowPtrs_ent_d = Kokkos::subview (k_rowPtrs_, numOffsets - 1);
3873  auto k_rowPtrs_ent_h = create_mirror_view (k_rowPtrs_ent_d);
3874  Kokkos::deep_copy (k_rowPtrs_ent_h, k_rowPtrs_ent_d);
3875  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3876  (k_rowPtrs_ent_h () != static_cast<size_t> (k_lclInds1D_.dimension_0 ()),
3877  std::logic_error, "(StaticProfile unpacked branch) Before "
3878  "allocating or packing, k_rowPtrs_(" << (numOffsets-1) << ") = "
3879  << k_rowPtrs_ent_h () << " != k_lclInds1D_.dimension_0() = "
3880  << k_lclInds1D_.dimension_0 () << ".");
3881  }
3882 #endif // HAVE_TPETRA_DEBUG
3883 
3884  // Pack the row offsets into ptr_d, by doing a sum-scan of the
3885  // array of valid entry counts per row (k_numRowEntries_).
3886 
3887  // Total number of entries in the matrix on the calling
3888  // process. We will compute this in the loop below. It's
3889  // cheap to compute and useful as a sanity check.
3890  size_t lclTotalNumEntries = 0;
3891  {
3892  // Allocate the packed row offsets array.
3893  ptr_d = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3894  ptr_d_const = ptr_d;
3895 
3896  // It's ok that k_numRowEntries_ is a host View; the
3897  // function can handle this.
3898  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3899 #ifdef HAVE_TPETRA_DEBUG
3900  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3901  (static_cast<size_t> (numRowEnt_h.dimension_0 ()) != lclNumRows,
3902  std::logic_error, "(StaticProfile unpacked branch) "
3903  "numRowEnt_h.dimension_0() = " << numRowEnt_h.dimension_0 ()
3904  << " != getNodeNumRows() = " << lclNumRows << "");
3905 #endif // HAVE_TPETRA_DEBUG
3906 
3907  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
3908 
3909 #ifdef HAVE_TPETRA_DEBUG
3910  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3911  (static_cast<size_t> (ptr_d.dimension_0 ()) != lclNumRows + 1,
3912  std::logic_error, "(StaticProfile unpacked branch) After "
3913  "allocating ptr_d, ptr_d.dimension_0() = " << ptr_d.dimension_0 ()
3914  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
3915  {
3916  // mfh 26 Jun 2016: Don't assume UVM. ptr_d lives in device
3917  // memory, so if we want to check its last entry on host, copy
3918  // it back to host explicitly.
3919  auto ptr_d_ent_d = Kokkos::subview (ptr_d, lclNumRows);
3920  auto ptr_d_ent_h = create_mirror_view (ptr_d_ent_d);
3921  Kokkos::deep_copy (ptr_d_ent_h, ptr_d_ent_d);
3922  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3923  (ptr_d_ent_h () != lclTotalNumEntries, std::logic_error,
3924  "Tpetra::CrsGraph::fillLocalGraph: In StaticProfile unpacked "
3925  "branch, after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
3926  << ") = " << ptr_d_ent_h () << " != total number of entries on "
3927  "the calling process = " << lclTotalNumEntries << ".");
3928  }
3929 #endif // HAVE_TPETRA_DEBUG
3930  }
3931 
3932  // Allocate the array of packed column indices.
3933  ind_d = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
3934 
3935  // k_rowPtrs_ and k_lclInds1D_ are currently unpacked. Pack
3936  // them, using the packed row offsets array ptr_d that we
3937  // created above.
3938  //
3939  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3940  // CrsMatrix?), we need to keep around the unpacked row
3941  // offsets and column indices.
3942 
3943  // Pack the column indices from unpacked k_lclInds1D_ into
3944  // packed ind_d. We will replace k_lclInds1D_ below.
3945  typedef pack_functor<
3946  typename local_graph_type::entries_type::non_const_type,
3947  row_map_type> inds_packer_type;
3948  inds_packer_type f (ind_d, k_lclInds1D_, ptr_d, k_rowPtrs_);
3949  Kokkos::parallel_for (lclNumRows, f);
3950 
3951 #ifdef HAVE_TPETRA_DEBUG
3952  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3953  (ptr_d.dimension_0 () == 0, std::logic_error, "(StaticProfile "
3954  "\"Optimize Storage\"=true branch) After packing, "
3955  "ptr_d.dimension_0() = 0. This probably means k_rowPtrs_ was "
3956  "never allocated.");
3957  if (ptr_d.dimension_0 () != 0) {
3958  const size_t numOffsets = static_cast<size_t> (ptr_d.dimension_0 ());
3959  // mfh 26 Jun 2016: Don't assume UVM. ptr_d lives in device
3960  // memory, so if we want to check its last entry on host, copy
3961  // it back to host explicitly.
3962  auto ptr_d_ent_d = Kokkos::subview (ptr_d, numOffsets - 1);
3963  auto ptr_d_ent_h = create_mirror_view (ptr_d_ent_d);
3964  Kokkos::deep_copy (ptr_d_ent_h, ptr_d_ent_d);
3965  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3966  (static_cast<size_t> (ptr_d_ent_h ()) != ind_d.dimension_0 (),
3967  std::logic_error, "(StaticProfile \"Optimize Storage\"=true "
3968  "branch) After packing, ptr_d(" << (numOffsets-1) << ") = " <<
3969  ptr_d_ent_h () << " != ind_d.dimension_0() = " <<
3970  ind_d.dimension_0 () << ".");
3971  }
3972 #endif // HAVE_TPETRA_DEBUG
3973  }
3974  else { // We don't have to pack, so just set the pointers.
3975  ptr_d_const = k_rowPtrs_;
3976  ind_d = k_lclInds1D_;
3977 
3978 #ifdef HAVE_TPETRA_DEBUG
3979  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3980  (ptr_d_const.dimension_0 () == 0, std::logic_error, "(StaticProfile "
3981  "\"Optimize Storage\"=false branch) ptr_d_const.dimension_0() = 0. "
3982  "This probably means that k_rowPtrs_ was never allocated.");
3983  if (ptr_d_const.dimension_0 () != 0) {
3984  const size_t numOffsets =
3985  static_cast<size_t> (ptr_d_const.dimension_0 ());
3986  // mfh 26 Jun 2016: Don't assume UVM. ptr_d_const lives in
3987  // device memory, so if we want to check its last entry on
3988  // host, copy it back to host explicitly.
3989  auto ptr_d_const_ent_d = Kokkos::subview (ptr_d_const, numOffsets - 1);
3990  auto ptr_d_const_ent_h = create_mirror_view (ptr_d_const_ent_d);
3991  Kokkos::deep_copy (ptr_d_const_ent_h, ptr_d_const_ent_d);
3992  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3993  (static_cast<size_t> (ptr_d_const_ent_h ()) != ind_d.dimension_0 (),
3994  std::logic_error, "(StaticProfile \"Optimize Storage\"=false "
3995  "branch) ptr_d_const(" << (numOffsets-1) << ") = " <<
3996  ptr_d_const_ent_h () << " != ind_d.dimension_0() = " <<
3997  ind_d.dimension_0 () << ".");
3998  }
3999 #endif // HAVE_TPETRA_DEBUG
4000  }
4001  }
4002 
4003 #ifdef HAVE_TPETRA_DEBUG
4004  // Extra sanity checks.
4005  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4006  (static_cast<size_t> (ptr_d_const.dimension_0 ()) != lclNumRows + 1,
4007  std::logic_error, "After packing, ptr_d_const.dimension_0() = " <<
4008  ptr_d_const.dimension_0 () << " != lclNumRows+1 = " << (lclNumRows+1)
4009  << ".");
4010  if (ptr_d_const.dimension_0 () != 0) {
4011  const size_t numOffsets = static_cast<size_t> (ptr_d_const.dimension_0 ());
4012  // mfh 26 Jun 2016: Don't assume UVM. ptr_d_const lives in
4013  // device memory, so if we want to check its last entry on
4014  // host, copy it back to host explicitly.
4015  auto ptr_d_const_ent_d = Kokkos::subview (ptr_d_const, numOffsets - 1);
4016  auto ptr_d_const_ent_h = create_mirror_view (ptr_d_const_ent_d);
4017  Kokkos::deep_copy (ptr_d_const_ent_h, ptr_d_const_ent_d);
4018  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4019  (static_cast<size_t> (ptr_d_const_ent_h ()) != ind_d.dimension_0 (),
4020  std::logic_error, "After packing, ptr_d_const(" << (numOffsets-1) <<
4021  ") = " << ptr_d_const_ent_h () << " != ind_d.dimension_0() = " <<
4022  ind_d.dimension_0 () << ".");
4023  }
4024 #endif // HAVE_TPETRA_DEBUG
4025 
4026  if (requestOptimizedStorage) {
4027  // With optimized storage, we don't need to store the 2-D column
4028  // indices array-of-arrays, or the array of row entry counts.
4029 
4030  // Free graph data structures that are only needed for 2-D or
4031  // unpacked 1-D storage.
4032  lclInds2D_ = Teuchos::null;
4033  k_numRowEntries_ = row_entries_type ();
4034 
4035  // Keep the new 1-D packed allocations.
4036  k_rowPtrs_ = ptr_d_const;
4037  k_lclInds1D_ = ind_d;
4038 
4039  // Storage is packed now, so the number of allocated entries is
4040  // the same as the actual number of entries.
4041  nodeNumAllocated_ = nodeNumEntries_;
4042  // The graph is definitely StaticProfile now, whether or not it
4043  // was before.
4044  pftype_ = StaticProfile;
4045  }
4046 
4047  // FIXME (mfh 28 Aug 2014) "Local Graph" sublist no longer used.
4048 
4049  // Build the local graph.
4050  lclGraph_ = local_graph_type (ind_d, ptr_d_const);
4051 
4052  // TODO (mfh 13 Mar 2014) getNodeNumDiags(), isUpperTriangular(),
4053  // and isLowerTriangular() depend on computeGlobalConstants(), in
4054  // particular the part where it looks at the local matrix. You
4055  // have to use global indices to determine which entries are
4056  // diagonal, or above or below the diagonal. However, lower or
4057  // upper triangularness is a local property.
4058  }
4059 
4060  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4061  void
4062  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
4063  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4064  {
4065  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
4066  //
4067  // FIXME (mfh 18 Aug 2014) This will break if the calling process
4068  // has no entries, because in that case, currently it is neither
4069  // locally nor globally indexed. This will change once we get rid
4070  // of lazy allocation (so that the constructor allocates indices
4071  // and therefore commits to local vs. global).
4072  const char tfecfFuncName[] = "replaceColMap: ";
4073  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4074  isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
4075  "Requires matching maps and non-static graph.");
4076  colMap_ = newColMap;
4077  }
4078 
4079  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4080  void
4082  reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
4083  const Teuchos::RCP<const import_type>& newImport,
4084  const bool sortIndicesInEachRow)
4085  {
4086  using Teuchos::REDUCE_MIN;
4087  using Teuchos::reduceAll;
4088  using Teuchos::RCP;
4089  typedef GlobalOrdinal GO;
4090  typedef LocalOrdinal LO;
4091  typedef typename local_graph_type::entries_type::non_const_type col_inds_type;
4092  const char tfecfFuncName[] = "reindexColumns: ";
4093 
4094  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4095  isFillComplete (), std::runtime_error, "The graph is fill complete "
4096  "(isFillComplete() returns true). You must call resumeFill() before "
4097  "you may call this method.");
4098 
4099  // mfh 19 Aug 2014: This method does NOT redistribute data; it
4100  // doesn't claim to do the work of an Import or Export. This
4101  // means that for all processes, the calling process MUST own all
4102  // column indices, in both the old column Map (if it exists) and
4103  // the new column Map. We check this via an all-reduce.
4104  //
4105  // Some processes may be globally indexed, others may be locally
4106  // indexed, and others (that have no graph entries) may be
4107  // neither. This method will NOT change the graph's current
4108  // state. If it's locally indexed, it will stay that way, and
4109  // vice versa. It would easy to add an option to convert indices
4110  // from global to local, so as to save a global-to-local
4111  // conversion pass. However, we don't do this here. The intended
4112  // typical use case is that the graph already has a column Map and
4113  // is locally indexed, and this is the case for which we optimize.
4114 
4115  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4116 
4117  // Attempt to convert indices to the new column Map's version of
4118  // local. This will fail if on the calling process, the graph has
4119  // indices that are not on that process in the new column Map.
4120  // After the local conversion attempt, we will do an all-reduce to
4121  // see if any processes failed.
4122 
4123  // If this is false, then either the graph contains a column index
4124  // which is invalid in the CURRENT column Map, or the graph is
4125  // locally indexed but currently has no column Map. In either
4126  // case, there is no way to convert the current local indices into
4127  // global indices, so that we can convert them into the new column
4128  // Map's local indices. It's possible for this to be true on some
4129  // processes but not others, due to replaceColMap.
4130  bool allCurColIndsValid = true;
4131  // On the calling process, are all valid current column indices
4132  // also in the new column Map on the calling process? In other
4133  // words, does local reindexing suffice, or should the user have
4134  // done an Import or Export instead?
4135  bool localSuffices = true;
4136 
4137  // Final arrays for the local indices. We will allocate exactly
4138  // one of these ONLY if the graph is locally indexed on the
4139  // calling process, and ONLY if the graph has one or more entries
4140  // (is not empty) on the calling process. In that case, we
4141  // allocate the first (1-D storage) if the graph has a static
4142  // profile, else we allocate the second (2-D storage).
4143  typename local_graph_type::entries_type::non_const_type newLclInds1D;
4144  Teuchos::ArrayRCP<Teuchos::Array<LO> > newLclInds2D;
4145 
4146  // If indices aren't allocated, that means the calling process
4147  // owns no entries in the graph. Thus, there is nothing to
4148  // convert, and it trivially succeeds locally.
4149  if (indicesAreAllocated ()) {
4150  if (isLocallyIndexed ()) {
4151  if (hasColMap ()) { // locally indexed, and currently has a column Map
4152  const map_type& oldColMap = * (getColMap ());
4153  if (pftype_ == StaticProfile) {
4154  // Allocate storage for the new local indices.
4155  RCP<node_type> node = getRowMap ()->getNode ();
4156  newLclInds1D =
4157  col_inds_type ("Tpetra::CrsGraph::ind", nodeNumAllocated_);
4158  // Attempt to convert the new indices locally.
4159  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4160  const RowInfo rowInfo = this->getRowInfo (lclRow);
4161  const size_t beg = rowInfo.offset1D;
4162  const size_t end = beg + rowInfo.numEntries;
4163  for (size_t k = beg; k < end; ++k) {
4164  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4165  // use a DualView instead.
4166  const LO oldLclCol = k_lclInds1D_(k);
4167  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4168  allCurColIndsValid = false;
4169  break; // Stop at the first invalid index
4170  }
4171  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4172 
4173  // The above conversion MUST succeed. Otherwise, the
4174  // current local index is invalid, which means that
4175  // the graph was constructed incorrectly.
4176  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4177  allCurColIndsValid = false;
4178  break; // Stop at the first invalid index
4179  }
4180  else {
4181  const LO newLclCol = newColMap->getLocalElement (gblCol);
4182  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4183  localSuffices = false;
4184  break; // Stop at the first invalid index
4185  }
4186  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4187  // use a DualView instead.
4188  newLclInds1D(k) = newLclCol;
4189  }
4190  } // for each entry in the current row
4191  } // for each locally owned row
4192  }
4193  else { // pftype_ == DynamicProfile
4194  // Allocate storage for the new local indices. We only
4195  // allocate the outer array here; we will allocate the
4196  // inner arrays below.
4197  newLclInds2D = Teuchos::arcp<Teuchos::Array<LO> > (lclNumRows);
4198 
4199  // Attempt to convert the new indices locally.
4200  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4201  const RowInfo rowInfo = this->getRowInfo (lclRow);
4202  newLclInds2D.resize (rowInfo.allocSize);
4203 
4204  Teuchos::ArrayView<const LO> oldLclRowView = getLocalView (rowInfo);
4205  Teuchos::ArrayView<LO> newLclRowView = (newLclInds2D[lclRow]) ();
4206 
4207  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4208  const LO oldLclCol = oldLclRowView[k];
4209  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4210  allCurColIndsValid = false;
4211  break; // Stop at the first invalid index
4212  }
4213  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4214 
4215  // The above conversion MUST succeed. Otherwise, the
4216  // local index is invalid and the graph is wrong.
4217  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4218  allCurColIndsValid = false;
4219  break; // Stop at the first invalid index
4220  }
4221  else {
4222  const LO newLclCol = newColMap->getLocalElement (gblCol);
4223  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4224  localSuffices = false;
4225  break; // Stop at the first invalid index.
4226  }
4227  newLclRowView[k] = newLclCol;
4228  }
4229  } // for each entry in the current row
4230  } // for each locally owned row
4231  } // pftype_
4232  }
4233  else { // locally indexed, but no column Map
4234  // This case is only possible if replaceColMap() was called
4235  // with a null argument on the calling process. It's
4236  // possible, but it means that this method can't possibly
4237  // succeed, since we have no way of knowing how to convert
4238  // the current local indices to global indices.
4239  allCurColIndsValid = false;
4240  }
4241  }
4242  else { // globally indexed
4243  // If the graph is globally indexed, we don't need to save
4244  // local indices, but we _do_ need to know whether the current
4245  // global indices are valid in the new column Map. We may
4246  // need to do a getRemoteIndexList call to find this out.
4247  //
4248  // In this case, it doesn't matter whether the graph currently
4249  // has a column Map. We don't need the old column Map to
4250  // convert from global indices to the _new_ column Map's local
4251  // indices. Furthermore, we can use the same code, whether
4252  // the graph is static or dynamic profile.
4253 
4254  // Test whether the current global indices are in the new
4255  // column Map on the calling process.
4256  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4257  const RowInfo rowInfo = this->getRowInfo (lclRow);
4258  Teuchos::ArrayView<const GO> oldGblRowView = getGlobalView (rowInfo);
4259  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4260  const GO gblCol = oldGblRowView[k];
4261  if (! newColMap->isNodeGlobalElement (gblCol)) {
4262  localSuffices = false;
4263  break; // Stop at the first invalid index
4264  }
4265  } // for each entry in the current row
4266  } // for each locally owned row
4267  } // locally or globally indexed
4268  } // whether indices are allocated
4269 
4270  // Do an all-reduce to check both possible error conditions.
4271  int lclSuccess[2];
4272  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
4273  lclSuccess[1] = localSuffices ? 1 : 0;
4274  int gblSuccess[2];
4275  gblSuccess[0] = 0;
4276  gblSuccess[1] = 0;
4277  RCP<const Teuchos::Comm<int> > comm =
4278  getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
4279  if (! comm.is_null ()) {
4280  reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
4281  }
4282 
4283  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4284  gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
4285  " The most likely reason is that the graph is locally indexed, but the "
4286  "column Map is missing (null) on some processes, due to a previous call "
4287  "to replaceColMap().");
4288 
4289  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4290  gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
4291  "contains column indices that are in the old column Map, but not in the "
4292  "new column Map (on that process). This method does NOT redistribute "
4293  "data; it does not claim to do the work of an Import or Export operation."
4294  " This means that for all processess, the calling process MUST own all "
4295  "column indices, in both the old column Map and the new column Map. In "
4296  "this case, you will need to do an Import or Export operation to "
4297  "redistribute data.");
4298 
4299  // Commit the results.
4300  if (isLocallyIndexed ()) {
4301  if (pftype_ == StaticProfile) {
4302  k_lclInds1D_ = newLclInds1D;
4303  } else { // dynamic profile
4304  lclInds2D_ = newLclInds2D;
4305  }
4306  // We've reindexed, so we don't know if the indices are sorted.
4307  //
4308  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
4309  // since we're already going through all the indices above. We
4310  // could also sort each row in place; that way, we would only
4311  // have to make one pass over the rows.
4312  indicesAreSorted_ = false;
4313  if (sortIndicesInEachRow) {
4314  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
4315  // order to call this method.
4316  //
4317  // FIXME (mfh 17 Sep 2014) This violates the strong exception
4318  // guarantee. It would be better to sort the new index arrays
4319  // before committing them.
4320  sortAllIndices ();
4321  }
4322  }
4323  colMap_ = newColMap;
4324 
4325  if (newImport.is_null ()) {
4326  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
4327  // check whether the input Import is null on any process.
4328  //
4329  // If the domain Map hasn't been set yet, we can't compute a new
4330  // Import object. Leave it what it is; it should be null, but
4331  // it doesn't matter. If the domain Map _has_ been set, then
4332  // compute a new Import object if necessary.
4333  if (! domainMap_.is_null ()) {
4334  if (! domainMap_->isSameAs (* newColMap)) {
4335  importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
4336  } else {
4337  importer_ = Teuchos::null; // don't need an Import
4338  }
4339  }
4340  } else {
4341  // The caller gave us an Import object. Assume that it's valid.
4342  importer_ = newImport;
4343  }
4344  }
4345 
4346 
4347  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4348  void
4350  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4351  const Teuchos::RCP<const import_type>& newImporter)
4352  {
4353  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4354  TEUCHOS_TEST_FOR_EXCEPTION(
4355  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4356  "this method unless the graph already has a column Map.");
4357  TEUCHOS_TEST_FOR_EXCEPTION(
4358  newDomainMap.is_null (), std::invalid_argument,
4359  prefix << "The new domain Map must be nonnull.");
4360 
4361 #ifdef HAVE_TPETRA_DEBUG
4362  if (newImporter.is_null ()) {
4363  // It's not a good idea to put expensive operations in a macro
4364  // clause, even if they are side effect - free, because macros
4365  // don't promise that they won't evaluate their arguments more
4366  // than once. It's polite for them to do so, but not required.
4367  const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4368  TEUCHOS_TEST_FOR_EXCEPTION(
4369  colSameAsDom, std::invalid_argument, "If the new Import is null, "
4370  "then the new domain Map must be the same as the current column Map.");
4371  }
4372  else {
4373  const bool colSameAsTgt =
4374  colMap_->isSameAs (* (newImporter->getTargetMap ()));
4375  const bool newDomSameAsSrc =
4376  newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4377  TEUCHOS_TEST_FOR_EXCEPTION(
4378  ! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4379  "new Import is nonnull, then the current column Map must be the same "
4380  "as the new Import's target Map, and the new domain Map must be the "
4381  "same as the new Import's source Map.");
4382  }
4383 #endif // HAVE_TPETRA_DEBUG
4384 
4385  domainMap_ = newDomainMap;
4386  importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4387  }
4388 
4389  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4393  {
4394  return lclGraph_;
4395  }
4396 
4397  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4398  void
4401  {
4402  using Teuchos::ArrayView;
4403  using Teuchos::outArg;
4404  using Teuchos::reduceAll;
4405  typedef LocalOrdinal LO;
4406  typedef GlobalOrdinal GO;
4407  typedef global_size_t GST;
4408 
4409 #ifdef HAVE_TPETRA_DEBUG
4410  TEUCHOS_TEST_FOR_EXCEPTION(! hasColMap(), std::logic_error, "Tpetra::"
4411  "CrsGraph::computeGlobalConstants: At this point, the graph should have "
4412  "a column Map, but it does not. Please report this bug to the Tpetra "
4413  "developers.");
4414 #endif // HAVE_TPETRA_DEBUG
4415 
4416  // If necessary, (re)compute the local constants: nodeNumDiags_,
4417  // lowerTriangular_, upperTriangular_, and nodeMaxNumRowEntries_.
4418  if (! haveLocalConstants_) {
4419  // We have actually already computed nodeNumEntries_.
4420  // nodeNumEntries_ gets updated by methods that insert or remove
4421  // indices (including setAllIndices and
4422  // expertStaticFillComplete). Before fillComplete, its count
4423  // may include duplicate column indices in the same row.
4424  // However, mergeRowIndices and mergeRowIndicesAndValues both
4425  // subtract off merged indices in each row from the total count.
4426  // Thus, nodeNumEntries_ _should_ be accurate at this point,
4427  // meaning that we don't have to re-count it here.
4428 
4429  // Reset local properties
4430  upperTriangular_ = true;
4431  lowerTriangular_ = true;
4432  nodeMaxNumRowEntries_ = 0;
4433  nodeNumDiags_ = 0;
4434 
4435  // At this point, we know that we have both a row Map and a column Map.
4436  const map_type& rowMap = *rowMap_;
4437  const map_type& colMap = *colMap_;
4438 
4439  // Go through all the entries of the graph. Count the number of
4440  // diagonal elements we encounter, and figure out whether the
4441  // graph is lower or upper triangular. Diagonal elements are
4442  // determined using global indices, with respect to the whole
4443  // graph. However, lower or upper triangularity is a local
4444  // property, and is determined using local indices.
4445  //
4446  // At this point, indices have already been sorted in each row.
4447  // That makes finding out whether the graph is lower / upper
4448  // triangular easier.
4449  if (indicesAreAllocated () && nodeNumAllocated_ > 0) {
4450  const LO numLocalRows = static_cast<LO> (this->getNodeNumRows ());
4451  for (LO localRow = 0; localRow < numLocalRows; ++localRow) {
4452  const GO globalRow = rowMap.getGlobalElement (localRow);
4453  // Find the local (column) index for the diagonal entry.
4454  // This process might not necessarily own _any_ entries in
4455  // the current row. If it doesn't, skip this row. It won't
4456  // affect any of the attributes (nodeNumDiagons_,
4457  // upperTriangular_, lowerTriangular_, or
4458  // nodeMaxNumRowEntries_) which this loop sets.
4459  const LO rlcid = colMap.getLocalElement (globalRow);
4460  // This process owns one or more entries in the current row.
4461  const RowInfo rowInfo = this->getRowInfo (localRow);
4462  ArrayView<const LO> rview = getLocalView (rowInfo);
4463  typename ArrayView<const LO>::iterator beg, end, cur;
4464  beg = rview.begin();
4465  end = beg + rowInfo.numEntries;
4466  if (beg != end) {
4467  for (cur = beg; cur != end; ++cur) {
4468  // is this the diagonal?
4469  if (rlcid == *cur) ++nodeNumDiags_;
4470  }
4471  // Local column indices are sorted in each row. That means
4472  // the smallest column index in this row (on this process)
4473  // is *beg, and the largest column index in this row (on
4474  // this process) is *(end - 1). We know that end - 1 is
4475  // valid because beg != end.
4476  const size_t smallestCol = static_cast<size_t> (*beg);
4477  const size_t largestCol = static_cast<size_t> (*(end - 1));
4478 
4479  if (smallestCol < static_cast<size_t> (localRow)) {
4480  upperTriangular_ = false;
4481  }
4482  if (static_cast<size_t> (localRow) < largestCol) {
4483  lowerTriangular_ = false;
4484  }
4485  }
4486  // Update the max number of entries over all rows.
4487  nodeMaxNumRowEntries_ = std::max (nodeMaxNumRowEntries_, rowInfo.numEntries);
4488  }
4489  }
4490  haveLocalConstants_ = true;
4491  } // if my process doesn't have local constants
4492 
4493  // Compute global constants from local constants. Processes that
4494  // already have local constants still participate in the
4495  // all-reduces, using their previously computed values.
4496  if (haveGlobalConstants_ == false) {
4497  // Promote all the nodeNum* and nodeMaxNum* quantities from
4498  // size_t to global_size_t, when doing the all-reduces for
4499  // globalNum* / globalMaxNum* results.
4500  //
4501  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4502  // this in two all-reduces (one for the sum and the other for
4503  // the max), or use a custom MPI_Op that combines the sum and
4504  // the max. The latter might even be slower than two
4505  // all-reduces on modern network hardware. It would also be a
4506  // good idea to use nonblocking all-reduces (MPI 3), so that we
4507  // don't have to wait around for the first one to finish before
4508  // starting the second one.
4509  GST lcl[2], gbl[2];
4510  lcl[0] = static_cast<GST> (nodeNumEntries_);
4511  lcl[1] = static_cast<GST> (nodeNumDiags_);
4512  reduceAll<int,GST> (*getComm (), Teuchos::REDUCE_SUM,
4513  2, lcl, gbl);
4514  globalNumEntries_ = gbl[0];
4515  globalNumDiags_ = gbl[1];
4516  reduceAll<int,GST> (*getComm (), Teuchos::REDUCE_MAX,
4517  static_cast<GST> (nodeMaxNumRowEntries_),
4518  outArg (globalMaxNumRowEntries_));
4519  haveGlobalConstants_ = true;
4520  }
4521  }
4522 
4523 
4524  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4525  void
4526  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
4527  makeIndicesLocal ()
4528  {
4529  using Teuchos::arcp;
4530  using Teuchos::Array;
4531  typedef LocalOrdinal LO;
4532  typedef GlobalOrdinal GO;
4533  typedef device_type DT;
4534  typedef typename local_graph_type::row_map_type::non_const_value_type offset_type;
4535  typedef decltype (k_numRowEntries_) row_entries_type;
4536  typedef typename row_entries_type::non_const_value_type num_ent_type;
4537  typedef typename local_graph_type::entries_type::non_const_type
4538  lcl_col_inds_type;
4539  typedef Kokkos::View<GO*, typename lcl_col_inds_type::array_layout,
4540  device_type> gbl_col_inds_type;
4541 
4542  const char tfecfFuncName[] = "makeIndicesLocal: ";
4543 
4544  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4545  (! this->hasColMap (), std::logic_error, "The graph does not have a "
4546  "column Map yet. This method should never be called in that case. "
4547  "Please report this bug to the Tpetra developers.");
4548  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4549  (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4550  "that it has a column Map, because hasColMap() returns true. However, "
4551  "the result of getColMap() is null. This should never happen. Please "
4552  "report this bug to the Tpetra developers.");
4553 
4554  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4555  const map_type& colMap = * (this->getColMap ());
4556 
4557  if (isGloballyIndexed () && lclNumRows != 0) {
4558  // This is a host View.
4559  typename row_entries_type::const_type h_numRowEnt = k_numRowEntries_;
4560 
4561  // allocate data for local indices
4562  if (getProfileType () == StaticProfile) {
4563  // If GO and LO are the same size, we can reuse the existing
4564  // array of 1-D index storage to convert column indices from
4565  // GO to LO. Otherwise, we'll just allocate a new buffer.
4566  constexpr bool LO_GO_same = std::is_same<LO, GO>::value;
4567  if (nodeNumAllocated_ && LO_GO_same) {
4568  // This prevents a build error (illegal assignment) if
4569  // LO_GO_same is _not_ true.
4570  k_lclInds1D_ = Kokkos::Impl::if_c<LO_GO_same,
4571  t_GlobalOrdinal_1D,
4572  lcl_col_inds_type>::select (k_gblInds1D_, k_lclInds1D_);
4573  }
4574  else {
4575  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4576  (k_rowPtrs_.dimension_0 () == 0, std::logic_error,
4577  "k_rowPtrs_.dimension_0() == 0. This should never happen at this "
4578  "point. Please report this bug to the Tpetra developers.");
4579 
4580  // Don't assume UVM. Get a ("0-D") View of the last entry
4581  // of k_rowPtrs_, get its host mirror View, and use that to
4582  // get the number of entries.
4583  auto lastEnt_d = Kokkos::subview (k_rowPtrs_, lclNumRows);
4584  auto lastEnt_h = Kokkos::create_mirror_view (lastEnt_d);
4585  Kokkos::deep_copy (lastEnt_h, lastEnt_d);
4586  const auto numEnt = lastEnt_h ();
4587 
4588  k_lclInds1D_ = lcl_col_inds_type ("Tpetra::CrsGraph::lclind", numEnt);
4589  }
4590 
4591  auto lclColMap = colMap.getLocalMap ();
4592  // This is a "device mirror" of the host View h_numRowEnt.
4593  //
4594  // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4595  // Device instance is to use its default constructor. See the
4596  // following Kokkos issue:
4597  //
4598  // https://github.com/kokkos/kokkos/issues/442
4599  auto k_numRowEnt = Kokkos::create_mirror_view (device_type (), h_numRowEnt);
4600 
4602  const size_t numBad =
4603  convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (k_lclInds1D_,
4604  k_gblInds1D_,
4605  k_rowPtrs_,
4606  lclColMap,
4607  k_numRowEnt);
4608  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4609  (numBad != 0, std::runtime_error, "When converting column indices "
4610  "from global to local, we encountered " << numBad
4611  << "ind" << (numBad != static_cast<size_t> (1) ? "ices" : "ex")
4612  << " that do" << (numBad != static_cast<size_t> (1) ? "es" : "")
4613  << " not live in the column Map on this process.");
4614 
4615  // We've converted column indices from global to local, so we
4616  // can deallocate the global column indices (which we know are
4617  // in 1-D storage, because the graph has static profile).
4618  k_gblInds1D_ = gbl_col_inds_type ();
4619  }
4620  else { // the graph has dynamic profile (2-D index storage)
4621  lclInds2D_ = Teuchos::arcp<Teuchos::Array<LO> > (lclNumRows);
4622  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4623  if (! gblInds2D_[lclRow].empty ()) {
4624  const GO* const ginds = gblInds2D_[lclRow].getRawPtr ();
4625  // NOTE (mfh 26 Jun 2016) It's always legal to cast the
4626  // number of entries in a row to LO, as long as the row
4627  // doesn't have too many duplicate entries.
4628  const LO rna = static_cast<LO> (gblInds2D_[lclRow].size ());
4629  const LO numEnt = static_cast<LO> (h_numRowEnt(lclRow));
4630  lclInds2D_[lclRow].resize (rna);
4631  LO* const linds = lclInds2D_[lclRow].getRawPtr ();
4632  for (LO j = 0; j < numEnt; ++j) {
4633  const GO gid = ginds[j];
4634  const LO lid = colMap.getLocalElement (gid);
4635  linds[j] = lid;
4636 #ifdef HAVE_TPETRA_DEBUG
4637  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4638  (linds[j] == Teuchos::OrdinalTraits<LO>::invalid(),
4639  std::logic_error,
4640  "Global column ginds[j=" << j << "]=" << ginds[j]
4641  << " of local row " << lclRow << " is not in the column Map. "
4642  "This should never happen. Please report this bug to the "
4643  "Tpetra developers.");
4644 #endif // HAVE_TPETRA_DEBUG
4645  }
4646  }
4647  }
4648  gblInds2D_ = Teuchos::null;
4649  }
4650  } // globallyIndexed() && lclNumRows > 0
4651 
4652  lclGraph_ = local_graph_type (k_lclInds1D_, k_rowPtrs_);
4653  indicesAreLocal_ = true;
4654  indicesAreGlobal_ = false;
4655  checkInternalState ();
4656  }
4657 
4658 
4659  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4660  void
4661  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
4662  sortAllIndices ()
4663  {
4664  typedef LocalOrdinal LO;
4665 
4666  // this should be called only after makeIndicesLocal()
4667  TEUCHOS_TEST_FOR_EXCEPT( isGloballyIndexed () );
4668  if (isSorted () == false) {
4669  // FIXME (mfh 06 Mar 2014) This would be a good place for a
4670  // thread-parallel kernel.
4671  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4672  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4673  const RowInfo rowInfo = this->getRowInfo (lclRow);
4674  this->sortRowIndices (rowInfo);
4675  }
4676  }
4677  indicesAreSorted_ = true; // we just sorted every row
4678  }
4679 
4680 
4681  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4682  void
4685  {
4686  using Teuchos::Array;
4687  using Teuchos::ArrayView;
4688  using Teuchos::outArg;
4689  using Teuchos::rcp;
4690  using Teuchos::REDUCE_MAX;
4691  using Teuchos::reduceAll;
4692  using std::endl;
4693  typedef LocalOrdinal LO;
4694  typedef GlobalOrdinal GO;
4695  const char tfecfFuncName[] = "makeColMap";
4696 
4697  if (hasColMap ()) { // The graph already has a column Map.
4698  // FIXME (mfh 26 Feb 2013): This currently prevents structure
4699  // changes that affect the column Map.
4700  return;
4701  }
4702 
4703  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4704  isLocallyIndexed (), std::runtime_error,
4705  ": The graph is locally indexed. Calling makeColMap() to make the "
4706  "column Map requires that the graph be globally indexed.");
4707 
4708  // After the calling process is done going through all of the rows
4709  // it owns, myColumns will contain the list of indices owned by
4710  // this process in the column Map.
4711  Array<GO> myColumns;
4712 
4713  // If we reach this point, we don't have a column Map yet, so the
4714  // graph can't be locally indexed. Thus, isGloballyIndexed() ==
4715  // false means that the graph is empty on this process, so
4716  // myColumns will be left empty.
4717  if (isGloballyIndexed ()) {
4718  // Go through all the rows, finding the populated column indices.
4719  //
4720  // Our final list of indices for the column Map constructor will
4721  // have the following properties (all of which are with respect
4722  // to the calling process):
4723  //
4724  // 1. Indices in the domain Map go first.
4725  // 2. Indices not in the domain Map follow, ordered first
4726  // contiguously by their owning process rank (in the domain
4727  // Map), then in increasing order within that.
4728  // 3. No duplicate indices.
4729  //
4730  // This imitates the ordering used by Aztec(OO) and Epetra.
4731  // Storing indices owned by the same process (in the domain Map)
4732  // contiguously permits the use of contiguous send and receive
4733  // buffers.
4734  //
4735  // We begin by partitioning the column indices into "local" GIDs
4736  // (owned by the domain Map) and "remote" GIDs (not owned by the
4737  // domain Map). We use the same order for local GIDs as the
4738  // domain Map, so we track them in place in their array. We use
4739  // an std::set (RemoteGIDSet) to keep track of remote GIDs, so
4740  // that we don't have to merge duplicates later.
4741  const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
4742  size_t numLocalColGIDs = 0, numRemoteColGIDs = 0;
4743 
4744  // GIDisLocal[lid] == 0 if and only if local index lid in the
4745  // domain Map is remote (not local).
4746  Array<char> GIDisLocal (domainMap_->getNodeNumElements (), 0);
4747  std::set<GO> RemoteGIDSet;
4748  // This preserves the not-sorted Epetra order of GIDs.
4749  std::vector<GO> RemoteGIDUnorderedVector;
4750  const LO myNumRows = this->getNodeNumRows ();
4751  for (LO r = 0; r < myNumRows; ++r) {
4752  const RowInfo rowinfo = this->getRowInfo (r);
4753  if (rowinfo.numEntries > 0) {
4754  // NOTE (mfh 02 Sep 2014) getGlobalView() returns a view of
4755  // all the space in the row, not just the occupied entries.
4756  // (This matters for the case of unpacked 1-D storage. We
4757  // might not have packed it yet.) That's why we need to
4758  // take a subview.
4759  ArrayView<const GO> rowGids = getGlobalView (rowinfo);
4760  rowGids = rowGids (0, rowinfo.numEntries);
4761 
4762  for (size_t k = 0; k < rowinfo.numEntries; ++k) {
4763  const GO gid = rowGids[k];
4764  const LO lid = domainMap_->getLocalElement (gid);
4765  if (lid != LINV) {
4766  const char alreadyFound = GIDisLocal[lid];
4767  if (alreadyFound == 0) {
4768  GIDisLocal[lid] = static_cast<char> (1);
4769  ++numLocalColGIDs;
4770  }
4771  }
4772  else {
4773  const bool notAlreadyFound = RemoteGIDSet.insert (gid).second;
4774  if (notAlreadyFound) { // gid did not exist in the set before
4775  if (! sortGhostsAssociatedWithEachProcessor_) {
4776  // The user doesn't want to sort remote GIDs (for
4777  // each remote process); they want us to keep remote
4778  // GIDs in their original order. We do this by
4779  // stuffing each remote GID into an array as we
4780  // encounter it for the first time. The std::set
4781  // helpfully tracks first encounters.
4782  RemoteGIDUnorderedVector.push_back (gid);
4783  }
4784  ++numRemoteColGIDs;
4785  }
4786  }
4787  } // for each entry k in row r
4788  } // if row r contains > 0 entries
4789  } // for each locally owned row r
4790 
4791  // Possible short-circuit for serial scenario:
4792  //
4793  // If all domain GIDs are present as column indices, then set
4794  // ColMap=DomainMap. By construction, LocalGIDs is a subset of
4795  // DomainGIDs.
4796  //
4797  // If we have
4798  // * Number of remote GIDs is 0, so that ColGIDs == LocalGIDs,
4799  // and
4800  // * Number of local GIDs is number of domain GIDs
4801  // then
4802  // * LocalGIDs \subset DomainGIDs && size(LocalGIDs) ==
4803  // size(DomainGIDs) => DomainGIDs == LocalGIDs == ColGIDs
4804  // on the calling process.
4805  //
4806  // We will concern ourselves only with the special case of a
4807  // serial DomainMap, obviating the need for communication.
4808  //
4809  // If
4810  // * DomainMap has a serial communicator
4811  // then we can set the column Map as the domain Map
4812  // return. Benefit: this graph won't need an Import object
4813  // later.
4814  //
4815  // Note, for a serial domain map, there can be no RemoteGIDs,
4816  // because there are no remote processes. Likely explanations
4817  // for this are:
4818  // * user submitted erroneous column indices
4819  // * user submitted erroneous domain Map
4820  if (domainMap_->getComm ()->getSize () == 1) {
4821  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4822  numRemoteColGIDs != 0, std::runtime_error,
4823  ": " << numRemoteColGIDs << " column "
4824  << (numRemoteColGIDs != 1 ? "indices are" : "index is")
4825  << " not in the domain Map." << endl
4826  << "Either these indices are invalid or the domain Map is invalid."
4827  << endl << "Remember that nonsquare matrices, or matrices where the "
4828  "row and range Maps are different, require calling the version of "
4829  "fillComplete that takes the domain and range Maps as input.");
4830  if (numLocalColGIDs == domainMap_->getNodeNumElements()) {
4831  colMap_ = domainMap_;
4832  checkInternalState ();
4833  return;
4834  }
4835  }
4836 
4837  // Populate myColumns with a list of all column GIDs. Put
4838  // locally owned (in the domain Map) GIDs at the front: they
4839  // correspond to "same" and "permuted" entries between the
4840  // column Map and the domain Map. Put remote GIDs at the back.
4841  myColumns.resize (numLocalColGIDs + numRemoteColGIDs);
4842  // get pointers into myColumns for each part
4843  ArrayView<GO> LocalColGIDs = myColumns (0, numLocalColGIDs);
4844  ArrayView<GO> RemoteColGIDs = myColumns (numLocalColGIDs, numRemoteColGIDs);
4845 
4846  // Copy the remote GIDs into myColumns
4847  if (sortGhostsAssociatedWithEachProcessor_) {
4848  // The std::set puts GIDs in increasing order.
4849  std::copy (RemoteGIDSet.begin(), RemoteGIDSet.end(),
4850  RemoteColGIDs.begin());
4851  } else {
4852  // Respect the originally encountered order.
4853  std::copy (RemoteGIDUnorderedVector.begin(),
4854  RemoteGIDUnorderedVector.end(), RemoteColGIDs.begin());
4855  }
4856 
4857  // Make a list of process ranks corresponding to the remote GIDs.
4858  Array<int> RemoteImageIDs (numRemoteColGIDs);
4859  // Look up the remote process' ranks in the domain Map.
4860  {
4861  const LookupStatus stat =
4862  domainMap_->getRemoteIndexList (RemoteColGIDs, RemoteImageIDs ());
4863 #ifdef HAVE_TPETRA_DEBUG
4864  // If any process returns IDNotPresent, then at least one of
4865  // the remote indices was not present in the domain Map. This
4866  // means that the Import object cannot be constructed, because
4867  // of incongruity between the column Map and domain Map.
4868  // This has two likely causes:
4869  // - The user has made a mistake in the column indices
4870  // - The user has made a mistake with respect to the domain Map
4871  const int missingID_lcl = (stat == IDNotPresent ? 1 : 0);
4872  int missingID_gbl = 0;
4873  reduceAll<int, int> (*getComm (), REDUCE_MAX, missingID_lcl,
4874  outArg (missingID_gbl));
4875  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4876  missingID_gbl == 1, std::runtime_error,
4877  ": Some column indices are not in the domain Map." << endl
4878  << "Either these column indices are invalid or the domain Map is "
4879  "invalid." << endl << "Likely cause: For a nonsquare matrix, you "
4880  "must give the domain and range Maps as input to fillComplete.");
4881 #else
4882  (void) stat; // forestall compiler warning for unused variable
4883 #endif // HAVE_TPETRA_DEBUG
4884  }
4885  // Sort incoming remote column indices by their owning process
4886  // rank, so that all columns coming from a given remote process
4887  // are contiguous. This means the Import's Distributor doesn't
4888  // need to reorder data.
4889  //
4890  // NOTE (mfh 02 Sep 2014) This needs to be a stable sort, so
4891  // that it respects either of the possible orderings of GIDs
4892  // (sorted, or original order) specified above.
4893  sort2 (RemoteImageIDs.begin(), RemoteImageIDs.end(), RemoteColGIDs.begin());
4894 
4895  // Copy the local GIDs into myColumns. Two cases:
4896  // 1. If the number of Local column GIDs is the same as the
4897  // number of Local domain GIDs, we can simply read the domain
4898  // GIDs into the front part of ColIndices (see logic above
4899  // from the serial short circuit case)
4900  // 2. We step through the GIDs of the DomainMap, checking to see
4901  // if each domain GID is a column GID. We want to do this to
4902  // maintain a consistent ordering of GIDs between the columns
4903  // and the domain.
4904 
4905  const size_t numDomainElts = domainMap_->getNodeNumElements ();
4906  if (numLocalColGIDs == numDomainElts) {
4907  // If the number of locally owned GIDs are the same as the
4908  // number of local domain Map elements, then the local domain
4909  // Map elements are the same as the locally owned GIDs.
4910  if (domainMap_->isContiguous ()) {
4911  // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case
4912  // that the domain Map is contiguous, it's more efficient to
4913  // avoid calling getNodeElementList(), since that
4914  // permanently constructs and caches the GID list in the
4915  // contiguous Map.
4916  GO curColMapGid = domainMap_->getMinGlobalIndex ();
4917  for (size_t k = 0; k < numLocalColGIDs; ++k, ++curColMapGid) {
4918  LocalColGIDs[k] = curColMapGid;
4919  }
4920  }
4921  else {
4922  ArrayView<const GO> domainElts = domainMap_->getNodeElementList ();
4923  std::copy (domainElts.begin(), domainElts.end(), LocalColGIDs.begin());
4924  }
4925  }
4926  else {
4927  // Count the number of locally owned GIDs, both to keep track
4928  // of the current array index, and as a sanity check.
4929  size_t numLocalCount = 0;
4930  if (domainMap_->isContiguous ()) {
4931  // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case
4932  // that the domain Map is contiguous, it's more efficient to
4933  // avoid calling getNodeElementList(), since that
4934  // permanently constructs and caches the GID list in the
4935  // contiguous Map.
4936  GO curColMapGid = domainMap_->getMinGlobalIndex ();
4937  for (size_t i = 0; i < numDomainElts; ++i, ++curColMapGid) {
4938  if (GIDisLocal[i]) {
4939  LocalColGIDs[numLocalCount++] = curColMapGid;
4940  }
4941  }
4942  }
4943  else {
4944  ArrayView<const GO> domainElts = domainMap_->getNodeElementList ();
4945  for (size_t i = 0; i < numDomainElts; ++i) {
4946  if (GIDisLocal[i]) {
4947  LocalColGIDs[numLocalCount++] = domainElts[i];
4948  }
4949  }
4950  }
4951  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4952  numLocalCount != numLocalColGIDs, std::logic_error,
4953  ": numLocalCount = " << numLocalCount << " != numLocalColGIDs = "
4954  << numLocalColGIDs << ". This should never happen. Please report "
4955  "this bug to the Tpetra developers.");
4956  }
4957 
4958  // FIXME (mfh 03 Apr 2013) Now would be a good time to use the
4959  // information we collected above to construct the Import. In
4960  // particular, building an Import requires:
4961  //
4962  // 1. numSameIDs (length of initial contiguous sequence of GIDs
4963  // on this process that are the same in both Maps; this
4964  // equals the number of domain Map elements on this process)
4965  //
4966  // 2. permuteToLIDs and permuteFromLIDs (both empty in this
4967  // case, since there's no permutation going on; the column
4968  // Map starts with the domain Map's GIDs, and immediately
4969  // after them come the remote GIDs)
4970  //
4971  // 3. remoteGIDs (exactly those GIDs that we found out above
4972  // were not in the domain Map) and remoteLIDs (which we could
4973  // have gotten above by using the three-argument version of
4974  // getRemoteIndexList() that computes local indices as well
4975  // as process ranks, instead of the two-argument version that
4976  // was used above)
4977  //
4978  // 4. remotePIDs (which we have from the getRemoteIndexList()
4979  // call above)
4980  //
4981  // 5. Sorting remotePIDs, and applying that permutation to
4982  // remoteGIDs and remoteLIDs (by calling sort3 above instead
4983  // of sort2)
4984  //
4985  // 6. Everything after the sort3 call in Import::setupExport():
4986  // a. Create the Distributor via createFromRecvs(), which
4987  // computes exportGIDs and exportPIDs
4988  // b. Compute exportLIDs from exportGIDs (by asking the
4989  // source Map, in this case the domain Map, to convert
4990  // global to local)
4991  //
4992  // Steps 1-5 come for free, since we must do that work anyway in
4993  // order to compute the column Map. In particular, Step 3 is
4994  // even more expensive than Step 6a, since it involves both
4995  // creating and using a new Distributor object.
4996 
4997  } // if the graph is globally indexed
4998 
4999  const global_size_t gstInv =
5000  Teuchos::OrdinalTraits<global_size_t>::invalid ();
5001  // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
5002  // be the same as the Map's min GID? If the first column is empty
5003  // (contains no entries), then the column Map's min GID won't
5004  // necessarily be the same as the domain Map's index base.
5005  const GO indexBase = domainMap_->getIndexBase ();
5006  colMap_ = rcp (new map_type (gstInv, myColumns, indexBase,
5007  domainMap_->getComm (),
5008  domainMap_->getNode ()));
5009 
5010  checkInternalState ();
5011  }
5012 
5013 
5014  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5015  void
5018  {
5019  TEUCHOS_TEST_FOR_EXCEPT( isGloballyIndexed() ); // call only after makeIndicesLocal()
5020  TEUCHOS_TEST_FOR_EXCEPT( ! isSorted() ); // call only after sortIndices()
5021  if (! isMerged ()) {
5022  const LocalOrdinal lclNumRows =
5023  static_cast<LocalOrdinal> (this->getNodeNumRows ());
5024  for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
5025  const RowInfo rowInfo = this->getRowInfo (row);
5026  mergeRowIndices(rowInfo);
5027  }
5028  // we just merged every row
5029  noRedundancies_ = true;
5030  }
5031  }
5032 
5033 
5034  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5035  void
5038  {
5039  using Teuchos::ParameterList;
5040  using Teuchos::RCP;
5041  using Teuchos::rcp;
5042 
5043  TEUCHOS_TEST_FOR_EXCEPTION(! hasColMap (), std::logic_error, "Tpetra::"
5044  "CrsGraph::makeImportExport: This method may not be called unless the "
5045  "graph has a column Map.");
5046  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
5047 
5048  // Don't do any checks to see if we need to create the Import, if
5049  // it exists already.
5050  //
5051  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
5052  // change CrsGraph in the future to allow changing the column
5053  // Map after fillComplete. For now, the column Map is fixed
5054  // after the first fillComplete call.
5055  if (importer_.is_null ()) {
5056  // Create the Import instance if necessary.
5057  if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
5058  if (params.is_null () || ! params->isSublist ("Import")) {
5059  importer_ = rcp (new import_type (domainMap_, colMap_));
5060  } else {
5061  RCP<ParameterList> importSublist = sublist (params, "Import", true);
5062  importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
5063  }
5064  }
5065  }
5066 
5067  // Don't do any checks to see if we need to create the Export, if
5068  // it exists already.
5069  if (exporter_.is_null ()) {
5070  // Create the Export instance if necessary.
5071  if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
5072  if (params.is_null () || ! params->isSublist ("Export")) {
5073  exporter_ = rcp (new export_type (rowMap_, rangeMap_));
5074  }
5075  else {
5076  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
5077  exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
5078  }
5079  }
5080  }
5081  }
5082 
5083 
5084  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5085  std::string
5086  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
5087  description () const
5088  {
5089  std::ostringstream oss;
5090  oss << dist_object_type::description ();
5091  if (isFillComplete ()) {
5092  oss << "{status = fill complete"
5093  << ", global rows = " << getGlobalNumRows()
5094  << ", global cols = " << getGlobalNumCols()
5095  << ", global num entries = " << getGlobalNumEntries()
5096  << "}";
5097  }
5098  else {
5099  oss << "{status = fill not complete"
5100  << ", global rows = " << getGlobalNumRows()
5101  << "}";
5102  }
5103  return oss.str();
5104  }
5105 
5106 
5107  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5108  void
5110  describe (Teuchos::FancyOStream &out,
5111  const Teuchos::EVerbosityLevel verbLevel) const
5112  {
5113  const char tfecfFuncName[] = "describe()";
5114  using Teuchos::ArrayView;
5115  using Teuchos::Comm;
5116  using Teuchos::RCP;
5117  using Teuchos::VERB_DEFAULT;
5118  using Teuchos::VERB_NONE;
5119  using Teuchos::VERB_LOW;
5120  using Teuchos::VERB_MEDIUM;
5121  using Teuchos::VERB_HIGH;
5122  using Teuchos::VERB_EXTREME;
5123  using std::endl;
5124  using std::setw;
5125 
5126  Teuchos::EVerbosityLevel vl = verbLevel;
5127  if (vl == VERB_DEFAULT) vl = VERB_LOW;
5128  RCP<const Comm<int> > comm = this->getComm();
5129  const int myImageID = comm->getRank(),
5130  numImages = comm->getSize();
5131  size_t width = 1;
5132  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5133  ++width;
5134  }
5135  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
5136  Teuchos::OSTab tab (out);
5137  // none: print nothing
5138  // low: print O(1) info from node 0
5139  // medium: print O(P) info, num entries per node
5140  // high: print O(N) info, num entries per row
5141  // extreme: print O(NNZ) info: print graph indices
5142  //
5143  // for medium and higher, print constituent objects at specified verbLevel
5144  if (vl != VERB_NONE) {
5145  if (myImageID == 0) out << this->description() << std::endl;
5146  // O(1) globals, minus what was already printed by description()
5147  if (isFillComplete() && myImageID == 0) {
5148  out << "Global number of diagonals = " << globalNumDiags_ << std::endl;
5149  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
5150  }
5151  // constituent objects
5152  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5153  if (myImageID == 0) out << "\nRow map: " << std::endl;
5154  rowMap_->describe(out,vl);
5155  if (colMap_ != Teuchos::null) {
5156  if (myImageID == 0) out << "\nColumn map: " << std::endl;
5157  colMap_->describe(out,vl);
5158  }
5159  if (domainMap_ != Teuchos::null) {
5160  if (myImageID == 0) out << "\nDomain map: " << std::endl;
5161  domainMap_->describe(out,vl);
5162  }
5163  if (rangeMap_ != Teuchos::null) {
5164  if (myImageID == 0) out << "\nRange map: " << std::endl;
5165  rangeMap_->describe(out,vl);
5166  }
5167  }
5168  // O(P) data
5169  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5170  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5171  if (myImageID == imageCtr) {
5172  out << "Node ID = " << imageCtr << std::endl
5173  << "Node number of entries = " << nodeNumEntries_ << std::endl
5174  << "Node number of diagonals = " << nodeNumDiags_ << std::endl
5175  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
5176  if (indicesAreAllocated()) {
5177  out << "Node number of allocated entries = " << nodeNumAllocated_ << std::endl;
5178  }
5179  else {
5180  out << "Indices are not allocated." << std::endl;
5181  }
5182  }
5183  comm->barrier();
5184  comm->barrier();
5185  comm->barrier();
5186  }
5187  }
5188  // O(N) and O(NNZ) data
5189  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
5190  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5191  ! hasRowInfo (), std::runtime_error, ": reduce verbosity level; "
5192  "graph row information was deleted at fillComplete().");
5193  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5194  if (myImageID == imageCtr) {
5195  out << std::setw(width) << "Node ID"
5196  << std::setw(width) << "Global Row"
5197  << std::setw(width) << "Num Entries";
5198  if (vl == VERB_EXTREME) {
5199  out << " Entries";
5200  }
5201  out << std::endl;
5202  const LocalOrdinal lclNumRows =
5203  static_cast<LocalOrdinal> (this->getNodeNumRows ());
5204  for (LocalOrdinal r=0; r < lclNumRows; ++r) {
5205  const RowInfo rowinfo = this->getRowInfo (r);
5206  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
5207  out << std::setw(width) << myImageID
5208  << std::setw(width) << gid
5209  << std::setw(width) << rowinfo.numEntries;
5210  if (vl == VERB_EXTREME) {
5211  out << " ";
5212  if (isGloballyIndexed()) {
5213  ArrayView<const GlobalOrdinal> rowview = getGlobalView(rowinfo);
5214  for (size_t j=0; j < rowinfo.numEntries; ++j) out << rowview[j] << " ";
5215  }
5216  else if (isLocallyIndexed()) {
5217  ArrayView<const LocalOrdinal> rowview = getLocalView(rowinfo);
5218  for (size_t j=0; j < rowinfo.numEntries; ++j) out << colMap_->getGlobalElement(rowview[j]) << " ";
5219  }
5220  }
5221  out << std::endl;
5222  }
5223  }
5224  comm->barrier();
5225  comm->barrier();
5226  comm->barrier();
5227  }
5228  }
5229  }
5230  }
5231 
5232 
5233  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5234  bool
5237  {
5238  (void) source; // forestall "unused variable" compiler warnings
5239 
5240  // It's not clear what kind of compatibility checks on sizes can
5241  // be performed here. Epetra_CrsGraph doesn't check any sizes for
5242  // compatibility.
5243  return true;
5244  }
5245 
5246 
5247  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5248  void
5251  size_t numSameIDs,
5252  const Teuchos::ArrayView<const LocalOrdinal> &permuteToLIDs,
5253  const Teuchos::ArrayView<const LocalOrdinal> &permuteFromLIDs)
5254  {
5255  using Teuchos::Array;
5256  using Teuchos::ArrayView;
5257  typedef LocalOrdinal LO;
5258  typedef GlobalOrdinal GO;
5259  const char tfecfFuncName[] = "copyAndPermute";
5261  typedef RowGraph<LO, GO, node_type> row_graph_type;
5262 
5263  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5264  permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
5265  ": permuteToLIDs and permuteFromLIDs must have the same size.");
5266  // Make sure that the source object has the right type. We only
5267  // actually need it to be a RowGraph, with matching first three
5268  // template parameters. If it's a CrsGraph, we can use view mode
5269  // instead of copy mode to get each row's data.
5270  //
5271  // FIXME (mfh 07 Jul 2013) It should not be necessary for any of
5272  // the template parameters but GO to match. GO has to match
5273  // because the graph has to send indices as global ordinals, if
5274  // the source and target graphs do not have the same column Map.
5275  // If LO doesn't match, the graphs could communicate using global
5276  // indices. It could be possible that Node affects the graph's
5277  // storage format, but packAndPrepare should assume a common
5278  // communication format in any case.
5279  const row_graph_type* srcRowGraph = dynamic_cast<const row_graph_type*> (&source);
5280  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5281  srcRowGraph == NULL, std::invalid_argument,
5282  ": The source object must be a RowGraph with matching first three "
5283  "template parameters.");
5284 
5285  // If the source object is actually a CrsGraph, we can use view
5286  // mode instead of copy mode to access the entries in each row,
5287  // if the graph is not fill complete.
5288  const this_type* srcCrsGraph = dynamic_cast<const this_type*> (&source);
5289 
5290  const map_type& srcRowMap = * (srcRowGraph->getRowMap ());
5291  const map_type& tgtRowMap = * (this->getRowMap ());
5292  const bool src_filled = srcRowGraph->isFillComplete ();
5293  Array<GO> row_copy;
5294  LO myid = 0;
5295 
5296  //
5297  // "Copy" part of "copy and permute."
5298  //
5299  if (src_filled || srcCrsGraph == NULL) {
5300  // If the source graph is fill complete, we can't use view mode,
5301  // because the data might be stored in a different format not
5302  // compatible with the expectations of view mode. Also, if the
5303  // source graph is not a CrsGraph, we can't use view mode,
5304  // because RowGraph only provides copy mode access to the data.
5305  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5306  const GO gid = srcRowMap.getGlobalElement (myid);
5307  size_t row_length = srcRowGraph->getNumEntriesInGlobalRow (gid);
5308  row_copy.resize (row_length);
5309  size_t check_row_length = 0;
5310  srcRowGraph->getGlobalRowCopy (gid, row_copy (), check_row_length);
5311  this->insertGlobalIndices (gid, row_copy ());
5312  }
5313  } else {
5314  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5315  const GO gid = srcRowMap.getGlobalElement (myid);
5316  ArrayView<const GO> row;
5317  srcCrsGraph->getGlobalRowView (gid, row);
5318  this->insertGlobalIndices (gid, row);
5319  }
5320  }
5321 
5322  //
5323  // "Permute" part of "copy and permute."
5324  //
5325  if (src_filled || srcCrsGraph == NULL) {
5326  for (LO i = 0; i < permuteToLIDs.size (); ++i) {
5327  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs[i]);
5328  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs[i]);
5329  size_t row_length = srcRowGraph->getNumEntriesInGlobalRow (srcgid);
5330  row_copy.resize (row_length);
5331  size_t check_row_length = 0;
5332  srcRowGraph->getGlobalRowCopy (srcgid, row_copy (), check_row_length);
5333  this->insertGlobalIndices (mygid, row_copy ());
5334  }
5335  } else {
5336  for (LO i = 0; i < permuteToLIDs.size (); ++i) {
5337  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs[i]);
5338  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs[i]);
5339  ArrayView<const GO> row;
5340  srcCrsGraph->getGlobalRowView (srcgid, row);
5341  this->insertGlobalIndices (mygid, row);
5342  }
5343  }
5344  }
5345 
5346 
5347  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5348  void
5350  packAndPrepare (const SrcDistObject& source,
5351  const Teuchos::ArrayView<const LocalOrdinal> &exportLIDs,
5352  Teuchos::Array<GlobalOrdinal> &exports,
5353  const Teuchos::ArrayView<size_t> & numPacketsPerLID,
5354  size_t& constantNumPackets,
5355  Distributor &distor)
5356  {
5357  using Teuchos::Array;
5358  const char tfecfFuncName[] = "packAndPrepare";
5359  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5360  exportLIDs.size() != numPacketsPerLID.size(), std::runtime_error,
5361  ": exportLIDs and numPacketsPerLID must have the same size.");
5362  typedef RowGraph<LocalOrdinal, GlobalOrdinal, node_type> row_graph_type;
5363  const row_graph_type& srcGraph = dynamic_cast<const row_graph_type&> (source);
5364 
5365  // We don't check whether src_graph has had fillComplete called,
5366  // because it doesn't matter whether the *source* graph has been
5367  // fillComplete'd. The target graph can not be fillComplete'd yet.
5368  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5369  this->isFillComplete (), std::runtime_error,
5370  ": The target graph of an Import or Export must not be fill complete.");
5371 
5372  srcGraph.pack (exportLIDs, exports, numPacketsPerLID, constantNumPackets, distor);
5373  }
5374 
5375 
5376  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5377  void
5378  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
5379  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5380  Teuchos::Array<GlobalOrdinal>& exports,
5381  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5382  size_t& constantNumPackets,
5383  Distributor& distor) const
5384  {
5385  using Teuchos::Array;
5386  typedef LocalOrdinal LO;
5387  typedef GlobalOrdinal GO;
5388  const char tfecfFuncName[] = "pack";
5389  (void) distor; // forestall "unused argument" compiler warning
5390 
5391  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5392  exportLIDs.size() != numPacketsPerLID.size(), std::runtime_error,
5393  ": exportLIDs and numPacketsPerLID must have the same size.");
5394 
5395  const map_type& srcMap = * (this->getMap ());
5396  constantNumPackets = 0;
5397 
5398  // Set numPacketsPerLID[i] to the number of entries owned by the
5399  // calling process in (local) row exportLIDs[i] of the graph, that
5400  // the caller wants us to send out. Compute the total number of
5401  // packets (that is, entries) owned by this process in all the
5402  // rows that the caller wants us to send out.
5403  size_t totalNumPackets = 0;
5404  Array<GO> row;
5405  for (LO i = 0; i < exportLIDs.size (); ++i) {
5406  const GO GID = srcMap.getGlobalElement (exportLIDs[i]);
5407  size_t row_length = this->getNumEntriesInGlobalRow (GID);
5408  numPacketsPerLID[i] = row_length;
5409  totalNumPackets += row_length;
5410  }
5411 
5412  exports.resize (totalNumPackets);
5413 
5414  // Loop again over the rows to export, and pack rows of indices
5415  // into the output buffer.
5416  size_t exportsOffset = 0;
5417  for (LO i = 0; i < exportLIDs.size (); ++i) {
5418  const GO GID = srcMap.getGlobalElement (exportLIDs[i]);
5419  size_t row_length = this->getNumEntriesInGlobalRow (GID);
5420  row.resize (row_length);
5421  size_t check_row_length = 0;
5422  this->getGlobalRowCopy (GID, row (), check_row_length);
5423  typename Array<GO>::const_iterator row_iter = row.begin();
5424  typename Array<GO>::const_iterator row_end = row.end();
5425  size_t j = 0;
5426  for (; row_iter != row_end; ++row_iter, ++j) {
5427  exports[exportsOffset+j] = *row_iter;
5428  }
5429  exportsOffset += row.size();
5430  }
5431  }
5432 
5433 
5434  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5435  void
5437  unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
5438  const Teuchos::ArrayView<const GlobalOrdinal> &imports,
5439  const Teuchos::ArrayView<size_t> &numPacketsPerLID,
5440  size_t constantNumPackets,
5441  Distributor& /* distor */,
5442  CombineMode /* CM */)
5443  {
5444  using Teuchos::ArrayView;
5445  typedef LocalOrdinal LO;
5446  typedef GlobalOrdinal GO;
5447 
5448  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
5449  // reasonable meaning, whether or not the matrix is fill complete.
5450  // It's just more work to implement.
5451 
5452  // We are not checking the value of the CombineMode input
5453  // argument. For CrsGraph, we only support import/export
5454  // operations if fillComplete has not yet been called. Any
5455  // incoming column-indices are inserted into the target graph. In
5456  // this context, CombineMode values of ADD vs INSERT are
5457  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
5458  // duplicate column-index is inserted, it will be compressed out
5459  // when fillComplete is called.
5460  //
5461  // Note: I think REPLACE means that an existing row is replaced by
5462  // the imported row, i.e., the existing indices are cleared. CGB,
5463  // 6/17/2010
5464 
5465  const char tfecfFuncName[] = "unpackAndCombine: ";
5466  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5467  importLIDs.size() != numPacketsPerLID.size(), std::runtime_error,
5468  "importLIDs and numPacketsPerLID must have the same size.");
5469  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5470  isFillComplete (), std::runtime_error,
5471  "Import or Export operations are not allowed on the destination "
5472  "CrsGraph if it is fill complete.");
5473  size_t importsOffset = 0;
5474 
5475  typedef typename ArrayView<const LO>::const_iterator iter_type;
5476  iter_type impLIDiter = importLIDs.begin();
5477  iter_type impLIDend = importLIDs.end();
5478 
5479  for (size_t i = 0; impLIDiter != impLIDend; ++impLIDiter, ++i) {
5480  LO row_length = numPacketsPerLID[i];
5481 
5482  const GO* const row_raw = (row_length == 0) ? NULL : &imports[importsOffset];
5483  ArrayView<const GlobalOrdinal> row (row_raw, row_length);
5484  insertGlobalIndicesFiltered (this->getMap ()->getGlobalElement (*impLIDiter), row);
5485  importsOffset += row_length;
5486  }
5487  }
5488 
5489 
5490  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5491  void
5493  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
5494  {
5495  using Teuchos::Comm;
5496  using Teuchos::null;
5497  using Teuchos::ParameterList;
5498  using Teuchos::RCP;
5499 
5500  // We'll set all the state "transactionally," so that this method
5501  // satisfies the strong exception guarantee. This object's state
5502  // won't be modified until the end of this method.
5503  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
5504  RCP<import_type> importer;
5505  RCP<export_type> exporter;
5506 
5507  rowMap = newMap;
5508  RCP<const Comm<int> > newComm =
5509  (newMap.is_null ()) ? null : newMap->getComm ();
5510 
5511  if (! domainMap_.is_null ()) {
5512  if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
5513  // Common case: original domain and row Maps are identical.
5514  // In that case, we need only replace the original domain Map
5515  // with the new Map. This ensures that the new domain and row
5516  // Maps _stay_ identical.
5517  domainMap = newMap;
5518  } else {
5519  domainMap = domainMap_->replaceCommWithSubset (newComm);
5520  }
5521  }
5522  if (! rangeMap_.is_null ()) {
5523  if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
5524  // Common case: original range and row Maps are identical. In
5525  // that case, we need only replace the original range Map with
5526  // the new Map. This ensures that the new range and row Maps
5527  // _stay_ identical.
5528  rangeMap = newMap;
5529  } else {
5530  rangeMap = rangeMap_->replaceCommWithSubset (newComm);
5531  }
5532  }
5533  if (! colMap.is_null ()) {
5534  colMap = colMap_->replaceCommWithSubset (newComm);
5535  }
5536 
5537  // (Re)create the Export and / or Import if necessary.
5538  if (! newComm.is_null ()) {
5539  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
5540  //
5541  // The operations below are collective on the new communicator.
5542  //
5543  // (Re)create the Export object if necessary. If I haven't
5544  // called fillComplete yet, I don't have a rangeMap, so I must
5545  // first check if the _original_ rangeMap is not null. Ditto
5546  // for the Import object and the domain Map.
5547  if (! rangeMap_.is_null () &&
5548  rangeMap != rowMap &&
5549  ! rangeMap->isSameAs (*rowMap)) {
5550  if (params.is_null () || ! params->isSublist ("Export")) {
5551  exporter = rcp (new export_type (rowMap, rangeMap));
5552  }
5553  else {
5554  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
5555  exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
5556  }
5557  }
5558  // (Re)create the Import object if necessary.
5559  if (! domainMap_.is_null () &&
5560  domainMap != colMap &&
5561  ! domainMap->isSameAs (*colMap)) {
5562  if (params.is_null () || ! params->isSublist ("Import")) {
5563  importer = rcp (new import_type (domainMap, colMap));
5564  } else {
5565  RCP<ParameterList> importSublist = sublist (params, "Import", true);
5566  importer = rcp (new import_type (domainMap, colMap, importSublist));
5567  }
5568  }
5569  } // if newComm is not null
5570 
5571  // Defer side effects until the end. If no destructors throw
5572  // exceptions (they shouldn't anyway), then this method satisfies
5573  // the strong exception guarantee.
5574  exporter_ = exporter;
5575  importer_ = importer;
5576  rowMap_ = rowMap;
5577  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
5578  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
5579  // the same object. We might want to get rid of this redundant
5580  // pointer sometime, but for now, we'll leave it alone and just
5581  // set map_ to the same object.
5582  this->map_ = rowMap;
5583  domainMap_ = domainMap;
5584  rangeMap_ = rangeMap;
5585  colMap_ = colMap;
5586  }
5587 
5588  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5589  bool
5591  hasRowInfo () const
5592  {
5593  if (indicesAreAllocated () &&
5594  getProfileType () == StaticProfile &&
5595  k_rowPtrs_.dimension_0 () == 0) {
5596  return false;
5597  } else {
5598  return true;
5599  }
5600  }
5601 
5602  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5603  void
5605  getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
5606  {
5607  typedef LocalOrdinal LO;
5608  typedef GlobalOrdinal GO;
5609  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
5610 
5611  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5612  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
5613  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
5614  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5615  (static_cast<LO> (offsets.dimension_0 ()) < lclNumRows,
5616  std::invalid_argument, "offsets.dimension_0() = " <<
5617  offsets.dimension_0 () << " < getNodeNumRows() = " << lclNumRows << ".");
5618 
5619  const map_type& rowMap = * (this->getRowMap ());
5620  const map_type& colMap = * (this->getColMap ());
5621 
5622 #ifdef HAVE_TPETRA_DEBUG
5623  bool allRowMapDiagEntriesInColMap = true;
5624  bool allDiagEntriesFound = true;
5625  bool allOffsetsCorrect = true;
5626  bool noOtherWeirdness = true;
5627  std::vector<std::pair<LO, size_t> > wrongOffsets;
5628 #endif // HAVE_TPETRA_DEBUG
5629 
5630  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
5631  // the subset of Map functionality that we need below.
5632  auto lclRowMap = rowMap.getLocalMap ();
5633  auto lclColMap = colMap.getLocalMap ();
5634 
5635  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
5636  // setup, at least on the host. For CUDA, we have to use LocalMap
5637  // (that comes from each of the two Maps).
5638 
5639  const bool sorted = this->isSorted ();
5640  if (isFillComplete ()) {
5641  auto lclGraph = this->getLocalGraph ();
5642  Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
5643  lclGraph.row_map, lclGraph.entries,
5644  sorted);
5645  }
5646  else {
5647  for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
5648  const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
5649  const GO gblColInd = gblRowInd;
5650  const LO lclColInd = lclColMap.getLocalElement (gblColInd);
5651 
5652  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
5653 #ifdef HAVE_TPETRA_DEBUG
5654  allRowMapDiagEntriesInColMap = false;
5655 #endif // HAVE_TPETRA_DEBUG
5656  offsets[lclRowInd] = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
5657  }
5658  else {
5659  const RowInfo rowInfo = this->getRowInfo (lclRowInd);
5660  if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
5661  rowInfo.numEntries > 0) {
5662 
5663  auto colInds = this->getLocalKokkosRowView (rowInfo);
5664  const size_t hint = 0; // not needed for this algorithm
5665  const size_t offset =
5666  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
5667  lclColInd, hint, sorted);
5668  offsets(lclRowInd) = offset;
5669 
5670 #ifdef HAVE_TPETRA_DEBUG
5671  // Now that we have what we think is an offset, make sure
5672  // that it really does point to the diagonal entry. Offsets
5673  // are _relative_ to each row, not absolute (for the whole
5674  // (local) graph).
5675  Teuchos::ArrayView<const LO> lclColInds;
5676  try {
5677  this->getLocalRowView (lclRowInd, lclColInds);
5678  }
5679  catch (...) {
5680  noOtherWeirdness = false;
5681  }
5682  // Don't continue with error checking if the above failed.
5683  if (noOtherWeirdness) {
5684  const size_t numEnt = lclColInds.size ();
5685  if (offset >= numEnt) {
5686  // Offsets are relative to each row, so this means that
5687  // the offset is out of bounds.
5688  allOffsetsCorrect = false;
5689  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
5690  } else {
5691  const LO actualLclColInd = lclColInds[offset];
5692  const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
5693  if (actualGblColInd != gblColInd) {
5694  allOffsetsCorrect = false;
5695  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
5696  }
5697  }
5698  }
5699 #endif // HAVE_TPETRA_DEBUG
5700  }
5701  else {
5702  offsets(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
5703 #ifdef HAVE_TPETRA_DEBUG
5704  allDiagEntriesFound = false;
5705 #endif // HAVE_TPETRA_DEBUG
5706  }
5707  }
5708  }
5709  }
5710 
5711 #ifdef HAVE_TPETRA_DEBUG
5712  if (wrongOffsets.size () != 0) {
5713  std::ostringstream os;
5714  os << "Proc " << this->getComm ()->getRank () << ": Wrong offsets: [";
5715  for (size_t k = 0; k < wrongOffsets.size (); ++k) {
5716  os << "(" << wrongOffsets[k].first << ","
5717  << wrongOffsets[k].second << ")";
5718  if (k + 1 < wrongOffsets.size ()) {
5719  os << ", ";
5720  }
5721  }
5722  os << "]" << std::endl;
5723  std::cerr << os.str ();
5724  }
5725 #endif // HAVE_TPETRA_DEBUG
5726 
5727 #ifdef HAVE_TPETRA_DEBUG
5728  using Teuchos::reduceAll;
5729  using std::endl;
5730  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
5731  const bool localSuccess =
5732  allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
5733  const int numResults = 5;
5734  int lclResults[5];
5735  lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
5736  lclResults[1] = allDiagEntriesFound ? 1 : 0;
5737  lclResults[2] = allOffsetsCorrect ? 1 : 0;
5738  lclResults[3] = noOtherWeirdness ? 1 : 0;
5739  // min-all-reduce will compute least rank of all the processes
5740  // that didn't succeed.
5741  lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
5742 
5743  int gblResults[5];
5744  gblResults[0] = 0;
5745  gblResults[1] = 0;
5746  gblResults[2] = 0;
5747  gblResults[3] = 0;
5748  gblResults[4] = 0;
5749  reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
5750  numResults, lclResults, gblResults);
5751 
5752  if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
5753  || gblResults[3] != 1) {
5754  std::ostringstream os; // build error message
5755  os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
5756  "possibly among others): " << endl;
5757  if (gblResults[0] == 0) {
5758  os << " - The column Map does not contain at least one diagonal entry "
5759  "of the graph." << endl;
5760  }
5761  if (gblResults[1] == 0) {
5762  os << " - On one or more processes, some row does not contain a "
5763  "diagonal entry." << endl;
5764  }
5765  if (gblResults[2] == 0) {
5766  os << " - On one or more processes, some offsets are incorrect."
5767  << endl;
5768  }
5769  if (gblResults[3] == 0) {
5770  os << " - One or more processes had some other error."
5771  << endl;
5772  }
5773  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
5774  }
5775 #endif // HAVE_TPETRA_DEBUG
5776  }
5777 
5778  namespace { // (anonymous)
5779 
5780  // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
5781  // below). The point is to avoid the deep copy between the input
5782  // Teuchos::ArrayRCP and the internally used Kokkos::View. We
5783  // can't use UVM to avoid the deep copy with CUDA, because the
5784  // ArrayRCP is a host pointer, while the input to the graph's
5785  // getLocalDiagOffsets method is a device pointer. Assigning a
5786  // host pointer to a device pointer is incorrect unless the host
5787  // pointer points to host pinned memory. The goal is to get rid
5788  // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
5789  // copy for backwards compatibility.
5790  //
5791  // We have to use template magic because
5792  // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
5793  // if device_type::memory_space is not Kokkos::HostSpace (as is
5794  // the case with CUDA).
5795 
5796  template<class DeviceType,
5797  const bool memSpaceIsHostSpace =
5798  std::is_same<typename DeviceType::memory_space,
5799  Kokkos::HostSpace>::value>
5800  struct HelpGetLocalDiagOffsets {};
5801 
5802  template<class DeviceType>
5803  struct HelpGetLocalDiagOffsets<DeviceType, true> {
5804  typedef DeviceType device_type;
5805  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
5806  Kokkos::MemoryUnmanaged> device_offsets_type;
5807  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
5808  Kokkos::MemoryUnmanaged> host_offsets_type;
5809 
5810  static device_offsets_type
5811  getDeviceOffsets (const host_offsets_type& hostOffsets)
5812  {
5813  // Host and device are the same; no need to allocate a
5814  // temporary device View.
5815  return hostOffsets;
5816  }
5817 
5818  static void
5819  copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
5820  const device_offsets_type& /* deviceOffsets */)
5821  { /* copy back not needed; host and device are the same */ }
5822  };
5823 
5824  template<class DeviceType>
5825  struct HelpGetLocalDiagOffsets<DeviceType, false> {
5826  typedef DeviceType device_type;
5827  // We have to do a deep copy, since host memory space != device
5828  // memory space. Thus, the device View is managed (we need to
5829  // allocate a temporary device View).
5830  typedef Kokkos::View<size_t*, device_type> device_offsets_type;
5831  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
5832  Kokkos::MemoryUnmanaged> host_offsets_type;
5833 
5834  static device_offsets_type
5835  getDeviceOffsets (const host_offsets_type& hostOffsets)
5836  {
5837  // Host memory space != device memory space, so we must
5838  // allocate a temporary device View for the graph.
5839  return device_offsets_type ("offsets", hostOffsets.dimension_0 ());
5840  }
5841 
5842  static void
5843  copyBackIfNeeded (const host_offsets_type& hostOffsets,
5844  const device_offsets_type& deviceOffsets)
5845  {
5846  Kokkos::deep_copy (hostOffsets, deviceOffsets);
5847  }
5848  };
5849  } // namespace (anonymous)
5850 
5851 
5852  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5853  void
5854  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
5855  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
5856  {
5857  typedef LocalOrdinal LO;
5858  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
5859  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5860  (! this->hasColMap (), std::runtime_error,
5861  "The graph does not yet have a column Map.");
5862  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
5863  if (static_cast<LO> (offsets.size ()) != myNumRows) {
5864  // NOTE (mfh 21 Jan 2016) This means that the method does not
5865  // satisfy the strong exception guarantee (no side effects
5866  // unless successful).
5867  offsets.resize (myNumRows);
5868  }
5869 
5870  // mfh 21 Jan 2016: This method unfortunately takes a
5871  // Teuchos::ArrayRCP, which is host memory. The graph wants a
5872  // device pointer. We can't access host memory from the device;
5873  // that's the wrong direction for UVM. (It's the right direction
5874  // for inefficient host pinned memory, but we don't want to use
5875  // that here.) Thus, if device memory space != host memory space,
5876  // we allocate and use a temporary device View to get the offsets.
5877  // If the two spaces are equal, the template magic makes the deep
5878  // copy go away.
5879  typedef HelpGetLocalDiagOffsets<device_type> helper_type;
5880  typedef typename helper_type::host_offsets_type host_offsets_type;
5881  // Unmanaged host View that views the output array.
5882  host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
5883  // Allocate temp device View if host != device, else reuse host array.
5884  auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
5885  // NOT recursion; this calls the overload that takes a device View.
5886  this->getLocalDiagOffsets (deviceOffsets);
5887  helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
5888  }
5889 
5890 } // namespace Tpetra
5891 
5892 //
5893 // Explicit instantiation macros
5894 //
5895 // Must be expanded from within the Tpetra namespace!
5896 //
5897 #define TPETRA_CRSGRAPH_GRAPH_INSTANT(LO,GO,NODE) template class CrsGraph< LO , GO , NODE >;
5898 
5899 // WARNING: These macros exist only for backwards compatibility.
5900 // We will remove them at some point.
5901 #define TPETRA_CRSGRAPH_SORTROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE)
5902 #define TPETRA_CRSGRAPH_MERGEROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE)
5903 #define TPETRA_CRSGRAPH_ALLOCATEVALUES1D_INSTANT(S,LO,GO,NODE)
5904 #define TPETRA_CRSGRAPH_ALLOCATEVALUES2D_INSTANT(S,LO,GO,NODE)
5905 
5906 #define TPETRA_CRSGRAPH_INSTANT(S,LO,GO,NODE) \
5907  TPETRA_CRSGRAPH_SORTROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE) \
5908  TPETRA_CRSGRAPH_MERGEROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE) \
5909  TPETRA_CRSGRAPH_ALLOCATEVALUES1D_INSTANT(S,LO,GO,NODE) \
5910  TPETRA_CRSGRAPH_ALLOCATEVALUES2D_INSTANT(S,LO,GO,NODE)
5911 
5912 #endif // TPETRA_CRSGRAPH_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
GlobalOrdinal getIndexBase() const
Returns the index base for global indices for this graph.
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object&#39;s Map.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns the communicator.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
Teuchos::RCP< node_type > getNode() const
Returns the underlying node.
Implementation details of Tpetra.
size_t global_size_t
Global size_t object.
Kokkos::StaticCrsGraph< LO, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
Traits class for "invalid" (flag) values of integer types that Tpetra uses as local ordinals or globa...
Insert new values that don&#39;t currently exist.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Creates a one-to-one version of the given Map where each GID is owned by only one process...
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a (StaticProfile) CrsGraph&#39;s global column indices into local column indices.
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
Abstract base class for objects that can be the source of an Import or Export operation.
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType &count)
Compute offsets from a constant count.
node_type node_type
This class&#39; Kokkos Node type.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::RCP< Node > getNode() const
Get this Map&#39;s Node object.
device_type::execution_space execution_space
This class&#39; Kokkos execution space.
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.