Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_getDiagCopyWithoutOffsets_def.hpp
Go to the documentation of this file.
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
45 #define TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
46 
54 
55 #include "Tpetra_Details_gathervPrint.hpp"
56 #include "Tpetra_RowGraph.hpp"
57 #include "Tpetra_CrsGraph.hpp"
58 #include "Tpetra_RowMatrix.hpp"
59 #include "Tpetra_Vector.hpp"
60 
61 namespace Tpetra {
62 namespace Details {
63 
64 // Work-around for #499: Implementation of one-argument (no offsets)
65 // getLocalDiagCopy for the NOT fill-complete case.
66 //
67 // NOTE (mfh 18 Jul 2016) This calls functions that are NOT GPU device
68 // functions! Thus, we do NOT use KOKKOS_INLINE_FUNCTION or
69 // KOKKOS_FUNCTION here, because those attempt to mark the functions
70 // they modify as CUDA device functions. This functor is ONLY for
71 // non-CUDA execution spaces!
72 template<class SC, class LO, class GO, class NT>
73 class GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor {
74 public:
75  typedef ::Tpetra::RowMatrix<SC, LO, GO, NT> row_matrix_type;
76  typedef ::Tpetra::Vector<SC, LO, GO, NT> vec_type;
77 
78  typedef typename vec_type::impl_scalar_type IST;
79  // The output Vector determines the execution space.
80  typedef typename vec_type::device_type device_type;
81 
82 private:
83  typedef typename vec_type::dual_view_type::host_mirror_space::execution_space host_execution_space;
84  typedef typename vec_type::map_type map_type;
85 
86  static bool
87  graphIsSorted (const row_matrix_type& A)
88  {
89  using Teuchos::RCP;
90  using Teuchos::rcp_dynamic_cast;
91  typedef Tpetra::CrsGraph<LO, GO, NT> crs_graph_type;
92  typedef Tpetra::RowGraph<LO, GO, NT> row_graph_type;
93 
94  // We conservatively assume not sorted. RowGraph lacks an
95  // "isSorted" predicate, so we can't know for sure unless the cast
96  // to CrsGraph succeeds.
97  bool sorted = false;
98 
99  RCP<const row_graph_type> G_row = A.getGraph ();
100  if (! G_row.is_null ()) {
101  RCP<const crs_graph_type> G_crs =
102  rcp_dynamic_cast<const crs_graph_type> (G_row);
103  if (! G_crs.is_null ()) {
104  sorted = G_crs->isSorted ();
105  }
106  }
107 
108  return sorted;
109  }
110 
111 public:
112  // lclNumErrs [out] Total count of errors on this process.
113  GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor (LO& lclNumErrs,
114  vec_type& diag,
115  const row_matrix_type& A) :
116  A_ (A),
117  lclRowMap_ (A.getRowMap ()->getLocalMap ()),
118  lclColMap_ (A.getColMap ()->getLocalMap ()),
119  sorted_ (graphIsSorted (A))
120  {
121  const LO lclNumRows = static_cast<LO> (diag.getLocalLength ());
122  {
123  const LO matLclNumRows =
124  static_cast<LO> (lclRowMap_.getNodeNumElements ());
125  TEUCHOS_TEST_FOR_EXCEPTION
126  (lclNumRows != matLclNumRows, std::invalid_argument,
127  "diag.getLocalLength() = " << lclNumRows << " != "
128  "A.getRowMap()->getNodeNumElements() = " << matLclNumRows << ".");
129  }
130 
131  // Side effects start below this point.
132 
133  diag.template modify<Kokkos::HostSpace> ();
134  D_lcl_ = diag.template getLocalView<Kokkos::HostSpace> ();
135  D_lcl_1d_ = Kokkos::subview (D_lcl_, Kokkos::ALL (), 0);
136 
137  Kokkos::RangePolicy<host_execution_space, LO> range (0, lclNumRows);
138  lclNumErrs = 0;
139  Kokkos::parallel_reduce (range, *this, lclNumErrs);
140 
141  // sync changes back to device, since the user doesn't know that
142  // we had to run on host.
143  diag.template sync<typename device_type::memory_space> ();
144  }
145 
146  void operator () (const LO& lclRowInd, LO& errCount) const {
147  using KokkosSparse::findRelOffset;
148 
149  D_lcl_1d_(lclRowInd) = Kokkos::Details::ArithTraits<IST>::zero ();
150  const GO gblInd = lclRowMap_.getGlobalElement (lclRowInd);
151  const LO lclColInd = lclColMap_.getLocalElement (gblInd);
152 
153  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
154  errCount++;
155  }
156  else { // row index is also in the column Map on this process
157  LO numEnt;
158  const LO* lclColInds;
159  const SC* curVals;
160  const LO err = A_.getLocalRowViewRaw (lclRowInd, numEnt, lclColInds, curVals);
161  if (err != 0) {
162  errCount++;
163  }
164  else {
165  // The search hint is always zero, since we only call this
166  // once per row of the matrix.
167  const LO hint = 0;
168  const LO offset =
169  findRelOffset (lclColInds, numEnt, lclColInd, hint, sorted_);
170  if (offset == numEnt) { // didn't find the diagonal column index
171  errCount++;
172  }
173  else {
174  D_lcl_1d_(lclRowInd) = curVals[offset];
175  }
176  }
177  }
178  }
179 
180 private:
181  const row_matrix_type& A_;
182  typename map_type::local_map_type lclRowMap_;
183  typename map_type::local_map_type lclColMap_;
184  typename vec_type::dual_view_type::t_host D_lcl_;
185  decltype (Kokkos::subview (D_lcl_, Kokkos::ALL (), 0)) D_lcl_1d_;
186  const bool sorted_;
187 };
188 
189 
190 template<class SC, class LO, class GO, class NT>
191 LO
193  const ::Tpetra::RowMatrix<SC, LO, GO, NT>& A,
194  const bool debug)
195 {
196  using ::Tpetra::Details::gathervPrint;
197  using Teuchos::outArg;
198  using Teuchos::REDUCE_MIN;
199  using Teuchos::reduceAll;
200  typedef GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor<SC,
201  LO, GO, NT> functor_type;
202 
203  // The functor's constructor does error checking and executes the
204  // thread-parallel kernel.
205 
206  LO lclNumErrs = 0;
207 
208  if (debug) {
209  int lclSuccess = 1;
210  int gblSuccess = 0;
211  std::ostringstream errStrm;
212  Teuchos::RCP<const Teuchos::Comm<int> > commPtr = A.getComm ();
213  if (commPtr.is_null ()) {
214  return lclNumErrs; // this process does not participate
215  }
216  const Teuchos::Comm<int>& comm = *commPtr;
217 
218  try {
219  functor_type functor (lclNumErrs, diag, A);
220  }
221  catch (std::exception& e) {
222  lclSuccess = -1;
223  errStrm << "Process " << A.getComm ()->getRank () << ": "
224  << e.what () << std::endl;
225  }
226  if (lclNumErrs != 0) {
227  lclSuccess = 0;
228  }
229 
230  reduceAll<int, int> (comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
231  if (gblSuccess == -1) {
232  if (comm.getRank () == 0) {
233  // We gather into std::cerr, rather than using an
234  // std::ostringstream, because there might be a lot of MPI
235  // processes. It could take too much memory to gather all the
236  // messages to Process 0 before printing. gathervPrint gathers
237  // and prints one message at a time, thus saving memory. I
238  // don't want to run out of memory while trying to print an
239  // error message; that would hide the real problem.
240  std::cerr << "getLocalDiagCopyWithoutOffsetsNotFillComplete threw an "
241  "exception on one or more MPI processes in the matrix's comunicator."
242  << std::endl;
243  }
244  gathervPrint (std::cerr, errStrm.str (), comm);
245  // Don't need to print anything here, since we've already
246  // printed to std::cerr above.
247  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "");
248  }
249  else if (gblSuccess == 0) {
250  TEUCHOS_TEST_FOR_EXCEPTION
251  (gblSuccess != 1, std::runtime_error,
252  "getLocalDiagCopyWithoutOffsetsNotFillComplete failed on "
253  "one or more MPI processes in the matrix's communicator.");
254  }
255  }
256  else { // ! debug
257  functor_type functor (lclNumErrs, diag, A);
258  }
259 
260  return lclNumErrs;
261 }
262 
263 } // namespace Details
264 } // namespace Tpetra
265 
266 // Explicit template instantiation macro for
267 // getLocalDiagCopyWithoutOffsetsNotFillComplete. NOT FOR USERS!!!
268 // Must be used inside the Tpetra namespace.
269 #define TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_INSTANT( SCALAR, LO, GO, NODE ) \
270  template LO \
271  Details::getLocalDiagCopyWithoutOffsetsNotFillComplete< SCALAR, LO, GO, NODE > \
272  ( ::Tpetra::Vector< SCALAR, LO, GO, NODE >& diag, \
273  const ::Tpetra::RowMatrix< SCALAR, LO, GO, NODE >& A, \
274  const bool debug);
275 
276 #endif // TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
Namespace Tpetra contains the class and methods constituting the Tpetra library.
base_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
An abstract interface for graphs accessed by rows.
Details::LocalMap< LocalOrdinal, GlobalOrdinal, device_type > local_map_type
Type of the "local" Map.
Implementation details of Tpetra.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
Traits class for "invalid" (flag) values of integer types that Tpetra uses as local ordinals or globa...
base_type::map_type map_type
The type of the Map specialization used by this class.
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix&#39;s diagonal entries into a Tpetra::V...
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
A read-only, row-oriented interface to a sparse matrix.
A distributed dense vector.
Node::device_type device_type
The Kokkos device type.