Tpetra parallel linear algebra  Version of the Day
Tpetra_Experimental_BlockMultiVector_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP
43 #define TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP
44 
45 #include "Tpetra_Experimental_BlockMultiVector_decl.hpp"
46 
47 namespace { // anonymous
48 
60  template<class MultiVectorType>
61  struct RawHostPtrFromMultiVector {
62  typedef typename MultiVectorType::impl_scalar_type impl_scalar_type;
63 
64  static impl_scalar_type* getRawPtr (MultiVectorType& X) {
65  // NOTE (mfh 09 Jun 2016) This does NOT sync to host, or mark
66  // host as modified. This is on purpose, because we don't want
67  // the BlockMultiVector sync'd to host unnecessarily.
68  // Otherwise, all the MultiVector and BlockMultiVector kernels
69  // would run on host instead of device. See Github Issue #428.
70  auto X_view_host = X.template getLocalView<Kokkos::HostSpace> ();
71  impl_scalar_type* X_raw = X_view_host.ptr_on_device ();
72  return X_raw;
73  }
74  };
75 
88  template<class S, class LO, class GO, class N>
90  getRawHostPtrFromMultiVector (Tpetra::MultiVector<S, LO, GO, N>& X) {
92  return RawHostPtrFromMultiVector<MV>::getRawPtr (X);
93  }
94 
95 } // namespace (anonymous)
96 
97 namespace Tpetra {
98 namespace Experimental {
99 
100 template<class Scalar, class LO, class GO, class Node>
104 {
105  return mv_;
106 }
107 
108 template<class Scalar, class LO, class GO, class Node>
109 Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
112 {
114  const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
115  TEUCHOS_TEST_FOR_EXCEPTION(
116  src_bmv == NULL, std::invalid_argument, "Tpetra::Experimental::"
117  "BlockMultiVector: The source object of an Import or Export to a "
118  "BlockMultiVector, must also be a BlockMultiVector.");
119  return Teuchos::rcp (src_bmv, false); // nonowning RCP
120 }
121 
122 template<class Scalar, class LO, class GO, class Node>
124 BlockMultiVector (const map_type& meshMap,
125  const LO blockSize,
126  const LO numVecs) :
127  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
128  meshMap_ (meshMap),
129  pointMap_ (makePointMap (meshMap, blockSize)),
130  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs), // nonowning RCP is OK, since pointMap_ won't go away
131  mvData_ (getRawHostPtrFromMultiVector (mv_)),
132  blockSize_ (blockSize)
133 {
134  // Make sure that mv_ has view semantics.
135  mv_.setCopyOrView (Teuchos::View);
136 }
137 
138 template<class Scalar, class LO, class GO, class Node>
140 BlockMultiVector (const map_type& meshMap,
141  const map_type& pointMap,
142  const LO blockSize,
143  const LO numVecs) :
144  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
145  meshMap_ (meshMap),
146  pointMap_ (pointMap),
147  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
148  mvData_ (getRawHostPtrFromMultiVector (mv_)),
149  blockSize_ (blockSize)
150 {
151  // Make sure that mv_ has view semantics.
152  mv_.setCopyOrView (Teuchos::View);
153 }
154 
155 template<class Scalar, class LO, class GO, class Node>
158  const map_type& meshMap,
159  const LO blockSize) :
160  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
161  meshMap_ (meshMap),
162  mvData_ (NULL), // just for now
163  blockSize_ (blockSize)
164 {
165  using Teuchos::RCP;
166 
167  if (X_mv.getCopyOrView () == Teuchos::View) {
168  // The input MultiVector has view semantics, so assignment just
169  // does a shallow copy.
170  mv_ = X_mv;
171  }
172  else if (X_mv.getCopyOrView () == Teuchos::Copy) {
173  // The input MultiVector has copy semantics. We can't change
174  // that, but we can make a view of the input MultiVector and
175  // change the view to have view semantics. (That sounds silly;
176  // shouldn't views always have view semantics? but remember that
177  // "view semantics" just governs the default behavior of the copy
178  // constructor and assignment operator.)
179  RCP<const mv_type> X_view_const;
180  const size_t numCols = X_mv.getNumVectors ();
181  if (numCols == 0) {
182  Teuchos::Array<size_t> cols (0); // view will have zero columns
183  X_view_const = X_mv.subView (cols ());
184  } else { // Range1D is an inclusive range
185  X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
186  }
187  TEUCHOS_TEST_FOR_EXCEPTION(
188  X_view_const.is_null (), std::logic_error, "Tpetra::Experimental::"
189  "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
190  "should never happen. Please report this bug to the Tpetra developers.");
191 
192  // It's perfectly OK to cast away const here. Those view methods
193  // should be marked const anyway, because views can't change the
194  // allocation (just the entries themselves).
195  RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
196  X_view->setCopyOrView (Teuchos::View);
197  TEUCHOS_TEST_FOR_EXCEPTION(
198  X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
199  "Experimental::BlockMultiVector constructor: We just set a MultiVector "
200  "to have view semantics, but it claims that it doesn't have view "
201  "semantics. This should never happen. "
202  "Please report this bug to the Tpetra developers.");
203  mv_ = *X_view; // MultiVector::operator= does a shallow copy here
204  }
205 
206  // At this point, mv_ has been assigned, so we can ignore X_mv.
207  Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
208  if (! pointMap.is_null ()) {
209  pointMap_ = *pointMap; // Map::operator= also does a shallow copy
210  }
211  mvData_ = getRawHostPtrFromMultiVector (mv_);
212 }
213 
214 template<class Scalar, class LO, class GO, class Node>
217  const map_type& newMeshMap,
218  const map_type& newPointMap,
219  const size_t offset) :
220  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
221  meshMap_ (newMeshMap),
222  pointMap_ (newPointMap),
223  mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
224  mvData_ (getRawHostPtrFromMultiVector (mv_)),
225  blockSize_ (X.getBlockSize ())
226 {
227  // Make sure that mv_ has view semantics.
228  mv_.setCopyOrView (Teuchos::View);
229 }
230 
231 template<class Scalar, class LO, class GO, class Node>
234  const map_type& newMeshMap,
235  const size_t offset) :
236  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
237  meshMap_ (newMeshMap),
238  pointMap_ (makePointMap (newMeshMap, X.getBlockSize ())),
239  mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
240  mvData_ (getRawHostPtrFromMultiVector (mv_)),
241  blockSize_ (X.getBlockSize ())
242 {
243  // Make sure that mv_ has view semantics.
244  mv_.setCopyOrView (Teuchos::View);
245 }
246 
247 template<class Scalar, class LO, class GO, class Node>
250  dist_object_type (Teuchos::null),
251  mvData_ (NULL),
252  blockSize_ (0)
253 {
254  // Make sure that mv_ has view semantics.
255  mv_.setCopyOrView (Teuchos::View);
256 }
257 
258 template<class Scalar, class LO, class GO, class Node>
261 makePointMap (const map_type& meshMap, const LO blockSize)
262 {
263  typedef Tpetra::global_size_t GST;
264  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
265 
266  const GST gblNumMeshMapInds =
267  static_cast<GST> (meshMap.getGlobalNumElements ());
268  const size_t lclNumMeshMapIndices =
269  static_cast<size_t> (meshMap.getNodeNumElements ());
270  const GST gblNumPointMapInds =
271  gblNumMeshMapInds * static_cast<GST> (blockSize);
272  const size_t lclNumPointMapInds =
273  lclNumMeshMapIndices * static_cast<size_t> (blockSize);
274  const GO indexBase = meshMap.getIndexBase ();
275 
276  if (meshMap.isContiguous ()) {
277  return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
278  meshMap.getComm (), meshMap.getNode ());
279  }
280  else {
281  // "Hilbert's Hotel" trick: multiply each process' GIDs by
282  // blockSize, and fill in. That ensures correctness even if the
283  // mesh Map is overlapping.
284  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getNodeElementList ();
285  const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
286  Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
287  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
288  const GO meshGid = lclMeshGblInds[g];
289  const GO pointGidStart = indexBase +
290  (meshGid - indexBase) * static_cast<GO> (blockSize);
291  const size_type offset = g * static_cast<size_type> (blockSize);
292  for (LO k = 0; k < blockSize; ++k) {
293  const GO pointGid = pointGidStart + static_cast<GO> (k);
294  lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
295  }
296  }
297  return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
298  meshMap.getComm (), meshMap.getNode ());
299  }
300 }
301 
302 
303 template<class Scalar, class LO, class GO, class Node>
304 void
306 replaceLocalValuesImpl (const LO localRowIndex,
307  const LO colIndex,
308  const Scalar vals[]) const
309 {
310  auto X_dst = getLocalBlock (localRowIndex, colIndex);
311  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
312  getBlockSize ());
313  Kokkos::deep_copy (X_dst, X_src);
314 }
315 
316 
317 template<class Scalar, class LO, class GO, class Node>
318 bool
320 replaceLocalValues (const LO localRowIndex,
321  const LO colIndex,
322  const Scalar vals[]) const
323 {
324  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
325  return false;
326  } else {
327  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
328  return true;
329  }
330 }
331 
332 template<class Scalar, class LO, class GO, class Node>
333 bool
335 replaceGlobalValues (const GO globalRowIndex,
336  const LO colIndex,
337  const Scalar vals[]) const
338 {
339  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
340  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
341  return false;
342  } else {
343  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
344  return true;
345  }
346 }
347 
348 template<class Scalar, class LO, class GO, class Node>
349 void
351 sumIntoLocalValuesImpl (const LO localRowIndex,
352  const LO colIndex,
353  const Scalar vals[]) const
354 {
355  auto X_dst = getLocalBlock (localRowIndex, colIndex);
356  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
357  getBlockSize ());
358  AXPY (STS::one (), X_src, X_dst);
359 }
360 
361 template<class Scalar, class LO, class GO, class Node>
362 bool
364 sumIntoLocalValues (const LO localRowIndex,
365  const LO colIndex,
366  const Scalar vals[]) const
367 {
368  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
369  return false;
370  } else {
371  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
372  return true;
373  }
374 }
375 
376 template<class Scalar, class LO, class GO, class Node>
377 bool
379 sumIntoGlobalValues (const GO globalRowIndex,
380  const LO colIndex,
381  const Scalar vals[]) const
382 {
383  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
384  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
385  return false;
386  } else {
387  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
388  return true;
389  }
390 }
391 
392 template<class Scalar, class LO, class GO, class Node>
393 bool
395 getLocalRowView (const LO localRowIndex, const LO colIndex, Scalar*& vals) const
396 {
397  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
398  return false;
399  } else {
400  auto X_ij = getLocalBlock (localRowIndex, colIndex);
401  vals = reinterpret_cast<Scalar*> (X_ij.ptr_on_device ());
402  return true;
403  }
404 }
405 
406 template<class Scalar, class LO, class GO, class Node>
407 bool
409 getGlobalRowView (const GO globalRowIndex, const LO colIndex, Scalar*& vals) const
410 {
411  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
412  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
413  return false;
414  } else {
415  auto X_ij = getLocalBlock (localRowIndex, colIndex);
416  vals = reinterpret_cast<Scalar*> (X_ij.ptr_on_device ());
417  return true;
418  }
419 }
420 
421 template<class Scalar, class LO, class GO, class Node>
424 getLocalBlock (const LO localRowIndex,
425  const LO colIndex) const
426 {
427  // NOTE (mfh 07 Jul 2016) It should be correct to add the
428  // commented-out test below. However, I've conservatively commented
429  // it out, since users might not realize that they need to have
430  // things sync'd correctly.
431 
432 // #ifdef HAVE_TPETRA_DEBUG
433 // TEUCHOS_TEST_FOR_EXCEPTION
434 // (mv_.template need_sync<Kokkos::HostSpace> (), std::runtime_error,
435 // "Tpetra::Experimental::BlockMultiVector::getLocalBlock: This method "
436 // "accesses host data, but the object is not in sync on host." );
437 // #endif // HAVE_TPETRA_DEBUG
438 
439  if (! isValidLocalMeshIndex (localRowIndex)) {
440  return typename little_vec_type::HostMirror ();
441  } else {
442  const size_t blockSize = getBlockSize ();
443  const size_t offset = colIndex * this->getStrideY () +
444  localRowIndex * blockSize;
445  impl_scalar_type* blockRaw = this->getRawPtr () + offset;
446  return typename little_vec_type::HostMirror (blockRaw, blockSize);
447  }
448 }
449 
450 template<class Scalar, class LO, class GO, class Node>
451 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
454 {
455  using Teuchos::rcp;
456  using Teuchos::rcpFromRef;
457 
458  // The source object of an Import or Export must be a
459  // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
460  // them in that order; one must succeed. Note that the target of
461  // the Import or Export calls checkSizes in DistObject's doTransfer.
462  typedef BlockMultiVector<Scalar, LO, GO, Node> this_type;
463  const this_type* srcBlkVec = dynamic_cast<const this_type*> (&src);
464  if (srcBlkVec == NULL) {
465  const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
466  if (srcMultiVec == NULL) {
467  // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
468  // currently does a shallow copy by default. This is why we
469  // return by RCP, rather than by value.
470  return rcp (new mv_type ());
471  } else { // src is a MultiVector
472  return rcp (srcMultiVec, false); // nonowning RCP
473  }
474  } else { // src is a BlockMultiVector
475  return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
476  }
477 }
478 
479 template<class Scalar, class LO, class GO, class Node>
482 {
483  return ! getMultiVectorFromSrcDistObject (src).is_null ();
484 }
485 
486 template<class Scalar, class LO, class GO, class Node>
489  size_t numSameIDs,
490  const Teuchos::ArrayView<const LO>& permuteToLIDs,
491  const Teuchos::ArrayView<const LO>& permuteFromLIDs)
492 {
493  const char tfecfFuncName[] = "copyAndPermute: ";
494  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
495  permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
496  "permuteToLIDs and permuteFromLIDs must have the same size."
497  << std::endl << "permuteToLIDs.size() = " << permuteToLIDs.size ()
498  << " != permuteFromLIDs.size() = " << permuteFromLIDs.size () << ".");
499 
501  Teuchos::RCP<const BMV> srcAsBmvPtr = getBlockMultiVectorFromSrcDistObject (src);
502  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
503  srcAsBmvPtr.is_null (), std::invalid_argument,
504  "The source of an Import or Export to a BlockMultiVector "
505  "must also be a BlockMultiVector.");
506  const BMV& srcAsBmv = *srcAsBmvPtr;
507 
508  // FIXME (mfh 23 Apr 2014) This implementation is sequential and
509  // assumes UVM.
510 
511  const LO numVecs = getNumVectors ();
512  const LO numSame = static_cast<LO> (numSameIDs);
513  for (LO j = 0; j < numVecs; ++j) {
514  for (LO lclRow = 0; lclRow < numSame; ++lclRow) {
515  deep_copy (getLocalBlock (lclRow, j), srcAsBmv.getLocalBlock (lclRow, j));
516  }
517  }
518 
519  // FIXME (mfh 20 June 2012) For an Export with duplicate GIDs on the
520  // same process, this merges their values by replacement of the last
521  // encountered GID, not by the specified merge rule (such as ADD).
522  const LO numPermuteLIDs = static_cast<LO> (permuteToLIDs.size ());
523  for (LO j = 0; j < numVecs; ++j) {
524  for (LO k = numSame; k < numPermuteLIDs; ++k) {
525  deep_copy (getLocalBlock (permuteToLIDs[k], j), srcAsBmv.getLocalBlock (permuteFromLIDs[k], j));
526  }
527  }
528 }
529 
530 template<class Scalar, class LO, class GO, class Node>
533  const Teuchos::ArrayView<const LO>& exportLIDs,
534  Teuchos::Array<impl_scalar_type>& exports,
535  const Teuchos::ArrayView<size_t>& /* numPacketsPerLID */,
536  size_t& constantNumPackets,
537  Tpetra::Distributor& /* distor */)
538 {
540  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
541  const char tfecfFuncName[] = "packAndPrepare: ";
542 
543  Teuchos::RCP<const BMV> srcAsBmvPtr = getBlockMultiVectorFromSrcDistObject (src);
544  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
545  srcAsBmvPtr.is_null (), std::invalid_argument,
546  "The source of an Import or Export to a BlockMultiVector "
547  "must also be a BlockMultiVector.");
548  const BMV& srcAsBmv = *srcAsBmvPtr;
549 
550  const LO numVecs = getNumVectors ();
551  const LO blockSize = getBlockSize ();
552 
553  // Number of things to pack per LID is the block size, times the
554  // number of columns. Input LIDs correspond to the mesh points, not
555  // the degrees of freedom (DOFs).
556  constantNumPackets =
557  static_cast<size_t> (blockSize) * static_cast<size_t> (numVecs);
558  const size_type numMeshLIDs = exportLIDs.size ();
559 
560  const size_type requiredExportsSize = numMeshLIDs *
561  static_cast<size_type> (blockSize) * static_cast<size_type> (numVecs);
562  exports.resize (requiredExportsSize);
563 
564  try {
565  size_type curExportPos = 0;
566  for (size_type meshLidIndex = 0; meshLidIndex < numMeshLIDs; ++meshLidIndex) {
567  for (LO j = 0; j < numVecs; ++j, curExportPos += blockSize) {
568  const LO meshLid = exportLIDs[meshLidIndex];
569  impl_scalar_type* const curExportPtr = &exports[curExportPos];
570  typename little_vec_type::HostMirror X_dst (curExportPtr, blockSize);
571  auto X_src = srcAsBmv.getLocalBlock (meshLid, j);
572 
573  Kokkos::deep_copy (X_dst, X_src);
574  }
575  }
576  } catch (std::exception& e) {
577  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
578  true, std::logic_error, "Oh no! packAndPrepare on Process "
579  << meshMap_.getComm ()->getRank () << " raised the following exception: "
580  << e.what ());
581  }
582 }
583 
584 template<class Scalar, class LO, class GO, class Node>
586 unpackAndCombine (const Teuchos::ArrayView<const LO>& importLIDs,
587  const Teuchos::ArrayView<const impl_scalar_type>& imports,
588  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
589  size_t constantNumPackets,
590  Tpetra::Distributor& distor,
592 {
593  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
594  const char tfecfFuncName[] = "unpackAndCombine: ";
595 
596  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
597  CM != ADD && CM != REPLACE && CM != INSERT && CM != ABSMAX && CM != ZERO,
598  std::invalid_argument, "Invalid CombineMode: " << CM << ". Valid "
599  "CombineMode values are ADD, REPLACE, INSERT, ABSMAX, and ZERO.");
600 
601  if (CM == ZERO) {
602  return; // Combining does nothing, so we don't have to combine anything.
603  }
604 
605  // Number of things to pack per LID is the block size.
606  // Input LIDs correspond to the mesh points, not the DOFs.
607  const size_type numMeshLIDs = importLIDs.size ();
608  const LO blockSize = getBlockSize ();
609  const LO numVecs = getNumVectors ();
610 
611  const size_type requiredImportsSize = numMeshLIDs *
612  static_cast<size_type> (blockSize) * static_cast<size_type> (numVecs);
613  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
614  imports.size () < requiredImportsSize, std::logic_error,
615  ": imports.size () = " << imports.size ()
616  << " < requiredImportsSize = " << requiredImportsSize << ".");
617 
618  size_type curImportPos = 0;
619  for (size_type meshLidIndex = 0; meshLidIndex < numMeshLIDs; ++meshLidIndex) {
620  for (LO j = 0; j < numVecs; ++j, curImportPos += blockSize) {
621  const LO meshLid = importLIDs[meshLidIndex];
622  const impl_scalar_type* const curImportPtr = &imports[curImportPos];
623 
624  typename const_little_vec_type::HostMirror::const_type X_src (curImportPtr, blockSize);
625  auto X_dst = getLocalBlock (meshLid, j);
626 
627  if (CM == INSERT || CM == REPLACE) {
628  deep_copy (X_dst, X_src);
629  } else if (CM == ADD) {
630  AXPY (STS::one (), X_src, X_dst);
631  } else if (CM == ABSMAX) {
632  Impl::absMax (X_dst, X_src);
633  }
634  }
635  }
636 }
637 
638 template<class Scalar, class LO, class GO, class Node>
640 putScalar (const Scalar& val)
641 {
642  mv_.putScalar (val);
643 }
644 
645 template<class Scalar, class LO, class GO, class Node>
647 scale (const Scalar& val)
648 {
649  mv_.scale (val);
650 }
651 
652 template<class Scalar, class LO, class GO, class Node>
654 update (const Scalar& alpha,
656  const Scalar& beta)
657 {
658  mv_.update (alpha, X.mv_, beta);
659 }
660 
661 namespace Impl {
662 // Y := alpha * D * X
663 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
664 struct BlockWiseMultiply {
665  typedef typename ViewD::size_type Size;
666 
667 private:
668  typedef typename ViewD::device_type Device;
669  typedef typename ViewD::non_const_value_type ImplScalar;
670  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
671 
672  template <typename View>
673  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
674  typename View::device_type, Unmanaged>;
675  typedef UnmanagedView<ViewY> UnMViewY;
676  typedef UnmanagedView<ViewD> UnMViewD;
677  typedef UnmanagedView<ViewX> UnMViewX;
678 
679  const Size block_size_;
680  Scalar alpha_;
681  UnMViewY Y_;
682  UnMViewD D_;
683  UnMViewX X_;
684 
685 public:
686  BlockWiseMultiply (const Size block_size, const Scalar& alpha,
687  const ViewY& Y, const ViewD& D, const ViewX& X)
688  : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
689  {}
690 
691  KOKKOS_INLINE_FUNCTION
692  void operator() (const Size k) const {
693  const auto zero = Kokkos::Details::ArithTraits<Scalar>::zero();
694  auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
695  const auto num_vecs = X_.dimension_1();
696  for (Size i = 0; i < num_vecs; ++i) {
697  Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
698  auto X_curBlk = Kokkos::subview(X_, kslice, i);
699  auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
700  // Y_curBlk := alpha * D_curBlk * X_curBlk.
701  // Recall that GEMV does an update (+=) of the last argument.
702  Tpetra::Experimental::FILL(Y_curBlk, zero);
703  Tpetra::Experimental::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
704  }
705  }
706 };
707 
708 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
709 inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
710 createBlockWiseMultiply (const int block_size, const Scalar& alpha,
711  const ViewY& Y, const ViewD& D, const ViewX& X) {
712  return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
713 }
714 
715 template <typename ViewY,
716  typename Scalar,
717  typename ViewD,
718  typename ViewZ,
719  typename LO = typename ViewY::size_type>
720 class BlockJacobiUpdate {
721 private:
722  typedef typename ViewD::device_type Device;
723  typedef typename ViewD::non_const_value_type ImplScalar;
724  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
725 
726  template <typename ViewType>
727  using UnmanagedView = Kokkos::View<typename ViewType::data_type,
728  typename ViewType::array_layout,
729  typename ViewType::device_type,
730  Unmanaged>;
731  typedef UnmanagedView<ViewY> UnMViewY;
732  typedef UnmanagedView<ViewD> UnMViewD;
733  typedef UnmanagedView<ViewZ> UnMViewZ;
734 
735  const LO blockSize_;
736  UnMViewY Y_;
737  const Scalar alpha_;
738  UnMViewD D_;
739  UnMViewZ Z_;
740  const Scalar beta_;
741 
742 public:
743  BlockJacobiUpdate (const ViewY& Y,
744  const Scalar& alpha,
745  const ViewD& D,
746  const ViewZ& Z,
747  const Scalar& beta) :
748  blockSize_ (D.dimension_1 ()),
749  // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.dimension_1 ())),
750  Y_ (Y),
751  alpha_ (alpha),
752  D_ (D),
753  Z_ (Z),
754  beta_ (beta)
755  {
756  static_assert (static_cast<int> (ViewY::rank) == 1,
757  "Y must have rank 1.");
758  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
759  static_assert (static_cast<int> (ViewZ::rank) == 1,
760  "Z must have rank 1.");
761  // static_assert (static_cast<int> (ViewZ::rank) ==
762  // static_cast<int> (ViewY::rank),
763  // "Z must have the same rank as Y.");
764  }
765 
766  KOKKOS_INLINE_FUNCTION void
767  operator() (const LO& k) const
768  {
769  using Kokkos::ALL;
770  using Kokkos::subview;
771  typedef Kokkos::pair<LO, LO> range_type;
772  typedef Kokkos::Details::ArithTraits<Scalar> KAT;
773 
774  // We only have to implement the alpha != 0 case.
775 
776  auto D_curBlk = subview (D_, k, ALL (), ALL ());
777  const range_type kslice (k*blockSize_, (k+1)*blockSize_);
778 
779  // Z.update (STS::one (), X, -STS::one ()); // assume this is done
780 
781  auto Z_curBlk = subview (Z_, kslice);
782  auto Y_curBlk = subview (Y_, kslice);
783  // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
784  if (beta_ == KAT::zero ()) {
785  Tpetra::Experimental::FILL (Y_curBlk, KAT::zero ());
786  }
787  else if (beta_ != KAT::one ()) {
788  Tpetra::Experimental::SCAL (beta_, Y_curBlk);
789  }
790  Tpetra::Experimental::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
791  }
792 };
793 
794 template<class ViewY,
795  class Scalar,
796  class ViewD,
797  class ViewZ,
798  class LO = typename ViewD::size_type>
799 void
800 blockJacobiUpdate (const ViewY& Y,
801  const Scalar& alpha,
802  const ViewD& D,
803  const ViewZ& Z,
804  const Scalar& beta)
805 {
806  static_assert (Kokkos::Impl::is_view<ViewY>::value, "Y must be a Kokkos::View.");
807  static_assert (Kokkos::Impl::is_view<ViewD>::value, "D must be a Kokkos::View.");
808  static_assert (Kokkos::Impl::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
809  static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
810  "Y and Z must have the same rank.");
811  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
812 
813  const auto lclNumMeshRows = D.dimension_0 ();
814 
815 #ifdef HAVE_TPETRA_DEBUG
816  // D.dimension_0() is the (local) number of mesh rows.
817  // D.dimension_1() is the block size. Thus, their product should be
818  // the local number of point rows, that is, the number of rows in Y.
819  const auto blkSize = D.dimension_1 ();
820  const auto lclNumPtRows = lclNumMeshRows * blkSize;
821  TEUCHOS_TEST_FOR_EXCEPTION
822  (Y.dimension_0 () != lclNumPtRows, std::invalid_argument,
823  "blockJacobiUpdate: Y.dimension_0() = " << Y.dimension_0 () << " != "
824  "D.dimension_0()*D.dimension_1() = " << lclNumMeshRows << " * " << blkSize
825  << " = " << lclNumPtRows << ".");
826  TEUCHOS_TEST_FOR_EXCEPTION
827  (Y.dimension_0 () != Z.dimension_0 (), std::invalid_argument,
828  "blockJacobiUpdate: Y.dimension_0() = " << Y.dimension_0 () << " != "
829  "Z.dimension_0() = " << Z.dimension_0 () << ".");
830  TEUCHOS_TEST_FOR_EXCEPTION
831  (Y.dimension_1 () != Z.dimension_1 (), std::invalid_argument,
832  "blockJacobiUpdate: Y.dimension_1() = " << Y.dimension_1 () << " != "
833  "Z.dimension_1() = " << Z.dimension_1 () << ".");
834 #endif // HAVE_TPETRA_DEBUG
835 
836  BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
837  typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
838  // lclNumMeshRows must fit in LO, else the Map would not be correct.
839  range_type range (0, static_cast<LO> (lclNumMeshRows));
840  Kokkos::parallel_for (range, functor);
841 }
842 
843 } // namespace Impl
844 
845 template<class Scalar, class LO, class GO, class Node>
847 blockWiseMultiply (const Scalar& alpha,
848  const Kokkos::View<const impl_scalar_type***,
849  device_type, Kokkos::MemoryUnmanaged>& D,
851 {
852  using Kokkos::ALL;
853  typedef typename device_type::execution_space execution_space;
854  typedef typename device_type::memory_space memory_space;
855  const LO lclNumMeshRows = meshMap_.getNodeNumElements ();
856 
857  if (alpha == STS::zero ()) {
858  this->putScalar (STS::zero ());
859  }
860  else { // alpha != 0
861  const LO blockSize = this->getBlockSize ();
862  const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
863  auto X_lcl = X.mv_.template getLocalView<memory_space> ();
864  auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
865  auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
866 
867  // Use an explicit RangePolicy with the desired execution space.
868  // Otherwise, if you just use a number, it runs on the default
869  // execution space. For example, if the default execution space
870  // is Cuda but the current execution space is Serial, using just a
871  // number would incorrectly run with Cuda.
872  Kokkos::RangePolicy<execution_space, LO> range (0, lclNumMeshRows);
873  Kokkos::parallel_for (range, bwm);
874  }
875 }
876 
877 template<class Scalar, class LO, class GO, class Node>
879 blockJacobiUpdate (const Scalar& alpha,
880  const Kokkos::View<const impl_scalar_type***,
881  device_type, Kokkos::MemoryUnmanaged>& D,
884  const Scalar& beta)
885 {
886  using Kokkos::ALL;
887  using Kokkos::subview;
888  typedef typename device_type::memory_space memory_space;
889  typedef impl_scalar_type IST;
890 
891  const IST alphaImpl = static_cast<IST> (alpha);
892  const IST betaImpl = static_cast<IST> (beta);
893  const LO numVecs = mv_.getNumVectors ();
894 
895  auto X_lcl = X.mv_.template getLocalView<memory_space> ();
896  auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
897  auto Z_lcl = Z.mv_.template getLocalView<memory_space> ();
898 
899  if (alpha == STS::zero ()) { // Y := beta * Y
900  this->scale (beta);
901  }
902  else { // alpha != 0
903  Z.update (STS::one (), X, -STS::one ());
904  for (LO j = 0; j < numVecs; ++j) {
905  auto X_lcl_j = subview (X_lcl, ALL (), j);
906  auto Y_lcl_j = subview (Y_lcl, ALL (), j);
907  auto Z_lcl_j = subview (Z_lcl, ALL (), j);
908  Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
909  }
910  }
911 }
912 
913 } // namespace Experimental
914 } // namespace Tpetra
915 
916 //
917 // Explicit instantiation macro
918 //
919 // Must be expanded from within the Tpetra namespace!
920 //
921 #define TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
922  template class Experimental::BlockMultiVector< S, LO, GO, NODE >;
923 
924 #endif // TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
device_type::execution_space execution_space
The Kokkos execution space.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using a global index.
bool getGlobalRowView(const GO globalRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a global index.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
void setCopyOrView(const Teuchos::DataAccess copyOrView)
Set whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
One or more distributed dense vectors.
MultiVector for multiple degrees of freedom per mesh point.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
virtual void unpackAndCombine(const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const impl_scalar_type > &imports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t constantNumPackets, Tpetra::Distributor &distor, Tpetra::CombineMode CM)
Perform any unpacking and combining after communication (old version that uses Teuchos memory managem...
global_size_t getGlobalNumElements() const
The number of elements in this Map.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a local index.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a global index.
size_t global_size_t
Global size_t object.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using local row and column indices.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
Insert new values that don&#39;t currently exist.
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
GlobalOrdinal getIndexBase() const
The index base for this Map.
Replace old value with maximum of magnitudes of old and new values.
Abstract base class for objects that can be the source of an Import or Export operation.
Replace existing values with new values.
Replace old values with zero.
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector&#39;s data.
Teuchos::ArrayView< const GlobalOrdinal > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
size_t getNumVectors() const
Number of columns in the multivector.
bool getLocalRowView(const LO localRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a local index. ...
Teuchos::RCP< Node > getNode() const
Get this Map&#39;s Node object.
little_vec_type::HostMirror getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a host view of the degrees of freedom at the given mesh point.
virtual void copyAndPermute(const Tpetra::SrcDistObject &source, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Perform copies and permutations that are local to this process.
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
virtual void packAndPrepare(const Tpetra::SrcDistObject &source, const Teuchos::ArrayView< const LO > &exportLIDs, Teuchos::Array< impl_scalar_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets, Tpetra::Distributor &distor)
Perform any packing or preparation required for communication.
Tpetra::MultiVector< Scalar, LO, GO, Node > mv_type
The specialization of Tpetra::MultiVector that this class uses.