Tpetra parallel linear algebra  Version of the Day
Tpetra_MultiVectorFiller.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 #ifndef __Tpetra_MultiVectorFiller_hpp
42 #define __Tpetra_MultiVectorFiller_hpp
43 
44 #include "Tpetra_MultiVector.hpp"
45 #include "Tpetra_Vector.hpp"
46 #include "Teuchos_CommHelpers.hpp"
47 #include <iterator>
48 #include <set>
49 
50 namespace Tpetra {
51 namespace Details {
52  // \fn sortAndMergeIn
53  // \brief Sort and merge newEntries into allEntries, and make unique.
54  //
55  // \param allEntries [in/out]: Array in which current entries have
56  // already been stored, and in which the new entries are to be
57  // stored. Passed by reference in case we need to resize it.
58  //
59  // \param currentEntries [in/out]: Current entries, which we assume
60  // have already been sorted and made unique. Aliases the
61  // beginning of allEntries.
62  //
63  // \param newEntries [in/out] New entries, which have not yet been
64  // sorted or made unique. This does <i>not</i> alias allEntries.
65  //
66  // Sort and make entries of newEntries unique. Resize allEntries if
67  // necessary to fit the unique entries of newEntries. Merge
68  // newEntries into allEntries and make the results unique. (This is
69  // cheaper than sorting the whole array.)
70  //
71  // \return A view of all the entries (current and new) in allEntries.
72  template<class T>
73  Teuchos::ArrayView<T>
74  sortAndMergeIn (Teuchos::Array<T>& allEntries,
75  Teuchos::ArrayView<T> currentEntries,
76  Teuchos::ArrayView<T> newEntries)
77  {
78  using Teuchos::ArrayView;
79  using Teuchos::as;
80  typedef typename ArrayView<T>::iterator iter_type;
81  typedef typename ArrayView<T>::size_type size_type;
82 
83  std::sort (newEntries.begin(), newEntries.end());
84  iter_type it = std::unique (newEntries.begin(), newEntries.end());
85  const size_type numNew = as<size_type> (it - newEntries.begin());
86  // View of the sorted, made-unique new entries to merge in.
87  ArrayView<T> newEntriesView = newEntries.view (0, numNew);
88 
89  const size_type numCur = currentEntries.size();
90  if (allEntries.size() < numCur + numNew) {
91  allEntries.resize (numCur + numNew);
92  }
93  ArrayView<T> allView = allEntries.view (0, numCur + numNew);
94  ArrayView<T> newView = allEntries.view (numCur, numNew); // target of copy
95 
96  std::copy (newEntries.begin(), newEntries.end(), newView.begin());
97  std::inplace_merge (allView.begin(), newView.begin(), allView.end());
98  iter_type it2 = std::unique (allView.begin(), allView.end());
99  const size_type numTotal = as<size_type> (it2 - allView.begin());
100 
101  return allEntries.view (0, numTotal);
102  }
103 
109  template<class MV>
111  public:
112  typedef typename MV::scalar_type scalar_type;
113  typedef typename MV::local_ordinal_type local_ordinal_type;
114  typedef typename MV::global_ordinal_type global_ordinal_type;
115  typedef typename MV::node_type node_type;
116 
117  typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
118 
130  MultiVectorFillerData (const Teuchos::RCP<const map_type>& map) :
131  map_ (map),
132  numCols_ (0)
133  {}
134 
151  MultiVectorFillerData (const Teuchos::RCP<const map_type>& map,
152  const size_t numColumns) :
153  map_ (map),
154  numCols_ (numColumns),
155  sourceIndices_ (numColumns),
156  sourceValues_ (numColumns)
157  {}
158 
160  void
161  setNumColumns (const size_t newNumColumns)
162  {
163  const size_t oldNumColumns = getNumColumns();
164  if (newNumColumns >= oldNumColumns) {
165  for (size_t j = oldNumColumns; j < newNumColumns; ++j) {
166  sourceIndices_.push_back (Teuchos::Array<global_ordinal_type> ());
167  sourceValues_.push_back (Teuchos::Array<scalar_type> ());
168  }
169  }
170  else {
171  // This may not necessarily deallocate any data, but that's OK.
172  sourceIndices_.resize (newNumColumns);
173  sourceValues_.resize (newNumColumns);
174  }
175  numCols_ = oldNumColumns;
176  }
177 
178  void
179  sumIntoGlobalValues (const Teuchos::ArrayView<const global_ordinal_type>& rows,
180  const size_t column,
181  const Teuchos::ArrayView<const scalar_type>& values)
182  {
183  if (column >= getNumColumns()) {
184  for (size_t j = column; j < getNumColumns(); ++j) {
185  sourceIndices_.push_back (Teuchos::Array<global_ordinal_type> ());
186  sourceValues_.push_back (Teuchos::Array<scalar_type> ());
187  }
188  }
189  std::copy (rows.begin(), rows.end(), std::back_inserter (sourceIndices_[column]));
190  std::copy (values.begin(), values.end(), std::back_inserter (sourceValues_[column]));
191  }
192 
200  void
201  sumIntoGlobalValues (const Teuchos::ArrayView<const global_ordinal_type>& rows,
202  const Teuchos::ArrayView<const scalar_type>& values)
203  {
204  typedef global_ordinal_type GO;
205  typedef scalar_type ST;
206  typedef typename Teuchos::ArrayView<const GO>::const_iterator GoIter;
207  typedef typename Teuchos::ArrayView<const ST>::const_iterator StIter;
208 
209  const size_t numColumns = getNumColumns();
210  GoIter rowIter = rows.begin();
211  StIter valIter = values.begin();
212  for (size_t j = 0; j < numColumns; ++j) {
213  GoIter rowIterNext = rowIter + numColumns;
214  StIter valIterNext = valIter + numColumns;
215  std::copy (rowIter, rowIterNext, std::back_inserter (sourceIndices_[j]));
216  std::copy (valIter, valIterNext, std::back_inserter (sourceValues_[j]));
217  rowIter = rowIterNext;
218  valIter = valIterNext;
219  }
220  }
221 
246  template<class BinaryFunction>
247  void
248  locallyAssemble (MV& X, BinaryFunction& f)
249  {
250  using Teuchos::Array;
251  using Teuchos::ArrayRCP;
252  using Teuchos::ArrayView;
253  using Teuchos::RCP;
254  typedef local_ordinal_type LO;
255  typedef global_ordinal_type GO;
256  typedef scalar_type ST;
257  typedef node_type NT;
258 
259  RCP<const ::Tpetra::Map<LO, GO, NT> > map = X.getMap();
260  Array<LO> localIndices;
261  const size_t numColumns = getNumColumns();
262  for (size_t j = 0; j < numColumns; ++j) {
263  const typename Array<const GO>::size_type numIndices = sourceIndices_[j].size();
264  // Precompute all the local indices before saving to the
265  // vector. Hopefully this gives us a little bit of locality
266  // in the global->local conversion, at the expense of a little
267  // more storage.
268  if (localIndices.size() < numIndices) {
269  localIndices.resize (numIndices);
270  }
271  ArrayView<LO> localIndicesView = localIndices.view (0, numIndices);
272  ArrayView<const GO> globalIndicesView = sourceIndices_[j].view (0, numIndices);
273  for (typename ArrayView<const GO>::size_type i = 0; i < numIndices; ++i) {
274  localIndices[i] = map->getLocalElement (globalIndicesView[i]);
275  }
276 
277  ArrayRCP<ST> X_j = X.getDataNonConst (j);
278  ArrayView<const ST> localValues = sourceValues_[j].view (0, numIndices);
279  for (typename ArrayView<const GO>::size_type i = 0; i < numIndices; ++i) {
280  const LO localInd = localIndices[i];
281  X_j[localInd] = f (X_j[localInd], localValues[i]);
282  }
283  }
284  }
285 
287  void
289  {
290  std::plus<double> f;
291  locallyAssemble<std::plus<scalar_type> > (X, f);
292  }
293 
295  void clear() {
296  Teuchos::Array<Teuchos::Array<global_ordinal_type> > newSourceIndices;
297  Teuchos::Array<Teuchos::Array<scalar_type> > newSourceValues;
298  // The standard STL idiom for clearing the contents of a vector completely.
299  std::swap (sourceIndices_, newSourceIndices);
300  std::swap (sourceValues_, newSourceValues);
301  }
302 
304  Teuchos::Array<global_ordinal_type>
306  {
307  using Teuchos::Array;
308  using Teuchos::ArrayView;
309  using Teuchos::as;
310  typedef global_ordinal_type GO;
311  typedef typename Array<GO>::size_type size_type;
312 
313  Array<GO> allInds (0); // will resize below
314  const size_t numCols = getNumColumns();
315 
316  if (numCols == 1) {
317  // Special case for 1 column avoids copying indices twice.
318  // Pick the size of the array exactly the first time so there
319  // are at most two allocations (the constructor may choose to
320  // allocate).
321  const size_type numNew = sourceIndices_[0].size();
322  allInds.resize (allInds.size() + numNew);
323  std::copy (sourceIndices_[0].begin(), sourceIndices_[0].end(),
324  allInds.begin());
325  std::sort (allInds.begin(), allInds.end());
326  typename Array<GO>::iterator it =
327  std::unique (allInds.begin(), allInds.end());
328  const size_type numFinal = as<size_type> (it - allInds.begin());
329  allInds.resize (numFinal);
330  }
331  else {
332  // Carefully collect all the row indices one column at a time.
333  // This ensures that the total allocation size in this routine
334  // is independent of the number of columns. Also, only sort
335  // the current column's indices. Use merges to ensure sorted
336  // order in the collected final result.
337  ArrayView<GO> curIndsView = allInds.view (0, 0); // will grow
338  Array<GO> newInds;
339  for (size_t j = 0; j < numCols; ++j) {
340  const size_type numNew = sourceIndices_[j].size();
341  if (numNew > newInds.size()) {
342  newInds.resize (numNew);
343  }
344  ArrayView<GO> newIndsView = newInds.view (0, numNew);
345  std::copy (sourceIndices_[j].begin(), sourceIndices_[j].end(),
346  newIndsView.begin());
347  curIndsView = sortAndMergeIn<GO> (allInds, curIndsView, newIndsView);
348  }
349  }
350  return allInds;
351  }
352 
353  private:
354  Teuchos::RCP<const map_type> map_;
355  size_t numCols_;
356  Teuchos::Array<Teuchos::Array<global_ordinal_type> > sourceIndices_;
357  Teuchos::Array<Teuchos::Array<scalar_type> > sourceValues_;
358 
359  size_t getNumColumns() const { return numCols_; }
360  };
361 
368  template<class MV>
369  class MultiVectorFillerData2 : public Teuchos::Describable {
370  public:
371  typedef typename MV::scalar_type scalar_type;
372  typedef typename MV::local_ordinal_type local_ordinal_type;
373  typedef typename MV::global_ordinal_type global_ordinal_type;
374  typedef typename MV::node_type node_type;
375 
376  typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
377 
388  MultiVectorFillerData2 (const Teuchos::RCP<const map_type>& map,
389  const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_DEFAULT,
390  const Teuchos::RCP<Teuchos::FancyOStream>& out=Teuchos::null) :
391  map_ (map),
392  numCols_ (0),
393  verbLevel_ (verbLevel),
394  out_ (out)
395  {}
396 
414  MultiVectorFillerData2 (const Teuchos::RCP<const map_type>& map,
415  const size_t numColumns,
416  const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_DEFAULT,
417  const Teuchos::RCP<Teuchos::FancyOStream>& out=Teuchos::null) :
418  map_ (map),
419  numCols_ (numColumns),
420  localVec_ (new MV (map, numColumns)),
421 #if 0
422  nonlocalIndices_ (numColumns),
423  nonlocalValues_ (numColumns),
424 #endif // 0
425  verbLevel_ (verbLevel),
426  out_ (out)
427  {}
428 
429  std::string
430  description() const
431  {
432  std::ostringstream oss;
433  oss << "Tpetra::MultiVectorFillerData2<"
434  << Teuchos::TypeNameTraits<MV>::name () << ">";
435  return oss.str();
436  }
437 
438  void
439  describe (Teuchos::FancyOStream& out,
440  const Teuchos::EVerbosityLevel verbLevel =
441  Teuchos::Describable::verbLevel_default) const
442  {
443  using std::endl;
444  using Teuchos::Array;
445  using Teuchos::ArrayRCP;
446  using Teuchos::ArrayView;
447  using Teuchos::RCP;
448  using Teuchos::VERB_DEFAULT;
449  using Teuchos::VERB_NONE;
450  using Teuchos::VERB_LOW;
451  using Teuchos::VERB_MEDIUM;
452  using Teuchos::VERB_HIGH;
453  using Teuchos::VERB_EXTREME;
454 
455  // Set default verbosity if applicable.
456  const Teuchos::EVerbosityLevel vl =
457  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
458 
459  RCP<const Teuchos::Comm<int> > comm = map_->getComm();
460  const int myImageID = comm->getRank();
461  const int numImages = comm->getSize();
462 
463  if (vl != VERB_NONE) {
464  // Don't set the tab level unless we're printing something.
465  Teuchos::OSTab tab (out);
466 
467  if (myImageID == 0) { // >= VERB_LOW prints description()
468  out << this->description() << endl;
469  }
470  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
471  if (myImageID == imageCtr) {
472  if (vl != VERB_LOW) {
473  // At verbosity > VERB_LOW, each process prints something.
474  out << "Process " << myImageID << ":" << endl;
475 
476  Teuchos::OSTab procTab (out);
477  // >= VERB_MEDIUM: print the local vector length.
478  out << "local length=" << localVec_->getLocalLength();
479  if (vl != VERB_MEDIUM) {
480  // >= VERB_HIGH: print isConstantStride() and getStride()
481  if (localVec_->isConstantStride()) {
482  out << ", constant stride=" << localVec_->getStride() << endl;
483  }
484  else {
485  out << ", not constant stride" << endl;
486  }
487  if (vl == VERB_EXTREME) {
488  // VERB_EXTREME: print all the values in the multivector.
489  out << "Local values:" << endl;
490  ArrayRCP<ArrayRCP<const scalar_type> > X = localVec_->get2dView();
491  for (size_t i = 0; i < localVec_->getLocalLength(); ++i) {
492  for (size_t j = 0; j < localVec_->getNumVectors(); ++j) {
493  out << X[j][i];
494  if (j < localVec_->getNumVectors() - 1) {
495  out << " ";
496  }
497  } // for each column
498  out << endl;
499  } // for each row
500 
501 #if 0
502  out << "Nonlocal indices and values:" << endl;
503  for (size_t j = 0; j < (size_t)nonlocalIndices_.size(); ++j) {
504  ArrayView<const global_ordinal_type> inds = nonlocalIndices_[j]();
505  ArrayView<const scalar_type> vals = nonlocalValues_[j]();
506  for (typename ArrayView<const global_ordinal_type>::size_type k = 0;
507  k < inds.size(); ++k) {
508  out << "X(" << inds[k] << "," << j << ") = " << vals[k] << endl;
509  }
510  }
511 #endif // 0
512  } // if vl == VERB_EXTREME
513  } // if (vl != VERB_MEDIUM)
514  else { // vl == VERB_LOW
515  out << endl;
516  }
517  } // if vl != VERB_LOW
518  } // if it is my process' turn to print
519  } // for each process in the communicator
520  } // if vl != VERB_NONE
521  }
522 
524  Teuchos::Array<global_ordinal_type>
525  getSourceIndices (const Teuchos::Comm<int>& comm,
526  const Teuchos::RCP<Teuchos::FancyOStream>& out = Teuchos::null,
527  const bool debug = false) const
528  {
529  using Teuchos::Array;
530  using Teuchos::ArrayView;
531  using Teuchos::RCP;
532  using Teuchos::rcp;
533  using Tpetra::global_size_t;
534  typedef global_ordinal_type GO;
535  const char prefix[] = "Tpetra::MultiVectorFiller::getSourceIndices: ";
536 
537  if (debug && ! out.is_null ()) {
538  std::ostringstream os;
539  os << "Proc " << comm.getRank () << ": getSourceIndices" << std::endl;
540  *out << os.str ();
541  }
542 
543  // Get the nonlocal row indices, sorted and made unique.
544  // It's fair to assume that these are not contiguous.
545  Array<GO> nonlocalIndices = getSortedUniqueNonlocalIndices (comm, out, debug);
546 
547  // Get the local row indices, not necessarily sorted or unique.
548  ArrayView<const GO> localIndices = getLocalIndices ();
549 
550  // Copy the local indices into the full indices array, and sort
551  // them there. We'll merge in the nonlocal indices below. This
552  // can be more efficient than just sorting all the indices, if
553  // there are a lot of nonlocal indices.
554  Array<GO> indices (localIndices.size () + nonlocalIndices.size ());
555  ArrayView<GO> localIndView = indices.view (0, localIndices.size ());
556  TEUCHOS_TEST_FOR_EXCEPTION
557  (localIndView.size () > indices.size (), std::logic_error,
558  prefix << "localIndView.size() = " << localIndView.size ()
559  << " > indices.size() = " << indices.size () << ".");
560  TEUCHOS_TEST_FOR_EXCEPTION
561  (localIndView.size () != localIndices.size (), std::logic_error,
562  prefix << "localIndView.size() = " << localIndView.size ()
563  << " != localIndices.size() = " << localIndices.size () << ".");
564 
565  std::copy (localIndices.begin (), localIndices.end (), localIndView.begin ());
566  std::sort (localIndView.begin (), localIndView.end ());
567 
568  if (debug && ! out.is_null ()) {
569  std::ostringstream os;
570  os << "Proc " << comm.getRank () << ": Right after copying and sorting" << std::endl;
571  *out << os.str ();
572  }
573 
574  // Merge the local and nonlocal indices.
575  if (nonlocalIndices.size () > 0) {
576  ArrayView<GO> nonlclIndView =
577  indices.view (localIndices.size (), nonlocalIndices.size ());
578 
579  // We need a view of the output array, because
580  // std::inplace_merge needs all its iterator inputs to have
581  // the same type. Debug mode builds are pickier than release
582  // mode builds, because the iterators in a debug mode build
583  // are of a different type that does run-time checking (they
584  // aren't just raw pointers).
585  ArrayView<GO> indView = indices.view (0, indices.size ());
586  if (debug && ! out.is_null ()) {
587  std::ostringstream os;
588  os << "Right before std::copy" << std::endl;
589  *out << os.str ();
590  }
591  std::copy (nonlocalIndices.begin (),
592  nonlocalIndices.end (),
593  nonlclIndView.begin ());
594  if (debug && ! out.is_null ()) {
595  std::ostringstream os;
596  os << "Proc " << comm.getRank () << ": Right before std::inplace_merge"
597  << std::endl;
598  *out << os.str ();
599  }
600  std::inplace_merge (indView.begin (),
601  indView.begin () + localIndices.size (),
602  indView.end ());
603  }
604 
605  if (debug && ! out.is_null ()) {
606  std::ostringstream os;
607  os << "Proc " << comm.getRank () << ": Done with getSourceIndices"
608  << std::endl;
609  *out << os.str ();
610  }
611  return indices;
612  }
613 
623  void
624  setNumColumns (const size_t newNumColumns)
625  {
626  using Teuchos::Array;
627  using Teuchos::Range1D;
628  using Teuchos::RCP;
629 
630  const size_t oldNumColumns = numCols_;
631  if (newNumColumns == oldNumColumns) {
632  return; // No side effects if no change.
633  }
634 
635  RCP<MV> newLclVec;
636  if (newNumColumns > oldNumColumns) {
637  newLclVec = rcp (new MV (map_, newNumColumns));
638  // Assign the contents of the old local multivector to the
639  // first oldNumColumns columns of the new local multivector,
640  // then get rid of the old local multivector.
641  RCP<MV> newLclVecView =
642  newLclVec->subViewNonConst (Range1D (0, oldNumColumns-1));
643  *newLclVecView = *localVec_;
644  }
645  else {
646  if (newNumColumns == 0) {
647  // Tpetra::MultiVector doesn't let you construct a
648  // multivector with zero columns.
649  newLclVec = Teuchos::null;
650  }
651  else {
652  newLclVec = localVec_->subViewNonConst (Range1D (0, newNumColumns-1));
653  }
654  }
655 
656  // Leave most side effects until the end, for exception safety.
657  localVec_ = newLclVec;
658  numCols_ = newNumColumns;
659  }
660 
666  void
667  sumIntoGlobalValues (const Teuchos::ArrayView<const global_ordinal_type>& rows,
668  const size_t columnIndex,
669  const Teuchos::ArrayView<const scalar_type>& values)
670  {
671  using Teuchos::ArrayView;
672  typedef local_ordinal_type LO;
673  typedef global_ordinal_type GO;
674  typedef scalar_type ST;
675  typedef decltype (rows.size ()) size_type;
676  const char prefix[] = "Tpetra::MultiVectorFiller::sumIntoGlobalValues: ";
677 
678  if (map_.is_null ()) {
679  return; // the calling process is not participating
680  }
681  const size_type numEnt = rows.size ();
682  TEUCHOS_TEST_FOR_EXCEPTION
683  (numEnt != values.size (), std::invalid_argument, prefix
684  << "rows.size() = " << numEnt << " != values.size() = "
685  << values.size () << ".");
686 
687  // FIXME (31 Aug 2015) Don't do this.
688  if (columnIndex >= getNumColumns()) {
689  // Automatically expand the number of columns. This
690  // implicitly ensures that localVec_ is not null.
691  setNumColumns (columnIndex + 1);
692  }
693 
694  // It would be a lot more efficient for users to get the Kokkos
695  // view outside of their fill loop, and fill into it directly.
696  auto X_j = localVec_->getVector (columnIndex);
697  auto X_j_2d = X_j->template getLocalView<typename MV::dual_view_type::t_host::memory_space> ();
698  auto X_j_1d = Kokkos::subview (X_j_2d, Kokkos::ALL (), 0);
699 
700  const map_type& theMap = *map_;
701  for (size_type k = 0; k < numEnt; ++k) {
702  const ST val = values[k];
703  const GO gblRowInd = rows[k];
704  const LO lclRowInd = theMap.getLocalElement (gblRowInd);
705  if (lclRowInd == Teuchos::OrdinalTraits<LO>::invalid ()) {
706  // The row doesn't belong to the calling process, so stuff
707  // its data in nonlocals_.
708 
709  auto& innerMap = nonlocals_[columnIndex];
710  auto innerIter = innerMap.find (gblRowInd);
711  if (innerIter == innerMap.end ()) {
712  innerMap.insert (std::make_pair (gblRowInd, values[k]));
713  } else {
714  innerIter->second += val;
715  }
716  }
717  else {
718  // FIXME (mfh 27 Feb 2012) Allow different combine modes.
719  // The current combine mode just adds to the current value
720  // at that location.
721  X_j_1d[lclRowInd] += val;
722  }
723  }
724  }
725 
735  void
736  sumIntoGlobalValues (const Teuchos::ArrayView<const global_ordinal_type>& rows,
737  const Teuchos::ArrayView<const scalar_type>& values)
738  {
739  using Teuchos::ArrayView;
740  typedef typename ArrayView<const global_ordinal_type>::size_type size_type;
741 
742  const size_t numCols = getNumColumns();
743  for (size_t j = 0; j < numCols; ++j) {
744  const size_type offset = numCols*j;
745  const size_type len = numCols;
746  this->sumIntoGlobalValues (rows.view (offset, len), j, values.view (offset, len));
747  }
748  }
749 
774  template<class BinaryFunction>
775  void
776  locallyAssemble (MV& X, BinaryFunction& f)
777  {
778  typedef scalar_type ST;
779  typedef local_ordinal_type LO;
780  typedef global_ordinal_type GO;
781  typedef typename MV::dual_view_type::t_host::memory_space host_memory_space;
782 
783  if (X.getMap ().is_null ()) {
784  return; // this process is not participating
785  }
786  const map_type& srcMap = * (X.getMap ());
787 
788 
789 
790  for (size_t j = 0; j < X.getNumVectors (); ++j) {
791  auto X_j = X.getVector (j);
792  auto X_j_2d = X_j->template getLocalView<host_memory_space> ();
793  auto X_j_1d = Kokkos::subview (X_j_2d, Kokkos::ALL (), 0);
794 
795  // First, assemble in the local components from localVec_.
796  if (! localVec_.is_null () && ! localVec_->getMap ().is_null ()
797  && j < localVec_->getNumVectors ()) {
798  auto lcl_j = localVec_->getVector (j);
799  auto lcl_j_2d = lcl_j->template getLocalView<host_memory_space> ();
800  auto lcl_j_1d = Kokkos::subview (lcl_j_2d, Kokkos::ALL (), 0);
801 
802  // Iterate over all rows of localVec_. i_lcl is a local
803  // index with respect to localVec_'s Map, which may differ
804  // from X's Map.
805  const map_type& lclMap = * (localVec_->getMap ());
806  const LO lclNumRows = static_cast<LO> (lcl_j->getLocalLength ());
807  for (LO i_lcl = 0; i_lcl < lclNumRows; ++i_lcl) {
808  const GO i_gbl = lclMap.getGlobalElement (i_lcl);
809  const LO i_X = srcMap.getLocalElement (i_gbl);
810  X_j_1d(i_X) = f (X_j_1d(i_X), lcl_j_1d(i_lcl));
811  }
812  }
813 
814  // Second, assemble in the nonlocal components.
815  auto outerIter = nonlocals_.find (j);
816  if (outerIter != nonlocals_.end ()) {
817  auto beg = outerIter->second.begin ();
818  auto end = outerIter->second.end ();
819  for (auto innerIter = beg; innerIter != end; ++innerIter) {
820  const GO gblRowInd = innerIter->first;
821  const LO lclRowInd = srcMap.getLocalElement (gblRowInd);
822 
823  if (lclRowInd != Teuchos::OrdinalTraits<LO>::invalid ()) {
824  const ST val = innerIter->second;
825  X_j_1d(lclRowInd) = f (X_j_1d(lclRowInd), val);
826  }
827  }
828  }
829  }
830  }
831 
837  void
839  {
840  std::plus<double> f;
841  locallyAssemble<std::plus<scalar_type> > (X, f);
842  }
843 
849  void clear ()
850  {
851  std::map<size_t, std::map<global_ordinal_type, scalar_type> > newNonlcls;
852  // The standard STL idiom for clearing the contents of a
853  // container completely. Setting the size to zero may not
854  // necessarily deallocate data.
855  std::swap (nonlocals_, newNonlcls);
856 
857  // Don't actually deallocate the multivector of local entries.
858  // Just fill it with zero. This is because the caller hasn't
859  // reset the number of columns.
860  if (! localVec_.is_null ()) {
861  localVec_->putScalar (Teuchos::ScalarTraits<scalar_type>::zero());
862  }
863  }
864 
865  private:
867  Teuchos::RCP<const map_type> map_;
868 
870  size_t numCols_;
871 
873  Teuchos::RCP<MV> localVec_;
874 
876  std::map<size_t, std::map<global_ordinal_type, scalar_type> > nonlocals_;
877 
879  Teuchos::EVerbosityLevel verbLevel_;
880 
882  Teuchos::RCP<Teuchos::FancyOStream> out_;
883 
885  size_t getNumColumns() const { return numCols_; }
886 
888  Teuchos::ArrayView<const global_ordinal_type>
889  getLocalIndices() const
890  {
891  return map_->getNodeElementList ();
892  }
893 
895  Teuchos::Array<global_ordinal_type>
896  getSortedUniqueNonlocalIndices (const Teuchos::Comm<int>& comm,
897  const Teuchos::RCP<Teuchos::FancyOStream>& out = Teuchos::null,
898  const bool debug = false) const
899  {
900  using Teuchos::Array;
901  using Teuchos::ArrayView;
902  using Teuchos::as;
903  typedef global_ordinal_type GO;
904 
905  if (debug && ! out.is_null ()) {
906  std::ostringstream os;
907  os << "Proc " << comm.getRank () << ": getSortedUniqueNonlocalIndices"
908  << std::endl;
909  *out << os.str ();
910  }
911 
912  // FIXME (mfh 30 Aug 2015) The std::set algorithm is harder to
913  // thread-parallelize, but it should be easier to make the new
914  // test pass with this.
915  std::set<GO> indsOut;
916  const size_t numCols = getNumColumns ();
917  for (size_t j = 0; j < numCols; ++j) {
918  auto outerIter = nonlocals_.find (j);
919  if (outerIter != nonlocals_.end ()) {
920  auto beg = outerIter->second.begin ();
921  auto end = outerIter->second.end ();
922  for (auto innerIter = beg; innerIter != end; ++innerIter) {
923  // *innerIter: (global row index, value) pair.
924  indsOut.insert (innerIter->first);
925  }
926  }
927  }
928 
929  if (debug && ! out.is_null ()) {
930  std::ostringstream os;
931  os << "Proc " << comm.getRank () << ": Made nonlocals set" << std::endl;
932  *out << os.str ();
933  }
934 
935  Array<GO> allNonlocals (indsOut.begin (), indsOut.end ());
936  if (debug && ! out.is_null ()) {
937  std::ostringstream os;
938  os << "Proc " << comm.getRank () << ": Done with "
939  "getSortedUniqueNonlocalIndices" << std::endl;
940  *out << os.str ();
941  }
942  return allNonlocals;
943  }
944  };
945 } // namespace Details
946 } // namespace Tpetra
947 
948 namespace Tpetra {
949 
974  template<class MV>
976  public:
977  typedef typename MV::scalar_type scalar_type;
978  typedef typename MV::local_ordinal_type local_ordinal_type;
979  typedef typename MV::global_ordinal_type global_ordinal_type;
980  typedef typename MV::node_type node_type;
981  typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
982 
1012  MultiVectorFiller (const Teuchos::RCP<const map_type>& map,
1013  const size_t numCols);
1014 
1034  void
1035  globalAssemble (MV& X_out, const bool forceReuseMap = false);
1036 
1038  void clear () {
1039  data_.clear ();
1040  }
1041 
1042  void
1043  describe (Teuchos::FancyOStream& out,
1044  const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
1045  {
1046  data_.describe (out, verbLevel);
1047  }
1048 
1059  void
1060  sumIntoGlobalValues (Teuchos::ArrayView<const global_ordinal_type> rows,
1061  size_t column,
1062  Teuchos::ArrayView<const scalar_type> values)
1063  {
1064  data_.sumIntoGlobalValues (rows, column, values);
1065  }
1066 
1082  void
1083  sumIntoGlobalValues (Teuchos::ArrayView<const global_ordinal_type> rows,
1084  Teuchos::ArrayView<const scalar_type> values)
1085  {
1086  data_.sumIntoGlobalValues (rows, values);
1087  }
1088 
1089  private:
1091  Teuchos::RCP<const map_type> ctorMap_;
1092 
1097  Teuchos::RCP<const map_type> sourceMap_;
1098 
1104  Teuchos::RCP<const map_type> targetMap_;
1105 
1118  Teuchos::RCP<MV> sourceVec_;
1119 
1125 
1127  Teuchos::RCP<export_type> exporter_;
1128 
1134  void locallyAssemble (MV& X_in) {
1135  data_.locallyAssemble (X_in);
1136  }
1137 
1139  Teuchos::Array<global_ordinal_type>
1140  getSourceIndices (const Teuchos::Comm<int>& comm,
1141  const Teuchos::RCP<Teuchos::FancyOStream>& out = Teuchos::null,
1142  const bool debug = false) const
1143  {
1144  return data_.getSourceIndices (comm, out, debug);
1145  }
1146 
1155  Teuchos::RCP<const map_type>
1156  computeSourceMap (const global_ordinal_type indexBase,
1157  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
1158  const Teuchos::RCP<Teuchos::FancyOStream>& out = Teuchos::null,
1159  const bool debug = false);
1160  };
1161 
1162  template<class MV>
1163  MultiVectorFiller<MV>::MultiVectorFiller (const Teuchos::RCP<const typename MultiVectorFiller<MV>::map_type>& map, const size_t numCols)
1164  : ctorMap_ (map),
1165  sourceMap_ (Teuchos::null),
1166  targetMap_ (Teuchos::null),
1167  data_ (map, numCols),
1168  exporter_ (Teuchos::null)
1169  {}
1170 
1171  template<class MV>
1172  Teuchos::RCP<const typename MultiVectorFiller<MV>::map_type>
1174  computeSourceMap (const global_ordinal_type indexBase,
1175  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
1176  const Teuchos::RCP<Teuchos::FancyOStream>& out,
1177  const bool debug)
1178  {
1179  using Teuchos::Array;
1180  using Teuchos::ArrayView;
1181  using Teuchos::rcp;
1182  typedef global_ordinal_type GO;
1183 
1184  if (debug && ! out.is_null ()) {
1185  out->pushTab ();
1186  std::ostringstream os;
1187  const int myRank = comm->getRank ();
1188  os << "Proc " << myRank << ": computeSourceMap" << std::endl;
1189  *out << os.str ();
1190  }
1191 
1192  Array<GO> indices = getSourceIndices (*comm, out, debug);
1193 
1194  if (debug && ! out.is_null ()) {
1195  std::ostringstream os;
1196  const int myRank = comm->getRank ();
1197  os << "Proc " << myRank << ": computeSourceMap: About to create Map"
1198  << std::endl;
1199  *out << os.str ();
1200  out->popTab ();
1201  }
1202 
1203  // Passing "invalid" for the numGlobalElements argument of the Map
1204  // constructor tells the Map to compute the global number of
1205  // elements itself.
1206  typedef Tpetra::global_size_t GST;
1207  const GST INV = Teuchos::OrdinalTraits<GST>::invalid ();
1208  return rcp (new map_type (INV, indices, indexBase, comm));
1209  }
1210 
1211  template<class MV>
1212  void
1214  globalAssemble (MV& X_out, const bool forceReuseMap)
1215  {
1216  using Teuchos::ArrayView;
1217  using Teuchos::Array;
1218  using Teuchos::Range1D;
1219  using Teuchos::RCP;
1220  using Teuchos::rcp;
1221  RCP<Teuchos::FancyOStream> outPtr;
1222  const bool debug = false;
1223 
1224  if (debug) {
1225  outPtr = Teuchos::getFancyOStream (Teuchos::rcpFromRef (std::cerr));
1226  outPtr->pushTab ();
1227  }
1228 
1229  const size_t numVecs = X_out.getNumVectors ();
1230  if (numVecs == 0) {
1231  // Nothing to do! Of course, this does not check for whether
1232  // X_out has the right number of rows. That's OK, though.
1233  // Compare to the definition of the BLAS' _AXPY for an input
1234  // vector containing NaNs, but multiplied by alpha=0.
1235  return;
1236  }
1237  //
1238  // Get the target Map of the Export. If X_out's Map is the same
1239  // as the target Map from last time, then we may be able to
1240  // recycle the Export from last time, if the source Map is also
1241  // the same.
1242  //
1243  RCP<const map_type> targetMap;
1244  bool assumeSameTargetMap = false;
1245  if (targetMap_.is_null()) {
1246  targetMap_ = X_out.getMap();
1247  targetMap = targetMap_;
1248  assumeSameTargetMap = false;
1249  }
1250  else {
1251  if (forceReuseMap) {
1252  targetMap = targetMap_;
1253  assumeSameTargetMap = true;
1254  }
1255  else {
1256  // If X_out's Map is the same as targetMap_, we may be able to
1257  // reuse the Export. Constructing the Export may be more
1258  // expensive than calling isSameAs() (which requires just a
1259  // few reductions and reading through the lists of owned
1260  // global indices), so it's worth checking.
1261  if (targetMap_->isSameAs (*(X_out.getMap()))) {
1262  assumeSameTargetMap = true;
1263  targetMap = targetMap_;
1264  }
1265  }
1266  }
1267 
1268  if (debug && ! outPtr.is_null ()) {
1269  std::ostringstream os;
1270  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1271  const int myRank = comm.getRank ();
1272  os << "Proc " << myRank << ": Right before getting source Map" << std::endl;
1273  *outPtr << os.str ();
1274  }
1275 
1276  //
1277  // Get the source Map of the Export. If the source Map of the
1278  // Export is the same as last time, then we may be able to recycle
1279  // the Export from last time, if the target Map is also the same.
1280  //
1281  RCP<const map_type> sourceMap;
1282  bool computedSourceMap = false;
1283  {
1284  if (forceReuseMap && ! sourceMap_.is_null()) {
1285  sourceMap = sourceMap_;
1286  }
1287  else {
1288  sourceMap = computeSourceMap (ctorMap_->getIndexBase (),
1289  ctorMap_->getComm (),
1290  outPtr, debug);
1291  computedSourceMap = true;
1292  }
1293  }
1294  if (computedSourceMap) {
1295  sourceMap_ = sourceMap;
1296  }
1297 
1298  if (debug && ! outPtr.is_null ()) {
1299  std::ostringstream os;
1300  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1301  const int myRank = comm.getRank ();
1302  os << "Proc " << myRank << ": Right before computing Export" << std::endl;
1303  *outPtr << os.str ();
1304  }
1305 
1306  //
1307  // Now that we have the source and target Maps of the Export, we
1308  // can check whether we can recycle the Export from last time.
1309  //
1310  const bool mustComputeExport =
1311  (exporter_.is_null() || (assumeSameTargetMap && ! computedSourceMap));
1312  if (mustComputeExport) {
1313  exporter_ = rcp (new export_type (sourceMap_, targetMap_));
1314  }
1315 
1316  if (debug && ! outPtr.is_null ()) {
1317  std::ostringstream os;
1318  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1319  const int myRank = comm.getRank ();
1320  os << "Proc " << myRank << ": Right after computing Export" << std::endl;
1321  *outPtr << os.str ();
1322  }
1323 
1324  // Source multivector for the Export.
1325  RCP<MV> X_in;
1326  const bool mustReallocateVec = sourceVec_.is_null() ||
1327  sourceVec_->getNumVectors() < numVecs || ! assumeSameTargetMap;
1328 
1329  if (mustReallocateVec) {
1330  // Reallocating nonlocalVec_ ensures that it has the right Map.
1331  sourceVec_ = rcp (new MV (sourceMap_, numVecs));
1332  X_in = sourceVec_;
1333  } else {
1334  if (sourceVec_->getNumVectors() == numVecs) {
1335  X_in = sourceVec_;
1336  } else { // sourceVec_ has more vectors than needed.
1337  X_in = sourceVec_->subViewNonConst (Range1D (0, numVecs-1));
1338  }
1339  }
1340 
1341  if (debug && ! outPtr.is_null ()) {
1342  std::ostringstream os;
1343  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1344  const int myRank = comm.getRank ();
1345  os << "Proc " << myRank << ": Right before locallyAssemble" << std::endl;
1346  *outPtr << os.str ();
1347  }
1348 
1349  // "Locally assemble" the data into X_in by summing together
1350  // entries with the same indices.
1351  locallyAssemble (*X_in);
1352 
1353  if (debug && ! outPtr.is_null ()) {
1354  std::ostringstream os;
1355  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1356  const int myRank = comm.getRank ();
1357  os << "Proc " << myRank << ": Right after locallyAssemble" << std::endl;
1358  *outPtr << os.str ();
1359  }
1360 
1361  // Do the Export.
1362  const Tpetra::CombineMode combineMode = Tpetra::ADD;
1363  X_out.doExport (*X_in, *exporter_, combineMode);
1364 
1365  if (debug && ! outPtr.is_null ()) {
1366  std::ostringstream os;
1367  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1368  const int myRank = comm.getRank ();
1369  os << "Proc " << myRank << ": Right after Export" << std::endl;
1370  *outPtr << os.str ();
1371  }
1372 
1373  // Clean out the locally owned data for next time.
1374  X_in->putScalar (Teuchos::ScalarTraits<scalar_type>::zero ());
1375 
1376  // FIXME (mfh 27 Aug 2015) Clean out the remote data for next
1377  // time. See Bug 6398.
1378 
1379  if (debug && ! outPtr.is_null ()) {
1380  std::ostringstream os;
1381  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1382  const int myRank = comm.getRank ();
1383  os << "Proc " << myRank << ": Done with globalAssemble" << std::endl;
1384  *outPtr << os.str ();
1385  outPtr->pushTab ();
1386  }
1387  }
1388 
1389  namespace Test {
1390 
1396  template<class MV>
1398  public:
1399  typedef typename MV::scalar_type scalar_type;
1400  typedef typename MV::local_ordinal_type local_ordinal_type;
1401  typedef typename MV::global_ordinal_type global_ordinal_type;
1402  typedef typename MV::node_type node_type;
1403  typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
1404 
1413  static void
1414  testSameMap (const Teuchos::RCP<const map_type>& targetMap,
1415  const global_ordinal_type eltSize, // Must be odd
1416  const size_t numCols,
1417  const Teuchos::RCP<Teuchos::FancyOStream>& outStream=Teuchos::null,
1418  const Teuchos::EVerbosityLevel verbosityLevel=Teuchos::VERB_DEFAULT)
1419  {
1420  using Teuchos::Array;
1421  using Teuchos::ArrayRCP;
1422  using Teuchos::ArrayView;
1423  using Teuchos::as;
1424  using Teuchos::Comm;
1425  using Teuchos::FancyOStream;
1426  using Teuchos::getFancyOStream;
1427  using Teuchos::oblackholestream;
1428  using Teuchos::ptr;
1429  using Teuchos::RCP;
1430  using Teuchos::rcp;
1431  using Teuchos::rcpFromRef;
1432  using Teuchos::REDUCE_SUM;
1433  using Teuchos::reduceAll;
1434  using std::cerr;
1435  using std::endl;
1436 
1437  typedef local_ordinal_type LO;
1438  typedef global_ordinal_type GO;
1439  typedef scalar_type ST;
1440  typedef Teuchos::ScalarTraits<ST> STS;
1441 
1442  TEUCHOS_TEST_FOR_EXCEPTION(eltSize % 2 == 0, std::invalid_argument,
1443  "Element size (eltSize) argument must be odd.");
1444  TEUCHOS_TEST_FOR_EXCEPTION(numCols == 0, std::invalid_argument,
1445  "Number of columns (numCols) argument must be nonzero.");
1446  // Default behavior is to print nothing out.
1447  RCP<FancyOStream> out = outStream.is_null() ?
1448  getFancyOStream (rcp (new oblackholestream)) : outStream;
1449  const Teuchos::EVerbosityLevel verbLevel =
1450  (verbosityLevel == Teuchos::VERB_DEFAULT) ?
1451  Teuchos::VERB_NONE : verbosityLevel;
1452 
1453  //RCP<MV> X = rcp (new MV (targetMap, numCols));
1454 
1455  const GO indexBase = targetMap->getIndexBase();
1456  Array<GO> rows (eltSize);
1457  Array<ST> values (eltSize);
1458  std::fill (values.begin(), values.end(), STS::one());
1459 
1460  // Make this a pointer so we can free its contents, in case
1461  // those contents depend on the input to globalAssemble().
1462  RCP<MultiVectorFiller<MV> > filler =
1463  rcp (new MultiVectorFiller<MV> (targetMap, numCols));
1464 
1465  TEUCHOS_TEST_FOR_EXCEPTION(! targetMap->isContiguous(),
1466  std::invalid_argument, "MultiVectorFiller test currently only works "
1467  "for contiguous Maps.");
1468 
1469  const GO minGlobalIndex = targetMap->getMinGlobalIndex();
1470  const GO maxGlobalIndex = targetMap->getMaxGlobalIndex();
1471  const GO minAllGlobalIndex = targetMap->getMinAllGlobalIndex();
1472  const GO maxAllGlobalIndex = targetMap->getMaxAllGlobalIndex();
1473  for (size_t j = 0; j < numCols; ++j) {
1474  for (GO i = minGlobalIndex; i <= maxGlobalIndex; ++i) {
1475  // Overlap over processes, without running out of bounds.
1476  const GO start = std::max (i - eltSize/2, minAllGlobalIndex);
1477  const GO end = std::min (i + eltSize/2, maxAllGlobalIndex);
1478  const GO len = end - start + 1;
1479 
1480  TEUCHOS_TEST_FOR_EXCEPTION(len > eltSize, std::logic_error,
1481  "At start,end = " << start << "," << end << ", len = " << len
1482  << " > eltSize = " << eltSize << ".");
1483 
1484  for (GO k = 0; k < len; ++k) {
1485  rows[k] = start + k;
1486  }
1487  if (verbLevel == Teuchos::VERB_EXTREME) {
1488  *out << "Inserting: "
1489  << Teuchos::toString (rows.view(0,len)) << ", "
1490  << Teuchos::toString (values.view(0, len)) << std::endl;
1491  }
1492  filler->sumIntoGlobalValues (rows.view(0, len), j, values.view(0, len));
1493  }
1494  }
1495 
1496  if (verbLevel == Teuchos::VERB_EXTREME) {
1497  *out << "Filler:" << std::endl;
1498  filler->describe (*out, verbLevel);
1499  *out << std::endl;
1500  }
1501 
1502  MV X_out (targetMap, numCols);
1503  filler->globalAssemble (X_out);
1504  filler = Teuchos::null;
1505 
1506  if (verbLevel == Teuchos::VERB_EXTREME) {
1507  *out << "X_out:" << std::endl;
1508  X_out.describe (*out, verbLevel);
1509  *out << std::endl;
1510  }
1511 
1512  // Create multivector for comparison.
1513  MV X_expected (targetMap, numCols);
1514  const scalar_type two = STS::one() + STS::one();
1515  for (size_t j = 0; j < numCols; ++j) {
1516  ArrayRCP<ST> X_j = X_expected.getDataNonConst (j);
1517 
1518  // Each entry of the column should have the value eltSize,
1519  // except for the first and last few entries in the whole
1520  // column (globally, not locally).
1521  for (GO i = minGlobalIndex; i <= maxGlobalIndex; ++i) {
1522  const LO localIndex = targetMap->getLocalElement (i);
1523  TEUCHOS_TEST_FOR_EXCEPTION(i == Teuchos::OrdinalTraits<LO>::invalid(),
1524  std::logic_error, "Global index " << i << " is not in the "
1525  "multivector's Map.");
1526 
1527  if (i <= minAllGlobalIndex + eltSize/2) {
1528  X_j[localIndex] = two + as<ST>(i) - as<ST>(indexBase);
1529  }
1530  else if (i >= maxAllGlobalIndex - eltSize/2) {
1531  X_j[localIndex] = two + as<ST>(maxAllGlobalIndex) - as<ST>(i);
1532  }
1533  else {
1534  X_j[localIndex] = as<ST>(eltSize);
1535  }
1536  } // for each global index which my process owns
1537  } // for each column of the multivector
1538 
1539  if (verbLevel == Teuchos::VERB_EXTREME) {
1540  *out << "X_expected:" << std::endl;
1541  X_expected.describe (*out, verbLevel);
1542  *out << std::endl;
1543  }
1544 
1545  Array<GO> errorLocations;
1546  for (size_t j = 0; j < numCols; ++j) {
1547  ArrayRCP<const ST> X_out_j = X_out.getData (j);
1548  ArrayRCP<const ST> X_expected_j = X_expected.getData (j);
1549 
1550  // Each entry of the column should have the value eltSize,
1551  // except for the first and last few entries in the whole
1552  // column (globally, not locally).
1553  for (GO i = minGlobalIndex; i <= maxGlobalIndex; ++i) {
1554  const LO localIndex = targetMap->getLocalElement (i);
1555  TEUCHOS_TEST_FOR_EXCEPTION(i == Teuchos::OrdinalTraits<LO>::invalid(),
1556  std::logic_error, "Global index " << i << " is not in the "
1557  "multivector's Map.");
1558 
1559  // The floating-point additions should be exact in this
1560  // case, except for very large values of eltSize.
1561  if (X_out_j[localIndex] != X_expected_j[localIndex]) {
1562  errorLocations.push_back (i);
1563  }
1564  } // for each global index which my process owns
1565 
1566  const typename Array<GO>::size_type localNumErrors = errorLocations.size();
1567  typename Array<GO>::size_type globalNumErrors = 0;
1568  RCP<const Comm<int> > comm = targetMap->getComm();
1569  reduceAll (*comm, REDUCE_SUM, localNumErrors, ptr (&globalNumErrors));
1570 
1571  if (globalNumErrors != 0) {
1572  std::ostringstream os;
1573  os << "Proc " << comm->getRank() << ": " << localNumErrors
1574  << " incorrect value" << (localNumErrors != 1 ? "s" : "")
1575  << ". Error locations: [ ";
1576  std::copy (errorLocations.begin(), errorLocations.end(),
1577  std::ostream_iterator<GO> (os, " "));
1578  os << "].";
1579  // Iterate through all processes in the communicator,
1580  // printing out each process' local errors.
1581  for (int p = 0; p < comm->getSize(); ++p) {
1582  if (p == comm->getRank()) {
1583  cerr << os.str() << endl;
1584  }
1585  // Barriers to let output finish.
1586  comm->barrier();
1587  comm->barrier();
1588  comm->barrier();
1589  }
1590  TEUCHOS_TEST_FOR_EXCEPTION(globalNumErrors != 0, std::logic_error,
1591  "Over all procs: " << globalNumErrors << " total error"
1592  << (globalNumErrors != 1 ? "s" : "") << ".");
1593  } // if there were any errors in column j
1594  } // for each column j
1595  }
1596  };
1597 
1599  template<class ScalarType,
1600  class LocalOrdinalType,
1601  class GlobalOrdinalType,
1602  class NodeType>
1603  void
1604  testMultiVectorFiller (const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
1605  const size_t unknownsPerNode,
1606  const GlobalOrdinalType unknownsPerElt,
1607  const size_t numCols,
1608  const Teuchos::RCP<Teuchos::FancyOStream>& outStream,
1609  const Teuchos::EVerbosityLevel verbLevel)
1610  {
1612  using Teuchos::FancyOStream;
1613  using Teuchos::getFancyOStream;
1614  using Teuchos::oblackholestream;
1615  using Teuchos::ParameterList;
1616  using Teuchos::RCP;
1617  using Teuchos::rcp;
1618  using std::cerr;
1619  using std::endl;
1620 
1621  typedef ScalarType ST;
1622  typedef LocalOrdinalType LO;
1623  typedef GlobalOrdinalType GO;
1624  typedef NodeType NT;
1625  typedef ::Tpetra::Map<LO, GO, NT> map_type;
1627  typedef Tpetra::global_size_t GST;
1628 
1629  RCP<FancyOStream> out = outStream.is_null() ?
1630  getFancyOStream (rcp (new oblackholestream)) : outStream;
1631  const GST INV = Teuchos::OrdinalTraits<GST>::invalid ();
1632  const GO indexBase = 0;
1633  RCP<const map_type> targetMap =
1634  rcp (new map_type (INV, unknownsPerNode, indexBase, comm));
1635 
1636  std::ostringstream os;
1637  try {
1638  MultiVectorFillerTester<MV>::testSameMap (targetMap, unknownsPerElt, numCols, out, verbLevel);
1639  } catch (std::exception& e) {
1640  os << e.what();
1641  }
1642 
1643  for (int p = 0; p < comm->getSize(); ++p) {
1644  if (p == comm->getRank()) {
1645  *out << "On process " << comm->getRank() << ": " << os.str() << endl;
1646  }
1647  comm->barrier();
1648  comm->barrier();
1649  comm->barrier();
1650  }
1651  }
1652 
1658  void
1659  testSortAndMergeIn ();
1660 
1661  } // namespace Test
1662 } // namespace Tpetra
1663 
1664 
1665 #endif // __Tpetra_MultiVectorFiller_hpp
void locallyAssemble(MV &X)
Do the "local" (not MPI) part of assembly.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
static void testSameMap(const Teuchos::RCP< const map_type > &targetMap, const global_ordinal_type eltSize, const size_t numCols, const Teuchos::RCP< Teuchos::FancyOStream > &outStream=Teuchos::null, const Teuchos::EVerbosityLevel verbosityLevel=Teuchos::VERB_DEFAULT)
Test global assembly when constructor Map = target Map.
Second implementation of fill and local assembly for MultiVectorFiller.
void clear()
Clear the contents of the vector, making it implicitly a vector of zeros.
MultiVectorFiller(const Teuchos::RCP< const map_type > &map, const size_t numCols)
Constructor.
void locallyAssemble(MV &X, BinaryFunction &f)
Locally assemble into X, with user-specified combine mode.
Teuchos::Array< global_ordinal_type > getSourceIndices(const Teuchos::Comm< int > &comm, const Teuchos::RCP< Teuchos::FancyOStream > &out=Teuchos::null, const bool debug=false) const
All source indices (local and nonlocal) of the source Map, sorted and unique.
One or more distributed dense vectors.
void locallyAssemble(MV &X)
Overload of locallyAssemble() for the usual ADD combine mode.
void sumIntoGlobalValues(const Teuchos::ArrayView< const global_ordinal_type > &rows, const Teuchos::ArrayView< const scalar_type > &values)
void sumIntoGlobalValues(Teuchos::ArrayView< const global_ordinal_type > rows, Teuchos::ArrayView< const scalar_type > values)
Sum data into the multivector.
Adds nonlocal sum-into functionality to Tpetra::MultiVector.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
Implementation details of Tpetra.
MultiVectorFillerData(const Teuchos::RCP< const map_type > &map, const size_t numColumns)
Constructor.
void sumIntoGlobalValues(const Teuchos::ArrayView< const global_ordinal_type > &rows, const size_t columnIndex, const Teuchos::ArrayView< const scalar_type > &values)
Set entry (rows[i],columnIndex) to values[i], for all i.
void locallyAssemble(MV &X, BinaryFunction &f)
Locally assemble into X.
size_t global_size_t
Global size_t object.
void sumIntoGlobalValues(const Teuchos::ArrayView< const global_ordinal_type > &rows, const Teuchos::ArrayView< const scalar_type > &values)
Set entry (rows[i],j) to values[i], for all i and j.
Implementation of fill and local assembly for MultiVectorFiller.
MultiVectorFillerData2(const Teuchos::RCP< const map_type > &map, const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_DEFAULT, const Teuchos::RCP< Teuchos::FancyOStream > &out=Teuchos::null)
Default constructor (sets number of columns to zero).
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=defaultArgNode< Node >())
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map with a user-spec...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void globalAssemble(MV &X_out, const bool forceReuseMap=false)
Assemble all the data (local and nonlocal) into X_out.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
void clear()
Clear contents, in preparation for another fill cycle.
Describes a parallel distribution of objects over processes.
MultiVectorFillerData2(const Teuchos::RCP< const map_type > &map, const size_t numColumns, const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_DEFAULT, const Teuchos::RCP< Teuchos::FancyOStream > &out=Teuchos::null)
Constructor.
MultiVectorFillerData(const Teuchos::RCP< const map_type > &map)
Default constructor (sets number of columns to zero).
void sumIntoGlobalValues(Teuchos::ArrayView< const global_ordinal_type > rows, size_t column, Teuchos::ArrayView< const scalar_type > values)
Sum data into the multivector.
void setNumColumns(const size_t newNumColumns)
Set the number of columns in the output multivector.
void setNumColumns(const size_t newNumColumns)
Set the number of columns in the output multivector.
void clear()
Clear the contents of the multivector.
Teuchos::Array< global_ordinal_type > getSourceIndices() const
All source indices (local and nonlocal) of the source Map, sorted and unique.