Tpetra parallel linear algebra  Version of the Day
Tpetra_DirectoryImpl_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef __Tpetra_DirectoryImpl_def_hpp
43 #define __Tpetra_DirectoryImpl_def_hpp
44 
47 
49 #include <Tpetra_Distributor.hpp>
50 #include <Tpetra_Map.hpp>
51 #include <Tpetra_TieBreak.hpp>
52 
53 #include <Tpetra_Details_FixedHashTable.hpp>
54 #include <Tpetra_HashTable.hpp>
55 
56 
57 // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
58 #ifdef HAVE_MPI
59 # include "mpi.h"
60 #endif // HAVE_MPI
61 // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
62 
63 
64 namespace Tpetra {
65  namespace Details {
66  template<class LO, class GO, class NT>
69 
70  template<class LO, class GO, class NT>
73  getEntries (const map_type& map,
74  const Teuchos::ArrayView<const GO> &globalIDs,
75  const Teuchos::ArrayView<int> &nodeIDs,
76  const Teuchos::ArrayView<LO> &localIDs,
77  const bool computeLIDs) const
78  {
79  // Ensure that globalIDs, nodeIDs, and localIDs (if applicable)
80  // all have the same size, before modifying any output arguments.
81  TEUCHOS_TEST_FOR_EXCEPTION(nodeIDs.size() != globalIDs.size(),
82  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
83  "Output arrays do not have the right sizes. nodeIDs.size() = "
84  << nodeIDs.size() << " != globalIDs.size() = " << globalIDs.size()
85  << ".");
86  TEUCHOS_TEST_FOR_EXCEPTION(
87  computeLIDs && localIDs.size() != globalIDs.size(),
88  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
89  "Output array do not have the right sizes. localIDs.size() = "
90  << localIDs.size() << " != globalIDs.size() = " << globalIDs.size()
91  << ".");
92 
93  // Initially, fill nodeIDs and localIDs (if applicable) with
94  // invalid values. The "invalid" process ID is -1 (this means
95  // the same thing as MPI_ANY_SOURCE to Teuchos, so it's an
96  // "invalid" process ID); the invalid local ID comes from
97  // OrdinalTraits.
98  std::fill (nodeIDs.begin(), nodeIDs.end(), -1);
99  if (computeLIDs) {
100  std::fill (localIDs.begin(), localIDs.end(),
101  Teuchos::OrdinalTraits<LO>::invalid ());
102  }
103  // Actually do the work.
104  return this->getEntriesImpl (map, globalIDs, nodeIDs, localIDs, computeLIDs);
105  }
106 
107 
108  template<class LO, class GO, class NT>
111  numProcs_ (map.getComm ()->getSize ())
112  {}
113 
114 
115  template<class LO, class GO, class NT>
118  numProcs_ (0) // to be set later
119  {}
120 
121 
122  template<class LO, class GO, class NT>
123  bool
125  isOneToOne (const Teuchos::Comm<int>& comm) const
126  {
127  // A locally replicated Map is one-to-one only if there is no
128  // replication, that is, only if the Map's communicator only has
129  // one process.
130  return (numProcs_ == 1);
131  }
132 
133 
134  template<class LO, class GO, class NT>
135  std::string
137  {
138  std::ostringstream os;
139  os << "ReplicatedDirectory"
140  << "<" << Teuchos::TypeNameTraits<LO>::name ()
141  << ", " << Teuchos::TypeNameTraits<GO>::name ()
142  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
143  return os.str ();
144  }
145 
146 
147  template<class LO, class GO, class NT>
150  {
151  TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
152  Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
153  TEUCHOS_TEST_FOR_EXCEPTION(! map.isUniform (), std::invalid_argument,
154  Teuchos::typeName (*this) << " constructor: Map is not uniform.");
155  }
156 
157 
158  template<class LO, class GO, class NT>
159  std::string
161  {
162  std::ostringstream os;
163  os << "ContiguousUniformDirectory"
164  << "<" << Teuchos::TypeNameTraits<LO>::name ()
165  << ", " << Teuchos::TypeNameTraits<GO>::name ()
166  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
167  return os.str ();
168  }
169 
170 
171  template<class LO, class GO, class NT>
175  const Teuchos::ArrayView<const GO> &globalIDs,
176  const Teuchos::ArrayView<int> &nodeIDs,
177  const Teuchos::ArrayView<LO> &localIDs,
178  const bool computeLIDs) const
179  {
180  using Teuchos::Comm;
181  using Teuchos::RCP;
182  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
183  const LO invalidLid = Teuchos::OrdinalTraits<LO>::invalid ();
185 
186  RCP<const Comm<int> > comm = map.getComm ();
187  const GO g_min = map.getMinAllGlobalIndex ();
188 
189  // Let N_G be the global number of elements in the Map,
190  // and P be the number of processes in its communicator.
191  // Then, N_G = P * N_L + R = R*(N_L + 1) + (P - R)*N_L.
192  //
193  // The first R processes own N_L+1 elements.
194  // The remaining P-R processes own N_L elements.
195  //
196  // Let g be the current GID, g_min be the global minimum GID,
197  // and g_0 = g - g_min. If g is a valid GID in this Map, then
198  // g_0 is in [0, N_G - 1].
199  //
200  // If g is a valid GID in this Map and g_0 < R*(N_L + 1), then
201  // the rank of the process that owns g is floor(g_0 / (N_L +
202  // 1)), and its corresponding local index on that process is g_0
203  // mod (N_L + 1).
204  //
205  // Let g_R = g_0 - R*(N_L + 1). If g is a valid GID in this Map
206  // and g_0 >= R*(N_L + 1), then the rank of the process that
207  // owns g is then R + floor(g_R / N_L), and its corresponding
208  // local index on that process is g_R mod N_L.
209 
210  const size_type N_G =
211  static_cast<size_type> (map.getGlobalNumElements ());
212  const size_type P = static_cast<size_type> (comm->getSize ());
213  const size_type N_L = N_G / P;
214  const size_type R = N_G - N_L * P; // N_G mod P
215  const size_type N_R = R * (N_L + static_cast<size_type> (1));
216 
217 #ifdef HAVE_TPETRA_DEBUG
218  TEUCHOS_TEST_FOR_EXCEPTION(
219  N_G != P*N_L + R, std::logic_error,
220  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
221  "N_G = " << N_G << " != P*N_L + R = " << P << "*" << N_L << " + " << R
222  << " = " << P*N_L + R << ". "
223  "Please report this bug to the Tpetra developers.");
224 #endif // HAVE_TPETRA_DEBUG
225 
226  const size_type numGids = globalIDs.size (); // for const loop bound
227  // Avoid signed/unsigned comparisons below, in case GO is
228  // unsigned. (Integer literals are generally signed.)
229  const GO ONE = static_cast<GO> (1);
230 
231  if (computeLIDs) {
232  for (size_type k = 0; k < numGids; ++k) {
233  const GO g_0 = globalIDs[k] - g_min;
234 
235  // The first test is a little strange just in case GO is
236  // unsigned. Compilers raise a warning on tests like "x <
237  // 0" if x is unsigned, but don't usually raise a warning if
238  // the expression is a bit more complicated than that.
239  if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
240  nodeIDs[k] = -1;
241  localIDs[k] = invalidLid;
242  res = IDNotPresent;
243  }
244  else if (g_0 < static_cast<GO> (N_R)) {
245  // The GID comes from the initial sequence of R processes.
246  nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
247  localIDs[k] = static_cast<LO> (g_0 % static_cast<GO> (N_L + 1));
248  }
249  else if (g_0 >= static_cast<GO> (N_R)) {
250  // The GID comes from the remaining P-R processes.
251  const GO g_R = g_0 - static_cast<GO> (N_R);
252  nodeIDs[k] = static_cast<int> (R + g_R / N_L);
253  localIDs[k] = static_cast<int> (g_R % N_L);
254  }
255 #ifdef HAVE_TPETRA_DEBUG
256  else {
257  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
258  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
259  "should never get here. "
260  "Please report this bug to the Tpetra developers.");
261  }
262 #endif // HAVE_TPETRA_DEBUG
263  }
264  }
265  else { // don't compute local indices
266  for (size_type k = 0; k < numGids; ++k) {
267  const GO g_0 = globalIDs[k] - g_min;
268  // The first test is a little strange just in case GO is
269  // unsigned. Compilers raise a warning on tests like "x <
270  // 0" if x is unsigned, but don't usually raise a warning if
271  // the expression is a bit more complicated than that.
272  if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
273  nodeIDs[k] = -1;
274  res = IDNotPresent;
275  }
276  else if (g_0 < static_cast<GO> (N_R)) {
277  // The GID comes from the initial sequence of R processes.
278  nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
279  }
280  else if (g_0 >= static_cast<GO> (N_R)) {
281  // The GID comes from the remaining P-R processes.
282  const GO g_R = g_0 - static_cast<GO> (N_R);
283  nodeIDs[k] = static_cast<int> (R + g_R / N_L);
284  }
285 #ifdef HAVE_TPETRA_DEBUG
286  else {
287  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
288  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
289  "should never get here. "
290  "Please report this bug to the Tpetra developers.");
291  }
292 #endif // HAVE_TPETRA_DEBUG
293  }
294  }
295  return res;
296  }
297 
298  template<class LO, class GO, class NT>
301  {
302  using Teuchos::arcp;
303  using Teuchos::gatherAll;
304  using Teuchos::RCP;
305 
306  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
307 
308  TEUCHOS_TEST_FOR_EXCEPTION(! map.isDistributed (), std::invalid_argument,
309  Teuchos::typeName (*this) << " constructor: Map is not distributed.");
310  TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
311  Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
312 
313  const int numProcs = comm->getSize ();
314 
315  // Make room for the min global ID on each process, plus one
316  // entry at the end for the "max cap."
317  allMinGIDs_ = arcp<GO> (numProcs + 1);
318  // Get my process' min global ID.
319  GO minMyGID = map.getMinGlobalIndex ();
320  // Gather all of the min global IDs into the first numProcs
321  // entries of allMinGIDs_.
322 
323  // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
324  //
325  // The purpose of this giant hack is that gatherAll appears to
326  // interpret the "receive count" argument differently than
327  // MPI_Allgather does. Matt Bettencourt reports Valgrind issues
328  // (memcpy with overlapping data) with MpiComm<int>::gatherAll,
329  // which could relate either to this, or to OpenMPI.
330 #ifdef HAVE_MPI
331  MPI_Datatype rawMpiType = MPI_INT;
332  bool useRawMpi = true;
333  if (typeid (GO) == typeid (int)) {
334  rawMpiType = MPI_INT;
335  } else if (typeid (GO) == typeid (long)) {
336  rawMpiType = MPI_LONG;
337  } else {
338  useRawMpi = false;
339  }
340  if (useRawMpi) {
341  using Teuchos::rcp_dynamic_cast;
342  using Teuchos::MpiComm;
343  RCP<const MpiComm<int> > mpiComm =
344  rcp_dynamic_cast<const MpiComm<int> > (comm);
345  // It could be a SerialComm instead, even in an MPI build, so
346  // be sure to check.
347  if (! comm.is_null ()) {
348  MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
349  const int err =
350  MPI_Allgather (&minMyGID, 1, rawMpiType,
351  allMinGIDs_.getRawPtr (), 1, rawMpiType,
352  rawMpiComm);
353  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
354  "Tpetra::DistributedContiguousDirectory: MPI_Allgather failed");
355  } else {
356  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
357  }
358  } else {
359  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
360  }
361 #else // NOT HAVE_MPI
362  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
363 #endif // HAVE_MPI
364  // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
365 
366  //gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
367 
368  // Put the max cap at the end. Adding one lets us write loops
369  // over the global IDs with the usual strict less-than bound.
370  allMinGIDs_[numProcs] = map.getMaxAllGlobalIndex ()
371  + Teuchos::OrdinalTraits<GO>::one ();
372  }
373 
374  template<class LO, class GO, class NT>
375  std::string
377  {
378  std::ostringstream os;
379  os << "DistributedContiguousDirectory"
380  << "<" << Teuchos::TypeNameTraits<LO>::name ()
381  << ", " << Teuchos::TypeNameTraits<GO>::name ()
382  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
383  return os.str ();
384  }
385 
386  template<class LO, class GO, class NT>
390  const Teuchos::ArrayView<const GO> &globalIDs,
391  const Teuchos::ArrayView<int> &nodeIDs,
392  const Teuchos::ArrayView<LO> &localIDs,
393  const bool computeLIDs) const
394  {
395  using Teuchos::Array;
396  using Teuchos::ArrayRCP;
397  using Teuchos::ArrayView;
398  using Teuchos::as;
399  using Teuchos::Comm;
400  using Teuchos::RCP;
401 
403  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
404  const int myRank = comm->getRank ();
405 
406  // Map is on one process or is locally replicated.
407  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
408  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
409  typename ArrayView<const GO>::iterator gidIter;
410  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
411  if (map.isNodeGlobalElement (*gidIter)) {
412  *procIter++ = myRank;
413  if (computeLIDs) {
414  *lidIter++ = map.getLocalElement (*gidIter);
415  }
416  }
417  else {
418  // Advance the pointers, leaving these values set to invalid
419  procIter++;
420  if (computeLIDs) {
421  lidIter++;
422  }
423  res = IDNotPresent;
424  }
425  }
426  return res;
427  }
428 
429  template<class LO, class GO, class NT>
433  const Teuchos::ArrayView<const GO> &globalIDs,
434  const Teuchos::ArrayView<int> &nodeIDs,
435  const Teuchos::ArrayView<LO> &localIDs,
436  const bool computeLIDs) const
437  {
438  using Teuchos::Array;
439  using Teuchos::ArrayRCP;
440  using Teuchos::ArrayView;
441  using Teuchos::as;
442  using Teuchos::Comm;
443  using Teuchos::RCP;
444 
445  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
446  const int numProcs = comm->getSize ();
447  const global_size_t nOverP = map.getGlobalNumElements () / numProcs;
448  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
450 
451  // Map is distributed but contiguous.
452  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
453  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
454  typename ArrayView<const GO>::iterator gidIter;
455  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
456  LO LID = LINVALID; // Assume not found until proven otherwise
457  int image = -1;
458  GO GID = *gidIter;
459  // Guess uniform distribution and start a little above it
460  // TODO: replace by a binary search
461  int curRank;
462  { // We go through all this trouble to avoid overflow and
463  // signed / unsigned casting mistakes (that were made in
464  // previous versions of this code).
465  const GO one = as<GO> (1);
466  const GO two = as<GO> (2);
467  const GO nOverP_GID = as<GO> (nOverP);
468  const GO lowerBound = GID / std::max(nOverP_GID, one) + two;
469  curRank = as<int>(std::min(lowerBound, as<GO>(numProcs - 1)));
470  }
471  bool found = false;
472  while (curRank >= 0 && curRank < numProcs) {
473  if (allMinGIDs_[curRank] <= GID) {
474  if (GID < allMinGIDs_[curRank + 1]) {
475  found = true;
476  break;
477  }
478  else {
479  curRank++;
480  }
481  }
482  else {
483  curRank--;
484  }
485  }
486  if (found) {
487  image = curRank;
488  LID = as<LO> (GID - allMinGIDs_[image]);
489  }
490  else {
491  res = IDNotPresent;
492  }
493  *procIter++ = image;
494  if (computeLIDs) {
495  *lidIter++ = LID;
496  }
497  }
498  return res;
499  }
500 
501  template<class LO, class GO, class NT>
504  oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
505  locallyOneToOne_ (true), // to be revised below
506  useHashTables_ (false) // to be revised below
507  {
508  initialize (map, Teuchos::null);
509  }
510 
511  template<class LO, class GO, class NT>
514  const tie_break_type& tie_break) :
515  oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
516  locallyOneToOne_ (true), // to be revised below
517  useHashTables_ (false) // to be revised below
518  {
519  initialize (map, Teuchos::ptrFromRef (tie_break));
520  }
521 
522  template<class LO, class GO, class NT>
523  void
525  initialize (const map_type& map,
526  Teuchos::Ptr<const tie_break_type> tie_break)
527  {
528  using Teuchos::arcp;
529  using Teuchos::Array;
530  using Teuchos::ArrayRCP;
531  using Teuchos::ArrayView;
532  using Teuchos::as;
533  using Teuchos::RCP;
534  using Teuchos::rcp;
535  using Teuchos::typeName;
536  using Teuchos::TypeNameTraits;
537  using std::cerr;
538  using std::endl;
539  typedef Array<int>::size_type size_type;
540 
541  // This class' implementation of getEntriesImpl() currently
542  // encodes the following assumptions:
543  //
544  // 1. global_size_t >= GO
545  // 2. global_size_t >= int
546  // 3. global_size_t >= LO
547  //
548  // We check these assumptions here.
549  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(GO),
550  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
551  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Global"
552  "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(GO)
553  << ".");
554  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(int),
555  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
556  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(int) = "
557  << sizeof(int) << ".");
558  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(LO),
559  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
560  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Local"
561  "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(LO)
562  << ".");
563 
564  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
565  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid ();
566  const GO minAllGID = map.getMinAllGlobalIndex ();
567  const GO maxAllGID = map.getMaxAllGlobalIndex ();
568 
569  // The "Directory Map" (see below) will have a range of elements
570  // from the minimum to the maximum GID of the user Map, and a
571  // minimum GID of minAllGID from the user Map. It doesn't
572  // actually have to store all those entries, though do beware of
573  // calling getNodeElementList on it (see Bug 5822).
574  const global_size_t numGlobalEntries = maxAllGID - minAllGID + 1;
575 
576  // We can't afford to replicate the whole directory on each
577  // process, so create the "Directory Map", a uniform contiguous
578  // Map that describes how we will distribute the directory over
579  // processes.
580  //
581  // FIXME (mfh 08 May 2012) Here we're setting minAllGID to be
582  // the index base. The index base should be separate from the
583  // minimum GID.
584  directoryMap_ = rcp (new map_type (numGlobalEntries, minAllGID, comm,
585  GloballyDistributed, map.getNode ()));
586  // The number of Directory elements that my process owns.
587  const size_t dir_numMyEntries = directoryMap_->getNodeNumElements ();
588 
589  // Fix for Bug 5822: If the input Map is "sparse," that is if
590  // the difference between the global min and global max GID is
591  // much larger than the global number of elements in the input
592  // Map, then it's possible that the Directory Map might have
593  // many more entries than the input Map on this process. This
594  // can cause memory scalability issues. In that case, we switch
595  // from the array-based implementation of Directory storage to
596  // the hash table - based implementation. We don't use hash
597  // tables all the time, because they are slower in the common
598  // case of a nonsparse Map.
599  //
600  // NOTE: This is a per-process decision. Some processes may use
601  // array-based storage, whereas others may use hash table -
602  // based storage.
603 
604  // A hash table takes a constant factor more space, more or
605  // less, than an array. Thus, it's not worthwhile, even in
606  // terms of memory usage, always to use a hash table.
607  // Furthermore, array lookups are faster than hash table
608  // lookups, so it may be worthwhile to use an array even if it
609  // takes more space. The "sparsity threshold" governs when to
610  // switch to a hash table - based implementation.
611  const size_t inverseSparsityThreshold = 10;
612  useHashTables_ =
613  dir_numMyEntries >= inverseSparsityThreshold * map.getNodeNumElements ();
614 
615  // Get list of process IDs that own the directory entries for the
616  // Map GIDs. These will be the targets of the sends that the
617  // Distributor will do.
618  const int myRank = comm->getRank ();
619  const size_t numMyEntries = map.getNodeNumElements ();
620  Array<int> sendImageIDs (numMyEntries);
621  ArrayView<const GO> myGlobalEntries = map.getNodeElementList ();
622  // An ID not present in this lookup indicates that it lies outside
623  // of the range [minAllGID,maxAllGID] (from map_). this means
624  // something is wrong with map_, our fault.
625  const LookupStatus lookupStatus =
626  directoryMap_->getRemoteIndexList (myGlobalEntries, sendImageIDs);
627  TEUCHOS_TEST_FOR_EXCEPTION(
628  lookupStatus == IDNotPresent, std::logic_error, Teuchos::typeName(*this)
629  << " constructor: the Directory Map could not find out where one or "
630  "more of my Map's indices should go. The input to getRemoteIndexList "
631  "is " << Teuchos::toString (myGlobalEntries) << ", and the output is "
632  << Teuchos::toString (sendImageIDs ()) << ". The input Map itself has "
633  "the following entries on the calling process " <<
634  map.getComm ()->getRank () << ": " <<
635  Teuchos::toString (map.getNodeElementList ()) << ", and has "
636  << map.getGlobalNumElements () << " total global indices in ["
637  << map.getMinAllGlobalIndex () << "," << map.getMaxAllGlobalIndex ()
638  << "]. The Directory Map has "
639  << directoryMap_->getGlobalNumElements () << " total global indices in "
640  "[" << directoryMap_->getMinAllGlobalIndex () << "," <<
641  directoryMap_->getMaxAllGlobalIndex () << "], and the calling process "
642  "has GIDs [" << directoryMap_->getMinGlobalIndex () << "," <<
643  directoryMap_->getMaxGlobalIndex () << "]. "
644  "This probably means there is a bug in Map or Directory. "
645  "Please report this bug to the Tpetra developers.");
646 
647  // Initialize the distributor using the list of process IDs to
648  // which to send. We'll use the distributor to send out triples
649  // of (GID, process ID, LID). We're sending the entries to the
650  // processes that the Directory Map says should own them, which is
651  // why we called directoryMap_->getRemoteIndexList() above.
652  Distributor distor (comm);
653  const size_t numReceives = distor.createFromSends (sendImageIDs);
654 
655  // NOTE (mfh 21 Mar 2012) The following code assumes that
656  // sizeof(GO) >= sizeof(int) and sizeof(GO) >= sizeof(LO).
657  //
658  // Create and fill buffer of (GID, PID, LID) triples to send
659  // out. We pack the (GID, PID, LID) triples into a single Array
660  // of GO, casting the PID from int to GO and the LID from LO to
661  // GO as we do so.
662  //
663  // FIXME (mfh 23 Mar 2014) This assumes that sizeof(LO) <=
664  // sizeof(GO) and sizeof(int) <= sizeof(GO). The former is
665  // required, and the latter is generally the case, but we should
666  // still check for this.
667  const int packetSize = 3; // We're sending triples, so packet size is 3.
668  Array<GO> exportEntries (packetSize * numMyEntries); // data to send out
669  {
670  size_type exportIndex = 0;
671  for (size_type i = 0; i < static_cast<size_type> (numMyEntries); ++i) {
672  exportEntries[exportIndex++] = myGlobalEntries[i];
673  exportEntries[exportIndex++] = as<GO> (myRank);
674  exportEntries[exportIndex++] = as<GO> (i);
675  }
676  }
677  // Buffer of data to receive. The Distributor figured out for
678  // us how many packets we're receiving, when we called its
679  // createFromSends() method to set up the distribution plan.
680  Array<GO> importElements (packetSize * distor.getTotalReceiveLength ());
681 
682  // Distribute the triples of (GID, process ID, LID).
683  distor.doPostsAndWaits (exportEntries ().getConst (), packetSize, importElements ());
684 
685  // Unpack the redistributed data. Both implementations of
686  // Directory storage map from an LID in the Directory Map (which
687  // is the LID of the GID to store) to either a PID or an LID in
688  // the input Map. Each "packet" (contiguous chunk of
689  // importElements) contains a triple: (GID, PID, LID).
690  if (useHashTables_) {
691  // Create the hash tables. We know exactly how many elements
692  // to expect in each hash table. FixedHashTable's constructor
693  // currently requires all the keys and values at once, so we
694  // have to extract them in temporary arrays. It may be
695  // possible to rewrite FixedHashTable to use a "start fill" /
696  // "end fill" approach that avoids the temporary arrays, but
697  // we won't try that for now.
698 
699  // The constructors of Array and ArrayRCP that take a number
700  // of elements all initialize the arrays. Instead, allocate
701  // raw arrays, then hand them off to ArrayRCP, to avoid the
702  // initial unnecessary initialization without losing the
703  // benefit of exception safety (and bounds checking, in a
704  // debug build).
705  LO* tableKeysRaw = NULL;
706  LO* tableLidsRaw = NULL;
707  int* tablePidsRaw = NULL;
708  try {
709  tableKeysRaw = new LO [numReceives];
710  tableLidsRaw = new LO [numReceives];
711  tablePidsRaw = new int [numReceives];
712  } catch (...) {
713  if (tableKeysRaw != NULL) {
714  delete [] tableKeysRaw;
715  }
716  if (tableLidsRaw != NULL) {
717  delete [] tableLidsRaw;
718  }
719  if (tablePidsRaw != NULL) {
720  delete [] tablePidsRaw;
721  }
722  throw;
723  }
724  ArrayRCP<LO> tableKeys (tableKeysRaw, 0, numReceives, true);
725  ArrayRCP<LO> tableLids (tableLidsRaw, 0, numReceives, true);
726  ArrayRCP<int> tablePids (tablePidsRaw, 0, numReceives, true);
727 
728  if (tie_break.is_null ()) {
729  // Fill the temporary arrays of keys and values.
730  size_type importIndex = 0;
731  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
732  const GO curGID = importElements[importIndex++];
733  const LO curLID = directoryMap_->getLocalElement (curGID);
734  TEUCHOS_TEST_FOR_EXCEPTION(
735  curLID == LINVALID, std::logic_error,
736  Teuchos::typeName(*this) << " constructor: Incoming global index "
737  << curGID << " does not have a corresponding local index in the "
738  "Directory Map. Please report this bug to the Tpetra developers.");
739  tableKeys[i] = curLID;
740  tablePids[i] = importElements[importIndex++];
741  tableLids[i] = importElements[importIndex++];
742  }
743  // Set up the hash tables. The hash tables' constructor
744  // detects whether there are duplicates, so that we can set
745  // locallyOneToOne_.
746  typedef Kokkos::Device<typename NT::execution_space,
747  typename NT::memory_space> DT;
748  lidToPidTable_ =
749  rcp (new Details::FixedHashTable<LO, int, DT> (tableKeys (),
750  tablePids ()));
751  locallyOneToOne_ = ! (lidToPidTable_->hasDuplicateKeys ());
752  lidToLidTable_ =
753  rcp (new Details::FixedHashTable<LO, LO, DT> (tableKeys (),
754  tableLids ()));
755  }
756  else { // tie_break is NOT null
757 
758  // For each directory Map LID received, collect all the
759  // corresponding (PID,LID) pairs. If the input Map is not
760  // one-to-one, corresponding directory Map LIDs will have
761  // more than one pair. In that case, we will use the
762  // TieBreak object to pick exactly one pair.
763  typedef std::map<LO, std::vector<std::pair<int, LO> > > pair_table_type;
764  pair_table_type ownedPidLidPairs;
765 
766  // For each directory Map LID received, collect the zero or
767  // more input Map (PID,LID) pairs into ownedPidLidPairs.
768  size_type importIndex = 0;
769  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
770  const GO curGID = importElements[importIndex++];
771  const LO dirMapLid = directoryMap_->getLocalElement (curGID);
772  TEUCHOS_TEST_FOR_EXCEPTION(
773  dirMapLid == LINVALID, std::logic_error,
774  Teuchos::typeName(*this) << " constructor: Incoming global index "
775  << curGID << " does not have a corresponding local index in the "
776  "Directory Map. Please report this bug to the Tpetra developers.");
777  tableKeys[i] = dirMapLid;
778  const int PID = importElements[importIndex++];
779  const int LID = importElements[importIndex++];
780 
781  // These may change below. We fill them in just to ensure
782  // that they won't have invalid values.
783  tablePids[i] = PID;
784  tableLids[i] = LID;
785 
786  // For every directory Map LID, we have to remember all
787  // (PID, LID) pairs. The TieBreak object will arbitrate
788  // between them in the loop below.
789  ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
790  }
791 
792  // Use TieBreak to arbitrate between (PID,LID) pairs
793  // corresponding to each directory Map LID.
794  //
795  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
796  // as the directory Map LID?
797  const size_type numPairs =
798  static_cast<size_type> (ownedPidLidPairs.size ());
799  for (size_type i = 0; i < numPairs; ++i) {
800  const LO dirMapLid = static_cast<LO> (i);
801  const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
802  const std::vector<std::pair<int, LO> >& pidLidList =
803  ownedPidLidPairs[i];
804  const size_t listLen = pidLidList.size ();
805  if (listLen > 0) {
806  if (listLen > 1) {
807  locallyOneToOne_ = false;
808  }
809  // If there is some (PID,LID) pair for the current input
810  // Map LID, then it makes sense to invoke the TieBreak
811  // object to arbitrate between the options. Even if
812  // there is only one (PID,LID) pair, we still want to
813  // give the TieBreak object a chance to do whatever it
814  // likes to do, in terms of side effects (e.g., track
815  // (PID,LID) pairs).
816  const size_type index =
817  static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
818  pidLidList));
819  tablePids[i] = pidLidList[index].first;
820  tableLids[i] = pidLidList[index].second;
821  }
822  }
823 
824  // Set up the hash tables.
825  typedef Kokkos::Device<typename NT::execution_space,
826  typename NT::memory_space> DT;
827  lidToPidTable_ =
828  rcp (new Details::FixedHashTable<LO, int, DT> (tableKeys (),
829  tablePids ()));
830  lidToLidTable_ =
831  rcp (new Details::FixedHashTable<LO, LO, DT> (tableKeys (),
832  tableLids ()));
833  }
834  }
835  else {
836  if (tie_break.is_null ()) {
837  // Use array-based implementation of Directory storage.
838  // Allocate these arrays and fill them with invalid values,
839  // in case the input Map's GID list is sparse (i.e., does
840  // not populate all GIDs from minAllGID to maxAllGID).
841  PIDs_ = arcp<int> (dir_numMyEntries);
842  std::fill (PIDs_.begin (), PIDs_.end (), -1);
843  LIDs_ = arcp<LO> (dir_numMyEntries);
844  std::fill (LIDs_.begin (), LIDs_.end (), LINVALID);
845  // Fill in the arrays with PIDs resp. LIDs.
846  size_type importIndex = 0;
847  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
848  const GO curGID = importElements[importIndex++];
849  const LO curLID = directoryMap_->getLocalElement (curGID);
850  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
851  Teuchos::typeName(*this) << " constructor: Incoming global index "
852  << curGID << " does not have a corresponding local index in the "
853  "Directory Map. Please report this bug to the Tpetra developers.");
854 
855  // If PIDs_[curLID] is not -1, then curGID is a duplicate
856  // on the calling process, so the Directory is not locally
857  // one-to-one.
858  if (PIDs_[curLID] != -1) {
859  locallyOneToOne_ = false;
860  }
861  PIDs_[curLID] = importElements[importIndex++];
862  LIDs_[curLID] = importElements[importIndex++];
863  }
864  }
865  else {
866  PIDs_ = arcp<int> (dir_numMyEntries);
867  LIDs_ = arcp<LO> (dir_numMyEntries);
868  std::fill (PIDs_.begin (), PIDs_.end (), -1);
869 
870  // All received (PID, LID) pairs go into ownedPidLidPairs.
871  // This is a map from the directory Map's LID to the (PID,
872  // LID) pair (where the latter LID comes from the input Map,
873  // not the directory Map). If the input Map is not
874  // one-to-one, corresponding LIDs will have
875  // ownedPidLidPairs[curLID].size() > 1. In that case, we
876  // will use the TieBreak object to pick exactly one pair.
877  Array<std::vector<std::pair<int, LO> > > ownedPidLidPairs (dir_numMyEntries);
878  size_type importIndex = 0;
879  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
880  const GO GID = importElements[importIndex++];
881  const int PID = importElements[importIndex++];
882  const LO LID = importElements[importIndex++];
883 
884  const LO dirMapLid = directoryMap_->getLocalElement (GID);
885  TEUCHOS_TEST_FOR_EXCEPTION(
886  dirMapLid == LINVALID, std::logic_error,
887  Teuchos::typeName(*this) << " constructor: Incoming global index "
888  << GID << " does not have a corresponding local index in the "
889  "Directory Map. Please report this bug to the Tpetra developers.");
890  ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
891  }
892 
893  // Use TieBreak to arbitrate between (PID,LID) pairs
894  // corresponding to each directory Map LID.
895  //
896  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
897  // as the directory Map LID?
898  const size_type numPairs =
899  static_cast<size_type> (ownedPidLidPairs.size ());
900  for (size_type i = 0; i < numPairs; ++i) {
901  const LO dirMapLid = static_cast<LO> (i);
902  const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
903  const std::vector<std::pair<int, LO> >& pidLidList =
904  ownedPidLidPairs[i];
905  const size_t listLen = pidLidList.size ();
906  if (listLen > 0) {
907  if (listLen > 1) {
908  locallyOneToOne_ = false;
909  }
910  // If there is some (PID,LID) pair for the current input
911  // Map LID, then it makes sense to invoke the TieBreak
912  // object to arbitrate between the options. Even if
913  // there is only one (PID,LID) pair, we still want to
914  // give the TieBreak object a chance to do whatever it
915  // likes to do, in terms of side effects (e.g., track
916  // (PID,LID) pairs).
917  const size_type index =
918  static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
919  pidLidList));
920  PIDs_[i] = pidLidList[index].first;
921  LIDs_[i] = pidLidList[index].second;
922  }
923  // else no GID specified by source map
924  }
925  }
926  }
927  }
928 
929  template<class LO, class GO, class NT>
930  std::string
932  {
933  std::ostringstream os;
934  os << "DistributedNoncontiguousDirectory"
935  << "<" << Teuchos::TypeNameTraits<LO>::name ()
936  << ", " << Teuchos::TypeNameTraits<GO>::name ()
937  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
938  return os.str ();
939  }
940 
941  template<class LO, class GO, class NT>
945  const Teuchos::ArrayView<const GO> &globalIDs,
946  const Teuchos::ArrayView<int> &nodeIDs,
947  const Teuchos::ArrayView<LO> &localIDs,
948  const bool computeLIDs) const
949  {
950  using Teuchos::Array;
951  using Teuchos::ArrayRCP;
952  using Teuchos::ArrayView;
953  using Teuchos::as;
954  using Teuchos::RCP;
955  using std::cerr;
956  using std::endl;
957  typedef typename Array<GO>::size_type size_type;
958 
959  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
960  const size_t numEntries = globalIDs.size ();
961  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
963 
964  //
965  // Set up directory structure.
966  //
967 
968  // If we're computing LIDs, we also have to include them in each
969  // packet, along with the GID and process ID.
970  const int packetSize = computeLIDs ? 3 : 2;
971 
972  // For data distribution, we use: Surprise! A Distributor!
973  Distributor distor (comm);
974 
975  // Get directory locations for the requested list of entries.
976  Array<int> dirImages (numEntries);
977  res = directoryMap_->getRemoteIndexList (globalIDs, dirImages ());
978  // Check for unfound globalIDs and set corresponding nodeIDs to -1
979  size_t numMissing = 0;
980  if (res == IDNotPresent) {
981  for (size_t i=0; i < numEntries; ++i) {
982  if (dirImages[i] == -1) {
983  nodeIDs[i] = -1;
984  if (computeLIDs) {
985  localIDs[i] = LINVALID;
986  }
987  numMissing++;
988  }
989  }
990  }
991 
992  Array<GO> sendGIDs;
993  Array<int> sendImages;
994  distor.createFromRecvs (globalIDs, dirImages (), sendGIDs, sendImages);
995  const size_type numSends = sendGIDs.size ();
996 
997  //
998  // mfh 13 Nov 2012:
999  //
1000  // The code below temporarily stores LO, GO, and int values in
1001  // an array of global_size_t. If one of the signed types (LO
1002  // and GO should both be signed) happened to be -1 (or some
1003  // negative number, but -1 is the one that came up today), then
1004  // conversion to global_size_t will result in a huge
1005  // global_size_t value, and thus conversion back may overflow.
1006  // (Teuchos::as doesn't know that we meant it to be an LO or GO
1007  // all along.)
1008  //
1009  // The overflow normally would not be a problem, since it would
1010  // just go back to -1 again. However, Teuchos::as does range
1011  // checking on conversions in a debug build, so it throws an
1012  // exception (std::range_error) in this case. Range checking is
1013  // generally useful in debug mode, so we don't want to disable
1014  // this behavior globally.
1015  //
1016  // We solve this problem by forgoing use of Teuchos::as for the
1017  // conversions below from LO, GO, or int to global_size_t, and
1018  // the later conversions back from global_size_t to LO, GO, or
1019  // int.
1020  //
1021  // I've recorded this discussion as Bug 5760.
1022  //
1023 
1024  // global_size_t >= GO
1025  // global_size_t >= size_t >= int
1026  // global_size_t >= size_t >= LO
1027  // Therefore, we can safely store all of these in a global_size_t
1028  Array<global_size_t> exports (packetSize * numSends);
1029  {
1030  // Packet format:
1031  // - If computing LIDs: (GID, PID, LID)
1032  // - Otherwise: (GID, PID)
1033  //
1034  // "PID" means "process ID" (a.k.a. "node ID," a.k.a. "rank").
1035 
1036  // Current position to which to write in exports array. If
1037  // sending pairs, we pack the (GID, PID) pair for gid =
1038  // sendGIDs[k] in exports[2*k], exports[2*k+1]. If sending
1039  // triples, we pack the (GID, PID, LID) pair for gid =
1040  // sendGIDs[k] in exports[3*k, 3*k+1, 3*k+2].
1041  size_type exportsIndex = 0;
1042 
1043  if (useHashTables_) {
1044  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1045  const GO curGID = sendGIDs[gidIndex];
1046  // Don't use as() here (see above note).
1047  exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1048  const LO curLID = directoryMap_->getLocalElement (curGID);
1049  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
1050  Teuchos::typeName (*this) << "::getEntriesImpl(): The Directory "
1051  "Map's global index " << curGID << " does not have a corresponding "
1052  "local index. Please report this bug to the Tpetra developers.");
1053  // Don't use as() here (see above note).
1054  exports[exportsIndex++] = static_cast<global_size_t> (lidToPidTable_->get (curLID));
1055  if (computeLIDs) {
1056  // Don't use as() here (see above note).
1057  exports[exportsIndex++] = static_cast<global_size_t> (lidToLidTable_->get (curLID));
1058  }
1059  }
1060  } else {
1061  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1062  const GO curGID = sendGIDs[gidIndex];
1063  // Don't use as() here (see above note).
1064  exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1065  const LO curLID = directoryMap_->getLocalElement (curGID);
1066  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
1067  Teuchos::typeName (*this) << "::getEntriesImpl(): The Directory "
1068  "Map's global index " << curGID << " does not have a corresponding "
1069  "local index. Please report this bug to the Tpetra developers.");
1070  // Don't use as() here (see above note).
1071  exports[exportsIndex++] = static_cast<global_size_t> (PIDs_[curLID]);
1072  if (computeLIDs) {
1073  // Don't use as() here (see above note).
1074  exports[exportsIndex++] = static_cast<global_size_t> (LIDs_[curLID]);
1075  }
1076  }
1077  }
1078 
1079  TEUCHOS_TEST_FOR_EXCEPTION(
1080  exportsIndex > exports.size (), std::logic_error,
1081  Teuchos::typeName (*this) << "::getEntriesImpl(): On Process " <<
1082  comm->getRank () << ", exportsIndex = " << exportsIndex <<
1083  " > exports.size() = " << exports.size () <<
1084  ". Please report this bug to the Tpetra developers.");
1085  }
1086 
1087  TEUCHOS_TEST_FOR_EXCEPTION(
1088  numEntries < numMissing, std::logic_error,
1089  Teuchos::typeName (*this) << "::getEntriesImpl(): On Process "
1090  << comm->getRank () << ", numEntries = " << numEntries
1091  << " < numMissing = " << numMissing
1092  << ". Please report this bug to the Tpetra developers.");
1093 
1094  //
1095  // mfh 13 Nov 2012: See note above on conversions between
1096  // global_size_t and LO, GO, or int.
1097  //
1098  const size_t numRecv = numEntries - numMissing;
1099 
1100  {
1101  const size_t importLen = packetSize * distor.getTotalReceiveLength ();
1102  const size_t requiredImportLen = numRecv * packetSize;
1103  const int myRank = comm->getRank ();
1104  TEUCHOS_TEST_FOR_EXCEPTION(
1105  importLen < requiredImportLen, std::logic_error,
1106  "Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl: "
1107  "On Process " << myRank << ": The 'imports' array must have length "
1108  "at least " << requiredImportLen << ", but its actual length is " <<
1109  importLen << ". numRecv: " << numRecv << ", packetSize: " <<
1110  packetSize << ", numEntries (# GIDs): " << numEntries <<
1111  ", numMissing: " << numMissing << ": distor.getTotalReceiveLength(): "
1112  << distor.getTotalReceiveLength () << ". " << std::endl <<
1113  "Distributor description: " << distor.description () << ". "
1114  << std::endl <<
1115  "Please report this bug to the Tpetra developers.");
1116  }
1117 
1118  Array<global_size_t> imports (packetSize * distor.getTotalReceiveLength ());
1119  // FIXME (mfh 20 Mar 2014) One could overlap the sort2() below
1120  // with communication, by splitting this call into doPosts and
1121  // doWaits. The code is still correct in this form, however.
1122  distor.doPostsAndWaits (exports ().getConst (), packetSize, imports ());
1123 
1124  Array<GO> sortedIDs (globalIDs); // deep copy (for later sorting)
1125  Array<GO> offset (numEntries); // permutation array (sort2 output)
1126  for (GO ii = 0; ii < static_cast<GO> (numEntries); ++ii) {
1127  offset[ii] = ii;
1128  }
1129  sort2 (sortedIDs.begin(), sortedIDs.begin() + numEntries, offset.begin());
1130 
1131  size_t importsIndex = 0;
1132  //typename Array<global_size_t>::iterator ptr = imports.begin();
1133  typedef typename Array<GO>::iterator IT;
1134 
1135  // we know these conversions are in range, because we loaded this data
1136  for (size_t i = 0; i < numRecv; ++i) {
1137  // Don't use as() here (see above note).
1138  const GO curGID = static_cast<GO> (imports[importsIndex++]);
1139  std::pair<IT, IT> p1 = std::equal_range (sortedIDs.begin(), sortedIDs.end(), curGID);
1140  if (p1.first != p1.second) {
1141  const size_t j = p1.first - sortedIDs.begin();
1142  // Don't use as() here (see above note).
1143  nodeIDs[offset[j]] = static_cast<int> (imports[importsIndex++]);
1144  if (computeLIDs) {
1145  // Don't use as() here (see above note).
1146  localIDs[offset[j]] = static_cast<LO> (imports[importsIndex++]);
1147  }
1148  if (nodeIDs[offset[j]] == -1) {
1149  res = IDNotPresent;
1150  }
1151  }
1152  }
1153 
1154  TEUCHOS_TEST_FOR_EXCEPTION(
1155  static_cast<size_t> (importsIndex) > static_cast<size_t> (imports.size ()),
1156  std::logic_error,
1157  "Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl: "
1158  "On Process " << comm->getRank () << ": importsIndex = " <<
1159  importsIndex << " > imports.size() = " << imports.size () << ". "
1160  "numRecv: " << numRecv << ", packetSize: " << packetSize << ", "
1161  "numEntries (# GIDs): " << numEntries << ", numMissing: " << numMissing
1162  << ": distor.getTotalReceiveLength(): "
1163  << distor.getTotalReceiveLength () << ". Please report this bug to "
1164  "the Tpetra developers.");
1165 
1166  return res;
1167  }
1168 
1169 
1170  template<class LO, class GO, class NT>
1171  bool
1173  isOneToOne (const Teuchos::Comm<int>& comm) const
1174  {
1175  if (oneToOneResult_ == ONE_TO_ONE_NOT_CALLED_YET) {
1176  const int lcl121 = isLocallyOneToOne () ? 1 : 0;
1177  int gbl121 = 0;
1178  Teuchos::reduceAll<int, int> (comm, Teuchos::REDUCE_MIN, lcl121,
1179  Teuchos::outArg (gbl121));
1180  oneToOneResult_ = (gbl121 == 1) ? ONE_TO_ONE_TRUE : ONE_TO_ONE_FALSE;
1181  }
1182  return (oneToOneResult_ == ONE_TO_ONE_TRUE);
1183  }
1184  } // namespace Details
1185 } // namespace Tpetra
1186 
1187 //
1188 // Explicit instantiation macro
1189 //
1190 // Must be expanded from within the Tpetra::Details namespace!
1191 //
1192 #define TPETRA_DIRECTORY_IMPL_INSTANT(LO,GO,NODE) \
1193  template class Directory< LO , GO , NODE >; \
1194  template class ReplicatedDirectory< LO , GO , NODE >; \
1195  template class ContiguousUniformDirectory< LO, GO, NODE >; \
1196  template class DistributedContiguousDirectory< LO , GO , NODE >; \
1197  template class DistributedNoncontiguousDirectory< LO , GO , NODE >; \
1198 
1199 #endif // __Tpetra_DirectoryImpl_def_hpp
Interface for breaking ties in ownership.
void doPostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the (forward) communication plan.
GlobalOrdinal getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
std::string description() const
A one-line human-readable description of this object.
GlobalOrdinal getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
Interface for breaking ties in ownership.
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
size_t getTotalReceiveLength() const
Total number of values this process will receive from other processes.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
Implementation of Directory for a distributed noncontiguous Map.
Implementation of Directory for a distributed contiguous Map.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
ReplicatedDirectory()
Constructor (that takes no arguments).
Implementation details of Tpetra.
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
std::string description() const
A one-line human-readable description of this object.
Sets up and executes a communication plan for a Tpetra DistObject.
std::string description() const
A one-line human-readable description of this object.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Find process IDs and (optionally) local IDs for the given global IDs.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Find process IDs and (optionally) local IDs for the given global IDs.
std::string description() const
A one-line human-readable description of this object.
GlobalOrdinal getMinGlobalIndex() const
The minimum global index owned by the calling process.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
Describes a parallel distribution of objects over processes.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Find process IDs and (optionally) local IDs for the given global IDs.
virtual bool isOneToOne(const Teuchos::Comm< int > &comm) const
Whether the Directory&#39;s input Map is (globally) one to one.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Find process IDs and (optionally) local IDs for the given global IDs.
std::string description() const
Return a one-line description of this object.
virtual bool isOneToOne(const Teuchos::Comm< int > &comm) const
Whether the Directory&#39;s input Map is (globally) one to one.
Declaration of implementation details of Tpetra::Directory.
Implementation of Directory for a contiguous, uniformly distributed Map.
bool isUniform() const
Whether the range of global indices is uniform.
LookupStatus getEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
void createFromRecvs(const Teuchos::ArrayView< const Ordinal > &remoteIDs, const Teuchos::ArrayView< const int > &remoteProcIDs, Teuchos::Array< Ordinal > &exportIDs, Teuchos::Array< int > &exportProcIDs)
Set up Distributor using list of process ranks from which to receive.