Tpetra parallel linear algebra  Version of the Day
Tpetra_Map_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
46 
47 #ifndef TPETRA_MAP_DEF_HPP
48 #define TPETRA_MAP_DEF_HPP
49 
50 #include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
51 #include "Tpetra_Details_FixedHashTable.hpp"
52 #include "Tpetra_Details_gathervPrint.hpp"
53 #include "Tpetra_Util.hpp"
54 #include "Teuchos_as.hpp"
55 #include <stdexcept>
56 
57 namespace Tpetra {
58  template <class LocalOrdinal, class GlobalOrdinal, class Node>
60  Map () :
61  comm_ (new Teuchos::SerialComm<int> ()),
62  node_ (defaultArgNode<Node> ()),
63  indexBase_ (0),
64  numGlobalElements_ (0),
65  numLocalElements_ (0),
66  minMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
67  maxMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
68  minAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
69  maxAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
70  firstContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
71  lastContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
72  uniform_ (false), // trivially
73  contiguous_ (false),
74  distributed_ (false), // no communicator yet
75  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
76  {}
77 
78  template <class LocalOrdinal, class GlobalOrdinal, class Node>
80  Map (global_size_t numGlobalElements,
81  GlobalOrdinal indexBase,
82  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
83  LocalGlobal lOrG,
84  const Teuchos::RCP<Node> &node) :
85  comm_ (comm),
86  node_ (node),
87  uniform_ (true),
88  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
89  {
90  using Teuchos::as;
91  using Teuchos::broadcast;
92  using Teuchos::outArg;
93  using Teuchos::reduceAll;
94  using Teuchos::REDUCE_MIN;
95  using Teuchos::REDUCE_MAX;
96  using Teuchos::typeName;
97  typedef GlobalOrdinal GO;
98  typedef global_size_t GST;
100 
101  // Make sure that Kokkos has been initialized (Github Issue #513).
102  TEUCHOS_TEST_FOR_EXCEPTION
103  (! execution_space::is_initialized (), std::runtime_error,
104  "Tpetra::Map constructor: The Kokkos execution space has not been "
105  "initialized. Please initialize it before creating a Map.")
106 
107 #ifdef HAVE_TPETRA_DEBUG
108  // In debug mode only, check whether numGlobalElements and
109  // indexBase are the same over all processes in the communicator.
110  {
111  GST proc0NumGlobalElements = numGlobalElements;
112  broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
113  GST minNumGlobalElements = numGlobalElements;
114  GST maxNumGlobalElements = numGlobalElements;
115  reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements, outArg (minNumGlobalElements));
116  reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements, outArg (maxNumGlobalElements));
117  TEUCHOS_TEST_FOR_EXCEPTION(
118  minNumGlobalElements != maxNumGlobalElements || numGlobalElements != minNumGlobalElements,
119  std::invalid_argument,
120  "Tpetra::Map constructor: All processes must provide the same number "
121  "of global elements. Process 0 set numGlobalElements = "
122  << proc0NumGlobalElements << ". The calling process "
123  << comm->getRank () << " set numGlobalElements = " << numGlobalElements
124  << ". The min and max values over all processes are "
125  << minNumGlobalElements << " resp. " << maxNumGlobalElements << ".");
126 
127  GO proc0IndexBase = indexBase;
128  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
129  GO minIndexBase = indexBase;
130  GO maxIndexBase = indexBase;
131  reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
132  reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
133  TEUCHOS_TEST_FOR_EXCEPTION(
134  minIndexBase != maxIndexBase || indexBase != minIndexBase,
135  std::invalid_argument,
136  "Tpetra::Map constructor: "
137  "All processes must provide the same indexBase argument. "
138  "Process 0 set indexBase = " << proc0IndexBase << ". The calling "
139  "process " << comm->getRank () << " set indexBase = " << indexBase
140  << ". The min and max values over all processes are "
141  << minIndexBase << " resp. " << maxIndexBase << ".");
142  }
143 #endif // HAVE_TPETRA_DEBUG
144 
145  // Distribute the elements across the processes in the given
146  // communicator so that global IDs (GIDs) are
147  //
148  // - Nonoverlapping (only one process owns each GID)
149  // - Contiguous (the sequence of GIDs is nondecreasing, and no two
150  // adjacent GIDs differ by more than one)
151  // - As evenly distributed as possible (the numbers of GIDs on two
152  // different processes do not differ by more than one)
153 
154  // All processes have the same numGlobalElements, but we still
155  // need to check that it is valid. numGlobalElements must be
156  // positive and not the "invalid" value (GSTI).
157  //
158  // This comparison looks funny, but it avoids compiler warnings
159  // for comparing unsigned integers (numGlobalElements_in is a
160  // GST, which is unsigned) while still working if we
161  // later decide to make GST signed.
162  TEUCHOS_TEST_FOR_EXCEPTION(
163  (numGlobalElements < 1 && numGlobalElements != 0),
164  std::invalid_argument,
165  "Tpetra::Map constructor: numGlobalElements (= "
166  << numGlobalElements << ") must be nonnegative.");
167 
168  TEUCHOS_TEST_FOR_EXCEPTION(
169  numGlobalElements == GSTI, std::invalid_argument,
170  "Tpetra::Map constructor: You provided numGlobalElements = Teuchos::"
171  "OrdinalTraits<Tpetra::global_size_t>::invalid(). This version of the "
172  "constructor requires a valid value of numGlobalElements. You "
173  "probably mistook this constructor for the \"contiguous nonuniform\" "
174  "constructor, which can compute the global number of elements for you "
175  "if you set numGlobalElements to that value.");
176 
177  size_t numLocalElements = 0; // will set below
178  if (lOrG == GloballyDistributed) {
179  // Compute numLocalElements:
180  //
181  // If numGlobalElements == numProcs * B + remainder,
182  // then Proc r gets B+1 elements if r < remainder,
183  // and B elements if r >= remainder.
184  //
185  // This strategy is valid for any value of numGlobalElements and
186  // numProcs, including the following border cases:
187  // - numProcs == 1
188  // - numLocalElements < numProcs
189  //
190  // In the former case, remainder == 0 && numGlobalElements ==
191  // numLocalElements. In the latter case, remainder ==
192  // numGlobalElements && numLocalElements is either 0 or 1.
193  const GST numProcs = static_cast<GST> (comm_->getSize ());
194  const GST myRank = static_cast<GST> (comm_->getRank ());
195  const GST quotient = numGlobalElements / numProcs;
196  const GST remainder = numGlobalElements - quotient * numProcs;
197 
198  GO startIndex;
199  if (myRank < remainder) {
200  numLocalElements = static_cast<size_t> (1) + static_cast<size_t> (quotient);
201  // myRank was originally an int, so it should never overflow
202  // reasonable GO types.
203  startIndex = as<GO> (myRank) * as<GO> (numLocalElements);
204  } else {
205  numLocalElements = as<size_t> (quotient);
206  startIndex = as<GO> (myRank) * as<GO> (numLocalElements) +
207  as<GO> (remainder);
208  }
209 
210  minMyGID_ = indexBase + startIndex;
211  maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
212  minAllGID_ = indexBase;
213  maxAllGID_ = indexBase + numGlobalElements - 1;
214  distributed_ = (numProcs > 1);
215  }
216  else { // lOrG == LocallyReplicated
217  numLocalElements = as<size_t> (numGlobalElements);
218  minMyGID_ = indexBase;
219  maxMyGID_ = indexBase + numGlobalElements - 1;
220  distributed_ = false;
221  }
222 
223  minAllGID_ = indexBase;
224  maxAllGID_ = indexBase + numGlobalElements - 1;
225  indexBase_ = indexBase;
226  numGlobalElements_ = numGlobalElements;
227  numLocalElements_ = numLocalElements;
228  firstContiguousGID_ = minMyGID_;
229  lastContiguousGID_ = maxMyGID_;
230  contiguous_ = true;
231 
232  // Create the Directory on demand in getRemoteIndexList().
233  //setupDirectory ();
234  }
235 
236  template <class LocalOrdinal, class GlobalOrdinal, class Node>
238  Map (global_size_t numGlobalElements,
239  size_t numLocalElements,
240  GlobalOrdinal indexBase,
241  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
242  const Teuchos::RCP<Node> &node) :
243  comm_ (comm),
244  node_ (node),
245  uniform_ (false),
246  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
247  {
248  using Teuchos::as;
249  using Teuchos::broadcast;
250  using Teuchos::outArg;
251  using Teuchos::reduceAll;
252  using Teuchos::REDUCE_MIN;
253  using Teuchos::REDUCE_MAX;
254  using Teuchos::REDUCE_SUM;
255  using Teuchos::scan;
256  typedef GlobalOrdinal GO;
257  typedef global_size_t GST;
259 
260  // Make sure that Kokkos has been initialized (Github Issue #513).
261  TEUCHOS_TEST_FOR_EXCEPTION
262  (! execution_space::is_initialized (), std::runtime_error,
263  "Tpetra::Map constructor: The Kokkos execution space has not been "
264  "initialized. Please initialize it before creating a Map.")
265 
266 #ifdef HAVE_TPETRA_DEBUG
267  // Global sum of numLocalElements over all processes.
268  // Keep this for later debug checks.
269  const GST debugGlobalSum =
270  initialNonuniformDebugCheck (numGlobalElements, numLocalElements,
271  indexBase, comm);
272 #endif // HAVE_TPETRA_DEBUG
273 
274  // Distribute the elements across the nodes so that they are
275  // - non-overlapping
276  // - contiguous
277 
278  // This differs from the first Map constructor (that only takes a
279  // global number of elements) in that the user has specified the
280  // number of local elements, so that the elements are not
281  // (necessarily) evenly distributed over the processes.
282 
283  // Compute my local offset. This is an inclusive scan, so to get
284  // the final offset, we subtract off the input.
285  GO scanResult = 0;
286  scan<int, GO> (*comm, REDUCE_SUM, numLocalElements, outArg (scanResult));
287  const GO myOffset = scanResult - numLocalElements;
288 
289  if (numGlobalElements != GSTI) {
290  numGlobalElements_ = numGlobalElements; // Use the user's value.
291  } else {
292  // Inclusive scan means that the last process has the final sum.
293  // Rather than doing a reduceAll to get the sum of
294  // numLocalElements, we can just have the last process broadcast
295  // its result. That saves us a round of log(numProcs) messages.
296  const int numProcs = comm->getSize ();
297  GST globalSum = scanResult;
298  if (numProcs > 1) {
299  broadcast (*comm, numProcs - 1, outArg (globalSum));
300  }
301  numGlobalElements_ = globalSum;
302 
303 #ifdef HAVE_TPETRA_DEBUG
304  // No need for an all-reduce here; both come from collectives.
305  TEUCHOS_TEST_FOR_EXCEPTION(
306  globalSum != debugGlobalSum, std::logic_error,
307  "Tpetra::Map constructor (contiguous nonuniform): "
308  "globalSum = " << globalSum << " != debugGlobalSum = " << debugGlobalSum
309  << ". Please report this bug to the Tpetra developers.");
310 #endif // HAVE_TPETRA_DEBUG
311  }
312  numLocalElements_ = numLocalElements;
313  indexBase_ = indexBase;
314  minAllGID_ = indexBase;
315  // numGlobalElements might be GSTI; use numGlobalElements_;
316  maxAllGID_ = indexBase + numGlobalElements_ - 1;
317  minMyGID_ = indexBase + myOffset;
318  maxMyGID_ = indexBase + myOffset + numLocalElements - 1;
319  firstContiguousGID_ = minMyGID_;
320  lastContiguousGID_ = maxMyGID_;
321  contiguous_ = true;
322  distributed_ = checkIsDist ();
323 
324  // Create the Directory on demand in getRemoteIndexList().
325  //setupDirectory ();
326  }
327 
328  template <class LocalOrdinal, class GlobalOrdinal, class Node>
331  initialNonuniformDebugCheck (const global_size_t numGlobalElements,
332  const size_t numLocalElements,
333  const GlobalOrdinal indexBase,
334  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) const
335  {
336 #ifdef HAVE_TPETRA_DEBUG
337  using Teuchos::broadcast;
338  using Teuchos::outArg;
339  using Teuchos::ptr;
340  using Teuchos::REDUCE_MAX;
341  using Teuchos::REDUCE_MIN;
342  using Teuchos::REDUCE_SUM;
343  using Teuchos::reduceAll;
344  typedef GlobalOrdinal GO;
345  typedef global_size_t GST;
347 
348  // The user has specified the distribution of indices over the
349  // processes. The distribution is not necessarily contiguous or
350  // equally shared over the processes.
351  //
352  // We assume that the number of local elements can be stored in a
353  // size_t. The instance member numLocalElements_ is a size_t, so
354  // this variable and that should have the same type.
355 
356  GST debugGlobalSum = 0; // Will be global sum of numLocalElements
357  reduceAll<int, GST> (*comm, REDUCE_SUM, static_cast<GST> (numLocalElements),
358  outArg (debugGlobalSum));
359  // In debug mode only, check whether numGlobalElements and
360  // indexBase are the same over all processes in the communicator.
361  {
362  GST proc0NumGlobalElements = numGlobalElements;
363  broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
364  GST minNumGlobalElements = numGlobalElements;
365  GST maxNumGlobalElements = numGlobalElements;
366  reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements,
367  outArg (minNumGlobalElements));
368  reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements,
369  outArg (maxNumGlobalElements));
370  TEUCHOS_TEST_FOR_EXCEPTION(
371  minNumGlobalElements != maxNumGlobalElements ||
372  numGlobalElements != minNumGlobalElements,
373  std::invalid_argument,
374  "Tpetra::Map constructor: All processes must provide the same number "
375  "of global elements. This is true even if that argument is Teuchos::"
376  "OrdinalTraits<global_size_t>::invalid() to signal that the Map should "
377  "compute the global number of elements. Process 0 set numGlobalElements"
378  " = " << proc0NumGlobalElements << ". The calling process "
379  << comm->getRank () << " set numGlobalElements = " << numGlobalElements
380  << ". The min and max values over all processes are "
381  << minNumGlobalElements << " resp. " << maxNumGlobalElements << ".");
382 
383  GO proc0IndexBase = indexBase;
384  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
385  GO minIndexBase = indexBase;
386  GO maxIndexBase = indexBase;
387  reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
388  reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
389  TEUCHOS_TEST_FOR_EXCEPTION(
390  minIndexBase != maxIndexBase || indexBase != minIndexBase,
391  std::invalid_argument,
392  "Tpetra::Map constructor: "
393  "All processes must provide the same indexBase argument. "
394  "Process 0 set indexBase = " << proc0IndexBase << ". The calling "
395  "process " << comm->getRank () << " set indexBase = " << indexBase
396  << ". The min and max values over all processes are "
397  << minIndexBase << " resp. " << maxIndexBase << ".");
398 
399  // Make sure that the sum of numLocalElements over all processes
400  // equals numGlobalElements.
401  TEUCHOS_TEST_FOR_EXCEPTION
402  (numGlobalElements != GSTI && debugGlobalSum != numGlobalElements,
403  std::invalid_argument, "Tpetra::Map constructor: The sum of each "
404  "process' number of indices over all processes, " << debugGlobalSum
405  << " != numGlobalElements = " << numGlobalElements << ". If you "
406  "would like this constructor to compute numGlobalElements for you, "
407  "you may set numGlobalElements = "
408  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() on input. "
409  "Please note that this is NOT necessarily -1.");
410  }
411 
412  return debugGlobalSum;
413 #else
414  return static_cast<global_size_t> (0);
415 #endif // HAVE_TPETRA_DEBUG
416  }
417 
418  template <class LocalOrdinal, class GlobalOrdinal, class Node>
419  void
420  Map<LocalOrdinal,GlobalOrdinal,Node>::
421  initWithNonownedHostIndexList (const global_size_t numGlobalElements,
422  const Kokkos::View<const GlobalOrdinal*,
423  Kokkos::LayoutLeft,
424  Kokkos::HostSpace,
425  Kokkos::MemoryUnmanaged>& entryList_host,
426  const GlobalOrdinal indexBase,
427  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
428  {
429  using Kokkos::LayoutLeft;
430  using Kokkos::subview;
431  using Kokkos::View;
432  using Teuchos::as;
433  using Teuchos::broadcast;
434  using Teuchos::outArg;
435  using Teuchos::ptr;
436  using Teuchos::REDUCE_MAX;
437  using Teuchos::REDUCE_MIN;
438  using Teuchos::REDUCE_SUM;
439  using Teuchos::reduceAll;
440  typedef LocalOrdinal LO;
441  typedef GlobalOrdinal GO;
442  typedef global_size_t GST;
444 
445  // Make sure that Kokkos has been initialized (Github Issue #513).
446  TEUCHOS_TEST_FOR_EXCEPTION
447  (! execution_space::is_initialized (), std::runtime_error,
448  "Tpetra::Map constructor: The Kokkos execution space has not been "
449  "initialized. Please initialize it before creating a Map.")
450 
451  // The user has specified the distribution of indices over the
452  // processes, via the input array of global indices on each
453  // process. The distribution is not necessarily contiguous or
454  // equally shared over the processes.
455 
456  // The length of the input array on this process is the number of
457  // local indices to associate with this process, even though the
458  // input array contains global indices. We assume that the number
459  // of local indices on a process can be stored in a size_t;
460  // numLocalElements_ is a size_t, so this variable and that should
461  // have the same type.
462  const size_t numLocalElements = static_cast<size_t> (entryList_host.size ());
463 
464  initialNonuniformDebugCheck (numGlobalElements, numLocalElements,
465  indexBase, comm);
466 
467  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
468  // reduction is redundant, since the directory Map will have to do
469  // the same thing. Thus, we could do the scan and broadcast for
470  // the directory Map here, and give the computed offsets to the
471  // directory Map's constructor. However, a reduction costs less
472  // than a scan and broadcast, so this still saves time if users of
473  // this Map don't ever need the Directory (i.e., if they never
474  // call getRemoteIndexList on this Map).
475  if (numGlobalElements != GSTI) {
476  numGlobalElements_ = numGlobalElements; // Use the user's value.
477  }
478  else { // The user wants us to compute the sum.
479  reduceAll<int, GST> (*comm, REDUCE_SUM,
480  static_cast<GST> (numLocalElements),
481  outArg (numGlobalElements_));
482  }
483 
484  // mfh 20 Feb 2013: We've never quite done the right thing for
485  // duplicate GIDs here. Duplicate GIDs have always been counted
486  // distinctly in numLocalElements_, and thus should get a
487  // different LID. However, we've always used std::map or a hash
488  // table for the GID -> LID lookup table, so distinct GIDs always
489  // map to the same LID. Furthermore, the order of the input GID
490  // list matters, so it's not desirable to sort for determining
491  // uniqueness.
492  //
493  // I've chosen for now to write this code as if the input GID list
494  // contains no duplicates. If this is not desired, we could use
495  // the lookup table itself to determine uniqueness: If we haven't
496  // seen the GID before, it gets a new LID and it's added to the
497  // LID -> GID and GID -> LID tables. If we have seen the GID
498  // before, it doesn't get added to either table. I would
499  // implement this, but it would cost more to do the double lookups
500  // in the table (one to check, and one to insert).
501  //
502  // More importantly, since we build the GID -> LID table in (a
503  // thread-) parallel (way), the order in which duplicate GIDs may
504  // get inserted is not defined. This would make the assignment of
505  // LID to GID nondeterministic.
506 
507  numLocalElements_ = numLocalElements;
508  indexBase_ = indexBase;
509 
510  minMyGID_ = indexBase_;
511  maxMyGID_ = indexBase_;
512 
513  // NOTE (mfh 27 May 2015): While finding the initial contiguous
514  // GID range requires looking at all the GIDs in the range,
515  // dismissing an interval of GIDs only requires looking at the
516  // first and last GIDs. Thus, we could do binary search backwards
517  // from the end in order to catch the common case of a contiguous
518  // interval followed by noncontiguous entries. On the other hand,
519  // we could just expose this case explicitly as yet another Map
520  // constructor, and avoid the trouble of detecting it.
521  if (numLocalElements_ > 0) {
522  // Find contiguous GID range, with the restriction that the
523  // beginning of the range starts with the first entry. While
524  // doing so, fill in the LID -> GID table.
525  View<GO*, LayoutLeft, device_type> lgMap ("lgMap", numLocalElements_);
526  auto lgMap_host = Kokkos::create_mirror_view (lgMap);
527 
528  // The input array entryList_host is already on host, so we
529  // don't need to take a host view of it.
530  // auto entryList_host = Kokkos::create_mirror_view (entryList);
531  // Kokkos::deep_copy (entryList_host, entryList);
532 
533  firstContiguousGID_ = entryList_host[0];
534  lastContiguousGID_ = firstContiguousGID_+1;
535 
536  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
537  // anyway, so we have to look at them all. The logical way to
538  // find the first noncontiguous entry would thus be to "reduce,"
539  // where the local reduction result is whether entryList[i] + 1
540  // == entryList[i+1].
541 
542  lgMap_host[0] = firstContiguousGID_;
543  size_t i = 1;
544  for ( ; i < numLocalElements_; ++i) {
545  const GO curGid = entryList_host[i];
546  const LO curLid = as<LO> (i);
547 
548  if (lastContiguousGID_ != curGid) break;
549 
550  // Add the entry to the LID->GID table only after we know that
551  // the current GID is in the initial contiguous sequence, so
552  // that we don't repeat adding it in the first iteration of
553  // the loop below over the remaining noncontiguous GIDs.
554  lgMap_host[curLid] = curGid;
555  ++lastContiguousGID_;
556  }
557  --lastContiguousGID_;
558 
559  // [firstContiguousGID_, lastContigousGID_] is the initial
560  // sequence of contiguous GIDs. We can start the min and max
561  // GID using this range.
562  minMyGID_ = firstContiguousGID_;
563  maxMyGID_ = lastContiguousGID_;
564 
565  // Compute the GID -> LID lookup table, _not_ including the
566  // initial sequence of contiguous GIDs.
567  {
568  const std::pair<size_t, size_t> ncRange (i, entryList_host.dimension_0 ());
569  auto nonContigGids_host = subview (entryList_host, ncRange);
570  TEUCHOS_TEST_FOR_EXCEPTION
571  (static_cast<size_t> (nonContigGids_host.dimension_0 ()) !=
572  static_cast<size_t> (entryList_host.dimension_0 () - i),
573  std::logic_error, "Tpetra::Map noncontiguous constructor: "
574  "nonContigGids_host.dimension_0() = "
575  << nonContigGids_host.dimension_0 ()
576  << " != entryList_host.dimension_0() - i = "
577  << (entryList_host.dimension_0 () - i) << " = "
578  << entryList_host.dimension_0 () << " - " << i
579  << ". Please report this bug to the Tpetra developers.");
580 
581  // FixedHashTable's constructor expects an owned device View,
582  // so we must deep-copy the subview of the input indices.
583  View<GO*, LayoutLeft, device_type>
584  nonContigGids ("nonContigGids", nonContigGids_host.size ());
585  Kokkos::deep_copy (nonContigGids, nonContigGids_host);
586 
587  glMap_ = global_to_local_table_type (nonContigGids,
588  firstContiguousGID_,
589  lastContiguousGID_,
590  static_cast<LO> (i));
591  }
592 
593  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
594  // table above, we have to look at all the (noncontiguous) input
595  // indices anyway. Thus, why not have the constructor compute
596  // and return the min and max?
597 
598  for ( ; i < numLocalElements_; ++i) {
599  const GO curGid = entryList_host[i];
600  const LO curLid = as<LO> (i);
601  lgMap_host[curLid] = curGid; // LID -> GID table
602 
603  // While iterating through entryList, we compute its
604  // (process-local) min and max elements.
605  if (curGid < minMyGID_) {
606  minMyGID_ = curGid;
607  }
608  if (curGid > maxMyGID_) {
609  maxMyGID_ = curGid;
610  }
611  }
612 
613  // We filled lgMap on host above; now sync back to device.
614  Kokkos::deep_copy (lgMap, lgMap_host);
615 
616  // "Commit" the local-to-global lookup table we filled in above.
617  lgMap_ = lgMap;
618  // We've already created this, so use it.
619  lgMapHost_ = lgMap_host;
620  }
621  else {
622  // This insures tests for GIDs in the range
623  // [firstContiguousGID_, lastContiguousGID_] fail for processes
624  // with no local elements.
625  firstContiguousGID_ = indexBase_+1;
626  lastContiguousGID_ = indexBase_;
627  // glMap_ was default constructed, so it's already empty.
628  }
629 
630  // Compute the min and max of all processes' GIDs. If
631  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
632  // are both indexBase_. This is wrong, but fixing it would
633  // require either a fancy sparse all-reduce, or a custom reduction
634  // operator that ignores invalid values ("invalid" means
635  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
636  //
637  // Also, while we're at it, use the same all-reduce to figure out
638  // if the Map is distributed. "Distributed" means that there is
639  // at least one process with a number of local elements less than
640  // the number of global elements.
641  //
642  // We're computing the min and max of all processes' GIDs using a
643  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
644  // and y are signed). (This lets us combine the min and max into
645  // a single all-reduce.) If each process sets localDist=1 if its
646  // number of local elements is strictly less than the number of
647  // global elements, and localDist=0 otherwise, then a MAX
648  // all-reduce on localDist tells us if the Map is distributed (1
649  // if yes, 0 if no). Thus, we can append localDist onto the end
650  // of the data and get the global result from the all-reduce.
651  if (std::numeric_limits<GO>::is_signed) {
652  // Does my process NOT own all the elements?
653  const GO localDist =
654  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
655 
656  GO minMaxInput[3];
657  minMaxInput[0] = -minMyGID_;
658  minMaxInput[1] = maxMyGID_;
659  minMaxInput[2] = localDist;
660 
661  GO minMaxOutput[3];
662  minMaxOutput[0] = 0;
663  minMaxOutput[1] = 0;
664  minMaxOutput[2] = 0;
665  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
666  minAllGID_ = -minMaxOutput[0];
667  maxAllGID_ = minMaxOutput[1];
668  const GO globalDist = minMaxOutput[2];
669  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
670  }
671  else { // unsigned; use two reductions
672  // This is always correct, no matter the signedness of GO.
673  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
674  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
675  distributed_ = checkIsDist ();
676  }
677 
678  contiguous_ = false; // "Contiguous" is conservative.
679 
680  TEUCHOS_TEST_FOR_EXCEPTION(
681  minAllGID_ < indexBase_,
682  std::invalid_argument,
683  "Tpetra::Map constructor (noncontiguous): "
684  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
685  "less than the given indexBase = " << indexBase_ << ".");
686 
687  // Create the Directory on demand in getRemoteIndexList().
688  //setupDirectory ();
689  }
690 
691  template <class LocalOrdinal, class GlobalOrdinal, class Node>
692  Map<LocalOrdinal,GlobalOrdinal,Node>::
693  Map (const global_size_t numGlobalElements,
694  const GlobalOrdinal indexList[],
695  const LocalOrdinal indexListSize,
696  const GlobalOrdinal indexBase,
697  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
698  comm_ (comm),
699  node_ (defaultArgNode<Node> ()),
700  uniform_ (false),
701  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
702  {
703  // Make sure that Kokkos has been initialized (Github Issue #513).
704  TEUCHOS_TEST_FOR_EXCEPTION
705  (! execution_space::is_initialized (), std::runtime_error,
706  "Tpetra::Map constructor: The Kokkos execution space has not been "
707  "initialized. Please initialize it before creating a Map.")
708 
709  // Not quite sure if I trust all code to behave correctly if the
710  // pointer is nonnull but the array length is nonzero, so I'll
711  // make sure the raw pointer is null if the length is zero.
712  const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
713  Kokkos::View<const GlobalOrdinal*,
714  Kokkos::LayoutLeft,
715  Kokkos::HostSpace,
716  Kokkos::MemoryUnmanaged> inds (indsRaw, indexListSize);
717  initWithNonownedHostIndexList (numGlobalElements, inds, indexBase, comm);
718  }
719 
720  template <class LocalOrdinal, class GlobalOrdinal, class Node>
722  Map (const global_size_t numGlobalElements,
723  const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
724  const GlobalOrdinal indexBase,
725  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
726  const Teuchos::RCP<Node>& node) :
727  comm_ (comm),
728  node_ (node),
729  uniform_ (false),
730  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
731  {
732  // Make sure that Kokkos has been initialized (Github Issue #513).
733  TEUCHOS_TEST_FOR_EXCEPTION
734  (! execution_space::is_initialized (), std::runtime_error,
735  "Tpetra::Map constructor: The Kokkos execution space has not been "
736  "initialized. Please initialize it before creating a Map.")
737 
738  const size_t numLclInds = static_cast<size_t> (entryList.size ());
739 
740  // Not quite sure if I trust both ArrayView and View to behave
741  // correctly if the pointer is nonnull but the array length is
742  // nonzero, so I'll make sure it's null if the length is zero.
743  const GlobalOrdinal* const indsRaw =
744  numLclInds == 0 ? NULL : entryList.getRawPtr ();
745  Kokkos::View<const GlobalOrdinal*,
746  Kokkos::LayoutLeft,
747  Kokkos::HostSpace,
748  Kokkos::MemoryUnmanaged> inds (indsRaw, numLclInds);
749  initWithNonownedHostIndexList (numGlobalElements, inds, indexBase, comm);
750  }
751 
752  template <class LocalOrdinal, class GlobalOrdinal, class Node>
754  Map (const global_size_t numGlobalElements,
755  const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
756  const GlobalOrdinal indexBase,
757  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
758  comm_ (comm),
759  node_ (defaultArgNode<Node> ()),
760  uniform_ (false),
761  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
762  {
763  using Kokkos::LayoutLeft;
764  using Kokkos::subview;
765  using Kokkos::View;
766  using Teuchos::arcp;
767  using Teuchos::ArrayView;
768  using Teuchos::as;
769  using Teuchos::broadcast;
770  using Teuchos::outArg;
771  using Teuchos::ptr;
772  using Teuchos::REDUCE_MAX;
773  using Teuchos::REDUCE_MIN;
774  using Teuchos::REDUCE_SUM;
775  using Teuchos::reduceAll;
776  using Teuchos::typeName;
777  typedef LocalOrdinal LO;
778  typedef GlobalOrdinal GO;
779  typedef global_size_t GST;
781 
782  // Make sure that Kokkos has been initialized (Github Issue #513).
783  TEUCHOS_TEST_FOR_EXCEPTION
784  (! execution_space::is_initialized (), std::runtime_error,
785  "Tpetra::Map constructor: The Kokkos execution space has not been "
786  "initialized. Please initialize it before creating a Map.")
787 
788  // The user has specified the distribution of indices over the
789  // processes, via the input array of global indices on each
790  // process. The distribution is not necessarily contiguous or
791  // equally shared over the processes.
792 
793  // The length of the input array on this process is the number of
794  // local indices to associate with this process, even though the
795  // input array contains global indices. We assume that the number
796  // of local indices on a process can be stored in a size_t;
797  // numLocalElements_ is a size_t, so this variable and that should
798  // have the same type.
799  const size_t numLocalElements = static_cast<size_t> (entryList.size ());
800 
801  initialNonuniformDebugCheck (numGlobalElements, numLocalElements,
802  indexBase, comm);
803 
804  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
805  // reduction is redundant, since the directory Map will have to do
806  // the same thing. Thus, we could do the scan and broadcast for
807  // the directory Map here, and give the computed offsets to the
808  // directory Map's constructor. However, a reduction costs less
809  // than a scan and broadcast, so this still saves time if users of
810  // this Map don't ever need the Directory (i.e., if they never
811  // call getRemoteIndexList on this Map).
812  if (numGlobalElements != GSTI) {
813  numGlobalElements_ = numGlobalElements; // Use the user's value.
814  }
815  else { // The user wants us to compute the sum.
816  reduceAll<int, GST> (*comm, REDUCE_SUM,
817  static_cast<GST> (numLocalElements),
818  outArg (numGlobalElements_));
819  }
820 
821  // mfh 20 Feb 2013: We've never quite done the right thing for
822  // duplicate GIDs here. Duplicate GIDs have always been counted
823  // distinctly in numLocalElements_, and thus should get a
824  // different LID. However, we've always used std::map or a hash
825  // table for the GID -> LID lookup table, so distinct GIDs always
826  // map to the same LID. Furthermore, the order of the input GID
827  // list matters, so it's not desirable to sort for determining
828  // uniqueness.
829  //
830  // I've chosen for now to write this code as if the input GID list
831  // contains no duplicates. If this is not desired, we could use
832  // the lookup table itself to determine uniqueness: If we haven't
833  // seen the GID before, it gets a new LID and it's added to the
834  // LID -> GID and GID -> LID tables. If we have seen the GID
835  // before, it doesn't get added to either table. I would
836  // implement this, but it would cost more to do the double lookups
837  // in the table (one to check, and one to insert).
838  //
839  // More importantly, since we build the GID -> LID table in (a
840  // thread-) parallel (way), the order in which duplicate GIDs may
841  // get inserted is not defined. This would make the assignment of
842  // LID to GID nondeterministic.
843 
844  numLocalElements_ = numLocalElements;
845  indexBase_ = indexBase;
846 
847  minMyGID_ = indexBase_;
848  maxMyGID_ = indexBase_;
849 
850  // NOTE (mfh 27 May 2015): While finding the initial contiguous
851  // GID range requires looking at all the GIDs in the range,
852  // dismissing an interval of GIDs only requires looking at the
853  // first and last GIDs. Thus, we could do binary search backwards
854  // from the end in order to catch the common case of a contiguous
855  // interval followed by noncontiguous entries. On the other hand,
856  // we could just expose this case explicitly as yet another Map
857  // constructor, and avoid the trouble of detecting it.
858  if (numLocalElements_ > 0) {
859  // Find contiguous GID range, with the restriction that the
860  // beginning of the range starts with the first entry. While
861  // doing so, fill in the LID -> GID table.
862  View<GO*, LayoutLeft, device_type> lgMap ("lgMap", numLocalElements_);
863  auto lgMap_host = Kokkos::create_mirror_view (lgMap);
864 
865  // Creating the mirror view is trivial, and the deep_copy is a
866  // no-op, if entryList is on host already.
867  auto entryList_host = Kokkos::create_mirror_view (entryList);
868  Kokkos::deep_copy (entryList_host, entryList);
869 
870  firstContiguousGID_ = entryList_host[0];
871  lastContiguousGID_ = firstContiguousGID_+1;
872 
873  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
874  // anyway, so we have to look at them all. The logical way to
875  // find the first noncontiguous entry would thus be to "reduce,"
876  // where the local reduction result is whether entryList[i] + 1
877  // == entryList[i+1].
878 
879  lgMap_host[0] = firstContiguousGID_;
880  size_t i = 1;
881  for ( ; i < numLocalElements_; ++i) {
882  const GO curGid = entryList_host[i];
883  const LO curLid = as<LO> (i);
884 
885  if (lastContiguousGID_ != curGid) break;
886 
887  // Add the entry to the LID->GID table only after we know that
888  // the current GID is in the initial contiguous sequence, so
889  // that we don't repeat adding it in the first iteration of
890  // the loop below over the remaining noncontiguous GIDs.
891  lgMap_host[curLid] = curGid;
892  ++lastContiguousGID_;
893  }
894  --lastContiguousGID_;
895 
896  // [firstContiguousGID_, lastContigousGID_] is the initial
897  // sequence of contiguous GIDs. We can start the min and max
898  // GID using this range.
899  minMyGID_ = firstContiguousGID_;
900  maxMyGID_ = lastContiguousGID_;
901 
902  // Compute the GID -> LID lookup table, _not_ including the
903  // initial sequence of contiguous GIDs.
904  {
905  const std::pair<size_t, size_t> ncRange (i, entryList.dimension_0 ());
906  auto nonContigGids = subview (entryList, ncRange);
907  TEUCHOS_TEST_FOR_EXCEPTION
908  (static_cast<size_t> (nonContigGids.dimension_0 ()) !=
909  static_cast<size_t> (entryList.dimension_0 () - i),
910  std::logic_error, "Tpetra::Map noncontiguous constructor: "
911  "nonContigGids.dimension_0() = "
912  << nonContigGids.dimension_0 ()
913  << " != entryList.dimension_0() - i = "
914  << (entryList.dimension_0 () - i) << " = "
915  << entryList.dimension_0 () << " - " << i
916  << ". Please report this bug to the Tpetra developers.");
917 
918  glMap_ = global_to_local_table_type (nonContigGids,
919  firstContiguousGID_,
920  lastContiguousGID_,
921  static_cast<LO> (i));
922  }
923 
924  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
925  // table above, we have to look at all the (noncontiguous) input
926  // indices anyway. Thus, why not have the constructor compute
927  // and return the min and max?
928 
929  for ( ; i < numLocalElements_; ++i) {
930  const GO curGid = entryList_host[i];
931  const LO curLid = static_cast<LO> (i);
932  lgMap_host[curLid] = curGid; // LID -> GID table
933 
934  // While iterating through entryList, we compute its
935  // (process-local) min and max elements.
936  if (curGid < minMyGID_) {
937  minMyGID_ = curGid;
938  }
939  if (curGid > maxMyGID_) {
940  maxMyGID_ = curGid;
941  }
942  }
943 
944  // We filled lgMap on host above; now sync back to device.
945  Kokkos::deep_copy (lgMap, lgMap_host);
946 
947  // "Commit" the local-to-global lookup table we filled in above.
948  lgMap_ = lgMap;
949  // We've already created this, so use it.
950  lgMapHost_ = lgMap_host;
951  }
952  else {
953  // This insures tests for GIDs in the range
954  // [firstContiguousGID_, lastContiguousGID_] fail for processes
955  // with no local elements.
956  firstContiguousGID_ = indexBase_+1;
957  lastContiguousGID_ = indexBase_;
958  // glMap_ was default constructed, so it's already empty.
959  }
960 
961  // Compute the min and max of all processes' GIDs. If
962  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
963  // are both indexBase_. This is wrong, but fixing it would
964  // require either a fancy sparse all-reduce, or a custom reduction
965  // operator that ignores invalid values ("invalid" means
966  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
967  //
968  // Also, while we're at it, use the same all-reduce to figure out
969  // if the Map is distributed. "Distributed" means that there is
970  // at least one process with a number of local elements less than
971  // the number of global elements.
972  //
973  // We're computing the min and max of all processes' GIDs using a
974  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
975  // and y are signed). (This lets us combine the min and max into
976  // a single all-reduce.) If each process sets localDist=1 if its
977  // number of local elements is strictly less than the number of
978  // global elements, and localDist=0 otherwise, then a MAX
979  // all-reduce on localDist tells us if the Map is distributed (1
980  // if yes, 0 if no). Thus, we can append localDist onto the end
981  // of the data and get the global result from the all-reduce.
982  if (std::numeric_limits<GO>::is_signed) {
983  // Does my process NOT own all the elements?
984  const GO localDist =
985  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
986 
987  GO minMaxInput[3];
988  minMaxInput[0] = -minMyGID_;
989  minMaxInput[1] = maxMyGID_;
990  minMaxInput[2] = localDist;
991 
992  GO minMaxOutput[3];
993  minMaxOutput[0] = 0;
994  minMaxOutput[1] = 0;
995  minMaxOutput[2] = 0;
996  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
997  minAllGID_ = -minMaxOutput[0];
998  maxAllGID_ = minMaxOutput[1];
999  const GO globalDist = minMaxOutput[2];
1000  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
1001  }
1002  else { // unsigned; use two reductions
1003  // This is always correct, no matter the signedness of GO.
1004  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
1005  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
1006  distributed_ = checkIsDist ();
1007  }
1008 
1009  contiguous_ = false; // "Contiguous" is conservative.
1010 
1011  TEUCHOS_TEST_FOR_EXCEPTION(
1012  minAllGID_ < indexBase_,
1013  std::invalid_argument,
1014  "Tpetra::Map constructor (noncontiguous): "
1015  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
1016  "less than the given indexBase = " << indexBase_ << ".");
1017 
1018  // Create the Directory on demand in getRemoteIndexList().
1019  //setupDirectory ();
1020  }
1021 
1022 
1023  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1025  {}
1026 
1027 
1028  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1029  bool
1031  {
1032  TEUCHOS_TEST_FOR_EXCEPTION(
1033  getComm ().is_null (), std::logic_error, "Tpetra::Map::isOneToOne: "
1034  "getComm() returns null. Please report this bug to the Tpetra "
1035  "developers.");
1036 
1037  // This is a collective operation, if it hasn't been called before.
1038  setupDirectory ();
1039  return directory_->isOneToOne (*this);
1040  }
1041 
1042 
1043  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1044  LocalOrdinal
1046  getLocalElement (GlobalOrdinal globalIndex) const
1047  {
1048  if (isContiguous ()) {
1049  if (globalIndex < getMinGlobalIndex () ||
1050  globalIndex > getMaxGlobalIndex ()) {
1052  }
1053  return static_cast<LocalOrdinal> (globalIndex - getMinGlobalIndex ());
1054  }
1055  else if (globalIndex >= firstContiguousGID_ &&
1056  globalIndex <= lastContiguousGID_) {
1057  return static_cast<LocalOrdinal> (globalIndex - firstContiguousGID_);
1058  }
1059  else {
1060  // If the given global index is not in the table, this returns
1061  // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1062  return glMap_.get (globalIndex);
1063  }
1064  }
1065 
1066  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1067  GlobalOrdinal
1069  getGlobalElement (LocalOrdinal localIndex) const
1070  {
1071  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1073  }
1074  if (isContiguous ()) {
1075  return getMinGlobalIndex () + localIndex;
1076  }
1077  else {
1078  // This is a host Kokkos::View access, with no RCP or ArrayRCP
1079  // involvement. As a result, it is thread safe.
1080  //
1081  // lgMapHost_ is a host pointer; this does NOT assume UVM.
1082  return lgMapHost_[localIndex];
1083  }
1084  }
1085 
1086  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1087  bool
1089  isNodeLocalElement (LocalOrdinal localIndex) const
1090  {
1091  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1092  return false;
1093  } else {
1094  return true;
1095  }
1096  }
1097 
1098  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1099  bool
1101  isNodeGlobalElement (GlobalOrdinal globalIndex) const {
1102  return this->getLocalElement (globalIndex) !=
1104  }
1105 
1106  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1108  return uniform_;
1109  }
1110 
1111  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1113  return contiguous_;
1114  }
1115 
1116 
1117  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1121  {
1122  return local_map_type (glMap_, lgMap_, getIndexBase (),
1123  getMinGlobalIndex (), getMaxGlobalIndex (),
1124  firstContiguousGID_, lastContiguousGID_,
1125  getNodeNumElements (), isContiguous ());
1126  }
1127 
1128  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1129  bool
1132  {
1133  using Teuchos::outArg;
1134  using Teuchos::REDUCE_MIN;
1135  using Teuchos::reduceAll;
1136  //
1137  // Tests that avoid the Boolean all-reduce below by using
1138  // globally consistent quantities.
1139  //
1140  if (this == &map) {
1141  // Pointer equality on one process always implies pointer
1142  // equality on all processes, since Map is immutable.
1143  return true;
1144  }
1145  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1146  // The two communicators have different numbers of processes.
1147  // It's not correct to call isCompatible() in that case. This
1148  // may result in the all-reduce hanging below.
1149  return false;
1150  }
1151  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1152  // Two Maps are definitely NOT compatible if they have different
1153  // global numbers of indices.
1154  return false;
1155  }
1156  else if (isContiguous () && isUniform () &&
1157  map.isContiguous () && map.isUniform ()) {
1158  // Contiguous uniform Maps with the same number of processes in
1159  // their communicators, and with the same global numbers of
1160  // indices, are always compatible.
1161  return true;
1162  }
1163  else if (! isContiguous () && ! map.isContiguous () &&
1164  lgMap_.dimension_0 () != 0 && map.lgMap_.dimension_0 () != 0 &&
1165  lgMap_.ptr_on_device () == map.lgMap_.ptr_on_device ()) {
1166  // Noncontiguous Maps whose global index lists are nonempty and
1167  // have the same pointer must be the same (and therefore
1168  // contiguous).
1169  //
1170  // Nonempty is important. For example, consider a communicator
1171  // with two processes, and two Maps that share this
1172  // communicator, with zero global indices on the first process,
1173  // and different nonzero numbers of global indices on the second
1174  // process. In that case, on the first process, the pointers
1175  // would both be NULL.
1176  return true;
1177  }
1178 
1179  TEUCHOS_TEST_FOR_EXCEPTION(
1180  getGlobalNumElements () != map.getGlobalNumElements (), std::logic_error,
1181  "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1182  "checked that this condition is true above, but it's false here. "
1183  "Please report this bug to the Tpetra developers.");
1184 
1185  // Do both Maps have the same number of indices on each process?
1186  const int locallyCompat =
1187  (getNodeNumElements () == map.getNodeNumElements ()) ? 1 : 0;
1188 
1189  int globallyCompat = 0;
1190  reduceAll<int, int> (*comm_, REDUCE_MIN, locallyCompat, outArg (globallyCompat));
1191  return (globallyCompat == 1);
1192  }
1193 
1194  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1195  bool
1198  {
1199  using Teuchos::ArrayView;
1200  typedef GlobalOrdinal GO;
1201  typedef typename ArrayView<const GO>::size_type size_type;
1202 
1203  // If both Maps are contiguous, we can compare their GID ranges
1204  // easily by looking at the min and max GID on this process.
1205  // Otherwise, we'll compare their GID lists. If only one Map is
1206  // contiguous, then we only have to call getNodeElementList() on
1207  // the noncontiguous Map. (It's best to avoid calling it on a
1208  // contiguous Map, since it results in unnecessary storage that
1209  // persists for the lifetime of the Map.)
1210 
1211  if (this == &map) {
1212  // Pointer equality on one process always implies pointer
1213  // equality on all processes, since Map is immutable.
1214  return true;
1215  }
1216  else if (getNodeNumElements () != map.getNodeNumElements ()) {
1217  return false;
1218  }
1219  else if (getMinGlobalIndex () != map.getMinGlobalIndex () ||
1220  getMaxGlobalIndex () != map.getMaxGlobalIndex ()) {
1221  return false;
1222  }
1223  else {
1224  if (isContiguous ()) {
1225  if (map.isContiguous ()) {
1226  return true; // min and max match, so the ranges match.
1227  }
1228  else { // *this is contiguous, but map is not contiguous
1229  TEUCHOS_TEST_FOR_EXCEPTION(
1230  ! this->isContiguous () || map.isContiguous (), std::logic_error,
1231  "Tpetra::Map::locallySameAs: BUG");
1232  ArrayView<const GO> rhsElts = map.getNodeElementList ();
1233  const GO minLhsGid = this->getMinGlobalIndex ();
1234  const size_type numRhsElts = rhsElts.size ();
1235  for (size_type k = 0; k < numRhsElts; ++k) {
1236  const GO curLhsGid = minLhsGid + static_cast<GO> (k);
1237  if (curLhsGid != rhsElts[k]) {
1238  return false; // stop on first mismatch
1239  }
1240  }
1241  return true;
1242  }
1243  }
1244  else if (map.isContiguous ()) { // *this is not contiguous, but map is
1245  TEUCHOS_TEST_FOR_EXCEPTION(
1246  this->isContiguous () || ! map.isContiguous (), std::logic_error,
1247  "Tpetra::Map::locallySameAs: BUG");
1248  ArrayView<const GO> lhsElts = this->getNodeElementList ();
1249  const GO minRhsGid = map.getMinGlobalIndex ();
1250  const size_type numLhsElts = lhsElts.size ();
1251  for (size_type k = 0; k < numLhsElts; ++k) {
1252  const GO curRhsGid = minRhsGid + static_cast<GO> (k);
1253  if (curRhsGid != lhsElts[k]) {
1254  return false; // stop on first mismatch
1255  }
1256  }
1257  return true;
1258  }
1259  else if (this->lgMap_.ptr_on_device () == map.lgMap_.ptr_on_device ()) {
1260  // Pointers to LID->GID "map" (actually just an array) are the
1261  // same, and the number of GIDs are the same.
1262  return this->getNodeNumElements () == map.getNodeNumElements ();
1263  }
1264  else { // we actually have to compare the GIDs
1265  if (this->getNodeNumElements () != map.getNodeNumElements ()) {
1266  return false; // We already checked above, but check just in case
1267  }
1268  else {
1269  ArrayView<const GO> lhsElts = getNodeElementList ();
1270  ArrayView<const GO> rhsElts = map.getNodeElementList ();
1271 
1272  // std::equal requires that the latter range is as large as
1273  // the former. We know the ranges have equal length, because
1274  // they have the same number of local entries.
1275  return std::equal (lhsElts.begin (), lhsElts.end (), rhsElts.begin ());
1276  }
1277  }
1278  }
1279  }
1280 
1281  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1282  bool
1285  {
1286  using Teuchos::outArg;
1287  using Teuchos::REDUCE_MIN;
1288  using Teuchos::reduceAll;
1289  //
1290  // Tests that avoid the Boolean all-reduce below by using
1291  // globally consistent quantities.
1292  //
1293  if (this == &map) {
1294  // Pointer equality on one process always implies pointer
1295  // equality on all processes, since Map is immutable.
1296  return true;
1297  }
1298  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1299  // The two communicators have different numbers of processes.
1300  // It's not correct to call isSameAs() in that case. This
1301  // may result in the all-reduce hanging below.
1302  return false;
1303  }
1304  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1305  // Two Maps are definitely NOT the same if they have different
1306  // global numbers of indices.
1307  return false;
1308  }
1309  else if (getMinAllGlobalIndex () != map.getMinAllGlobalIndex () ||
1310  getMaxAllGlobalIndex () != map.getMaxAllGlobalIndex () ||
1311  getIndexBase () != map.getIndexBase ()) {
1312  // If the global min or max global index doesn't match, or if
1313  // the index base doesn't match, then the Maps aren't the same.
1314  return false;
1315  }
1316  else if (isDistributed () != map.isDistributed ()) {
1317  // One Map is distributed and the other is not, which means that
1318  // the Maps aren't the same.
1319  return false;
1320  }
1321  else if (isContiguous () && isUniform () &&
1322  map.isContiguous () && map.isUniform ()) {
1323  // Contiguous uniform Maps with the same number of processes in
1324  // their communicators, with the same global numbers of indices,
1325  // and with matching index bases and ranges, must be the same.
1326  return true;
1327  }
1328 
1329  // The two communicators must have the same number of processes,
1330  // with process ranks occurring in the same order. This uses
1331  // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1332  // "Operations that access communicators are local and their
1333  // execution does not require interprocess communication."
1334  // However, just to be sure, I'll put this call after the above
1335  // tests that don't communicate.
1336  if (! Details::congruent (*comm_, * (map.getComm ()))) {
1337  return false;
1338  }
1339 
1340  // If we get this far, we need to check local properties and then
1341  // communicate local sameness across all processes.
1342  const int isSame_lcl = locallySameAs (map) ? 1 : 0;
1343 
1344  // Return true if and only if all processes report local sameness.
1345  int isSame_gbl = 0;
1346  reduceAll<int, int> (*comm_, REDUCE_MIN, isSame_lcl, outArg (isSame_gbl));
1347  return isSame_gbl == 1;
1348  }
1349 
1350  namespace { // (anonymous)
1351  template <class LO, class GO, class DT>
1352  class FillLgMap {
1353  public:
1354  FillLgMap (const Kokkos::View<GO*, DT>& lgMap,
1355  const GO startGid) :
1356  lgMap_ (lgMap), startGid_ (startGid)
1357  {
1358  Kokkos::RangePolicy<LO, typename DT::execution_space>
1359  range (static_cast<LO> (0), static_cast<LO> (lgMap.size ()));
1360  Kokkos::parallel_for (range, *this);
1361  }
1362 
1363  KOKKOS_INLINE_FUNCTION void operator () (const LO& lid) const {
1364  lgMap_(lid) = startGid_ + static_cast<GO> (lid);
1365  }
1366 
1367  private:
1368  const Kokkos::View<GO*, DT> lgMap_;
1369  const GO startGid_;
1370  };
1371 
1372  } // namespace (anonymous)
1373 
1374 
1375  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1376  typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_type
1378  {
1379  typedef LocalOrdinal LO;
1380  typedef GlobalOrdinal GO;
1381  typedef device_type DT;
1382 
1383  typedef decltype (lgMap_) const_lg_view_type;
1384  typedef typename const_lg_view_type::non_const_type lg_view_type;
1385 
1386  // If the local-to-global mapping doesn't exist yet, and if we
1387  // have local entries, then create and fill the local-to-global
1388  // mapping.
1389  const bool needToCreateLocalToGlobalMapping =
1390  lgMap_.dimension_0 () == 0 && numLocalElements_ > 0;
1391 
1392  if (needToCreateLocalToGlobalMapping) {
1393 #ifdef HAVE_TEUCHOS_DEBUG
1394  // The local-to-global mapping should have been set up already
1395  // for a noncontiguous map.
1396  TEUCHOS_TEST_FOR_EXCEPTION( ! isContiguous(), std::logic_error,
1397  "Tpetra::Map::getNodeElementList: The local-to-global mapping (lgMap_) "
1398  "should have been set up already for a noncontiguous Map. Please report"
1399  " this bug to the Tpetra team.");
1400 #endif // HAVE_TEUCHOS_DEBUG
1401 
1402  const LO numElts = static_cast<LO> (getNodeNumElements ());
1403 
1404  lg_view_type lgMap ("lgMap", numElts);
1405  FillLgMap<LO, GO, DT> fillIt (lgMap, minMyGID_);
1406 
1407  auto lgMapHost = Kokkos::create_mirror_view (lgMap);
1408  Kokkos::deep_copy (lgMapHost, lgMap);
1409 
1410  // "Commit" the local-to-global lookup table we filled in above.
1411  lgMap_ = lgMap;
1412  lgMapHost_ = lgMapHost;
1413  }
1414 
1415  return lgMap_;
1416  }
1417 
1418  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1419  Teuchos::ArrayView<const GlobalOrdinal>
1421  {
1422  typedef GlobalOrdinal GO; // convenient abbreviation
1423 
1424  // If the local-to-global mapping doesn't exist yet, and if we
1425  // have local entries, then create and fill the local-to-global
1426  // mapping.
1427  (void) this->getMyGlobalIndices ();
1428 
1429  // This does NOT assume UVM; lgMapHost_ is a host pointer.
1430  const GO* lgMapHostRawPtr = lgMapHost_.ptr_on_device ();
1431  // The third argument forces ArrayView not to try to track memory
1432  // in a debug build. We have to use it because the memory does
1433  // not belong to a Teuchos memory management class.
1434  return Teuchos::ArrayView<const GO> (lgMapHostRawPtr,
1435  lgMapHost_.dimension_0 (),
1436  Teuchos::RCP_DISABLE_NODE_LOOKUP);
1437  }
1438 
1439  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1441  return distributed_;
1442  }
1443 
1444  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1446  using Teuchos::TypeNameTraits;
1447  std::ostringstream os;
1448 
1449  os << "Tpetra::Map: {"
1450  << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name ()
1451  << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name ()
1452  << ", NodeType: " << TypeNameTraits<Node>::name ();
1453  if (this->getObjectLabel () != "") {
1454  os << ", Label: \"" << this->getObjectLabel () << "\"";
1455  }
1456  os << ", Global number of entries: " << getGlobalNumElements ()
1457  << ", Number of processes: " << getComm ()->getSize ()
1458  << ", Uniform: " << (isUniform () ? "true" : "false")
1459  << ", Contiguous: " << (isContiguous () ? "true" : "false")
1460  << ", Distributed: " << (isDistributed () ? "true" : "false")
1461  << "}";
1462  return os.str ();
1463  }
1464 
1469  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1470  std::string
1472  localDescribeToString (const Teuchos::EVerbosityLevel vl) const
1473  {
1474  typedef LocalOrdinal LO;
1475  using std::endl;
1476 
1477  // This preserves current behavior of Map.
1478  if (vl < Teuchos::VERB_HIGH) {
1479  return std::string ();
1480  }
1481  auto outStringP = Teuchos::rcp (new std::ostringstream ());
1482  Teuchos::RCP<Teuchos::FancyOStream> outp =
1483  Teuchos::getFancyOStream (outStringP);
1484  Teuchos::FancyOStream& out = *outp;
1485 
1486  auto comm = this->getComm ();
1487  const int myRank = comm->getRank ();
1488  const int numProcs = comm->getSize ();
1489  out << "Process " << myRank << " of " << numProcs << ":" << endl;
1490  Teuchos::OSTab tab1 (out);
1491 
1492  const LO numEnt = static_cast<LO> (this->getNodeNumElements ());
1493  out << "My number of entries: " << numEnt << endl
1494  << "My minimum global index: " << this->getMinGlobalIndex () << endl
1495  << "My maximum global index: " << this->getMaxGlobalIndex () << endl;
1496 
1497  if (vl == Teuchos::VERB_EXTREME) {
1498  out << "My global indices: [";
1499  const LO minLclInd = this->getMinLocalIndex ();
1500  for (LO k = 0; k < numEnt; ++k) {
1501  out << minLclInd + this->getGlobalElement (k);
1502  if (k + 1 < numEnt) {
1503  out << ", ";
1504  }
1505  }
1506  out << "]" << endl;
1507  }
1508 
1509  out.flush (); // make sure the ostringstream got everything
1510  return outStringP->str ();
1511  }
1512 
1513  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1514  void
1515  Map<LocalOrdinal,GlobalOrdinal,Node>::
1516  describe (Teuchos::FancyOStream &out,
1517  const Teuchos::EVerbosityLevel verbLevel) const
1518  {
1519  using Teuchos::TypeNameTraits;
1520  using Teuchos::VERB_DEFAULT;
1521  using Teuchos::VERB_NONE;
1522  using Teuchos::VERB_LOW;
1523  using Teuchos::VERB_HIGH;
1524  using std::endl;
1525  typedef LocalOrdinal LO;
1526  typedef GlobalOrdinal GO;
1527  const Teuchos::EVerbosityLevel vl =
1528  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
1529 
1530  if (vl == VERB_NONE) {
1531  return; // don't print anything
1532  }
1533  // If this Map's Comm is null, then the Map does not participate
1534  // in collective operations with the other processes. In that
1535  // case, it is not even legal to call this method. The reasonable
1536  // thing to do in that case is nothing.
1537  auto comm = this->getComm ();
1538  if (comm.is_null ()) {
1539  return;
1540  }
1541  const int myRank = comm->getRank ();
1542  const int numProcs = comm->getSize ();
1543 
1544  // Only Process 0 should touch the output stream, but this method
1545  // in general may need to do communication. Thus, we may need to
1546  // preserve the current tab level across multiple "if (myRank ==
1547  // 0) { ... }" inner scopes. This is why we sometimes create
1548  // OSTab instances by pointer, instead of by value. We only need
1549  // to create them by pointer if the tab level must persist through
1550  // multiple inner scopes.
1551  Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1552 
1553  if (myRank == 0) {
1554  // At every verbosity level but VERB_NONE, Process 0 prints.
1555  // By convention, describe() always begins with a tab before
1556  // printing.
1557  tab0 = Teuchos::rcp (new Teuchos::OSTab (out));
1558  out << "\"Tpetra::Map\":" << endl;
1559  tab1 = Teuchos::rcp (new Teuchos::OSTab (out));
1560  {
1561  out << "Template parameters:" << endl;
1562  Teuchos::OSTab tab2 (out);
1563  out << "LocalOrdinal: " << TypeNameTraits<LO>::name () << endl
1564  << "GlobalOrdinal: " << TypeNameTraits<GO>::name () << endl
1565  << "Node: " << TypeNameTraits<Node>::name () << endl;
1566  }
1567  const std::string label = this->getObjectLabel ();
1568  if (label != "") {
1569  out << "Label: \"" << label << "\"" << endl;
1570  }
1571  out << "Global number of entries: " << getGlobalNumElements () << endl
1572  << "Minimum global index: " << getMinAllGlobalIndex () << endl
1573  << "Maximum global index: " << getMaxAllGlobalIndex () << endl
1574  << "Index base: " << getIndexBase () << endl
1575  << "Number of processes: " << numProcs << endl
1576  << "Uniform: " << (isUniform () ? "true" : "false") << endl
1577  << "Contiguous: " << (isContiguous () ? "true" : "false") << endl
1578  << "Distributed: " << (isDistributed () ? "true" : "false") << endl;
1579  }
1580 
1581  // This is collective over the Map's communicator.
1582  if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1583  const std::string lclStr = this->localDescribeToString (vl);
1584  Tpetra::Details::gathervPrint (out, lclStr, *comm);
1585  }
1586  }
1587 
1588  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1589  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1591  replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
1592  {
1593  using Teuchos::RCP;
1594  using Teuchos::rcp;
1595  typedef global_size_t GST;
1596  typedef LocalOrdinal LO;
1597  typedef GlobalOrdinal GO;
1598  typedef Map<LO, GO, Node> map_type;
1599 
1600  // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1601  // the Map by calling its ordinary public constructor, using the
1602  // original Map's data. This only involves O(1) all-reduces over
1603  // the new communicator, which in the common case only includes a
1604  // small number of processes.
1605 
1606  // Create the Map to return.
1607  if (newComm.is_null () || newComm->getSize () < 1) {
1608  return Teuchos::null; // my process does not participate in the new Map
1609  }
1610  else if (newComm->getSize () == 1) {
1611  // The case where the new communicator has only one process is
1612  // easy. We don't have to communicate to get all the
1613  // information we need. Use the default comm to create the new
1614  // Map, then fill in all the fields directly.
1615  RCP<map_type> newMap (new map_type ());
1616 
1617  newMap->comm_ = newComm;
1618  newMap->node_ = this->node_;
1619  // mfh 07 Oct 2016: Preserve original behavior, even though the
1620  // original index base may no longer be the globally min global
1621  // index. See #616 for why this doesn't matter so much anymore.
1622  newMap->indexBase_ = this->indexBase_;
1623  newMap->numGlobalElements_ = this->numLocalElements_;
1624  newMap->numLocalElements_ = this->numLocalElements_;
1625  newMap->minMyGID_ = this->minMyGID_;
1626  newMap->maxMyGID_ = this->maxMyGID_;
1627  newMap->minAllGID_ = this->minMyGID_;
1628  newMap->maxAllGID_ = this->maxMyGID_;
1629  newMap->firstContiguousGID_ = this->firstContiguousGID_;
1630  newMap->lastContiguousGID_ = this->lastContiguousGID_;
1631  // Since the new communicator has only one process, neither
1632  // uniformity nor contiguity have changed.
1633  newMap->uniform_ = this->uniform_;
1634  newMap->contiguous_ = this->contiguous_;
1635  // The new communicator only has one process, so the new Map is
1636  // not distributed.
1637  newMap->distributed_ = false;
1638  newMap->lgMap_ = this->lgMap_;
1639  newMap->lgMapHost_ = this->lgMapHost_;
1640  newMap->glMap_ = this->glMap_;
1641  // It's OK not to initialize the new Map's Directory.
1642  // This is initialized lazily, on first call to getRemoteIndexList.
1643 
1644  return newMap;
1645  }
1646  else { // newComm->getSize() != 1
1647  // Even if the original Map is contiguous, the new Map might not
1648  // be, especially if the excluded processes have ranks != 0 or
1649  // newComm->getSize()-1. The common case for this method is to
1650  // exclude many (possibly even all but one) processes, so it
1651  // likely doesn't pay to do the global communication (over the
1652  // original communicator) to figure out whether we can optimize
1653  // the result Map. Thus, we just set up the result Map as
1654  // noncontiguous.
1655  //
1656  // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
1657  // the global-to-local table, etc. Optimize this code path to
1658  // avoid unnecessary local work.
1659 
1660  // Make Map (re)compute the global number of elements.
1661  const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid ();
1662  // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
1663  // to use the noncontiguous Map constructor, since the new Map
1664  // might not be contiguous. Even if the old Map was contiguous,
1665  // some process in the "middle" might have been excluded. If we
1666  // want to avoid local work, we either have to do the setup by
1667  // hand, or write a new Map constructor.
1668 #if 1
1669  // The disabled code here throws the following exception in
1670  // Map's replaceCommWithSubset test:
1671  //
1672  // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ())
1673  // 10:
1674  // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
1675  // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
1676 
1677  auto lgMap = this->getMyGlobalIndices ();
1678  typedef typename std::decay<decltype (lgMap.dimension_0 ()) >::type size_type;
1679  const size_type lclNumInds =
1680  static_cast<size_type> (this->getNodeNumElements ());
1681  using Teuchos::TypeNameTraits;
1682  TEUCHOS_TEST_FOR_EXCEPTION
1683  (lgMap.dimension_0 () != lclNumInds, std::logic_error,
1684  "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
1685  "has length " << lgMap.dimension_0 () << " (of type " <<
1686  TypeNameTraits<size_type>::name () << ") != this->getNodeNumElements()"
1687  " = " << this->getNodeNumElements () << ". The latter, upon being "
1688  "cast to size_type = " << TypeNameTraits<size_type>::name () << ", "
1689  "becomes " << lclNumInds << ". Please report this bug to the Tpetra "
1690  "developers.");
1691 #else
1692  Teuchos::ArrayView<const GO> lgMap = this->getNodeElementList ();
1693 #endif // 1
1694 
1695  const GO indexBase = this->getIndexBase ();
1696  return rcp (new map_type (RECOMPUTE, lgMap, indexBase, newComm));
1697  }
1698  }
1699 
1700  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1701  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1704  {
1705  using Teuchos::Comm;
1706  using Teuchos::null;
1707  using Teuchos::outArg;
1708  using Teuchos::RCP;
1709  using Teuchos::rcp;
1710  using Teuchos::REDUCE_MIN;
1711  using Teuchos::reduceAll;
1712 
1713  // Create the new communicator. split() returns a valid
1714  // communicator on all processes. On processes where color == 0,
1715  // ignore the result. Passing key == 0 tells MPI to order the
1716  // processes in the new communicator by their rank in the old
1717  // communicator.
1718  const int color = (numLocalElements_ == 0) ? 0 : 1;
1719  // MPI_Comm_split must be called collectively over the original
1720  // communicator. We can't just call it on processes with color
1721  // one, even though we will ignore its result on processes with
1722  // color zero.
1723  RCP<const Comm<int> > newComm = comm_->split (color, 0);
1724  if (color == 0) {
1725  newComm = null;
1726  }
1727 
1728  // Create the Map to return.
1729  if (newComm.is_null ()) {
1730  return null; // my process does not participate in the new Map
1731  } else {
1732  // The default constructor that's useful for clone() above is
1733  // also useful here.
1734  RCP<Map> map = rcp (new Map ());
1735 
1736  map->comm_ = newComm;
1737  map->indexBase_ = indexBase_;
1738  map->numGlobalElements_ = numGlobalElements_;
1739  map->numLocalElements_ = numLocalElements_;
1740  map->minMyGID_ = minMyGID_;
1741  map->maxMyGID_ = maxMyGID_;
1742  map->minAllGID_ = minAllGID_;
1743  map->maxAllGID_ = maxAllGID_;
1744  map->firstContiguousGID_= firstContiguousGID_;
1745  map->lastContiguousGID_ = lastContiguousGID_;
1746 
1747  // Uniformity and contiguity have not changed. The directory
1748  // has changed, but we've taken care of that above.
1749  map->uniform_ = uniform_;
1750  map->contiguous_ = contiguous_;
1751 
1752  // If the original Map was NOT distributed, then the new Map
1753  // cannot be distributed.
1754  //
1755  // If the number of processes in the new communicator is 1, then
1756  // the new Map is not distributed.
1757  //
1758  // Otherwise, we have to check the new Map using an all-reduce
1759  // (over the new communicator). For example, the original Map
1760  // may have had some processes with zero elements, and all other
1761  // processes with the same number of elements as in the whole
1762  // Map. That Map is technically distributed, because of the
1763  // processes with zero elements. Removing those processes would
1764  // make the new Map locally replicated.
1765  if (! distributed_ || newComm->getSize () == 1) {
1766  map->distributed_ = false;
1767  } else {
1768  const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
1769  int allProcsOwnAllGids = 0;
1770  reduceAll<int, int> (*newComm, REDUCE_MIN, iOwnAllGids, outArg (allProcsOwnAllGids));
1771  map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
1772  }
1773 
1774  map->lgMap_ = lgMap_;
1775  map->lgMapHost_ = lgMapHost_;
1776  map->glMap_ = glMap_;
1777  map->node_ = node_;
1778 
1779  // Map's default constructor creates an uninitialized Directory.
1780  // The Directory will be initialized on demand in
1781  // getRemoteIndexList().
1782  //
1783  // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
1784  // directory more efficiently than just recreating it. If
1785  // directory recreation proves a bottleneck, we can always
1786  // revisit this. On the other hand, Directory creation is only
1787  // collective over the new, presumably much smaller
1788  // communicator, so it may not be worth the effort to optimize.
1789 
1790  return map;
1791  }
1792  }
1793 
1794  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1795  void
1797  {
1798  TEUCHOS_TEST_FOR_EXCEPTION(
1799  directory_.is_null (), std::logic_error, "Tpetra::Map::setupDirectory: "
1800  "The Directory is null. "
1801  "Please report this bug to the Tpetra developers.");
1802 
1803  // Only create the Directory if it hasn't been created yet.
1804  // This is a collective operation.
1805  if (! directory_->initialized ()) {
1806  directory_->initialize (*this);
1807  }
1808  }
1809 
1810  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1811  LookupStatus
1812  Map<LocalOrdinal,GlobalOrdinal,Node>::
1813  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
1814  const Teuchos::ArrayView<int>& PIDs,
1815  const Teuchos::ArrayView<LocalOrdinal>& LIDs) const
1816  {
1818  typedef Teuchos::ArrayView<int>::size_type size_type;
1819 
1820  // Empty Maps (i.e., containing no indices on any processes in the
1821  // Map's communicator) are perfectly valid. In that case, if the
1822  // input GID list is nonempty, we fill the output arrays with
1823  // invalid values, and return IDNotPresent to notify the caller.
1824  // It's perfectly valid to give getRemoteIndexList GIDs that the
1825  // Map doesn't own. SubmapImport test 2 needs this functionality.
1826  if (getGlobalNumElements () == 0) {
1827  if (GIDs.size () == 0) {
1828  return AllIDsPresent; // trivially
1829  } else {
1830  for (size_type k = 0; k < PIDs.size (); ++k) {
1831  PIDs[k] = OrdinalTraits<int>::invalid ();
1832  }
1833  for (size_type k = 0; k < LIDs.size (); ++k) {
1834  LIDs[k] = OrdinalTraits<LocalOrdinal>::invalid ();
1835  }
1836  return IDNotPresent;
1837  }
1838  }
1839 
1840  // getRemoteIndexList must be called collectively, and Directory
1841  // initialization is collective too, so it's OK to initialize the
1842  // Directory on demand.
1843  setupDirectory ();
1844  return directory_->getDirectoryEntries (*this, GIDs, PIDs, LIDs);
1845  }
1846 
1847  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1848  LookupStatus
1850  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal> & GIDs,
1851  const Teuchos::ArrayView<int> & PIDs) const
1852  {
1853  if (getGlobalNumElements () == 0) {
1854  if (GIDs.size () == 0) {
1855  return AllIDsPresent; // trivially
1856  } else {
1857  // The Map contains no indices, so all output PIDs are invalid.
1858  for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) {
1860  }
1861  return IDNotPresent;
1862  }
1863  }
1864 
1865  // getRemoteIndexList must be called collectively, and Directory
1866  // initialization is collective too, so it's OK to initialize the
1867  // Directory on demand.
1868  setupDirectory ();
1869  return directory_->getDirectoryEntries (*this, GIDs, PIDs);
1870  }
1871 
1872  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1873  Teuchos::RCP<const Teuchos::Comm<int> >
1875  return comm_;
1876  }
1877 
1878  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1879  Teuchos::RCP<Node>
1881  return node_;
1882  }
1883 
1884  template <class LocalOrdinal,class GlobalOrdinal, class Node>
1886  using Teuchos::as;
1887  using Teuchos::outArg;
1888  using Teuchos::REDUCE_MIN;
1889  using Teuchos::reduceAll;
1890 
1891  bool global = false;
1892  if (comm_->getSize () > 1) {
1893  // The communicator has more than one process, but that doesn't
1894  // necessarily mean the Map is distributed.
1895  int localRep = 0;
1896  if (numGlobalElements_ == as<global_size_t> (numLocalElements_)) {
1897  // The number of local elements on this process equals the
1898  // number of global elements.
1899  //
1900  // NOTE (mfh 22 Nov 2011) Does this still work if there were
1901  // duplicates in the global ID list on input (the third Map
1902  // constructor), so that the number of local elements (which
1903  // are not duplicated) on this process could be less than the
1904  // number of global elements, even if this process owns all
1905  // the elements?
1906  localRep = 1;
1907  }
1908  int allLocalRep;
1909  reduceAll<int, int> (*comm_, REDUCE_MIN, localRep, outArg (allLocalRep));
1910  if (allLocalRep != 1) {
1911  // At least one process does not own all the elements.
1912  // This makes the Map a distributed Map.
1913  global = true;
1914  }
1915  }
1916  // If the communicator has only one process, then the Map is not
1917  // distributed.
1918  return global;
1919  }
1920 
1921  namespace Details {
1922 
1923  template <class LocalOrdinal,class GlobalOrdinal, class Node>
1924  bool
1927  {
1928  using Teuchos::ArrayView;
1929  typedef GlobalOrdinal GO; // a handy abbreviation
1930  typedef typename ArrayView<const GO>::size_type size_type;
1931 
1932  bool fitted = true;
1933  if (&map1 == &map2) {
1934  fitted = true;
1935  }
1936  else if (map1.isContiguous () && map2.isContiguous () &&
1937  map1.getMinGlobalIndex () == map2.getMinGlobalIndex () &&
1938  map1.getMaxGlobalIndex () <= map2.getMaxGlobalIndex ()) {
1939  // Special case where both Maps are contiguous.
1940  fitted = true;
1941  }
1942  else {
1943  ArrayView<const GO> inds_map2 = map2.getNodeElementList ();
1944  const size_type numInds_map1 =
1945  static_cast<size_type> (map1.getNodeNumElements ());
1946 
1947  if (map1.isContiguous ()) {
1948  // Avoid calling getNodeElementList() on the always one-to-one
1949  // Map, if it is contiguous (a common case). When called on a
1950  // contiguous Map, getNodeElementList() causes allocation of
1951  // an array that sticks around, even though the array isn't
1952  // needed. (The Map is contiguous, so you can compute the
1953  // entries; you don't need to store them.)
1954  if (numInds_map1 > inds_map2.size ()) {
1955  // There are fewer indices in map1 on this process than in
1956  // map2. This case might be impossible.
1957  fitted = false;
1958  }
1959  else {
1960  // Do all the map1 indices match the initial map2 indices?
1961  const GO minInd_map1 = map1.getMinGlobalIndex ();
1962  for (size_type k = 0; k < numInds_map1; ++k) {
1963  const GO inds_map1_k = static_cast<GO> (k) + minInd_map1;
1964  if (inds_map1_k != inds_map2[k]) {
1965  fitted = false;
1966  break;
1967  }
1968  }
1969  }
1970  }
1971  else { // map1 is not contiguous.
1972  // Get index lists from both Maps, and compare their indices.
1973  ArrayView<const GO> inds_map1 = map1.getNodeElementList ();
1974  if (numInds_map1 > inds_map2.size ()) {
1975  // There are fewer indices in map1 on this process than in
1976  // map2. This case might be impossible.
1977  fitted = false;
1978  }
1979  else {
1980  // Do all the map1 indices match those in map2?
1981  for (size_type k = 0; k < numInds_map1; ++k) {
1982  if (inds_map1[k] != inds_map2[k]) {
1983  fitted = false;
1984  break;
1985  }
1986  }
1987  }
1988  }
1989  }
1990  return fitted;
1991  }
1992 
1993  } // namespace Details
1994 } // namespace Tpetra
1995 
1996 template <class LocalOrdinal, class GlobalOrdinal>
1997 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal> >
1998 Tpetra::createLocalMap (const size_t numElements,
1999  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2000 {
2001  typedef LocalOrdinal LO;
2002  typedef GlobalOrdinal GO;
2003  typedef typename ::Tpetra::Map<LO, GO>::node_type NT;
2004  return createLocalMapWithNode<LO, GO, NT> (numElements, comm);
2005 }
2006 
2007 template <class LocalOrdinal, class GlobalOrdinal>
2008 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal> >
2009 Tpetra::createUniformContigMap (const global_size_t numElements,
2010  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2011 {
2012  typedef LocalOrdinal LO;
2013  typedef GlobalOrdinal GO;
2014  typedef typename ::Tpetra::Map<LO, GO>::node_type NT;
2015  return createUniformContigMapWithNode<LO, GO, NT> (numElements, comm);
2016 }
2017 
2018 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2019 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2020 Tpetra::createUniformContigMapWithNode (const global_size_t numElements,
2021  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2022  const Teuchos::RCP<Node>& node)
2023 {
2024  using Teuchos::rcp;
2026  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2027 
2028  if (node.is_null ()) {
2029  return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed));
2030  }
2031  else {
2032  return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed, node));
2033  }
2034 }
2035 
2036 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2037 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2038 Tpetra::createLocalMapWithNode (const size_t numElements,
2039  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2040  const Teuchos::RCP<Node>& node)
2041 {
2042  using Tpetra::global_size_t;
2043  using Teuchos::rcp;
2045  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2046  const global_size_t globalNumElts = static_cast<global_size_t> (numElements);
2047 
2048  if (node.is_null ()) {
2049  return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated));
2050  }
2051  else {
2052  return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated, node));
2053  }
2054 }
2055 
2056 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2057 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2059  const size_t localNumElements,
2060  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2061  const Teuchos::RCP<Node>& node)
2062 {
2063  using Teuchos::rcp;
2065  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2066 
2067  return rcp (new map_type (numElements, localNumElements, indexBase, comm, node));
2068 }
2069 
2070 template <class LocalOrdinal, class GlobalOrdinal>
2071 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal> >
2073  const size_t localNumElements,
2074  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2075 {
2076  typedef LocalOrdinal LO;
2077  typedef GlobalOrdinal GO;
2079 
2080  return Tpetra::createContigMapWithNode<LO, GO, NT> (numElements, localNumElements, comm);
2081 }
2082 
2083 
2084 template <class LocalOrdinal, class GlobalOrdinal>
2085 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal> >
2086 Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2087  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2088 {
2089  typedef LocalOrdinal LO;
2090  typedef GlobalOrdinal GO;
2092 
2093  return Tpetra::createNonContigMapWithNode<LO, GO, NT> (elementList, comm);
2094 }
2095 
2096 
2097 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2098 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2099 Tpetra::createNonContigMapWithNode (const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2100  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2101  const Teuchos::RCP<Node>& node)
2102 {
2103  using Teuchos::rcp;
2105  typedef Tpetra::global_size_t GST;
2107  // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2108  // shouldn't be zero, given that the index base is supposed to equal
2109  // the globally min global index?
2110  const GlobalOrdinal indexBase = 0;
2111 
2112  if (node.is_null ()) {
2113  return rcp (new map_type (INV, elementList, indexBase, comm));
2114  }
2115  else {
2116  return rcp (new map_type (INV, elementList, indexBase, comm, node));
2117  }
2118 }
2119 
2120 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2121 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2122 Tpetra::createWeightedContigMapWithNode (const int myWeight,
2123  const Tpetra::global_size_t numElements,
2124  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2125  const Teuchos::RCP<Node>& node)
2126 {
2127  Teuchos::RCP< Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > map;
2128  int sumOfWeights, elemsLeft, localNumElements;
2129  const int numImages = comm->getSize();
2130  const int myImageID = comm->getRank();
2131  Teuchos::reduceAll<int>(*comm,Teuchos::REDUCE_SUM,myWeight,Teuchos::outArg(sumOfWeights));
2132  const double myShare = ((double)myWeight) / ((double)sumOfWeights);
2133  localNumElements = (int)std::floor( myShare * ((double)numElements) );
2134  // std::cout << "numElements: " << numElements << " myWeight: " << myWeight << " sumOfWeights: " << sumOfWeights << " myShare: " << myShare << std::endl;
2135  Teuchos::reduceAll<int>(*comm,Teuchos::REDUCE_SUM,localNumElements,Teuchos::outArg(elemsLeft));
2136  elemsLeft = numElements - elemsLeft;
2137  // std::cout << "(before) localNumElements: " << localNumElements << " elemsLeft: " << elemsLeft << std::endl;
2138  // i think this is true. just test it for now.
2139  TEUCHOS_TEST_FOR_EXCEPT(elemsLeft < -numImages || numImages < elemsLeft);
2140  if (elemsLeft < 0) {
2141  // last elemsLeft nodes lose an element
2142  if (myImageID >= numImages-elemsLeft) --localNumElements;
2143  }
2144  else if (elemsLeft > 0) {
2145  // first elemsLeft nodes gain an element
2146  if (myImageID < elemsLeft) ++localNumElements;
2147  }
2148  // std::cout << "(after) localNumElements: " << localNumElements << std::endl;
2149  return createContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(numElements,localNumElements,comm,node);
2150 }
2151 
2152 
2153 template<class LO, class GO, class NT>
2154 Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >
2155 Tpetra::createOneToOne (const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& M)
2156 {
2157  using Teuchos::Array;
2158  using Teuchos::ArrayView;
2159  using Teuchos::as;
2160  using Teuchos::rcp;
2161  typedef Tpetra::Map<LO, GO, NT> map_type;
2162  typedef global_size_t GST;
2164  const int myRank = M->getComm ()->getRank ();
2165 
2166  // Bypasses for special cases where either M is known to be
2167  // one-to-one, or the one-to-one version of M is easy to compute.
2168  // This is why we take M as an RCP, not as a const reference -- so
2169  // that we can return M itself if it is 1-to-1.
2170  if (! M->isDistributed ()) {
2171  // For a locally replicated Map, we assume that users want to push
2172  // all the GIDs to Process 0.
2173 
2174  // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2175  // you think it should return, in this special case of a locally
2176  // replicated contiguous Map.
2177  const GST numGlobalEntries = M->getGlobalNumElements ();
2178  if (M->isContiguous ()) {
2179  const size_t numLocalEntries =
2180  (myRank == 0) ? as<size_t> (numGlobalEntries) : static_cast<size_t> (0);
2181  return rcp (new map_type (numGlobalEntries, numLocalEntries,
2182  M->getIndexBase (), M->getComm (),
2183  M->getNode ()));
2184  }
2185  else {
2186  ArrayView<const GO> myGids =
2187  (myRank == 0) ? M->getNodeElementList () : Teuchos::null;
2188  return rcp (new map_type (GINV, myGids (), M->getIndexBase (),
2189  M->getComm (), M->getNode ()));
2190 
2191  }
2192  }
2193  else if (M->isContiguous ()) {
2194  // Contiguous, distributed Maps are one-to-one by construction.
2195  // (Locally replicated Maps can be contiguous.)
2196  return M;
2197  }
2198  else {
2200  const size_t numMyElems = M->getNodeNumElements ();
2201  ArrayView<const GO> myElems = M->getNodeElementList ();
2202  Array<int> owner_procs_vec (numMyElems);
2203 
2204  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2205 
2206  Array<GO> myOwned_vec (numMyElems);
2207  size_t numMyOwnedElems = 0;
2208  for (size_t i = 0; i < numMyElems; ++i) {
2209  const GO GID = myElems[i];
2210  const int owner = owner_procs_vec[i];
2211 
2212  if (myRank == owner) {
2213  myOwned_vec[numMyOwnedElems++] = GID;
2214  }
2215  }
2216  myOwned_vec.resize (numMyOwnedElems);
2217 
2218  return rcp (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2219  M->getComm (), M->getNode ()));
2220  }
2221 }
2222 
2223 template<class LocalOrdinal, class GlobalOrdinal, class Node>
2224 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2227 {
2228  using Teuchos::Array;
2229  using Teuchos::ArrayView;
2230  using Teuchos::rcp;
2231  typedef LocalOrdinal LO;
2232  typedef GlobalOrdinal GO;
2233  typedef Tpetra::Map<LO,GO,Node> map_type;
2234  int myID = M->getComm()->getRank();
2235 
2236  // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2237  // Maps (which are 1-to-1 by construction).
2238 
2239  //Based off Epetra's one to one.
2240 
2242  directory.initialize (*M, tie_break);
2243  size_t numMyElems = M->getNodeNumElements ();
2244  ArrayView<const GO> myElems = M->getNodeElementList ();
2245  Array<int> owner_procs_vec (numMyElems);
2246 
2247  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2248 
2249  Array<GO> myOwned_vec (numMyElems);
2250  size_t numMyOwnedElems = 0;
2251  for (size_t i = 0; i < numMyElems; ++i) {
2252  GO GID = myElems[i];
2253  int owner = owner_procs_vec[i];
2254 
2255  if (myID == owner) {
2256  myOwned_vec[numMyOwnedElems++] = GID;
2257  }
2258  }
2259  myOwned_vec.resize (numMyOwnedElems);
2260 
2261  // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2262  // valid for the new Map. Why can't we reuse it?
2263  const global_size_t GINV =
2265  return rcp (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2266  M->getComm (), M->getNode ()));
2267 }
2268 
2269 //
2270 // Explicit instantiation macro
2271 //
2272 // Must be expanded from within the Tpetra namespace!
2273 //
2274 
2276 #define TPETRA_MAP_INSTANT(LO,GO,NODE) \
2277  \
2278  template class Map< LO , GO , NODE >; \
2279  \
2280  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2281  createLocalMapWithNode<LO,GO,NODE> (const size_t numElements, \
2282  const Teuchos::RCP< const Teuchos::Comm< int > >& comm, \
2283  const Teuchos::RCP< NODE >& node); \
2284  \
2285  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2286  createContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2287  const size_t localNumElements, \
2288  const Teuchos::RCP< const Teuchos::Comm< int > >& comm, \
2289  const Teuchos::RCP< NODE > &node); \
2290  \
2291  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2292  createNonContigMapWithNode(const Teuchos::ArrayView<const GO> &elementList, \
2293  const Teuchos::RCP<const Teuchos::Comm<int> > &comm, \
2294  const Teuchos::RCP<NODE> &node); \
2295  \
2296  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2297  createUniformContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2298  const Teuchos::RCP< const Teuchos::Comm< int > >& comm, \
2299  const Teuchos::RCP< NODE > &node); \
2300  \
2301  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2302  createWeightedContigMapWithNode<LO,GO,NODE> (const int thisNodeWeight, \
2303  const global_size_t numElements, \
2304  const Teuchos::RCP< const Teuchos::Comm< int > >& comm, \
2305  const Teuchos::RCP< NODE >& node); \
2306  \
2307  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2308  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M); \
2309  \
2310  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2311  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M, \
2312  const Tpetra::Details::TieBreak<LO,GO>& tie_break); \
2313  \
2314  template bool \
2315  Details::isLocallyFitted (const Tpetra::Map<LO, GO, NODE>& map1, \
2316  const Tpetra::Map<LO, GO, NODE>& map2);
2317 
2318 
2320 #define TPETRA_MAP_INSTANT_DEFAULTNODE(LO,GO) \
2321  template Teuchos::RCP< const Map<LO,GO> > \
2322  createLocalMap<LO,GO>( const size_t, const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2323  \
2324  template Teuchos::RCP< const Map<LO,GO> > \
2325  createContigMap<LO,GO>( global_size_t, size_t, \
2326  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2327  \
2328  template Teuchos::RCP< const Map<LO,GO> > \
2329  createNonContigMap(const Teuchos::ArrayView<const GO> &, \
2330  const Teuchos::RCP<const Teuchos::Comm<int> > &); \
2331  \
2332  template Teuchos::RCP< const Map<LO,GO> > \
2333  createUniformContigMap<LO,GO>( const global_size_t, \
2334  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2335 
2336 #endif // TPETRA_MAP_DEF_HPP
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node...
Interface for breaking ties in ownership.
GlobalOrdinal getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
GlobalOrdinal getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createWeightedContigMapWithNode(const int thisNodeWeight, const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Non-member constructor for a contiguous Map with user-defined weights and a user-specified Kokkos Nod...
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
LookupStatus getDirectoryEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs) const
Given a global ID list, return the list of their owning process IDs.
"Local" part of Map suitable for Kokkos kernels.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
Implementation details of Tpetra.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
size_t global_size_t
Global size_t object.
Traits class for "invalid" (flag) values of integer types that Tpetra uses as local ordinals or globa...
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Creates a one-to-one version of the given Map where each GID is owned by only one process...
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=defaultArgNode< Node >())
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map with a user-spec...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node...
GlobalOrdinal getIndexBase() const
The index base for this Map.
node_type ::device_type device_type
The Kokkos device type over which to allocate Views and perform work.
Teuchos::ArrayView< const GlobalOrdinal > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
GlobalOrdinal getMinGlobalIndex() const
The minimum global index owned by the calling process.
Node node_type
The type of the Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map with the default...
bool isLocallyFitted(const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &map1, const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &map2)
Is map1 locally fitted to map2?
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
Describes a parallel distribution of objects over processes.
Implement mapping from global ID to process ID and local ID.
Stand-alone utility functions and macros.
void initialize(const map_type &map)
Initialize the Directory with its Map.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a non-contiguous Map with the default Kokkos Node.
LocalGlobal
Enum for local versus global allocation of Map entries.
bool isUniform() const
Whether the range of global indices is uniform.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Non-member constructor for a non-contiguous Map with a user-specified Kokkos Node.
GlobalOrdinal getMaxGlobalIndex() const
The maximum global index owned by the calling process.
Map()
Default constructor (that does nothing).