Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_copyOffsets.hpp
Go to the documentation of this file.
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_DETAILS_COPYOFFSETS_HPP
45 #define TPETRA_DETAILS_COPYOFFSETS_HPP
46 
51 
52 #include "TpetraCore_config.h"
53 #include "Kokkos_Core.hpp"
54 #include <limits>
55 #include <type_traits>
56 
57 namespace Tpetra {
58 namespace Details {
59 
60 //
61 // Implementation details for copyOffsets (see below).
62 // Users should skip over this anonymous namespace.
63 //
64 namespace { // (anonymous)
65 
66  // Implementation detail of copyOffsets (see below).
67  //
68  // Overflow is impossible (the output can fit the input) if the
69  // output type is bigger than the input type, or if the types have
70  // the same size and (the output type is unsigned, or both types are
71  // signed).
72  //
73  // Implicit here is the assumption that both input and output types
74  // are integers.
75  template<class T1, class T2,
76  const bool T1_is_signed = std::is_signed<T1>::value,
77  const bool T2_is_signed = std::is_signed<T2>::value>
78  struct OutputCanFitInput {
79  static const bool value = sizeof (T1) > sizeof (T2) ||
80  (sizeof (T1) == sizeof (T2) &&
81  (std::is_unsigned<T1>::value || (std::is_signed<T1>::value && std::is_signed<T2>::value)));
82  };
83 
84  // Implementation detail of copyOffsets (see below).
85  //
86  // Kokkos parallel_reduce functor for copying offset ("ptr") arrays.
87  // Tpetra::Details::FixedHashTable uses this in its "copy"
88  // constructor for converting between different Device types. All
89  // the action happens in the partial specializations for different
90  // values of outputCanFitInput. "Output can fit input" means that
91  // casting the input's value type to the output's value type will
92  // never result in integer overflow.
93  template<class OutputViewType,
94  class InputViewType,
95  const bool outputCanFitInput =
96  OutputCanFitInput<typename OutputViewType::non_const_value_type,
97  typename InputViewType::non_const_value_type>::value>
98  class CopyOffsetsFunctor {};
99 
100  // Specialization for when overflow is possible.
101  template<class OutputViewType, class InputViewType>
102  class CopyOffsetsFunctor<OutputViewType, InputViewType, false> {
103  public:
104  typedef typename OutputViewType::execution_space execution_space;
105  typedef typename OutputViewType::size_type size_type;
106  typedef int value_type;
107 
108  typedef typename InputViewType::non_const_value_type input_value_type;
109  typedef typename OutputViewType::non_const_value_type output_value_type;
110 
111  CopyOffsetsFunctor (const OutputViewType& dst, const InputViewType& src) :
112  dst_ (dst),
113  src_ (src),
114  // We know that output_value_type cannot fit all values of
115  // input_value_type, so an input_value_type can fit all values
116  // of output_value_type. This means we can convert from
117  // output_value_type to input_value_type. This is how we test
118  // whether a given input_value_type value can fit in an
119  // output_value_type.
120  minDstVal_ (static_cast<input_value_type> (std::numeric_limits<output_value_type>::min ())),
121  maxDstVal_ (static_cast<input_value_type> (std::numeric_limits<output_value_type>::max ()))
122  {
123  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use
124  // a memory space, rather than an execution space, as the first
125  // argument of VerifyExecutionCanAccessMemorySpace.
126  static_assert (Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
127  typename OutputViewType::memory_space,
128  typename InputViewType::memory_space>::value,
129  "CopyOffsetsFunctor (implements copyOffsets): Output "
130  "View's space must be able to access the input View's "
131  "memory space.");
132  }
133 
134  KOKKOS_INLINE_FUNCTION void
135  operator () (const size_type& i, value_type& noOverflow) const {
136  const input_value_type src_i = src_(i);
137  if (src_i < minDstVal_ || src_i > maxDstVal_) {
138  noOverflow = 0;
139  }
140  dst_(i) = static_cast<output_value_type> (src_i);
141  }
142 
143  KOKKOS_INLINE_FUNCTION void init (value_type& noOverflow) const {
144  noOverflow = 1; // success (no overflow)
145  }
146 
147  KOKKOS_INLINE_FUNCTION void
148  join (volatile value_type& result,
149  const volatile value_type& current) const {
150  result = (result>0 && current>0)?1:0; // was there any overflow?
151  }
152 
153  private:
154  OutputViewType dst_;
155  InputViewType src_;
156  input_value_type minDstVal_;
157  input_value_type maxDstVal_;
158  };
159 
160  // Specialization for when overflow is impossible.
161  template<class OutputViewType, class InputViewType>
162  class CopyOffsetsFunctor<OutputViewType, InputViewType, true> {
163  public:
164  typedef typename OutputViewType::execution_space execution_space;
165  typedef typename OutputViewType::size_type size_type;
166  typedef int value_type;
167 
168  CopyOffsetsFunctor (const OutputViewType& dst, const InputViewType& src) :
169  dst_ (dst),
170  src_ (src)
171  {
172  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use
173  // a memory space, rather than an execution space, as the first
174  // argument of VerifyExecutionCanAccessMemorySpace.
175  static_assert (Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
176  typename OutputViewType::memory_space,
177  typename InputViewType::memory_space>::value,
178  "CopyOffsetsFunctor (implements copyOffsets): Output "
179  "View's space must be able to access the input View's "
180  "memory space.");
181  }
182 
183  KOKKOS_INLINE_FUNCTION void
184  operator () (const size_type& i, value_type& /* noOverflow */) const {
185  // Overflow is impossible in this case, so there's no need to check.
186  dst_(i) = src_(i);
187  }
188 
189  KOKKOS_INLINE_FUNCTION void init (value_type& noOverflow) const {
190  noOverflow = 1; // success (no overflow)
191  }
192 
193  KOKKOS_INLINE_FUNCTION void
194  join (volatile value_type& result,
195  const volatile value_type& current) const {
196  result = (result>0 && current>0)?1:0; // was there any overflow?
197  }
198 
199  private:
200  OutputViewType dst_;
201  InputViewType src_;
202  };
203 
204  // Implementation detail of copyOffsets (see below).
205  //
206  // We specialize copyOffsets on two different conditions:
207  //
208  // 1. Are the two Views' layouts the same, and do the input and
209  // output Views have the same value type?
210  // 2. Can the output View's execution space access the input View's
211  // memory space?
212  //
213  // If (1) is true, that makes the implementation simple: just call
214  // Kokkos::deep_copy (FixedHashTable always uses the same layout, no
215  // matter the device type). Otherwise, we need a custom copy
216  // functor. If (2) is true, then we can use CopyOffsetsFunctor
217  // directly. Otherwise, we have to copy the input View into the
218  // output View's memory space, before we can use the functor.
219  //
220  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use a
221  // memory space, rather than an execution space, as the first
222  // argument of VerifyExecutionCanAccessMemorySpace.
223  template<class OutputViewType,
224  class InputViewType,
225  const bool sameLayoutsSameOffsetTypes =
226  std::is_same<typename OutputViewType::array_layout,
227  typename InputViewType::array_layout>::value &&
228  std::is_same<typename OutputViewType::non_const_value_type,
229  typename InputViewType::non_const_value_type>::value,
230  const bool outputExecSpaceCanAccessInputMemSpace =
231  Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
232  typename OutputViewType::memory_space,
233  typename InputViewType::memory_space>::value>
234  struct CopyOffsetsImpl {
235  static void run (const OutputViewType& dst, const InputViewType& src);
236  };
237 
238  // Specialization for sameLayoutsSameOffsetTypes = true:
239  //
240  // If both input and output Views have the same layout, and both
241  // input and output use the same type for offsets, then we don't
242  // need to check for overflow, and we can use Kokkos::deep_copy
243  // directly. It doesn't matter whether the output execution space
244  // can access the input memory space: Kokkos::deep_copy takes care
245  // of the details.
246  template<class OutputViewType,
247  class InputViewType,
248  const bool outputExecSpaceCanAccessInputMemSpace>
249  struct CopyOffsetsImpl<OutputViewType, InputViewType,
250  true, outputExecSpaceCanAccessInputMemSpace> {
251  static void run (const OutputViewType& dst, const InputViewType& src) {
252  static_assert (std::is_same<typename OutputViewType::non_const_value_type,
253  typename InputViewType::non_const_value_type>::value,
254  "CopyOffsetsImpl (implementation of copyOffsets): In order"
255  " to call this specialization, the input and output must "
256  "use the same offset type.");
257  static_assert (static_cast<int> (OutputViewType::rank) ==
258  static_cast<int> (InputViewType::rank),
259  "CopyOffsetsImpl (implementation of copyOffsets): In order"
260  " to call this specialization, src and dst must have the "
261  "same rank.");
262  static_assert (std::is_same<typename OutputViewType::array_layout,
263  typename InputViewType::array_layout>::value,
264  "CopyOffsetsImpl (implementation of copyOffsets): In order"
265  " to call this specialization, src and dst must have the "
266  "the same array_layout.");
267  Kokkos::deep_copy (dst, src);
268  }
269  };
270 
271  // Specializations for sameLayoutsSameOffsetTypes = false:
272  //
273  // If input and output don't have the same layout, or use different
274  // types for offsets, then we can't use Kokkos::deep_copy directly,
275  // and we may have to check for overflow.
276 
277  // Specialization for sameLayoutsSameOffsetTypes = false and
278  // outputExecSpaceCanAccessInputMemSpace = true:
279  //
280  // If the output execution space can access the input memory space,
281  // then we can use CopyOffsetsFunctor directly.
282  template<class OutputViewType,
283  class InputViewType>
284  struct CopyOffsetsImpl<OutputViewType, InputViewType,
285  false, true> {
286  static void run (const OutputViewType& dst, const InputViewType& src) {
287  static_assert (static_cast<int> (OutputViewType::rank) ==
288  static_cast<int> (InputViewType::rank),
289  "CopyOffsetsImpl (implementation of copyOffsets): "
290  "src and dst must have the same rank.");
291  constexpr bool sameLayoutsSameOffsetTypes =
292  std::is_same<typename OutputViewType::array_layout,
293  typename InputViewType::array_layout>::value &&
294  std::is_same<typename OutputViewType::non_const_value_type,
295  typename InputViewType::non_const_value_type>::value;
296  static_assert (! sameLayoutsSameOffsetTypes,
297  "CopyOffsetsImpl (implements copyOffsets): In order to "
298  "call this specialization, sameLayoutsSameOffsetTypes "
299  "must be false. That is, either the input and output "
300  "must have different array layouts, or their value types "
301  "must differ.");
302  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use
303  // a memory space, rather than an execution space, as the first
304  // argument of VerifyExecutionCanAccessMemorySpace.
305  static_assert (Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
306  typename OutputViewType::memory_space,
307  typename InputViewType::memory_space>::value,
308  "CopyOffsetsImpl (implements copyOffsets): In order to "
309  "call this specialization, the output View's space must "
310  "be able to access the input View's memory space.");
311  typedef CopyOffsetsFunctor<OutputViewType, InputViewType> functor_type;
312  int noOverflow = 0; // output argument of the reduction
313  Kokkos::parallel_reduce (dst.dimension_0 (),
314  functor_type (dst, src),
315  noOverflow);
316  TEUCHOS_TEST_FOR_EXCEPTION
317  (noOverflow==0, std::runtime_error, "copyOffsets: One or more values in "
318  "src were too big (in the sense of integer overflow) to fit in dst.");
319  }
320  };
321 
322  // Specialization for sameLayoutsSameOffsetTypes = false and
323  // outputExecSpaceCanAccessInputMemSpace = false.
324  //
325  // If the output execution space canNOT access the input memory
326  // space, then we can't use CopyOffsetsFunctor directly. Instead,
327  // tell Kokkos to copy the input View's data into the output View's
328  // memory space _first_. Since the offset types are different for
329  // this specialization, we can't just call Kokkos::deep_copy
330  // directly between the input and output Views of offsets; that
331  // wouldn't compile.
332  //
333  // This case can and does come up in practice: If the output View's
334  // execution space is Cuda, it cannot currently access host memory
335  // (that's the opposite direction from what UVM allows).
336  // Furthermore, that case specifically requires overflow checking,
337  // since (as of 28 Jan 2016 at least) Kokkos::Cuda uses a smaller
338  // offset type than Kokkos' host spaces.
339  template<class OutputViewType, class InputViewType>
340  struct CopyOffsetsImpl<OutputViewType, InputViewType,
341  false, false> {
342  static void run (const OutputViewType& dst, const InputViewType& src) {
343  static_assert (static_cast<int> (OutputViewType::rank) ==
344  static_cast<int> (InputViewType::rank),
345  "CopyOffsetsImpl (implementation of copyOffsets): In order"
346  " to call this specialization, src and dst must have the "
347  "same rank.");
348  constexpr bool sameLayoutsSameOffsetTypes =
349  std::is_same<typename OutputViewType::array_layout,
350  typename InputViewType::array_layout>::value &&
351  std::is_same<typename OutputViewType::non_const_value_type,
352  typename InputViewType::non_const_value_type>::value;
353  static_assert (! sameLayoutsSameOffsetTypes,
354  "CopyOffsetsImpl (implements copyOffsets): In order to "
355  "call this specialization, sameLayoutsSameOffsetTypes "
356  "must be false. That is, either the input and output "
357  "must have different array layouts, or their value types "
358  "must differ.");
359 
360  typedef Kokkos::View<typename InputViewType::non_const_value_type*,
361  Kokkos::LayoutLeft,
362  typename OutputViewType::device_type>
363  output_space_copy_type;
364  using Kokkos::ViewAllocateWithoutInitializing;
365  output_space_copy_type
366  outputSpaceCopy (ViewAllocateWithoutInitializing ("outputSpace"),
367  src.dimension_0 ());
368  Kokkos::deep_copy (outputSpaceCopy, src);
369 
370  // The output View's execution space can access
371  // outputSpaceCopy's data, so we can run the functor now.
372  typedef CopyOffsetsFunctor<OutputViewType,
373  output_space_copy_type> functor_type;
374  int noOverflow = 0;
375  Kokkos::parallel_reduce (dst.dimension_0 (),
376  functor_type (dst, outputSpaceCopy),
377  noOverflow);
378  TEUCHOS_TEST_FOR_EXCEPTION
379  (noOverflow==0, std::runtime_error, "copyOffsets: One or more values "
380  "in src were too big (in the sense of integer overflow) to fit in "
381  "dst.");
382  }
383  };
384 } // namespace (anonymous)
385 
397 template<class OutputViewType, class InputViewType>
398 void
399 copyOffsets (const OutputViewType& dst, const InputViewType& src)
400 {
401  static_assert (Kokkos::Impl::is_view<OutputViewType>::value,
402  "OutputViewType (the type of dst) must be a Kokkos::View.");
403  static_assert (Kokkos::Impl::is_view<InputViewType>::value,
404  "InputViewType (the type of src) must be a Kokkos::View.");
405  static_assert (std::is_same<typename OutputViewType::value_type,
406  typename OutputViewType::non_const_value_type>::value,
407  "OutputViewType (the type of dst) must be a nonconst Kokkos::View.");
408  static_assert (static_cast<int> (OutputViewType::rank) == 1,
409  "OutputViewType (the type of dst) must be a rank-1 Kokkos::View.");
410  static_assert (static_cast<int> (InputViewType::rank) == 1,
411  "InputViewType (the type of src) must be a rank-1 Kokkos::View.");
412  static_assert (std::is_integral<typename std::decay<decltype (dst(0)) >::type>::value,
413  "The entries of dst must be built-in integers.");
414  static_assert (std::is_integral<typename std::decay<decltype (src(0)) >::type>::value,
415  "The entries of src must be built-in integers.");
416 
417  TEUCHOS_TEST_FOR_EXCEPTION
418  (dst.dimension_0 () != src.dimension_0 (), std::invalid_argument,
419  "copyOffsets: dst.dimension_0() = " << dst.dimension_0 ()
420  << " != src.dimension_0() = " << src.dimension_0 () << ".");
421 
422  CopyOffsetsImpl<OutputViewType, InputViewType>::run (dst, src);
423 }
424 
425 } // namespace Details
426 } // namespace Tpetra
427 
428 #endif // TPETRA_DETAILS_COPYOFFSETS_HPP
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
Implementation details of Tpetra.