Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_MultiVectorDistObjectKernels_decl.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 // mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
45 // incorrect (as() is not a device function) and usually irrelevant
46 // (it would only matter if LocalOrdinal were bigger than size_t on a
47 // particular platform, which is unlikely).
48 
49 #ifndef TPETRA_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_DECL_HPP
50 #define TPETRA_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_DECL_HPP
51 
52 #include "Tpetra_ConfigDefs.hpp"
53 #if TPETRA_USE_KOKKOS_DISTOBJECT
54 
55 #include "Kokkos_Core.hpp"
56 
57 // Don't include Teuchos_ScalarTraits.hpp here because we want a different
58 // version for CPU versus GPU
59 
60 namespace Tpetra {
61 namespace Details {
62 
63  // Functors for implementing packAndPrepare and unpackAndCombine
64  // through parallel_for
65 
66  template <typename Scalar, typename LocalOrdinal, typename Device>
67  struct PackArraySingleColumnConstantStride {
68  typedef Device execution_space;
69  typedef typename execution_space::size_type size_type;
70 
71  Kokkos::View<const LocalOrdinal*, execution_space> exportLIDs;
72  Kokkos::View<const Scalar*, execution_space, Kokkos::MemoryUnmanaged> src;
73  Kokkos::View<Scalar*, execution_space> exports;
74 
75  KOKKOS_INLINE_FUNCTION
76  void operator()( const size_type k ) const {
77  exports[k] = src[exportLIDs[k]];
78  }
79 
80  void pack();
81  };
82 
83  template <typename Scalar, typename LocalOrdinal, typename Device>
84  struct PackArraySingleColumnOffset {
85  typedef Device execution_space;
86  typedef typename execution_space::size_type size_type;
87 
88  Kokkos::View<const LocalOrdinal*, execution_space> exportLIDs;
89  Kokkos::View<const Scalar*, execution_space, Kokkos::MemoryUnmanaged> src;
90  Kokkos::View<Scalar*, execution_space> exports;
91  size_t offset;
92 
93  KOKKOS_INLINE_FUNCTION
94  void operator()( const size_type k ) const {
95  exports[k] = src[exportLIDs[k] + offset];
96  }
97 
98  void pack();
99  };
100 
101  template <typename Scalar, typename LocalOrdinal, typename Device>
102  struct PackArrayMultiColumnConstantStride {
103  typedef Device execution_space;
104  typedef typename execution_space::size_type size_type;
105 
106  Kokkos::View<const LocalOrdinal*, execution_space> exportLIDs;
107  Kokkos::View<const Scalar*, execution_space, Kokkos::MemoryUnmanaged> src;
108  Kokkos::View<Scalar*, execution_space> exports;
109  size_t stride, numCols;
110 
111  KOKKOS_INLINE_FUNCTION
112  void operator()( const size_type k ) const {
113  const size_t localRow = exportLIDs[k]; // should use as<size_t>()
114  const size_t offset = k*numCols;
115  for (size_t j = 0; j < numCols; ++j)
116  exports[offset + j] = src[localRow + j*stride];
117  }
118 
119  void pack();
120  };
121 
122  template <typename Scalar, typename LocalOrdinal, typename Device>
123  struct PackArrayMultiColumnVariableStride {
124  typedef Device execution_space;
125  typedef typename execution_space::size_type size_type;
126 
127  Kokkos::View<const LocalOrdinal*, execution_space> exportLIDs;
128  Kokkos::View<const size_t*, execution_space> srcWhichVectors;
129  Kokkos::View<const Scalar*, execution_space, Kokkos::MemoryUnmanaged> src;
130  Kokkos::View<Scalar*, execution_space> exports;
131  size_t stride, numCols;
132 
133  KOKKOS_INLINE_FUNCTION
134  void operator()( const size_type k ) const {
135  const size_t localRow = exportLIDs[k]; // should use as<size_t>()
136  const size_t offset = k*numCols;
137  for (size_t j = 0; j < numCols; ++j)
138  exports[offset + j] = src[localRow + srcWhichVectors[j]*stride];
139  }
140 
141  void pack();
142  };
143 
144  struct InsertOp {
145  template <typename Scalar>
146  KOKKOS_INLINE_FUNCTION
147  void operator() (Scalar& dest, const Scalar& src) const {
148  //dest = src;
149  Kokkos::atomic_exchange(&dest, src);
150  }
151  };
152  struct AddOp {
153  template <typename Scalar>
154  KOKKOS_INLINE_FUNCTION
155  void operator() (Scalar& dest, const Scalar& src) const {
156  //dest += src;
157  Kokkos::atomic_fetch_add(&dest, src);
158  }
159  };
160  struct AbsMaxOp {
161  template <typename Scalar>
162  KOKKOS_INLINE_FUNCTION
163  Scalar max(const Scalar& a, const Scalar& b) const {
164  return a > b ? a : b ;
165  }
166 
167  template <typename Scalar>
168  KOKKOS_INLINE_FUNCTION
169  void operator() (Scalar& dest, const Scalar& src) const {
170  typedef Teuchos::ScalarTraits<Scalar> SCT;
171  //dest = max( SCT::magnitude(dest), SCT::magnitude(src) );
172  Kokkos::atomic_exchange(
173  &dest, max( SCT::magnitude(dest), SCT::magnitude(src) ) );
174  }
175  };
176 
177  template <typename Scalar, typename LocalOrdinal, typename Op, typename Device>
178  struct UnpackArrayMultiColumnConstantStride {
179  typedef Device execution_space;
180  typedef typename execution_space::size_type size_type;
181 
182  Kokkos::View<const LocalOrdinal*, execution_space> importLIDs;
183  Kokkos::View<const Scalar*, execution_space> imports;
184  Kokkos::View<Scalar*, execution_space, Kokkos::MemoryUnmanaged> dest;
185  size_t stride, numCols;
186  Op op;
187 
188  KOKKOS_INLINE_FUNCTION
189  void operator()( const size_type k ) const {
190  const size_t localRow = importLIDs[k]; // should use as<size_t>()
191  const size_t offset = k*numCols;
192  for (size_t j = 0; j < numCols; ++j)
193  op(dest[localRow + j*stride], imports[offset + j]);
194  }
195 
196  void unpack();
197  };
198 
199  template <typename Scalar, typename LocalOrdinal, typename Op, typename Device>
200  struct UnpackArrayMultiColumnVariableStride {
201  typedef Device execution_space;
202  typedef typename execution_space::size_type size_type;
203 
204  Kokkos::View<const LocalOrdinal*, execution_space> importLIDs;
205  Kokkos::View<const size_t*, execution_space> whichVectors;
206  Kokkos::View<const Scalar*, execution_space> imports;
207  Kokkos::View<Scalar*, execution_space, Kokkos::MemoryUnmanaged> dest;
208  size_t stride, numCols;
209  Op op;
210 
211  KOKKOS_INLINE_FUNCTION
212  void operator()( const size_type k ) const {
213  const size_t localRow = importLIDs[k]; // should use as<size_t>()
214  const size_t offset = k*numCols;
215  for (size_t j = 0; j < numCols; ++j)
216  op(dest[localRow + whichVectors[j]*stride], imports[offset + j]);
217  }
218 
219  void unpack();
220  };
221 
222  template <typename Scalar, typename LocalOrdinal, typename Device>
223  struct PermuteArrayMultiColumnConstantStride {
224  typedef Device execution_space;
225  typedef typename execution_space::size_type size_type;
226 
227  Kokkos::View<const LocalOrdinal*, execution_space> permuteToLIDs;
228  Kokkos::View<const LocalOrdinal*, execution_space> permuteFromLIDs;
229  Kokkos::View<const Scalar*, execution_space, Kokkos::MemoryUnmanaged> src;
230  Kokkos::View<Scalar*, execution_space, Kokkos::MemoryUnmanaged> dest;
231  size_t src_stride, dest_stride, numCols;
232 
233  KOKKOS_INLINE_FUNCTION
234  void operator()( const size_type k ) const {
235  const size_t toRow = permuteToLIDs[k]; // should use as<size_t>()
236  const size_t fromRow = permuteFromLIDs[k]; // should use as<size_t>()
237  for (size_t j = 0; j < numCols; ++j)
238  dest[toRow + j*dest_stride] = src[fromRow + j*src_stride];
239  }
240 
241  void permute();
242  };
243 
244  template <typename Scalar, typename LocalOrdinal, typename Device>
245  struct PermuteArrayMultiColumnVariableStride {
246  typedef Device execution_space;
247  typedef typename execution_space::size_type size_type;
248 
249  Kokkos::View<const LocalOrdinal*, execution_space> permuteToLIDs;
250  Kokkos::View<const LocalOrdinal*, execution_space> permuteFromLIDs;
251  Kokkos::View<const size_t*, execution_space> src_whichVectors;
252  Kokkos::View<const size_t*, execution_space> dest_whichVectors;
253  Kokkos::View<const Scalar*, execution_space, Kokkos::MemoryUnmanaged> src;
254  Kokkos::View<Scalar*, execution_space, Kokkos::MemoryUnmanaged> dest;
255  size_t src_stride, dest_stride, numCols;
256 
257  KOKKOS_INLINE_FUNCTION
258  void operator()( const size_type k ) const {
259  const size_t toRow = permuteToLIDs[k]; // should use as<size_t>()
260  const size_t fromRow = permuteFromLIDs[k]; // should use as<size_t>()
261  for (size_t j = 0; j < numCols; ++j)
262  dest[toRow + dest_whichVectors[j]*dest_stride] =
263  src[fromRow + src_whichVectors[j]*src_stride];
264  }
265 
266  void permute();
267  };
268 
269 } // Details namespace
270 } // Tpetra namespace
271 
272 #endif // TPETRA_USE_KOKKOS_DISTOBJECT
273 
274 #endif // TPETRA_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_DECL_HPP
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Implementation details of Tpetra.