Kokkos Core Kernels Package  Version of the Day
Kokkos_ExecPolicy.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 2.0
6 // Copyright (2014) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
44 #ifndef KOKKOS_EXECPOLICY_HPP
45 #define KOKKOS_EXECPOLICY_HPP
46 
47 #include <Kokkos_Core_fwd.hpp>
48 #include <impl/Kokkos_Traits.hpp>
49 #include <impl/Kokkos_StaticAssert.hpp>
50 #include <impl/Kokkos_Error.hpp>
51 #include <impl/Kokkos_Tags.hpp>
52 #include <impl/Kokkos_AnalyzePolicy.hpp>
53 #include <Kokkos_Concepts.hpp>
54 #include <iostream>
55 
56 //----------------------------------------------------------------------------
57 
58 namespace Kokkos {
59 
81 template<class ... Properties>
83  : public Impl::PolicyTraits<Properties ... >
84 {
85 private:
86  typedef Impl::PolicyTraits<Properties ... > traits;
87 
88  typename traits::execution_space m_space ;
89  typename traits::index_type m_begin ;
90  typename traits::index_type m_end ;
91  typename traits::index_type m_granularity ;
92  typename traits::index_type m_granularity_mask ;
93 
94 public:
97  typedef typename traits::index_type member_type ;
98 
99  KOKKOS_INLINE_FUNCTION const typename traits::execution_space & space() const { return m_space ; }
100  KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin ; }
101  KOKKOS_INLINE_FUNCTION member_type end() const { return m_end ; }
102 
103  //TODO: find a better workaround for Clangs weird instantiation order
104  // This thing is here because of an instantiation error, where the RangePolicy is inserted into FunctorValue Traits, which
105  // tries decltype on the operator. It tries to do this even though the first argument of parallel for clearly doesn't match.
106  void operator()(const int&) const {}
107 
108  RangePolicy(const RangePolicy&) = default;
109  RangePolicy(RangePolicy&&) = default;
110 
111  inline RangePolicy() : m_space(), m_begin(0), m_end(0) {}
112 
114  inline
115  RangePolicy( const typename traits::execution_space & work_space
116  , const member_type work_begin
117  , const member_type work_end
118  )
119  : m_space( work_space )
120  , m_begin( work_begin < work_end ? work_begin : 0 )
121  , m_end( work_begin < work_end ? work_end : 0 )
122  , m_granularity(0)
123  , m_granularity_mask(0)
124  {
125  set_auto_chunk_size();
126  }
127 
129  inline
130  RangePolicy( const member_type work_begin
131  , const member_type work_end
132  )
133  : RangePolicy( typename traits::execution_space()
134  , work_begin , work_end )
135  {}
136 
137 public:
139  inline member_type chunk_size() const {
140  return m_granularity;
141  }
142 
144  inline RangePolicy set_chunk_size(int chunk_size_) const {
145  RangePolicy p = *this;
146  p.m_granularity = chunk_size_;
147  p.m_granularity_mask = p.m_granularity - 1;
148  return p;
149  }
150 
151 private:
153  inline void set_auto_chunk_size() {
154 
155  typename traits::index_type concurrency = traits::execution_space::concurrency();
156  if( concurrency==0 ) concurrency=1;
157 
158  if(m_granularity > 0) {
159  if(!Impl::is_integral_power_of_two( m_granularity ))
160  Kokkos::abort("RangePolicy blocking granularity must be power of two" );
161  }
162 
163  member_type new_chunk_size = 1;
164  while(new_chunk_size*100*concurrency < m_end-m_begin)
165  new_chunk_size *= 2;
166  if(new_chunk_size < 128) {
167  new_chunk_size = 1;
168  while( (new_chunk_size*40*concurrency < m_end-m_begin ) && (new_chunk_size<128) )
169  new_chunk_size*=2;
170  }
171  m_granularity = new_chunk_size;
172  m_granularity_mask = m_granularity - 1;
173  }
174 
175 public:
180  struct WorkRange {
181  typedef typename RangePolicy::work_tag work_tag ;
182  typedef typename RangePolicy::member_type member_type ;
183 
184  KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin ; }
185  KOKKOS_INLINE_FUNCTION member_type end() const { return m_end ; }
186 
191  KOKKOS_INLINE_FUNCTION
192  WorkRange( const RangePolicy & range
193  , const int part_rank
194  , const int part_size
195  )
196  : m_begin(0), m_end(0)
197  {
198  if ( part_size ) {
199 
200  // Split evenly among partitions, then round up to the granularity.
201  const member_type work_part =
202  ( ( ( ( range.end() - range.begin() ) + ( part_size - 1 ) ) / part_size )
203  + range.m_granularity_mask ) & ~member_type(range.m_granularity_mask);
204 
205  m_begin = range.begin() + work_part * part_rank ;
206  m_end = m_begin + work_part ;
207 
208  if ( range.end() < m_begin ) m_begin = range.end() ;
209  if ( range.end() < m_end ) m_end = range.end() ;
210  }
211  }
212 
213  private:
214  member_type m_begin ;
215  member_type m_end ;
216  WorkRange();
217  WorkRange & operator = ( const WorkRange & );
218  };
219 };
220 
221 } // namespace Kokkos
222 
223 //----------------------------------------------------------------------------
224 //----------------------------------------------------------------------------
225 
226 namespace Kokkos {
227 
228 namespace Impl {
229 
230 template< class ExecSpace, class ... Properties>
231 class TeamPolicyInternal: public Impl::PolicyTraits<Properties ... > {
232 private:
233  typedef Impl::PolicyTraits<Properties ... > traits;
234 
235 public:
236 
237  //----------------------------------------
248  template< class FunctorType >
249  static int team_size_max( const FunctorType & );
250 
261  template< class FunctorType >
262  static int team_size_recommended( const FunctorType & );
263 
264  template< class FunctorType >
265  static int team_size_recommended( const FunctorType & , const int&);
266  //----------------------------------------
268  TeamPolicyInternal( const typename traits::execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1 );
269 
270  TeamPolicyInternal( const typename traits::execution_space & , int league_size_request , const Kokkos::AUTO_t & , int vector_length_request = 1 );
271 
273  TeamPolicyInternal( int league_size_request , int team_size_request , int vector_length_request = 1 );
274 
275  TeamPolicyInternal( int league_size_request , const Kokkos::AUTO_t & , int vector_length_request = 1 );
276 
277 /* TeamPolicyInternal( int league_size_request , int team_size_request );
278 
279  TeamPolicyInternal( int league_size_request , const Kokkos::AUTO_t & );*/
280 
286  KOKKOS_INLINE_FUNCTION int league_size() const ;
287 
293  KOKKOS_INLINE_FUNCTION int team_size() const ;
294 
295  inline typename traits::index_type chunk_size() const ;
296 
297  inline TeamPolicyInternal set_chunk_size(int chunk_size) const ;
298 
302  struct member_type {
303 
305  KOKKOS_INLINE_FUNCTION
306  typename traits::execution_space::scratch_memory_space team_shmem() const ;
307 
309  KOKKOS_INLINE_FUNCTION int league_rank() const ;
310 
312  KOKKOS_INLINE_FUNCTION int league_size() const ;
313 
315  KOKKOS_INLINE_FUNCTION int team_rank() const ;
316 
318  KOKKOS_INLINE_FUNCTION int team_size() const ;
319 
321  KOKKOS_INLINE_FUNCTION void team_barrier() const ;
322 
324  template< class JoinOp >
325  KOKKOS_INLINE_FUNCTION
326  typename JoinOp::value_type team_reduce( const typename JoinOp::value_type
327  , const JoinOp & ) const ;
328 
334  template< typename Type >
335  KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const ;
336 
346  template< typename Type >
347  KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const ;
348  };
349 };
350 
351  struct PerTeamValue {
352  int value;
353  PerTeamValue(int arg);
354  };
355 
356  struct PerThreadValue {
357  int value;
358  PerThreadValue(int arg);
359  };
360 
361 }
362 
363 Impl::PerTeamValue PerTeam(const int& arg);
364 Impl::PerThreadValue PerThread(const int& arg);
365 
388 template< class ... Properties>
389 class TeamPolicy: public
390  Impl::TeamPolicyInternal<
391  typename Impl::PolicyTraits<Properties ... >::execution_space,
392  Properties ...> {
393  typedef Impl::TeamPolicyInternal<
394  typename Impl::PolicyTraits<Properties ... >::execution_space,
395  Properties ...> internal_policy;
396 
397  typedef Impl::PolicyTraits<Properties ... > traits;
398 
399 public:
401 
402  TeamPolicy& operator = (const TeamPolicy&) = default;
403 
405  TeamPolicy( const typename traits::execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1 )
406  : internal_policy(typename traits::execution_space(),league_size_request,team_size_request, vector_length_request) {}
407 
408  TeamPolicy( const typename traits::execution_space & , int league_size_request , const Kokkos::AUTO_t & , int vector_length_request = 1 )
409  : internal_policy(typename traits::execution_space(),league_size_request,Kokkos::AUTO(), vector_length_request) {}
410 
412  TeamPolicy( int league_size_request , int team_size_request , int vector_length_request = 1 )
413  : internal_policy(league_size_request,team_size_request, vector_length_request) {}
414 
415  TeamPolicy( int league_size_request , const Kokkos::AUTO_t & , int vector_length_request = 1 )
416  : internal_policy(league_size_request,Kokkos::AUTO(), vector_length_request) {}
417 
418 /* TeamPolicy( int league_size_request , int team_size_request )
419  : internal_policy(league_size_request,team_size_request) {}
420 
421  TeamPolicy( int league_size_request , const Kokkos::AUTO_t & )
422  : internal_policy(league_size_request,Kokkos::AUTO()) {}*/
423 
424 private:
425  TeamPolicy(const internal_policy& p):internal_policy(p) {}
426 public:
427 
428  inline TeamPolicy set_chunk_size(int chunk) const {
429  return TeamPolicy(internal_policy::set_chunk_size(chunk));
430  };
431 
432  inline TeamPolicy set_scratch_size(const int& level, const Impl::PerTeamValue& per_team) const {
433  return TeamPolicy(internal_policy::set_scratch_size(level,per_team));
434  };
435  inline TeamPolicy set_scratch_size(const int& level, const Impl::PerThreadValue& per_thread) const {
436  return TeamPolicy(internal_policy::set_scratch_size(level,per_thread));
437  };
438  inline TeamPolicy set_scratch_size(const int& level, const Impl::PerTeamValue& per_team, const Impl::PerThreadValue& per_thread) const {
439  return TeamPolicy(internal_policy::set_scratch_size(level, per_team, per_thread));
440  };
441  inline TeamPolicy set_scratch_size(const int& level, const Impl::PerThreadValue& per_thread, const Impl::PerTeamValue& per_team) const {
442  return TeamPolicy(internal_policy::set_scratch_size(level, per_team, per_thread));
443  };
444 
445 };
446 
447 namespace Impl {
448 
449 template<typename iType, class TeamMemberType>
450 struct TeamThreadRangeBoundariesStruct {
451 private:
452 
453  KOKKOS_INLINE_FUNCTION static
454  iType ibegin( const iType & arg_begin
455  , const iType & arg_end
456  , const iType & arg_rank
457  , const iType & arg_size
458  )
459  {
460  return arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * arg_rank ;
461  }
462 
463  KOKKOS_INLINE_FUNCTION static
464  iType iend( const iType & arg_begin
465  , const iType & arg_end
466  , const iType & arg_rank
467  , const iType & arg_size
468  )
469  {
470  const iType end_ = arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * ( arg_rank + 1 );
471  return end_ < arg_end ? end_ : arg_end ;
472  }
473 
474 public:
475 
476  typedef iType index_type;
477  const iType start;
478  const iType end;
479  enum {increment = 1};
480  const TeamMemberType& thread;
481 
482  KOKKOS_INLINE_FUNCTION
483  TeamThreadRangeBoundariesStruct( const TeamMemberType& arg_thread
484  , const iType& arg_end
485  )
486  : start( ibegin( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
487  , end( iend( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
488  , thread( arg_thread )
489  {}
490 
491  KOKKOS_INLINE_FUNCTION
492  TeamThreadRangeBoundariesStruct( const TeamMemberType& arg_thread
493  , const iType& arg_begin
494  , const iType& arg_end
495  )
496  : start( ibegin( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
497  , end( iend( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
498  , thread( arg_thread )
499  {}
500 };
501 
502 template<typename iType, class TeamMemberType>
503 struct ThreadVectorRangeBoundariesStruct {
504  typedef iType index_type;
505  enum {start = 0};
506  const iType end;
507  enum {increment = 1};
508 
509  KOKKOS_INLINE_FUNCTION
510  ThreadVectorRangeBoundariesStruct ( const TeamMemberType& thread, const iType& count ) : end( count ) {}
511 };
512 
513 template<class TeamMemberType>
514 struct ThreadSingleStruct {
515  const TeamMemberType& team_member;
516  KOKKOS_INLINE_FUNCTION
517  ThreadSingleStruct( const TeamMemberType& team_member_ ) : team_member( team_member_ ) {}
518 };
519 
520 template<class TeamMemberType>
521 struct VectorSingleStruct {
522  const TeamMemberType& team_member;
523  KOKKOS_INLINE_FUNCTION
524  VectorSingleStruct( const TeamMemberType& team_member_ ) : team_member( team_member_ ) {}
525 };
526 
527 } // namespace Impl
528 
535 template<typename iType, class TeamMemberType>
536 KOKKOS_INLINE_FUNCTION
537 Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType>
538 TeamThreadRange( const TeamMemberType&, const iType& count );
539 
546 template<typename iType1, typename iType2, class TeamMemberType>
547 KOKKOS_INLINE_FUNCTION
548 Impl::TeamThreadRangeBoundariesStruct<typename std::common_type<iType1, iType2>::type, TeamMemberType>
549 TeamThreadRange( const TeamMemberType&, const iType1& begin, const iType2& end );
550 
557 template<typename iType, class TeamMemberType>
558 KOKKOS_INLINE_FUNCTION
559 Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType>
560 ThreadVectorRange( const TeamMemberType&, const iType& count );
561 
562 } // namespace Kokkos
563 
564 #endif /* #define KOKKOS_EXECPOLICY_HPP */
565 
566 //----------------------------------------------------------------------------
567 //----------------------------------------------------------------------------
member_type chunk_size() const
return chunk_size
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
RangePolicy execution_policy
Tag this class as an execution policy.
KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< iType, TeamMemberType > TeamThreadRange(const TeamMemberType &, const iType &count)
Execution policy for parallel work over a threads within a team.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION int team_rank() const
Rank of this thread within this team.
KOKKOS_INLINE_FUNCTION int league_size() const
Number of teams in the league.
KOKKOS_INLINE_FUNCTION int league_rank() const
Rank of this team within the league of teams.
TeamPolicy(const typename traits::execution_space &, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition&#39;s rank and size.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value) const
Intra-team exclusive prefix sum with team_rank() ordering.
RangePolicy(const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION void team_barrier() const
Barrier among the threads of this team.
KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< iType, TeamMemberType > ThreadVectorRange(const TeamMemberType &, const iType &count)
Execution policy for a vector parallel loop.
RangePolicy set_chunk_size(int chunk_size_) const
set chunk_size to a discrete value
Execution policy for work over a range of an integral type.
KOKKOS_INLINE_FUNCTION int team_size() const
Number of threads in this team.
Subrange for a partition&#39;s rank and size.
Execution policy for parallel work over a league of teams of threads.
Parallel execution of a functor calls the functor once with each member of the execution policy...
KOKKOS_INLINE_FUNCTION JoinOp::value_type team_reduce(const typename JoinOp::value_type, const JoinOp &) const
Intra-team reduction. Returns join of all values of the team members.
KOKKOS_INLINE_FUNCTION traits::execution_space::scratch_memory_space team_shmem() const
Handle to the currently executing team shared scratch memory.