Kokkos Core Kernels Package  Version of the Day
Kokkos_CudaSpace.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 2.0
6 // Copyright (2014) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
44 #ifndef KOKKOS_CUDASPACE_HPP
45 #define KOKKOS_CUDASPACE_HPP
46 
47 #include <Kokkos_Core_fwd.hpp>
48 
49 #if defined( KOKKOS_HAVE_CUDA )
50 
51 #include <iosfwd>
52 #include <typeinfo>
53 #include <string>
54 
55 #include <Kokkos_HostSpace.hpp>
56 
57 #include <Cuda/Kokkos_Cuda_abort.hpp>
58 
59 /*--------------------------------------------------------------------------*/
60 
61 namespace Kokkos {
62 
65 class CudaSpace {
66 public:
67 
69  typedef CudaSpace memory_space ;
70  typedef Kokkos::Cuda execution_space ;
72 
73  typedef unsigned int size_type ;
74 
75  /*--------------------------------*/
76 
77  CudaSpace();
78  CudaSpace( CudaSpace && rhs ) = default ;
79  CudaSpace( const CudaSpace & rhs ) = default ;
80  CudaSpace & operator = ( CudaSpace && rhs ) = default ;
81  CudaSpace & operator = ( const CudaSpace & rhs ) = default ;
82  ~CudaSpace() = default ;
83 
85  void * allocate( const size_t arg_alloc_size ) const ;
86 
88  void deallocate( void * const arg_alloc_ptr
89  , const size_t arg_alloc_size ) const ;
90 
91  /*--------------------------------*/
93  static void access_error();
94  static void access_error( const void * const );
95 
96 private:
97 
98  int m_device ;
99 
100  // friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > ;
101 };
102 
103 namespace Impl {
110 void init_lock_arrays_cuda_space();
111 
119 int* atomic_lock_array_cuda_space_ptr(bool deallocate = false);
120 
127 int* scratch_lock_array_cuda_space_ptr(bool deallocate = false);
128 
135 int* threadid_lock_array_cuda_space_ptr(bool deallocate = false);
136 }
137 } // namespace Kokkos
138 
139 /*--------------------------------------------------------------------------*/
140 /*--------------------------------------------------------------------------*/
141 
142 namespace Kokkos {
143 
147 class CudaUVMSpace {
148 public:
149 
151  typedef CudaUVMSpace memory_space ;
152  typedef Cuda execution_space ;
154  typedef unsigned int size_type ;
155 
157  static bool available();
158 
159  /*--------------------------------*/
160 
161  CudaUVMSpace();
162  CudaUVMSpace( CudaUVMSpace && rhs ) = default ;
163  CudaUVMSpace( const CudaUVMSpace & rhs ) = default ;
164  CudaUVMSpace & operator = ( CudaUVMSpace && rhs ) = default ;
165  CudaUVMSpace & operator = ( const CudaUVMSpace & rhs ) = default ;
166  ~CudaUVMSpace() = default ;
167 
169  void * allocate( const size_t arg_alloc_size ) const ;
170 
172  void deallocate( void * const arg_alloc_ptr
173  , const size_t arg_alloc_size ) const ;
174 
175  /*--------------------------------*/
176 
177 private:
178  int m_device ;
179 };
180 
181 } // namespace Kokkos
182 
183 /*--------------------------------------------------------------------------*/
184 /*--------------------------------------------------------------------------*/
185 
186 namespace Kokkos {
187 
191 class CudaHostPinnedSpace {
192 public:
193 
195 
196  typedef HostSpace::execution_space execution_space ;
197  typedef CudaHostPinnedSpace memory_space ;
199  typedef unsigned int size_type ;
200 
201  /*--------------------------------*/
202 
203  CudaHostPinnedSpace();
204  CudaHostPinnedSpace( CudaHostPinnedSpace && rhs ) = default ;
205  CudaHostPinnedSpace( const CudaHostPinnedSpace & rhs ) = default ;
206  CudaHostPinnedSpace & operator = ( CudaHostPinnedSpace && rhs ) = default ;
207  CudaHostPinnedSpace & operator = ( const CudaHostPinnedSpace & rhs ) = default ;
208  ~CudaHostPinnedSpace() = default ;
209 
211  void * allocate( const size_t arg_alloc_size ) const ;
212 
214  void deallocate( void * const arg_alloc_ptr
215  , const size_t arg_alloc_size ) const ;
216 
217  /*--------------------------------*/
218 };
219 
220 } // namespace Kokkos
221 
222 /*--------------------------------------------------------------------------*/
223 /*--------------------------------------------------------------------------*/
224 
225 namespace Kokkos {
226 namespace Impl {
227 
231 
232 template<>
233 struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaSpace > {
234  enum { assignable = false };
235  enum { accessible = false };
236  enum { deepcopy = true };
237 };
238 
239 template<>
240 struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaUVMSpace > {
241  // HostSpace::execution_space != CudaUVMSpace::execution_space
242  enum { assignable = false };
243  enum { accessible = true };
244  enum { deepcopy = true };
245 };
246 
247 template<>
248 struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace > {
249  // HostSpace::execution_space == CudaHostPinnedSpace::execution_space
250  enum { assignable = true };
251  enum { accessible = true };
252  enum { deepcopy = true };
253 };
254 
255 
256 template<>
257 struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::HostSpace > {
258  enum { assignable = false };
259  enum { accessible = false };
260  enum { deepcopy = true };
261 };
262 
263 template<>
264 struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaUVMSpace > {
265  enum { assignable = true };
266  enum { accessible = true };
267  enum { deepcopy = true };
268 };
269 
270 template<>
271 struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace > {
272  // CudaSpace::execution_space != CudaHostPinnedSpace::execution_space
273  enum { assignable = false };
274  enum { accessible = true }; // CudaSpace::execution_space
275  enum { deepcopy = true };
276 };
277 
278 
279 template<>
280 struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::HostSpace > {
281  // CudaUVMSpace::execution_space == Cuda
282  // Cuda cannot access HostSpace
283  enum { assignable = false };
284  enum { accessible = false };
285  enum { deepcopy = true };
286 };
287 
288 template<>
289 struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaSpace > {
290  // CudaUVMSpace::execution_space == CudaSpace::execution_space
291  // Can access CudaUVMSpace from Host but cannot access CudaSpace from Host
292  enum { assignable = false };
293 
294  // CudaUVMSpace::execution_space can access CudaSpace
295  enum { accessible = true };
296  enum { deepcopy = true };
297 };
298 
299 template<>
300 struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace > {
301  // CudaUVMSpace::execution_space != CudaHostPinnedSpace::execution_space
302  enum { assignable = false };
303  enum { accessible = true }; // CudaUVMSpace::execution_space
304  enum { deepcopy = true };
305 };
306 
307 
308 template<>
309 struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace > {
310  // Can access CudaHostPinnedSpace from Cuda
311  // Cannot access HostSpace from Cuda
312  enum { assignable = false }; // Cannot access from Cuda
313  enum { accessible = false }; // CudaHostPinnedSpace::execution_space
314  enum { deepcopy = true };
315 };
316 
317 template<>
318 struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace > {
319  enum { assignable = false }; // Cannot access from Host
320  enum { accessible = false };
321  enum { deepcopy = true };
322 };
323 
324 template<>
325 struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace > {
326  enum { assignable = false }; // different execution_space
327  enum { accessible = true }; // same accessibility
328  enum { deepcopy = true };
329 };
330 
331 }} // namespace Kokkos::Impl
332 
333 /*--------------------------------------------------------------------------*/
334 /*--------------------------------------------------------------------------*/
335 
336 namespace Kokkos {
337 namespace Impl {
338 
339 void DeepCopyAsyncCuda( void * dst , const void * src , size_t n);
340 
341 template<> struct DeepCopy< CudaSpace , CudaSpace , Cuda>
342 {
343  DeepCopy( void * dst , const void * src , size_t );
344  DeepCopy( const Cuda & , void * dst , const void * src , size_t );
345 };
346 
347 template<> struct DeepCopy< CudaSpace , HostSpace , Cuda >
348 {
349  DeepCopy( void * dst , const void * src , size_t );
350  DeepCopy( const Cuda & , void * dst , const void * src , size_t );
351 };
352 
353 template<> struct DeepCopy< HostSpace , CudaSpace , Cuda >
354 {
355  DeepCopy( void * dst , const void * src , size_t );
356  DeepCopy( const Cuda & , void * dst , const void * src , size_t );
357 };
358 
359 template<class ExecutionSpace> struct DeepCopy< CudaSpace , CudaSpace , ExecutionSpace >
360 {
361  inline
362  DeepCopy( void * dst , const void * src , size_t n )
363  { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
364 
365  inline
366  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
367  {
368  exec.fence();
369  DeepCopyAsyncCuda (dst,src,n);
370  }
371 };
372 
373 template<class ExecutionSpace> struct DeepCopy< CudaSpace , HostSpace , ExecutionSpace >
374 {
375  inline
376  DeepCopy( void * dst , const void * src , size_t n )
377  { (void) DeepCopy< CudaSpace , HostSpace , Cuda>( dst , src , n ); }
378 
379  inline
380  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
381  {
382  exec.fence();
383  DeepCopyAsyncCuda (dst,src,n);
384  }
385 };
386 
387 template<class ExecutionSpace>
388 struct DeepCopy< HostSpace , CudaSpace , ExecutionSpace >
389 {
390  inline
391  DeepCopy( void * dst , const void * src , size_t n )
392  { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
393 
394  inline
395  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
396  {
397  exec.fence();
398  DeepCopyAsyncCuda (dst,src,n);
399  }
400 };
401 
402 template<class ExecutionSpace>
403 struct DeepCopy< CudaSpace , CudaUVMSpace , ExecutionSpace >
404 {
405  inline
406  DeepCopy( void * dst , const void * src , size_t n )
407  { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
408 
409  inline
410  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
411  {
412  exec.fence();
413  DeepCopyAsyncCuda (dst,src,n);
414  }
415 };
416 
417 template<class ExecutionSpace>
418 struct DeepCopy< CudaSpace , CudaHostPinnedSpace , ExecutionSpace>
419 {
420  inline
421  DeepCopy( void * dst , const void * src , size_t n )
422  { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
423 
424  inline
425  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
426  {
427  exec.fence();
428  DeepCopyAsyncCuda (dst,src,n);
429  }
430 };
431 
432 
433 template<class ExecutionSpace>
434 struct DeepCopy< CudaUVMSpace , CudaSpace , ExecutionSpace>
435 {
436  inline
437  DeepCopy( void * dst , const void * src , size_t n )
438  { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
439 
440  inline
441  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
442  {
443  exec.fence();
444  DeepCopyAsyncCuda (dst,src,n);
445  }
446 };
447 
448 template<class ExecutionSpace>
449 struct DeepCopy< CudaUVMSpace , CudaUVMSpace , ExecutionSpace>
450 {
451  inline
452  DeepCopy( void * dst , const void * src , size_t n )
453  { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
454 
455  inline
456  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
457  {
458  exec.fence();
459  DeepCopyAsyncCuda (dst,src,n);
460  }
461 };
462 
463 template<class ExecutionSpace>
464 struct DeepCopy< CudaUVMSpace , CudaHostPinnedSpace , ExecutionSpace>
465 {
466  inline
467  DeepCopy( void * dst , const void * src , size_t n )
468  { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
469 
470  inline
471  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
472  {
473  exec.fence();
474  DeepCopyAsyncCuda (dst,src,n);
475  }
476 };
477 
478 template<class ExecutionSpace> struct DeepCopy< CudaUVMSpace , HostSpace , ExecutionSpace >
479 {
480  inline
481  DeepCopy( void * dst , const void * src , size_t n )
482  { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
483 
484  inline
485  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
486  {
487  exec.fence();
488  DeepCopyAsyncCuda (dst,src,n);
489  }
490 };
491 
492 
493 template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaSpace , ExecutionSpace >
494 {
495  inline
496  DeepCopy( void * dst , const void * src , size_t n )
497  { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
498 
499  inline
500  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
501  {
502  exec.fence();
503  DeepCopyAsyncCuda (dst,src,n);
504  }
505 };
506 
507 template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaUVMSpace , ExecutionSpace >
508 {
509  inline
510  DeepCopy( void * dst , const void * src , size_t n )
511  { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
512 
513  inline
514  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
515  {
516  exec.fence();
517  DeepCopyAsyncCuda (dst,src,n);
518  }
519 };
520 
521 template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaHostPinnedSpace , ExecutionSpace >
522 {
523  inline
524  DeepCopy( void * dst , const void * src , size_t n )
525  { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
526 
527  inline
528  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
529  {
530  exec.fence();
531  DeepCopyAsyncCuda (dst,src,n);
532  }
533 };
534 
535 template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , HostSpace , ExecutionSpace >
536 {
537  inline
538  DeepCopy( void * dst , const void * src , size_t n )
539  { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
540 
541  inline
542  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
543  {
544  exec.fence();
545  DeepCopyAsyncCuda (dst,src,n);
546  }
547 };
548 
549 
550 template<class ExecutionSpace> struct DeepCopy< HostSpace , CudaUVMSpace , ExecutionSpace >
551 {
552  inline
553  DeepCopy( void * dst , const void * src , size_t n )
554  { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
555 
556  inline
557  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
558  {
559  exec.fence();
560  DeepCopyAsyncCuda (dst,src,n);
561  }
562 };
563 
564 template<class ExecutionSpace> struct DeepCopy< HostSpace , CudaHostPinnedSpace , ExecutionSpace >
565 {
566  inline
567  DeepCopy( void * dst , const void * src , size_t n )
568  { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
569 
570  inline
571  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
572  {
573  exec.fence();
574  DeepCopyAsyncCuda (dst,src,n);
575  }
576 };
577 
578 } // namespace Impl
579 } // namespace Kokkos
580 
581 //----------------------------------------------------------------------------
582 //----------------------------------------------------------------------------
583 
584 namespace Kokkos {
585 namespace Impl {
586 
588 template<>
589 struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::HostSpace >
590 {
591  enum { value = false };
592  KOKKOS_INLINE_FUNCTION static void verify( void )
593  { Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
594 
595  KOKKOS_INLINE_FUNCTION static void verify( const void * )
596  { Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
597 };
598 
600 template<>
601 struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >
602 {
603  enum { value = true };
604  KOKKOS_INLINE_FUNCTION static void verify( void ) { }
605  KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
606 };
607 
609 template<>
610 struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >
611 {
612  enum { value = true };
613  KOKKOS_INLINE_FUNCTION static void verify( void ) { }
614  KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
615 };
616 
618 template< class OtherSpace >
619 struct VerifyExecutionCanAccessMemorySpace<
620  typename enable_if< ! is_same<Kokkos::CudaSpace,OtherSpace>::value , Kokkos::CudaSpace >::type ,
621  OtherSpace >
622 {
623  enum { value = false };
624  KOKKOS_INLINE_FUNCTION static void verify( void )
625  { Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
626 
627  KOKKOS_INLINE_FUNCTION static void verify( const void * )
628  { Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
629 };
630 
631 //----------------------------------------------------------------------------
633 template<>
634 struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaSpace >
635 {
636  enum { value = false };
637  inline static void verify( void ) { CudaSpace::access_error(); }
638  inline static void verify( const void * p ) { CudaSpace::access_error(p); }
639 };
640 
642 template<>
643 struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaUVMSpace >
644 {
645  enum { value = true };
646  inline static void verify( void ) { }
647  inline static void verify( const void * ) { }
648 };
649 
651 template<>
652 struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >
653 {
654  enum { value = true };
655  KOKKOS_INLINE_FUNCTION static void verify( void ) {}
656  KOKKOS_INLINE_FUNCTION static void verify( const void * ) {}
657 };
658 
659 } // namespace Impl
660 } // namespace Kokkos
661 
662 //----------------------------------------------------------------------------
663 //----------------------------------------------------------------------------
664 
665 namespace Kokkos {
666 namespace Impl {
667 
668 template<>
669 class SharedAllocationRecord< Kokkos::CudaSpace , void >
670  : public SharedAllocationRecord< void , void >
671 {
672 private:
673 
674  friend class SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ;
675 
676  typedef SharedAllocationRecord< void , void > RecordBase ;
677 
678  SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
679  SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
680 
681  static void deallocate( RecordBase * );
682 
683  static ::cudaTextureObject_t
684  attach_texture_object( const unsigned sizeof_alias
685  , void * const alloc_ptr
686  , const size_t alloc_size );
687 
688  static RecordBase s_root_record ;
689 
690  ::cudaTextureObject_t m_tex_obj ;
691  const Kokkos::CudaSpace m_space ;
692 
693 protected:
694 
695  ~SharedAllocationRecord();
696  SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
697 
698  SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
699  , const std::string & arg_label
700  , const size_t arg_alloc_size
701  , const RecordBase::function_type arg_dealloc = & deallocate
702  );
703 
704 public:
705 
706  std::string get_label() const ;
707 
708  static SharedAllocationRecord * allocate( const Kokkos::CudaSpace & arg_space
709  , const std::string & arg_label
710  , const size_t arg_alloc_size );
711 
713  static
714  void * allocate_tracked( const Kokkos::CudaSpace & arg_space
715  , const std::string & arg_label
716  , const size_t arg_alloc_size );
717 
719  static
720  void * reallocate_tracked( void * const arg_alloc_ptr
721  , const size_t arg_alloc_size );
722 
724  static
725  void deallocate_tracked( void * const arg_alloc_ptr );
726 
727  static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
728 
729  template< typename AliasType >
730  inline
731  ::cudaTextureObject_t attach_texture_object()
732  {
733  static_assert( ( std::is_same< AliasType , int >::value ||
734  std::is_same< AliasType , ::int2 >::value ||
735  std::is_same< AliasType , ::int4 >::value )
736  , "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
737 
738  if ( m_tex_obj == 0 ) {
739  m_tex_obj = attach_texture_object( sizeof(AliasType)
740  , (void*) RecordBase::m_alloc_ptr
741  , RecordBase::m_alloc_size );
742  }
743 
744  return m_tex_obj ;
745  }
746 
747  template< typename AliasType >
748  inline
749  int attach_texture_object_offset( const AliasType * const ptr )
750  {
751  // Texture object is attached to the entire allocation range
752  return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
753  }
754 
755  static void print_records( std::ostream & , const Kokkos::CudaSpace & , bool detail = false );
756 };
757 
758 
759 template<>
760 class SharedAllocationRecord< Kokkos::CudaUVMSpace , void >
761  : public SharedAllocationRecord< void , void >
762 {
763 private:
764 
765  typedef SharedAllocationRecord< void , void > RecordBase ;
766 
767  SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
768  SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
769 
770  static void deallocate( RecordBase * );
771 
772  static RecordBase s_root_record ;
773 
774  ::cudaTextureObject_t m_tex_obj ;
775  const Kokkos::CudaUVMSpace m_space ;
776 
777 protected:
778 
779  ~SharedAllocationRecord();
780  SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
781 
782  SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
783  , const std::string & arg_label
784  , const size_t arg_alloc_size
785  , const RecordBase::function_type arg_dealloc = & deallocate
786  );
787 
788 public:
789 
790  std::string get_label() const ;
791 
792  static SharedAllocationRecord * allocate( const Kokkos::CudaUVMSpace & arg_space
793  , const std::string & arg_label
794  , const size_t arg_alloc_size
795  );
796 
798  static
799  void * allocate_tracked( const Kokkos::CudaUVMSpace & arg_space
800  , const std::string & arg_label
801  , const size_t arg_alloc_size );
802 
804  static
805  void * reallocate_tracked( void * const arg_alloc_ptr
806  , const size_t arg_alloc_size );
807 
809  static
810  void deallocate_tracked( void * const arg_alloc_ptr );
811 
812  static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
813 
814 
815  template< typename AliasType >
816  inline
817  ::cudaTextureObject_t attach_texture_object()
818  {
819  static_assert( ( std::is_same< AliasType , int >::value ||
820  std::is_same< AliasType , ::int2 >::value ||
821  std::is_same< AliasType , ::int4 >::value )
822  , "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
823 
824  if ( m_tex_obj == 0 ) {
825  m_tex_obj = SharedAllocationRecord< Kokkos::CudaSpace , void >::
826  attach_texture_object( sizeof(AliasType)
827  , (void*) RecordBase::m_alloc_ptr
828  , RecordBase::m_alloc_size );
829  }
830 
831  return m_tex_obj ;
832  }
833 
834  template< typename AliasType >
835  inline
836  int attach_texture_object_offset( const AliasType * const ptr )
837  {
838  // Texture object is attached to the entire allocation range
839  return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
840  }
841 
842  static void print_records( std::ostream & , const Kokkos::CudaUVMSpace & , bool detail = false );
843 };
844 
845 template<>
846 class SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >
847  : public SharedAllocationRecord< void , void >
848 {
849 private:
850 
851  typedef SharedAllocationRecord< void , void > RecordBase ;
852 
853  SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
854  SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
855 
856  static void deallocate( RecordBase * );
857 
858  static RecordBase s_root_record ;
859 
860  const Kokkos::CudaHostPinnedSpace m_space ;
861 
862 protected:
863 
864  ~SharedAllocationRecord();
865  SharedAllocationRecord() : RecordBase(), m_space() {}
866 
867  SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
868  , const std::string & arg_label
869  , const size_t arg_alloc_size
870  , const RecordBase::function_type arg_dealloc = & deallocate
871  );
872 
873 public:
874 
875  std::string get_label() const ;
876 
877  static SharedAllocationRecord * allocate( const Kokkos::CudaHostPinnedSpace & arg_space
878  , const std::string & arg_label
879  , const size_t arg_alloc_size
880  );
882  static
883  void * allocate_tracked( const Kokkos::CudaHostPinnedSpace & arg_space
884  , const std::string & arg_label
885  , const size_t arg_alloc_size );
886 
888  static
889  void * reallocate_tracked( void * const arg_alloc_ptr
890  , const size_t arg_alloc_size );
891 
893  static
894  void deallocate_tracked( void * const arg_alloc_ptr );
895 
896 
897  static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
898 
899  static void print_records( std::ostream & , const Kokkos::CudaHostPinnedSpace & , bool detail = false );
900 };
901 
902 } // namespace Impl
903 } // namespace Kokkos
904 
905 //----------------------------------------------------------------------------
906 //----------------------------------------------------------------------------
907 
908 #endif /* #if defined( KOKKOS_HAVE_CUDA ) */
909 #endif /* #define KOKKOS_CUDASPACE_HPP */
910 
Memory space for main process and CPU execution spaces.
Memory management for host memory.
bool available()
Query if hwloc is available.
Access relationship between DstMemorySpace and SrcMemorySpace.