Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
Kokkos_CudaSpace.hpp
1//@HEADER
2// ************************************************************************
3//
4// Kokkos v. 4.0
5// Copyright (2022) National Technology & Engineering
6// Solutions of Sandia, LLC (NTESS).
7//
8// Under the terms of Contract DE-NA0003525 with NTESS,
9// the U.S. Government retains certain rights in this software.
10//
11// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12// See https://kokkos.org/LICENSE for license information.
13// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14//
15//@HEADER
16
17#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18#include <Kokkos_Macros.hpp>
19static_assert(false,
20 "Including non-public Kokkos header files is not allowed.");
21#endif
22#ifndef KOKKOS_CUDASPACE_HPP
23#define KOKKOS_CUDASPACE_HPP
24
25#include <Kokkos_Macros.hpp>
26#if defined(KOKKOS_ENABLE_CUDA)
27
28#include <Kokkos_Core_fwd.hpp>
29
30#include <iosfwd>
31#include <typeinfo>
32#include <string>
33#include <memory>
34
35#include <Kokkos_HostSpace.hpp>
36#include <impl/Kokkos_SharedAlloc.hpp>
37
38#include <impl/Kokkos_Profiling_Interface.hpp>
39
40#include <Cuda/Kokkos_Cuda_abort.hpp>
41
42#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
43extern "C" bool kokkos_impl_cuda_pin_uvm_to_host();
44extern "C" void kokkos_impl_cuda_set_pin_uvm_to_host(bool);
45#endif
46
47/*--------------------------------------------------------------------------*/
48
49namespace Kokkos {
50namespace Impl {
51
52template <typename T>
53struct is_cuda_type_space : public std::false_type {};
54
55} // namespace Impl
56
59class CudaSpace {
60 public:
62 using memory_space = CudaSpace;
63 using execution_space = Kokkos::Cuda;
64 using device_type = Kokkos::Device<execution_space, memory_space>;
65
66 using size_type = unsigned int;
67
68 /*--------------------------------*/
69
70 CudaSpace();
71 CudaSpace(CudaSpace&& rhs) = default;
72 CudaSpace(const CudaSpace& rhs) = default;
73 CudaSpace& operator=(CudaSpace&& rhs) = default;
74 CudaSpace& operator=(const CudaSpace& rhs) = default;
75 ~CudaSpace() = default;
76
78 void* allocate(const Cuda& exec_space, const size_t arg_alloc_size) const;
79 void* allocate(const Cuda& exec_space, const char* arg_label,
80 const size_t arg_alloc_size,
81 const size_t arg_logical_size = 0) const;
82 void* allocate(const size_t arg_alloc_size) const;
83 void* allocate(const char* arg_label, const size_t arg_alloc_size,
84 const size_t arg_logical_size = 0) const;
85
87 void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
88 void deallocate(const char* arg_label, void* const arg_alloc_ptr,
89 const size_t arg_alloc_size,
90 const size_t arg_logical_size = 0) const;
91
92 private:
93 template <class, class, class, class>
95 void* impl_allocate(const Cuda& exec_space, const char* arg_label,
96 const size_t arg_alloc_size,
97 const size_t arg_logical_size = 0,
98 const Kokkos::Tools::SpaceHandle =
99 Kokkos::Tools::make_space_handle(name())) const;
100 void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
101 const size_t arg_logical_size = 0,
102 const Kokkos::Tools::SpaceHandle =
103 Kokkos::Tools::make_space_handle(name())) const;
104 void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
105 const size_t arg_alloc_size,
106 const size_t arg_logical_size = 0,
107 const Kokkos::Tools::SpaceHandle =
108 Kokkos::Tools::make_space_handle(name())) const;
109
110 public:
112 static constexpr const char* name() { return m_name; }
113
114 private:
115 int m_device;
116
117 static constexpr const char* m_name = "Cuda";
118 friend class Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaSpace, void>;
119};
120
121template <>
122struct Impl::is_cuda_type_space<CudaSpace> : public std::true_type {};
123
124} // namespace Kokkos
125
126/*--------------------------------------------------------------------------*/
127/*--------------------------------------------------------------------------*/
128
129namespace Kokkos {
130
134class CudaUVMSpace {
135 public:
137 using memory_space = CudaUVMSpace;
138 using execution_space = Cuda;
139 using device_type = Kokkos::Device<execution_space, memory_space>;
140 using size_type = unsigned int;
141
142#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
144 KOKKOS_DEPRECATED static bool available();
145#endif
146
147 /*--------------------------------*/
148
149 /*--------------------------------*/
150
151 CudaUVMSpace();
152 CudaUVMSpace(CudaUVMSpace&& rhs) = default;
153 CudaUVMSpace(const CudaUVMSpace& rhs) = default;
154 CudaUVMSpace& operator=(CudaUVMSpace&& rhs) = default;
155 CudaUVMSpace& operator=(const CudaUVMSpace& rhs) = default;
156 ~CudaUVMSpace() = default;
157
159 void* allocate(const size_t arg_alloc_size) const;
160 void* allocate(const char* arg_label, const size_t arg_alloc_size,
161 const size_t arg_logical_size = 0) const;
162
164 void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
165 void deallocate(const char* arg_label, void* const arg_alloc_ptr,
166 const size_t arg_alloc_size,
167 const size_t arg_logical_size = 0) const;
168
169 private:
170 template <class, class, class, class>
172 void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
173 const size_t arg_logical_size = 0,
174 const Kokkos::Tools::SpaceHandle =
175 Kokkos::Tools::make_space_handle(name())) const;
176 void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
177 const size_t arg_alloc_size,
178 const size_t arg_logical_size = 0,
179 const Kokkos::Tools::SpaceHandle =
180 Kokkos::Tools::make_space_handle(name())) const;
181
182 public:
184 static constexpr const char* name() { return m_name; }
185
186#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
187 static bool cuda_pin_uvm_to_host();
188 static void cuda_set_pin_uvm_to_host(bool val);
189#endif
190 /*--------------------------------*/
191
192 private:
193 int m_device;
194
195#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
196 static bool kokkos_impl_cuda_pin_uvm_to_host_v;
197#endif
198 static constexpr const char* m_name = "CudaUVM";
199};
200
201template <>
202struct Impl::is_cuda_type_space<CudaUVMSpace> : public std::true_type {};
203
204} // namespace Kokkos
205
206/*--------------------------------------------------------------------------*/
207/*--------------------------------------------------------------------------*/
208
209namespace Kokkos {
210
214class CudaHostPinnedSpace {
215 public:
217
218 using execution_space = HostSpace::execution_space;
219 using memory_space = CudaHostPinnedSpace;
220 using device_type = Kokkos::Device<execution_space, memory_space>;
221 using size_type = unsigned int;
222
223 /*--------------------------------*/
224
225 CudaHostPinnedSpace();
226 CudaHostPinnedSpace(CudaHostPinnedSpace&& rhs) = default;
227 CudaHostPinnedSpace(const CudaHostPinnedSpace& rhs) = default;
228 CudaHostPinnedSpace& operator=(CudaHostPinnedSpace&& rhs) = default;
229 CudaHostPinnedSpace& operator=(const CudaHostPinnedSpace& rhs) = default;
230 ~CudaHostPinnedSpace() = default;
231
233 void* allocate(const size_t arg_alloc_size) const;
234 void* allocate(const char* arg_label, const size_t arg_alloc_size,
235 const size_t arg_logical_size = 0) const;
236
238 void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
239 void deallocate(const char* arg_label, void* const arg_alloc_ptr,
240 const size_t arg_alloc_size,
241 const size_t arg_logical_size = 0) const;
242
243 private:
244 template <class, class, class, class>
246 void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
247 const size_t arg_logical_size = 0,
248 const Kokkos::Tools::SpaceHandle =
249 Kokkos::Tools::make_space_handle(name())) const;
250 void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
251 const size_t arg_alloc_size,
252 const size_t arg_logical_size = 0,
253 const Kokkos::Tools::SpaceHandle =
254 Kokkos::Tools::make_space_handle(name())) const;
255
256 public:
258 static constexpr const char* name() { return m_name; }
259
260 private:
261 static constexpr const char* m_name = "CudaHostPinned";
262
263 /*--------------------------------*/
264};
265
266template <>
267struct Impl::is_cuda_type_space<CudaHostPinnedSpace> : public std::true_type {};
268
269} // namespace Kokkos
270
271/*--------------------------------------------------------------------------*/
272/*--------------------------------------------------------------------------*/
273
274namespace Kokkos {
275namespace Impl {
276
277cudaStream_t cuda_get_deep_copy_stream();
278
279const std::unique_ptr<Kokkos::Cuda>& cuda_get_deep_copy_space(
280 bool initialize = true);
281
282static_assert(Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaSpace,
283 Kokkos::CudaSpace>::assignable,
284 "");
285static_assert(Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaUVMSpace,
286 Kokkos::CudaUVMSpace>::assignable,
287 "");
288static_assert(
289 Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaHostPinnedSpace,
290 Kokkos::CudaHostPinnedSpace>::assignable,
291 "");
292
293//----------------------------------------
294
295template <>
296struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaSpace> {
297 enum : bool { assignable = false };
298 enum : bool { accessible = false };
299 enum : bool { deepcopy = true };
300};
301
302template <>
303struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaUVMSpace> {
304 // HostSpace::execution_space != CudaUVMSpace::execution_space
305 enum : bool { assignable = false };
306 enum : bool { accessible = true };
307 enum : bool { deepcopy = true };
308};
309
310template <>
311struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace> {
312 // HostSpace::execution_space == CudaHostPinnedSpace::execution_space
313 enum : bool { assignable = true };
314 enum : bool { accessible = true };
315 enum : bool { deepcopy = true };
316};
317
318//----------------------------------------
319
320template <>
321struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::HostSpace> {
322 enum : bool { assignable = false };
323 enum : bool { accessible = false };
324 enum : bool { deepcopy = true };
325};
326
327template <>
328struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaUVMSpace> {
329 // CudaSpace::execution_space == CudaUVMSpace::execution_space
330 enum : bool { assignable = true };
331 enum : bool { accessible = true };
332 enum : bool { deepcopy = true };
333};
334
335template <>
336struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaHostPinnedSpace> {
337 // CudaSpace::execution_space != CudaHostPinnedSpace::execution_space
338 enum : bool { assignable = false };
339 enum : bool { accessible = true }; // CudaSpace::execution_space
340 enum : bool { deepcopy = true };
341};
342
343//----------------------------------------
344// CudaUVMSpace::execution_space == Cuda
345// CudaUVMSpace accessible to both Cuda and Host
346
347template <>
348struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::HostSpace> {
349 enum : bool { assignable = false };
350 enum : bool { accessible = false }; // Cuda cannot access HostSpace
351 enum : bool { deepcopy = true };
352};
353
354template <>
355struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaSpace> {
356 // CudaUVMSpace::execution_space == CudaSpace::execution_space
357 // Can access CudaUVMSpace from Host but cannot access CudaSpace from Host
358 enum : bool { assignable = false };
359
360 // CudaUVMSpace::execution_space can access CudaSpace
361 enum : bool { accessible = true };
362 enum : bool { deepcopy = true };
363};
364
365template <>
366struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaHostPinnedSpace> {
367 // CudaUVMSpace::execution_space != CudaHostPinnedSpace::execution_space
368 enum : bool { assignable = false };
369 enum : bool { accessible = true }; // CudaUVMSpace::execution_space
370 enum : bool { deepcopy = true };
371};
372
373//----------------------------------------
374// CudaHostPinnedSpace::execution_space == HostSpace::execution_space
375// CudaHostPinnedSpace accessible to both Cuda and Host
376
377template <>
378struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace> {
379 enum : bool { assignable = false }; // Cannot access from Cuda
380 enum : bool { accessible = true }; // CudaHostPinnedSpace::execution_space
381 enum : bool { deepcopy = true };
382};
383
384template <>
385struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaSpace> {
386 enum : bool { assignable = false }; // Cannot access from Host
387 enum : bool { accessible = false };
388 enum : bool { deepcopy = true };
389};
390
391template <>
392struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaUVMSpace> {
393 enum : bool { assignable = false }; // different execution_space
394 enum : bool { accessible = true }; // same accessibility
395 enum : bool { deepcopy = true };
396};
397
398//----------------------------------------
399
400} // namespace Impl
401} // namespace Kokkos
402
403/*--------------------------------------------------------------------------*/
404/*--------------------------------------------------------------------------*/
405
406namespace Kokkos {
407namespace Impl {
408
409void DeepCopyCuda(void* dst, const void* src, size_t n);
410void DeepCopyAsyncCuda(const Cuda& instance, void* dst, const void* src,
411 size_t n);
412void DeepCopyAsyncCuda(void* dst, const void* src, size_t n);
413
414template <class MemSpace>
415struct DeepCopy<MemSpace, HostSpace, Cuda,
416 std::enable_if_t<is_cuda_type_space<MemSpace>::value>> {
417 DeepCopy(void* dst, const void* src, size_t n) { DeepCopyCuda(dst, src, n); }
418 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
419 DeepCopyAsyncCuda(instance, dst, src, n);
420 }
421};
422
423template <class MemSpace>
424struct DeepCopy<HostSpace, MemSpace, Cuda,
425 std::enable_if_t<is_cuda_type_space<MemSpace>::value>> {
426 DeepCopy(void* dst, const void* src, size_t n) { DeepCopyCuda(dst, src, n); }
427 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
428 DeepCopyAsyncCuda(instance, dst, src, n);
429 }
430};
431
432template <class MemSpace1, class MemSpace2>
433struct DeepCopy<MemSpace1, MemSpace2, Cuda,
434 std::enable_if_t<is_cuda_type_space<MemSpace1>::value &&
435 is_cuda_type_space<MemSpace2>::value>> {
436 DeepCopy(void* dst, const void* src, size_t n) { DeepCopyCuda(dst, src, n); }
437 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
438 DeepCopyAsyncCuda(instance, dst, src, n);
439 }
440};
441
442template <class MemSpace1, class MemSpace2, class ExecutionSpace>
443struct DeepCopy<MemSpace1, MemSpace2, ExecutionSpace,
444 std::enable_if_t<is_cuda_type_space<MemSpace1>::value &&
445 is_cuda_type_space<MemSpace2>::value &&
446 !std::is_same<ExecutionSpace, Cuda>::value>> {
447 inline DeepCopy(void* dst, const void* src, size_t n) {
448 DeepCopyCuda(dst, src, n);
449 }
450
451 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
452 size_t n) {
453 exec.fence(fence_string());
454 DeepCopyAsyncCuda(dst, src, n);
455 }
456
457 private:
458 static const std::string& fence_string() {
459 static const std::string string =
460 std::string("Kokkos::Impl::DeepCopy<") + MemSpace1::name() + "Space, " +
461 MemSpace2::name() +
462 "Space, ExecutionSpace>::DeepCopy: fence before copy";
463 return string;
464 }
465};
466
467template <class MemSpace, class ExecutionSpace>
468struct DeepCopy<MemSpace, HostSpace, ExecutionSpace,
469 std::enable_if_t<is_cuda_type_space<MemSpace>::value &&
470 !std::is_same<ExecutionSpace, Cuda>::value>> {
471 inline DeepCopy(void* dst, const void* src, size_t n) {
472 DeepCopyCuda(dst, src, n);
473 }
474
475 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
476 size_t n) {
477 exec.fence(fence_string());
478 DeepCopyAsyncCuda(dst, src, n);
479 }
480
481 private:
482 static const std::string& fence_string() {
483 static const std::string string =
484 std::string("Kokkos::Impl::DeepCopy<") + MemSpace::name() +
485 "Space, HostSpace, ExecutionSpace>::DeepCopy: fence before copy";
486 return string;
487 }
488};
489
490template <class MemSpace, class ExecutionSpace>
491struct DeepCopy<HostSpace, MemSpace, ExecutionSpace,
492 std::enable_if_t<is_cuda_type_space<MemSpace>::value &&
493 !std::is_same<ExecutionSpace, Cuda>::value>> {
494 inline DeepCopy(void* dst, const void* src, size_t n) {
495 DeepCopyCuda(dst, src, n);
496 }
497
498 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
499 size_t n) {
500 exec.fence(fence_string());
501 DeepCopyAsyncCuda(dst, src, n);
502 }
503
504 private:
505 static const std::string& fence_string() {
506 static const std::string string =
507 std::string("Kokkos::Impl::DeepCopy<HostSpace, ") + MemSpace::name() +
508 "Space, ExecutionSpace>::DeepCopy: fence before copy";
509 return string;
510 }
511};
512
513} // namespace Impl
514} // namespace Kokkos
515
516//----------------------------------------------------------------------------
517//----------------------------------------------------------------------------
518
519namespace Kokkos {
520namespace Impl {
521
522template <>
523class SharedAllocationRecord<Kokkos::CudaSpace, void>
524 : public HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace> {
525 private:
526 friend class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>;
527 friend class SharedAllocationRecordCommon<Kokkos::CudaSpace>;
528 friend class HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
529
530 using RecordBase = SharedAllocationRecord<void, void>;
531 using base_t =
532 HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
533
534 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
535 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
536
537 static ::cudaTextureObject_t attach_texture_object(
538 const unsigned sizeof_alias, void* const alloc_ptr,
539 const size_t alloc_size);
540
541#ifdef KOKKOS_ENABLE_DEBUG
542 static RecordBase s_root_record;
543#endif
544
545 ::cudaTextureObject_t m_tex_obj = 0;
546 const Kokkos::CudaSpace m_space;
547
548 protected:
549 ~SharedAllocationRecord();
550 SharedAllocationRecord() = default;
551
552 // This constructor does not forward to the one without exec_space arg
553 // in order to work around https://github.com/kokkos/kokkos/issues/5258
554 // This constructor is templated so I can't just put it into the cpp file
555 // like the other constructor.
556 template <typename ExecutionSpace>
557 SharedAllocationRecord(
558 const ExecutionSpace& /*exec_space*/, const Kokkos::CudaSpace& arg_space,
559 const std::string& arg_label, const size_t arg_alloc_size,
560 const RecordBase::function_type arg_dealloc = &base_t::deallocate)
561 : base_t(
562#ifdef KOKKOS_ENABLE_DEBUG
563 &SharedAllocationRecord<Kokkos::CudaSpace, void>::s_root_record,
564#endif
565 Impl::checked_allocation_with_header(arg_space, arg_label,
566 arg_alloc_size),
567 sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
568 arg_label),
569 m_tex_obj(0),
570 m_space(arg_space) {
571
572 SharedAllocationHeader header;
573
574 this->base_t::_fill_host_accessible_header_info(header, arg_label);
575
576 // Copy to device memory
577 // workaround for issue with NVCC and MSVC
578 // https://github.com/kokkos/kokkos/issues/5258
579 deep_copy_header_no_exec(RecordBase::m_alloc_ptr, &header);
580 }
581
582 SharedAllocationRecord(
583 const Kokkos::Cuda& exec_space, const Kokkos::CudaSpace& arg_space,
584 const std::string& arg_label, const size_t arg_alloc_size,
585 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
586
587 SharedAllocationRecord(
588 const Kokkos::CudaSpace& arg_space, const std::string& arg_label,
589 const size_t arg_alloc_size,
590 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
591
592 // helper function to work around MSVC+NVCC issue
593 // https://github.com/kokkos/kokkos/issues/5258
594 static void deep_copy_header_no_exec(void*, const void*);
595
596 public:
597 template <typename AliasType>
598 inline ::cudaTextureObject_t attach_texture_object() {
599 static_assert((std::is_same<AliasType, int>::value ||
600 std::is_same<AliasType, ::int2>::value ||
601 std::is_same<AliasType, ::int4>::value),
602 "Cuda texture fetch only supported for alias types of int, "
603 "::int2, or ::int4");
604
605 if (m_tex_obj == 0) {
606 m_tex_obj = attach_texture_object(sizeof(AliasType),
607 (void*)RecordBase::m_alloc_ptr,
608 RecordBase::m_alloc_size);
609 }
610
611 return m_tex_obj;
612 }
613
614 template <typename AliasType>
615 inline int attach_texture_object_offset(const AliasType* const ptr) {
616 // Texture object is attached to the entire allocation range
617 return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr);
618 }
619};
620
621template <>
622class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>
623 : public SharedAllocationRecordCommon<Kokkos::CudaUVMSpace> {
624 private:
625 friend class SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
626
627 using base_t = SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
628 using RecordBase = SharedAllocationRecord<void, void>;
629
630 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
631 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
632
633 static RecordBase s_root_record;
634
635 ::cudaTextureObject_t m_tex_obj = 0;
636 const Kokkos::CudaUVMSpace m_space;
637
638 protected:
639 ~SharedAllocationRecord();
640 SharedAllocationRecord() = default;
641
642 // This constructor does not forward to the one without exec_space arg
643 // in order to work around https://github.com/kokkos/kokkos/issues/5258
644 // This constructor is templated so I can't just put it into the cpp file
645 // like the other constructor.
646 template <typename ExecutionSpace>
647 SharedAllocationRecord(
648 const ExecutionSpace& /*exec_space*/,
649 const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label,
650 const size_t arg_alloc_size,
651 const RecordBase::function_type arg_dealloc = &base_t::deallocate)
652 : base_t(
653#ifdef KOKKOS_ENABLE_DEBUG
654 &SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::s_root_record,
655#endif
656 Impl::checked_allocation_with_header(arg_space, arg_label,
657 arg_alloc_size),
658 sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
659 arg_label),
660 m_tex_obj(0),
661 m_space(arg_space) {
662 this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
663 arg_label);
664 }
665
666 SharedAllocationRecord(
667 const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label,
668 const size_t arg_alloc_size,
669 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
670
671 public:
672 template <typename AliasType>
673 inline ::cudaTextureObject_t attach_texture_object() {
674 static_assert((std::is_same<AliasType, int>::value ||
675 std::is_same<AliasType, ::int2>::value ||
676 std::is_same<AliasType, ::int4>::value),
677 "Cuda texture fetch only supported for alias types of int, "
678 "::int2, or ::int4");
679
680 if (m_tex_obj == 0) {
681 m_tex_obj = SharedAllocationRecord<Kokkos::CudaSpace, void>::
682 attach_texture_object(sizeof(AliasType),
683 (void*)RecordBase::m_alloc_ptr,
684 RecordBase::m_alloc_size);
685 }
686
687 return m_tex_obj;
688 }
689
690 template <typename AliasType>
691 inline int attach_texture_object_offset(const AliasType* const ptr) {
692 // Texture object is attached to the entire allocation range
693 return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr);
694 }
695};
696
697template <>
698class SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>
699 : public SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace> {
700 private:
701 friend class SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
702
703 using RecordBase = SharedAllocationRecord<void, void>;
704 using base_t = SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
705
706 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
707 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
708
709 static RecordBase s_root_record;
710
711 const Kokkos::CudaHostPinnedSpace m_space;
712
713 protected:
714 ~SharedAllocationRecord();
715 SharedAllocationRecord() = default;
716
717 // This constructor does not forward to the one without exec_space arg
718 // in order to work around https://github.com/kokkos/kokkos/issues/5258
719 // This constructor is templated so I can't just put it into the cpp file
720 // like the other constructor.
721 template <typename ExecutionSpace>
722 SharedAllocationRecord(
723 const ExecutionSpace& /*exec_space*/,
724 const Kokkos::CudaHostPinnedSpace& arg_space,
725 const std::string& arg_label, const size_t arg_alloc_size,
726 const RecordBase::function_type arg_dealloc = &base_t::deallocate)
727 : base_t(
728#ifdef KOKKOS_ENABLE_DEBUG
729 &SharedAllocationRecord<Kokkos::CudaHostPinnedSpace,
730 void>::s_root_record,
731#endif
732 Impl::checked_allocation_with_header(arg_space, arg_label,
733 arg_alloc_size),
734 sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
735 arg_label),
736 m_space(arg_space) {
737 this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
738 arg_label);
739 }
740
741 SharedAllocationRecord(
742 const Kokkos::CudaHostPinnedSpace& arg_space,
743 const std::string& arg_label, const size_t arg_alloc_size,
744 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
745};
746
747} // namespace Impl
748} // namespace Kokkos
749
750//----------------------------------------------------------------------------
751//----------------------------------------------------------------------------
752
753#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
754#endif /* #define KOKKOS_CUDASPACE_HPP */
LogicalMemorySpace is a space that is identical to another space, but differentiable by name and temp...
Memory management for host memory.
bool available()
Query if hwloc is available.
Access relationship between DstMemorySpace and SrcMemorySpace.