48 use_auto_team_size_(true),
53 fad_use_shared_memory_(false)
55#if defined(SACADO_VIEW_CUDA_HIERARCHICAL_DFAD)
56#if defined(KOKKOS_ENABLE_CUDA)
69 int roundDownToPowerOfTwo(
int in) {
79 const int& in_vector_size,
80 const int& in_fad_vector_size,
81 const bool force_override)
84 if ( force_override ) {
91 Kokkos::TeamPolicy<PHX::Device> policy(1, Kokkos::AUTO);
92 auto blank_functor = KOKKOS_LAMBDA (
const Kokkos::TeamPolicy<PHX::exec_space>::member_type) {};
94 int team_size_max = std::min(in_team_size, policy.team_size_max(blank_functor, Kokkos::ParallelForTag()));
95 team_size_=roundDownToPowerOfTwo(team_size_max);
97 int vec_size_max = policy.vector_length_max();
98 vector_size_ = roundDownToPowerOfTwo(std::min(vec_size_max, in_vector_size));
99 fad_vector_size_ = roundDownToPowerOfTwo(std::min(vec_size_max, in_fad_vector_size));
103 const bool& in_fad_use_shared_memory)
const bool use_shared_memory_
Singleton class for accessing kokkos hierarchical parallelism parameters.
int team_size_
If true, the team size is set with Kokkos::AUTO()
static HP & inst()
Private ctor.
int vector_size_
User specified team size.
void setUseSharedMemory(const bool &use_shared_memory, const bool &fad_use_shared_memory)
Tell kokkos kernels if they should use shared memory. This is very problem dependent.
bool fad_use_shared_memory_
Use shared memory kokkos kernels for non-fad types.
bool use_shared_memory_
FAD vector size.
HP()
Use shared memory kokkos kernels for fad types.
int fad_vector_size_
Default vector size for non-AD types.
void overrideSizes(const int &team_size, const int &vector_size, const int &fad_vector_size, const bool force_override_safety=false)