42#ifndef STOKHOS_CUDA_BLOCKCRSMATRIX_HPP
43#define STOKHOS_CUDA_BLOCKCRSMATRIX_HPP
49#include "Kokkos_Core.hpp"
56template<
class BlockSpec ,
typename MatrixValue ,
typename VectorValue >
59 Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > ,
60 Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > >
66 typedef Kokkos::View< VectorValue** ,Kokkos::LayoutLeft , Kokkos::Cuda >
block_vector_type ;
93 iBlock < blockCount ; iBlock += gridDim.x ) {
99 for ( ; iEntry < iEntryEnd ; ++iEntry ) {
100 const VectorValue *
const x = & m_x( 0 , m_A.
graph.entries(iEntry) );
101 const MatrixValue *
const a = & m_A.
values( 0 , iEntry );
106 if ( threadIdx.x + blockDim.x * threadIdx.y < m_A.
block.dimension() ) {
107 m_y(threadIdx.x,iBlock) = y ;
117 Kokkos::Impl::cuda_internal_maximum_warp_count() * Kokkos::Impl::CudaTraits::WarpSize ;
122 std::min( row_count , Kokkos::Impl::cuda_internal_maximum_grid_count()[0] ) , 1 , 1 );
128 if ( thread_max < block.x * block.y ) {
129 std::ostringstream msg ;
130 msg <<
"Kokkos::Impl::Multiply< BlockCrsMatrix< Block , Value , Cuda > , ... >"
131 <<
" ERROR: block dimension = " << block.x * block.y
132 <<
" > " << thread_max <<
"== maximum Cuda threads per block" ;
133 throw std::runtime_error(msg.str());
136 Kokkos::Impl::cuda_parallel_launch_local_memory<<< grid , block , shmem >>>(
Multiply(A,x,y) );
CRS matrix of dense blocks.
const block_vector_type m_y
const block_vector_type m_x
static void apply(const matrix_type &A, const block_vector_type &x, const block_vector_type &y)
Multiply(const matrix_type &A, const block_vector_type &x, const block_vector_type &y)
BlockCrsMatrix< BlockSpec, MatrixValue, execution_space > matrix_type
__device__ void operator()(void) const
execution_space::size_type size_type
Kokkos::Cuda execution_space
Kokkos::View< VectorValue **,Kokkos::LayoutLeft, Kokkos::Cuda > block_vector_type
Top-level namespace for Stokhos classes and functions.