42#ifndef STOKHOS_CRSMATRIX_HPP
43#define STOKHOS_CRSMATRIX_HPP
48#include "Kokkos_Core.hpp"
49#include "Kokkos_StaticCrsGraph.hpp"
59 Dim3(
const size_t x_,
const size_t y_ = 1,
const size_t z_ = 1) :
60 x(x_),
y(y_),
z(z_) {}
68 const size_t threads_per_block_x_,
69 const size_t threads_per_block_y_ = 1,
70 const size_t threads_per_block_z_ = 1) :
71 block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
78template <
typename ValueType,
typename Device,
79 typename Layout = Kokkos::LayoutRight>
84 typedef Kokkos::View< value_type[], Layout, execution_space >
values_type;
85#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
86 typedef Kokkos::StaticCrsGraph< int , Layout, execution_space , int >
graph_type;
88 typedef Kokkos::StaticCrsGraph< int , Layout, execution_space , void, int >
graph_type;
102template <
typename MatrixValue,
105 typename InputVectorType,
106 typename OutputVectorType>
136 KOKKOS_INLINE_FUNCTION
144 for (
size_type iEntry = iEntryBegin; iEntry < iEntryEnd; ++iEntry ) {
145 sum += m_A.
values(iEntry) * m_x( m_A.
graph.entries(iEntry) );
155 const size_t row_count = A.
graph.row_map.extent(0) - 1;
156 Kokkos::parallel_for( row_count,
Multiply(A,x,y) );
161template <
typename MatrixValue,
164 typename InputMultiVectorType,
165 typename OutputMultiVectorType,
166 typename OrdinalType >
168 InputMultiVectorType,
169 OutputMultiVectorType,
170 std::vector<OrdinalType>,
181 typedef typename output_multi_vector_type::value_type
scalar_type;
196 , m_col_indices( col_indices )
197 , m_num_vecs( col_indices.size() )
202 KOKKOS_INLINE_FUNCTION
213 for (
size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
214 sum += m_A.
values(iEntry) * m_x( m_A.
graph.entries(iEntry), iCol );
217 m_y( iRow, iCol ) = sum;
228 const size_t n = A.
graph.row_map.extent(0) - 1 ;
231 const size_t block_size = 20;
232 const size_t num_vecs = col.size();
233 std::vector<OrdinalType> block_col;
234 block_col.reserve(block_size);
235 for (
size_t block=0; block<num_vecs; block+=block_size) {
237 block+block_size <= num_vecs ? block_size : num_vecs-block;
238 block_col.resize(bs);
239 for (
size_t i=0; i<bs; ++i)
240 block_col[i] = col[block+i];
241 Kokkos::parallel_for( n ,
Multiply(A,x,y,block_col) );
252template <
typename MatrixValue,
255 typename InputMultiVectorType,
256 typename OutputMultiVectorType >
257class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
258 InputMultiVectorType,
259 OutputMultiVectorType,
264 typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
265 typedef InputMultiVectorType input_multi_vector_type;
266 typedef OutputMultiVectorType output_multi_vector_type;
269 typedef typename execution_space::size_type size_type;
270 typedef typename output_multi_vector_type::value_type scalar_type;
272 const matrix_type m_A;
273 const input_multi_vector_type m_x;
274 output_multi_vector_type m_y;
275 const size_type m_num_row;
276 const size_type m_num_col;
278 static const size_type m_block_row_size = 32;
279 static const size_type m_block_col_size = 20;
281 Multiply(
const matrix_type& A,
282 const input_multi_vector_type& x,
283 output_multi_vector_type& y )
287 , m_num_row( A.graph.row_map.extent(0)-1 )
288 , m_num_col( m_y.extent(1) )
294 KOKKOS_INLINE_FUNCTION
295 void operator()(
const size_type iBlockRow )
const
298 const size_type num_row =
299 iBlockRow+m_block_row_size <= m_num_row ?
300 m_block_row_size : m_num_row-iBlockRow;
303 for (size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
305 const size_type num_col =
306 iBlockCol+m_block_col_size <= m_num_col ?
307 m_block_col_size : m_num_col-iBlockCol;
310 const size_type iRowEnd = iBlockRow + num_row;
311 for (size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
314 const size_type iEntryBegin = m_A.graph.row_map[iRow];
315 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
318 const size_type iColEnd = iBlockCol + num_col;
319 for (size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
322 scalar_type sum = 0.0;
323 for (size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
324 sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
326 m_y( iRow, iCol ) = sum;
336 static void apply(
const matrix_type & A,
337 const input_multi_vector_type& x,
338 output_multi_vector_type& y )
341 const size_type num_row = A.graph.row_map.extent(0) - 1;
342 const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
343 Kokkos::parallel_for( n , Multiply(A,x,y) );
348template <
typename MatrixValue,
351 typename InputMultiVectorType,
352 typename OutputMultiVectorType >
354 InputMultiVectorType,
355 OutputMultiVectorType,
366 typedef typename output_multi_vector_type::value_type
scalar_type;
379 , m_num_vecs( m_y.extent(1) )
384 KOKKOS_INLINE_FUNCTION
390 for (
size_type iCol=0; iCol<m_num_vecs; iCol++) {
394 for (
size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
395 sum += m_A.
values(iEntry) * m_x( m_A.
graph.entries(iEntry), iCol );
398 m_y( iRow, iCol ) = sum;
408 const size_t n = A.
graph.row_map.extent(0) - 1 ;
409 Kokkos::parallel_for( n ,
Multiply(A,x,y) );
432template <
typename MatrixValue,
435 typename InputViewType,
436 typename OutputViewType>
437class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
438 std::vector<InputViewType>,
439 std::vector<OutputViewType>,
444 typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
445 typedef std::vector<InputViewType> input_multi_vector_type;
446 typedef std::vector<OutputViewType> output_multi_vector_type;
449 typedef typename execution_space::size_type size_type;
450 typedef typename OutputViewType::value_type scalar_type;
452 const matrix_type m_A;
453 const input_multi_vector_type m_x;
454 output_multi_vector_type m_y;
455 const size_type m_num_row;
456 const size_type m_num_col;
458 static const size_type m_block_row_size = 32;
459 static const size_type m_block_col_size = 20;
461 Multiply(
const matrix_type& A,
462 const input_multi_vector_type& x,
463 output_multi_vector_type& y )
467 , m_num_row( A.graph.row_map.extent(0)-1 )
468 , m_num_col( x.size() )
474 KOKKOS_INLINE_FUNCTION
475 void operator()(
const size_type iBlockRow )
const
478 const size_type num_row =
479 iBlockRow+m_block_row_size <= m_num_row ?
480 m_block_row_size : m_num_row-iBlockRow;
483 for (size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
485 const size_type num_col =
486 iBlockCol+m_block_col_size <= m_num_col ?
487 m_block_col_size : m_num_col-iBlockCol;
490 const size_type iRowEnd = iBlockRow + num_row;
491 for (size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
494 const size_type iEntryBegin = m_A.graph.row_map[iRow];
495 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
498 const size_type iColEnd = iBlockCol + num_col;
499 for (size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
503 for (size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
504 sum += m_A.values(iEntry) * m_x[iCol](m_A.graph.entries(iEntry));
506 m_y[iCol](iRow) = sum;
516 static void apply(
const matrix_type & A,
517 const input_multi_vector_type& x,
518 output_multi_vector_type& y )
521 const size_type num_row = A.graph.row_map.extent(0) - 1;
522 const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
523 Kokkos::parallel_for( n , Multiply(A,x,y) );
528template <
typename MatrixValue,
531 typename InputViewType,
532 typename OutputViewType>
534 std::vector<InputViewType>,
535 std::vector<OutputViewType>,
559 , m_num_vecs( x.size() )
565 KOKKOS_INLINE_FUNCTION
571 for (
size_type iCol=0; iCol<m_num_vecs; iCol++) {
575 for (
size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
576 sum += m_A.
values(iEntry) * m_x[iCol]( m_A.
graph.entries(iEntry) );
579 m_y[iCol]( iRow) = sum;
589 const size_t n = A.
graph.row_map.extent(0) - 1 ;
590 Kokkos::parallel_for( n ,
Multiply(A,x,y) );
615template <
typename MatrixValue,
618 typename InputMultiVectorType,
619 typename OutputMultiVectorType,
620 typename OrdinalType>
622 const InputMultiVectorType& x,
623 OutputMultiVectorType& y,
624 const std::vector<OrdinalType>& col_indices,
629 typedef Kokkos::View<typename InputMultiVectorType::value_type*, typename InputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> InputVectorType;
630 typedef Kokkos::View<typename OutputMultiVectorType::value_type*, typename OutputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> OutputVectorType;
632 for (
size_t i=0; i<col_indices.size(); ++i) {
633 InputVectorType x_view =
634 Kokkos::subview( x , Kokkos::ALL() , col_indices[i] );
635 OutputVectorType y_view =
636 Kokkos::subview( y , Kokkos::ALL() , col_indices[i] );
637 multiply_type::apply( A , x_view , y_view );
641template <
typename MatrixValue,
644 typename InputVectorType,
645 typename OutputVectorType>
647 const std::vector<InputVectorType>& x,
648 std::vector<OutputVectorType>& y,
653 for (
size_t i=0; i<x.size(); ++i) {
654 multiply_type::apply( A , x[i] , y[i] );
665template <
typename ValueType,
typename Layout,
typename Device>
675template <
typename ValueType,
typename Layout,
typename Device>
685template <
typename ValueType,
typename Layout,
typename DstDevice,
701template <
typename MatrixValue,
typename Layout,
typename Device >
710 std::ofstream file(filename.c_str());
712 file.setf(std::ios::scientific);
715 Kokkos::deep_copy(hA, A);
720 file <<
"%%MatrixMarket matrix coordinate real general" << std::endl;
721 file << nRow <<
" " << nRow <<
" " << hA.
values.extent(0) << std::endl;
726 for (
size_type entry=entryBegin; entry<entryEnd; ++entry) {
727 file << row+1 <<
" " << hA.
graph.entries(entry)+1 <<
" "
728 << std::setw(22) << hA.
values(entry) << std::endl;
Kokkos::DefaultExecutionSpace execution_space
Stokhos::DeviceConfig dev_config
CrsMatrix< ValueType, typename values_type::host_mirror_space, Layout > HostMirror
Kokkos::View< value_type[], Layout, execution_space > values_type
Kokkos::StaticCrsGraph< int, Layout, execution_space, void, int > graph_type
CrsMatrix(Stokhos::DeviceConfig dev_config_)
static void write(const matrix_type &A, const std::string &filename)
CrsMatrix< MatrixValue, Device, Layout > matrix_type
execution_space::size_type size_type
execution_space::size_type size_type
KOKKOS_INLINE_FUNCTION void operator()(const size_type iRow) const
Multiply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y)
OutputViewType::value_type scalar_type
std::vector< InputViewType > input_multi_vector_type
const input_multi_vector_type m_x
static void apply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y)
output_multi_vector_type m_y
CrsMatrix< MatrixValue, Device, Layout > matrix_type
std::vector< OutputViewType > output_multi_vector_type
const size_type m_num_vecs
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP... > >::value >::type sum(const Kokkos::View< RD, RP... > &r, const Kokkos::View< XD, XP... > &x)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
Top-level namespace for Stokhos classes and functions.
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
size_t num_threads_per_block
DeviceConfig(const size_t num_blocks_, const size_t threads_per_block_x_, const size_t threads_per_block_y_=1, const size_t threads_per_block_z_=1)