56#ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
57#define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
59#include "Kokkos_Core.hpp"
60#include "Kokkos_ArithTraits.hpp"
65namespace KokkosRefactor {
81template<
class IntegerType,
82 const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
84 static KOKKOS_INLINE_FUNCTION
bool
85 test (
const IntegerType x,
86 const IntegerType exclusiveUpperBound);
90template<
class IntegerType>
92 static KOKKOS_INLINE_FUNCTION
bool
93 test (
const IntegerType x,
94 const IntegerType exclusiveUpperBound)
96 return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
101template<
class IntegerType>
102struct OutOfBounds<IntegerType, false> {
103 static KOKKOS_INLINE_FUNCTION
bool
104 test (
const IntegerType x,
105 const IntegerType exclusiveUpperBound)
107 return x >= exclusiveUpperBound;
113template<
class IntegerType>
114KOKKOS_INLINE_FUNCTION
bool
115outOfBounds (
const IntegerType x,
const IntegerType exclusiveUpperBound)
125 template <
typename DstView,
typename SrcView,
typename IdxView,
126 typename Enabled =
void>
127 struct PackArraySingleColumn {
128 typedef typename DstView::execution_space execution_space;
129 typedef typename execution_space::size_type size_type;
136 PackArraySingleColumn (
const DstView& dst_,
140 dst(dst_), src(src_), idx(idx_), col(col_) {}
142 KOKKOS_INLINE_FUNCTION
void
143 operator() (
const size_type k)
const {
144 dst(k) = src(idx(k), col);
148 pack (
const DstView& dst,
153 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
155 (
"Tpetra::MultiVector pack one col",
156 range_type (0, idx.size ()),
157 PackArraySingleColumn (dst, src, idx, col));
161 template <
typename DstView,
164 typename SizeType =
typename DstView::execution_space::size_type,
165 typename Enabled =
void>
166 class PackArraySingleColumnWithBoundsCheck {
168 static_assert (Kokkos::is_view<DstView>::value,
169 "DstView must be a Kokkos::View.");
170 static_assert (Kokkos::is_view<SrcView>::value,
171 "SrcView must be a Kokkos::View.");
172 static_assert (Kokkos::is_view<IdxView>::value,
173 "IdxView must be a Kokkos::View.");
174 static_assert (
static_cast<int> (DstView::rank) == 1,
175 "DstView must be a rank-1 Kokkos::View.");
176 static_assert (
static_cast<int> (SrcView::rank) == 2,
177 "SrcView must be a rank-2 Kokkos::View.");
178 static_assert (
static_cast<int> (IdxView::rank) == 1,
179 "IdxView must be a rank-1 Kokkos::View.");
180 static_assert (std::is_integral<SizeType>::value,
181 "SizeType must be a built-in integer type.");
183 typedef SizeType size_type;
184 using value_type = size_t;
193 PackArraySingleColumnWithBoundsCheck (
const DstView& dst_,
196 const size_type col_) :
197 dst (dst_), src (src_), idx (idx_), col (col_) {}
199 KOKKOS_INLINE_FUNCTION
void
200 operator() (
const size_type k, value_type& lclErrCount)
const {
201 using index_type =
typename IdxView::non_const_value_type;
203 const index_type lclRow = idx(k);
204 if (lclRow <
static_cast<index_type
> (0) ||
205 lclRow >=
static_cast<index_type
> (src.extent (0))) {
209 dst(k) = src(lclRow, col);
213 KOKKOS_INLINE_FUNCTION
214 void init (value_type& initialErrorCount)
const {
215 initialErrorCount = 0;
218 KOKKOS_INLINE_FUNCTION
void
219 join (value_type& dstErrorCount,
220 const value_type& srcErrorCount)
const
222 dstErrorCount += srcErrorCount;
226 pack (
const DstView& dst,
231 typedef typename DstView::execution_space execution_space;
232 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
233 typedef typename IdxView::non_const_value_type index_type;
235 size_t errorCount = 0;
236 Kokkos::parallel_reduce
237 (
"Tpetra::MultiVector pack one col debug only",
238 range_type (0, idx.size ()),
239 PackArraySingleColumnWithBoundsCheck (dst, src, idx, col),
242 if (errorCount != 0) {
246 auto idx_h = Kokkos::create_mirror_view (idx);
249 Kokkos::deep_copy (idx_h, idx);
251 std::vector<index_type> badIndices;
252 const size_type numInds = idx_h.extent (0);
253 for (size_type k = 0; k < numInds; ++k) {
254 if (idx_h(k) <
static_cast<index_type
> (0) ||
255 idx_h(k) >=
static_cast<index_type
> (src.extent (0))) {
256 badIndices.push_back (idx_h(k));
260 TEUCHOS_TEST_FOR_EXCEPTION
261 (errorCount != badIndices.size (), std::logic_error,
262 "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
263 <<
" != badIndices.size() = " << badIndices.size () <<
". This sho"
264 "uld never happen. Please report this to the Tpetra developers.");
266 std::ostringstream os;
267 os <<
"MultiVector single-column pack kernel had "
268 << badIndices.size () <<
" out-of bounds index/ices. "
270 for (
size_t k = 0; k < badIndices.size (); ++k) {
272 if (k + 1 < badIndices.size ()) {
277 throw std::runtime_error (os.str ());
283 template <
typename DstView,
typename SrcView,
typename IdxView>
285 pack_array_single_column (
const DstView& dst,
289 const bool debug =
true)
291 static_assert (Kokkos::is_view<DstView>::value,
292 "DstView must be a Kokkos::View.");
293 static_assert (Kokkos::is_view<SrcView>::value,
294 "SrcView must be a Kokkos::View.");
295 static_assert (Kokkos::is_view<IdxView>::value,
296 "IdxView must be a Kokkos::View.");
297 static_assert (
static_cast<int> (DstView::rank) == 1,
298 "DstView must be a rank-1 Kokkos::View.");
299 static_assert (
static_cast<int> (SrcView::rank) == 2,
300 "SrcView must be a rank-2 Kokkos::View.");
301 static_assert (
static_cast<int> (IdxView::rank) == 1,
302 "IdxView must be a rank-1 Kokkos::View.");
305 typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
306 impl_type::pack (dst, src, idx, col);
309 typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
310 impl_type::pack (dst, src, idx, col);
314 template <
typename DstView,
typename SrcView,
typename IdxView,
315 typename Enabled =
void>
316 struct PackArrayMultiColumn {
317 typedef typename DstView::execution_space execution_space;
318 typedef typename execution_space::size_type size_type;
325 PackArrayMultiColumn (
const DstView& dst_,
328 const size_t numCols_) :
329 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
331 KOKKOS_INLINE_FUNCTION
void
332 operator() (
const size_type k)
const {
333 const typename IdxView::value_type localRow = idx(k);
334 const size_t offset = k*numCols;
335 for (
size_t j = 0; j < numCols; ++j) {
336 dst(offset + j) = src(localRow, j);
340 static void pack(
const DstView& dst,
344 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
346 (
"Tpetra::MultiVector pack multicol const stride",
347 range_type (0, idx.size ()),
348 PackArrayMultiColumn (dst, src, idx, numCols));
352 template <
typename DstView,
355 typename SizeType =
typename DstView::execution_space::size_type,
356 typename Enabled =
void>
357 class PackArrayMultiColumnWithBoundsCheck {
359 using size_type = SizeType;
360 using value_type = size_t;
369 PackArrayMultiColumnWithBoundsCheck (
const DstView& dst_,
372 const size_type numCols_) :
373 dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
375 KOKKOS_INLINE_FUNCTION
void
376 operator() (
const size_type k, value_type& lclErrorCount)
const {
377 typedef typename IdxView::non_const_value_type index_type;
379 const index_type lclRow = idx(k);
380 if (lclRow <
static_cast<index_type
> (0) ||
381 lclRow >=
static_cast<index_type
> (src.extent (0))) {
385 const size_type offset = k*numCols;
386 for (size_type j = 0; j < numCols; ++j) {
387 dst(offset + j) = src(lclRow, j);
392 KOKKOS_INLINE_FUNCTION
393 void init (value_type& initialErrorCount)
const {
394 initialErrorCount = 0;
397 KOKKOS_INLINE_FUNCTION
void
398 join (value_type& dstErrorCount,
399 const value_type& srcErrorCount)
const
401 dstErrorCount += srcErrorCount;
405 pack (
const DstView& dst,
408 const size_type numCols)
410 typedef typename DstView::execution_space execution_space;
411 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
412 typedef typename IdxView::non_const_value_type index_type;
414 size_t errorCount = 0;
415 Kokkos::parallel_reduce
416 (
"Tpetra::MultiVector pack multicol const stride debug only",
417 range_type (0, idx.size ()),
418 PackArrayMultiColumnWithBoundsCheck (dst, src, idx, numCols),
420 if (errorCount != 0) {
424 auto idx_h = Kokkos::create_mirror_view (idx);
427 Kokkos::deep_copy (idx_h, idx);
429 std::vector<index_type> badIndices;
430 const size_type numInds = idx_h.extent (0);
431 for (size_type k = 0; k < numInds; ++k) {
432 if (idx_h(k) <
static_cast<index_type
> (0) ||
433 idx_h(k) >=
static_cast<index_type
> (src.extent (0))) {
434 badIndices.push_back (idx_h(k));
438 TEUCHOS_TEST_FOR_EXCEPTION
439 (errorCount != badIndices.size (), std::logic_error,
440 "PackArrayMultiColumnWithBoundsCheck: errorCount = " << errorCount
441 <<
" != badIndices.size() = " << badIndices.size () <<
". This sho"
442 "uld never happen. Please report this to the Tpetra developers.");
444 std::ostringstream os;
445 os <<
"Tpetra::MultiVector multiple-column pack kernel had "
446 << badIndices.size () <<
" out-of bounds index/ices (errorCount = "
447 << errorCount <<
"): [";
448 for (
size_t k = 0; k < badIndices.size (); ++k) {
450 if (k + 1 < badIndices.size ()) {
455 throw std::runtime_error (os.str ());
461 template <
typename DstView,
465 pack_array_multi_column (
const DstView& dst,
468 const size_t numCols,
469 const bool debug =
true)
471 static_assert (Kokkos::is_view<DstView>::value,
472 "DstView must be a Kokkos::View.");
473 static_assert (Kokkos::is_view<SrcView>::value,
474 "SrcView must be a Kokkos::View.");
475 static_assert (Kokkos::is_view<IdxView>::value,
476 "IdxView must be a Kokkos::View.");
477 static_assert (
static_cast<int> (DstView::rank) == 1,
478 "DstView must be a rank-1 Kokkos::View.");
479 static_assert (
static_cast<int> (SrcView::rank) == 2,
480 "SrcView must be a rank-2 Kokkos::View.");
481 static_assert (
static_cast<int> (IdxView::rank) == 1,
482 "IdxView must be a rank-1 Kokkos::View.");
485 typedef PackArrayMultiColumnWithBoundsCheck<DstView,
486 SrcView, IdxView> impl_type;
487 impl_type::pack (dst, src, idx, numCols);
490 typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
491 impl_type::pack (dst, src, idx, numCols);
495 template <
typename DstView,
typename SrcView,
typename IdxView,
496 typename ColView,
typename Enabled =
void>
497 struct PackArrayMultiColumnVariableStride {
498 typedef typename DstView::execution_space execution_space;
499 typedef typename execution_space::size_type size_type;
507 PackArrayMultiColumnVariableStride (
const DstView& dst_,
511 const size_t numCols_) :
512 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
514 KOKKOS_INLINE_FUNCTION
515 void operator() (
const size_type k)
const {
516 const typename IdxView::value_type localRow = idx(k);
517 const size_t offset = k*numCols;
518 for (
size_t j = 0; j < numCols; ++j) {
519 dst(offset + j) = src(localRow, col(j));
523 static void pack(
const DstView& dst,
528 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
530 (
"Tpetra::MultiVector pack multicol var stride",
531 range_type (0, idx.size ()),
532 PackArrayMultiColumnVariableStride (dst, src, idx, col, numCols));
536 template <
typename DstView,
540 typename SizeType =
typename DstView::execution_space::size_type,
541 typename Enabled =
void>
542 class PackArrayMultiColumnVariableStrideWithBoundsCheck {
544 using size_type = SizeType;
545 using value_type = size_t;
555 PackArrayMultiColumnVariableStrideWithBoundsCheck (
const DstView& dst_,
559 const size_type numCols_) :
560 dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
562 KOKKOS_INLINE_FUNCTION
void
563 operator() (
const size_type k, value_type& lclErrorCount)
const {
564 typedef typename IdxView::non_const_value_type row_index_type;
565 typedef typename ColView::non_const_value_type col_index_type;
567 const row_index_type lclRow = idx(k);
568 if (lclRow <
static_cast<row_index_type
> (0) ||
569 lclRow >=
static_cast<row_index_type
> (src.extent (0))) {
573 const size_type offset = k*numCols;
574 for (size_type j = 0; j < numCols; ++j) {
575 const col_index_type lclCol = col(j);
576 if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
580 dst(offset + j) = src(lclRow, lclCol);
586 KOKKOS_INLINE_FUNCTION
void
587 init (value_type& initialErrorCount)
const {
588 initialErrorCount = 0;
591 KOKKOS_INLINE_FUNCTION
void
592 join (value_type& dstErrorCount,
593 const value_type& srcErrorCount)
const
595 dstErrorCount += srcErrorCount;
599 pack (
const DstView& dst,
603 const size_type numCols)
605 using execution_space =
typename DstView::execution_space;
606 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
607 using row_index_type =
typename IdxView::non_const_value_type;
608 using col_index_type =
typename ColView::non_const_value_type;
610 size_t errorCount = 0;
611 Kokkos::parallel_reduce
612 (
"Tpetra::MultiVector pack multicol var stride debug only",
613 range_type (0, idx.size ()),
614 PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
617 if (errorCount != 0) {
618 constexpr size_t maxNumBadIndicesToPrint = 100;
620 std::ostringstream os;
621 os <<
"Tpetra::MultiVector multicolumn variable stride pack kernel "
622 "found " << errorCount
623 <<
" error" << (errorCount != size_t (1) ?
"s" :
"") <<
". ";
628 auto idx_h = Kokkos::create_mirror_view (idx);
631 Kokkos::deep_copy (idx_h, idx);
633 std::vector<row_index_type> badRows;
634 const size_type numRowInds = idx_h.extent (0);
635 for (size_type k = 0; k < numRowInds; ++k) {
636 if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
637 badRows.push_back (idx_h(k));
641 if (badRows.size () != 0) {
642 os << badRows.size () <<
" out-of-bounds row ind"
643 << (badRows.size () != size_t (1) ?
"ices" :
"ex");
644 if (badRows.size () <= maxNumBadIndicesToPrint) {
646 for (
size_t k = 0; k < badRows.size (); ++k) {
648 if (k + 1 < badRows.size ()) {
659 os <<
"No out-of-bounds row indices. ";
664 auto col_h = Kokkos::create_mirror_view (col);
667 Kokkos::deep_copy (col_h, col);
669 std::vector<col_index_type> badCols;
670 const size_type numColInds = col_h.extent (0);
671 for (size_type k = 0; k < numColInds; ++k) {
672 if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
673 badCols.push_back (col_h(k));
677 if (badCols.size () != 0) {
678 os << badCols.size () <<
" out-of-bounds column ind"
679 << (badCols.size () != size_t (1) ?
"ices" :
"ex");
680 if (badCols.size () <= maxNumBadIndicesToPrint) {
682 for (
size_t k = 0; k < badCols.size (); ++k) {
684 if (k + 1 < badCols.size ()) {
695 os <<
"No out-of-bounds column indices. ";
698 TEUCHOS_TEST_FOR_EXCEPTION
699 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
700 std::logic_error,
"Tpetra::MultiVector variable stride pack "
701 "kernel reports errorCount=" << errorCount <<
", but we failed "
702 "to find any bad rows or columns. This should never happen. "
703 "Please report this to the Tpetra developers.");
705 throw std::runtime_error (os.str ());
710 template <
typename DstView,
715 pack_array_multi_column_variable_stride (
const DstView& dst,
719 const size_t numCols,
720 const bool debug =
true)
722 static_assert (Kokkos::is_view<DstView>::value,
723 "DstView must be a Kokkos::View.");
724 static_assert (Kokkos::is_view<SrcView>::value,
725 "SrcView must be a Kokkos::View.");
726 static_assert (Kokkos::is_view<IdxView>::value,
727 "IdxView must be a Kokkos::View.");
728 static_assert (Kokkos::is_view<ColView>::value,
729 "ColView must be a Kokkos::View.");
730 static_assert (
static_cast<int> (DstView::rank) == 1,
731 "DstView must be a rank-1 Kokkos::View.");
732 static_assert (
static_cast<int> (SrcView::rank) == 2,
733 "SrcView must be a rank-2 Kokkos::View.");
734 static_assert (
static_cast<int> (IdxView::rank) == 1,
735 "IdxView must be a rank-1 Kokkos::View.");
736 static_assert (
static_cast<int> (ColView::rank) == 1,
737 "ColView must be a rank-1 Kokkos::View.");
740 typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
741 SrcView, IdxView, ColView> impl_type;
742 impl_type::pack (dst, src, idx, col, numCols);
745 typedef PackArrayMultiColumnVariableStride<DstView,
746 SrcView, IdxView, ColView> impl_type;
747 impl_type::pack (dst, src, idx, col, numCols);
753 struct atomic_tag {};
754 struct nonatomic_tag {};
758 KOKKOS_INLINE_FUNCTION
759 void operator() (atomic_tag, SC& dest,
const SC& src)
const {
760 Kokkos::atomic_add (&dest, src);
764 KOKKOS_INLINE_FUNCTION
765 void operator() (nonatomic_tag, SC& dest,
const SC& src)
const {
776 KOKKOS_INLINE_FUNCTION
777 void operator() (atomic_tag, SC& dest,
const SC& src)
const {
782 KOKKOS_INLINE_FUNCTION
783 void operator() (nonatomic_tag, SC& dest,
const SC& src)
const {
789 template <
class Scalar>
793 KOKKOS_FUNCTION AbsMaxHelper& operator+=(AbsMaxHelper
const& rhs) {
794 auto lhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(value);
795 auto rhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(rhs.value);
796 value = lhs_abs_value > rhs_abs_value ? lhs_abs_value : rhs_abs_value;
800 KOKKOS_FUNCTION AbsMaxHelper operator+(AbsMaxHelper
const& rhs)
const {
801 AbsMaxHelper ret = *
this;
807 template <
typename SC>
808 KOKKOS_INLINE_FUNCTION
809 void operator() (atomic_tag, SC& dst,
const SC& src)
const {
810 Kokkos::atomic_add(
reinterpret_cast<AbsMaxHelper<SC>*
>(&dst), AbsMaxHelper<SC>{src});
813 template <
typename SC>
814 KOKKOS_INLINE_FUNCTION
815 void operator() (nonatomic_tag, SC& dst,
const SC& src)
const {
816 auto dst_abs_value = Kokkos::ArithTraits<SC>::abs(dst);
817 auto src_abs_value = Kokkos::ArithTraits<SC>::abs(src);
818 dst = dst_abs_value > src_abs_value ? dst_abs_value : src_abs_value;
822 template <
typename ExecutionSpace,
827 typename Enabled =
void>
828 class UnpackArrayMultiColumn {
830 static_assert (Kokkos::is_view<DstView>::value,
831 "DstView must be a Kokkos::View.");
832 static_assert (Kokkos::is_view<SrcView>::value,
833 "SrcView must be a Kokkos::View.");
834 static_assert (Kokkos::is_view<IdxView>::value,
835 "IdxView must be a Kokkos::View.");
836 static_assert (
static_cast<int> (DstView::rank) == 2,
837 "DstView must be a rank-2 Kokkos::View.");
838 static_assert (
static_cast<int> (SrcView::rank) == 1,
839 "SrcView must be a rank-1 Kokkos::View.");
840 static_assert (
static_cast<int> (IdxView::rank) == 1,
841 "IdxView must be a rank-1 Kokkos::View.");
844 typedef typename ExecutionSpace::execution_space execution_space;
845 typedef typename execution_space::size_type size_type;
855 UnpackArrayMultiColumn (
const ExecutionSpace& ,
860 const size_t numCols_) :
868 template<
class TagType>
869 KOKKOS_INLINE_FUNCTION
void
870 operator() (TagType tag,
const size_type k)
const
873 (std::is_same<TagType, atomic_tag>::value ||
874 std::is_same<TagType, nonatomic_tag>::value,
875 "TagType must be atomic_tag or nonatomic_tag.");
877 const typename IdxView::value_type localRow = idx(k);
878 const size_t offset = k*numCols;
879 for (
size_t j = 0; j < numCols; ++j) {
880 op (tag, dst(localRow, j), src(offset+j));
885 unpack (
const ExecutionSpace& execSpace,
890 const size_t numCols,
891 const bool use_atomic_updates)
893 if (use_atomic_updates) {
895 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
897 (
"Tpetra::MultiVector unpack const stride atomic",
898 range_type (0, idx.size ()),
899 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
903 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
905 (
"Tpetra::MultiVector unpack const stride nonatomic",
906 range_type (0, idx.size ()),
907 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
912 template <
typename ExecutionSpace,
917 typename SizeType =
typename ExecutionSpace::execution_space::size_type,
918 typename Enabled =
void>
919 class UnpackArrayMultiColumnWithBoundsCheck {
921 static_assert (Kokkos::is_view<DstView>::value,
922 "DstView must be a Kokkos::View.");
923 static_assert (Kokkos::is_view<SrcView>::value,
924 "SrcView must be a Kokkos::View.");
925 static_assert (Kokkos::is_view<IdxView>::value,
926 "IdxView must be a Kokkos::View.");
927 static_assert (
static_cast<int> (DstView::rank) == 2,
928 "DstView must be a rank-2 Kokkos::View.");
929 static_assert (
static_cast<int> (SrcView::rank) == 1,
930 "SrcView must be a rank-1 Kokkos::View.");
931 static_assert (
static_cast<int> (IdxView::rank) == 1,
932 "IdxView must be a rank-1 Kokkos::View.");
933 static_assert (std::is_integral<SizeType>::value,
934 "SizeType must be a built-in integer type.");
937 using execution_space =
typename ExecutionSpace::execution_space;
938 using size_type = SizeType;
939 using value_type = size_t;
949 UnpackArrayMultiColumnWithBoundsCheck (
const ExecutionSpace& ,
954 const size_type numCols_) :
962 template<
class TagType>
963 KOKKOS_INLINE_FUNCTION
void
964 operator() (TagType tag,
966 size_t& lclErrCount)
const
969 (std::is_same<TagType, atomic_tag>::value ||
970 std::is_same<TagType, nonatomic_tag>::value,
971 "TagType must be atomic_tag or nonatomic_tag.");
972 using index_type =
typename IdxView::non_const_value_type;
974 const index_type lclRow = idx(k);
975 if (lclRow <
static_cast<index_type
> (0) ||
976 lclRow >=
static_cast<index_type
> (dst.extent (0))) {
980 const size_type offset = k*numCols;
981 for (size_type j = 0; j < numCols; ++j) {
982 op (tag, dst(lclRow,j), src(offset+j));
987 template<
class TagType>
988 KOKKOS_INLINE_FUNCTION
void
989 init (TagType,
size_t& initialErrorCount)
const {
990 initialErrorCount = 0;
993 template<
class TagType>
994 KOKKOS_INLINE_FUNCTION
void
996 size_t& dstErrorCount,
997 const size_t& srcErrorCount)
const
999 dstErrorCount += srcErrorCount;
1003 unpack (
const ExecutionSpace& execSpace,
1008 const size_type numCols,
1009 const bool use_atomic_updates)
1011 using index_type =
typename IdxView::non_const_value_type;
1013 size_t errorCount = 0;
1014 if (use_atomic_updates) {
1016 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1017 Kokkos::parallel_reduce
1018 (
"Tpetra::MultiVector unpack multicol const stride atomic debug only",
1019 range_type (0, idx.size ()),
1020 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1026 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1027 Kokkos::parallel_reduce
1028 (
"Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
1029 range_type (0, idx.size ()),
1030 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1035 if (errorCount != 0) {
1039 auto idx_h = Kokkos::create_mirror_view (idx);
1042 Kokkos::deep_copy (idx_h, idx);
1044 std::vector<index_type> badIndices;
1045 const size_type numInds = idx_h.extent (0);
1046 for (size_type k = 0; k < numInds; ++k) {
1047 if (idx_h(k) <
static_cast<index_type
> (0) ||
1048 idx_h(k) >=
static_cast<index_type
> (dst.extent (0))) {
1049 badIndices.push_back (idx_h(k));
1053 if (errorCount != badIndices.size ()) {
1054 std::ostringstream os;
1055 os <<
"MultiVector unpack kernel: errorCount = " << errorCount
1056 <<
" != badIndices.size() = " << badIndices.size ()
1057 <<
". This should never happen. "
1058 "Please report this to the Tpetra developers.";
1059 throw std::logic_error (os.str ());
1062 std::ostringstream os;
1063 os <<
"MultiVector unpack kernel had " << badIndices.size ()
1064 <<
" out-of bounds index/ices. Here they are: [";
1065 for (
size_t k = 0; k < badIndices.size (); ++k) {
1066 os << badIndices[k];
1067 if (k + 1 < badIndices.size ()) {
1072 throw std::runtime_error (os.str ());
1077 template <
typename ExecutionSpace,
1083 unpack_array_multi_column (
const ExecutionSpace& execSpace,
1088 const size_t numCols,
1089 const bool use_atomic_updates,
1092 static_assert (Kokkos::is_view<DstView>::value,
1093 "DstView must be a Kokkos::View.");
1094 static_assert (Kokkos::is_view<SrcView>::value,
1095 "SrcView must be a Kokkos::View.");
1096 static_assert (Kokkos::is_view<IdxView>::value,
1097 "IdxView must be a Kokkos::View.");
1098 static_assert (
static_cast<int> (DstView::rank) == 2,
1099 "DstView must be a rank-2 Kokkos::View.");
1100 static_assert (
static_cast<int> (SrcView::rank) == 1,
1101 "SrcView must be a rank-1 Kokkos::View.");
1102 static_assert (
static_cast<int> (IdxView::rank) == 1,
1103 "IdxView must be a rank-1 Kokkos::View.");
1106 typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1107 DstView, SrcView, IdxView, Op> impl_type;
1108 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1109 use_atomic_updates);
1112 typedef UnpackArrayMultiColumn<ExecutionSpace,
1113 DstView, SrcView, IdxView, Op> impl_type;
1114 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1115 use_atomic_updates);
1119 template <
typename ExecutionSpace,
1125 typename Enabled =
void>
1126 class UnpackArrayMultiColumnVariableStride {
1128 static_assert (Kokkos::is_view<DstView>::value,
1129 "DstView must be a Kokkos::View.");
1130 static_assert (Kokkos::is_view<SrcView>::value,
1131 "SrcView must be a Kokkos::View.");
1132 static_assert (Kokkos::is_view<IdxView>::value,
1133 "IdxView must be a Kokkos::View.");
1134 static_assert (Kokkos::is_view<ColView>::value,
1135 "ColView must be a Kokkos::View.");
1136 static_assert (
static_cast<int> (DstView::rank) == 2,
1137 "DstView must be a rank-2 Kokkos::View.");
1138 static_assert (
static_cast<int> (SrcView::rank) == 1,
1139 "SrcView must be a rank-1 Kokkos::View.");
1140 static_assert (
static_cast<int> (IdxView::rank) == 1,
1141 "IdxView must be a rank-1 Kokkos::View.");
1142 static_assert (
static_cast<int> (ColView::rank) == 1,
1143 "ColView must be a rank-1 Kokkos::View.");
1146 using execution_space =
typename ExecutionSpace::execution_space;
1147 using size_type =
typename execution_space::size_type;
1158 UnpackArrayMultiColumnVariableStride (
const ExecutionSpace& ,
1159 const DstView& dst_,
1160 const SrcView& src_,
1161 const IdxView& idx_,
1162 const ColView& col_,
1164 const size_t numCols_) :
1173 template<
class TagType>
1174 KOKKOS_INLINE_FUNCTION
void
1175 operator() (TagType tag,
const size_type k)
const
1178 (std::is_same<TagType, atomic_tag>::value ||
1179 std::is_same<TagType, nonatomic_tag>::value,
1180 "TagType must be atomic_tag or nonatomic_tag.");
1182 const typename IdxView::value_type localRow = idx(k);
1183 const size_t offset = k*numCols;
1184 for (
size_t j = 0; j < numCols; ++j) {
1185 op (tag, dst(localRow, col(j)), src(offset+j));
1190 unpack (
const ExecutionSpace& execSpace,
1196 const size_t numCols,
1197 const bool use_atomic_updates)
1199 if (use_atomic_updates) {
1201 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1202 Kokkos::parallel_for
1203 (
"Tpetra::MultiVector unpack var stride atomic",
1204 range_type (0, idx.size ()),
1205 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1206 idx, col, op, numCols));
1210 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1211 Kokkos::parallel_for
1212 (
"Tpetra::MultiVector unpack var stride nonatomic",
1213 range_type (0, idx.size ()),
1214 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1215 idx, col, op, numCols));
1220 template <
typename ExecutionSpace,
1226 typename SizeType =
typename ExecutionSpace::execution_space::size_type,
1227 typename Enabled =
void>
1228 class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1230 static_assert (Kokkos::is_view<DstView>::value,
1231 "DstView must be a Kokkos::View.");
1232 static_assert (Kokkos::is_view<SrcView>::value,
1233 "SrcView must be a Kokkos::View.");
1234 static_assert (Kokkos::is_view<IdxView>::value,
1235 "IdxView must be a Kokkos::View.");
1236 static_assert (Kokkos::is_view<ColView>::value,
1237 "ColView must be a Kokkos::View.");
1238 static_assert (
static_cast<int> (DstView::rank) == 2,
1239 "DstView must be a rank-2 Kokkos::View.");
1240 static_assert (
static_cast<int> (SrcView::rank) == 1,
1241 "SrcView must be a rank-1 Kokkos::View.");
1242 static_assert (
static_cast<int> (IdxView::rank) == 1,
1243 "IdxView must be a rank-1 Kokkos::View.");
1244 static_assert (
static_cast<int> (ColView::rank) == 1,
1245 "ColView must be a rank-1 Kokkos::View.");
1246 static_assert (std::is_integral<SizeType>::value,
1247 "SizeType must be a built-in integer type.");
1250 using execution_space =
typename ExecutionSpace::execution_space;
1251 using size_type = SizeType;
1252 using value_type = size_t;
1263 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1264 (
const ExecutionSpace& ,
1265 const DstView& dst_,
1266 const SrcView& src_,
1267 const IdxView& idx_,
1268 const ColView& col_,
1270 const size_t numCols_) :
1279 template<
class TagType>
1280 KOKKOS_INLINE_FUNCTION
void
1281 operator() (TagType tag,
1283 value_type& lclErrorCount)
const
1286 (std::is_same<TagType, atomic_tag>::value ||
1287 std::is_same<TagType, nonatomic_tag>::value,
1288 "TagType must be atomic_tag or nonatomic_tag.");
1289 using row_index_type =
typename IdxView::non_const_value_type;
1290 using col_index_type =
typename ColView::non_const_value_type;
1292 const row_index_type lclRow = idx(k);
1293 if (lclRow <
static_cast<row_index_type
> (0) ||
1294 lclRow >=
static_cast<row_index_type
> (dst.extent (0))) {
1298 const size_type offset = k * numCols;
1299 for (size_type j = 0; j < numCols; ++j) {
1300 const col_index_type lclCol = col(j);
1301 if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1305 op (tag, dst(lclRow, col(j)), src(offset+j));
1311 KOKKOS_INLINE_FUNCTION
void
1312 init (value_type& initialErrorCount)
const {
1313 initialErrorCount = 0;
1316 KOKKOS_INLINE_FUNCTION
void
1317 join (value_type& dstErrorCount,
1318 const value_type& srcErrorCount)
const
1320 dstErrorCount += srcErrorCount;
1324 unpack (
const ExecutionSpace& execSpace,
1330 const size_type numCols,
1331 const bool use_atomic_updates)
1333 using row_index_type =
typename IdxView::non_const_value_type;
1334 using col_index_type =
typename ColView::non_const_value_type;
1336 size_t errorCount = 0;
1337 if (use_atomic_updates) {
1339 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1340 Kokkos::parallel_reduce
1341 (
"Tpetra::MultiVector unpack var stride atomic debug only",
1342 range_type (0, idx.size ()),
1343 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1344 (execSpace, dst, src, idx, col, op, numCols),
1349 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1350 Kokkos::parallel_reduce
1351 (
"Tpetra::MultiVector unpack var stride nonatomic debug only",
1352 range_type (0, idx.size ()),
1353 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1354 (execSpace, dst, src, idx, col, op, numCols),
1358 if (errorCount != 0) {
1359 constexpr size_t maxNumBadIndicesToPrint = 100;
1361 std::ostringstream os;
1362 os <<
"Tpetra::MultiVector multicolumn variable stride unpack kernel "
1363 "found " << errorCount
1364 <<
" error" << (errorCount != size_t (1) ?
"s" :
"") <<
". ";
1370 auto idx_h = Kokkos::create_mirror_view (idx);
1373 Kokkos::deep_copy (idx_h, idx);
1375 std::vector<row_index_type> badRows;
1376 const size_type numRowInds = idx_h.extent (0);
1377 for (size_type k = 0; k < numRowInds; ++k) {
1378 if (idx_h(k) <
static_cast<row_index_type
> (0) ||
1379 idx_h(k) >=
static_cast<row_index_type
> (dst.extent (0))) {
1380 badRows.push_back (idx_h(k));
1384 if (badRows.size () != 0) {
1385 os << badRows.size () <<
" out-of-bounds row ind"
1386 << (badRows.size () != size_t (1) ?
"ices" :
"ex");
1387 if (badRows.size () <= maxNumBadIndicesToPrint) {
1389 for (
size_t k = 0; k < badRows.size (); ++k) {
1391 if (k + 1 < badRows.size ()) {
1402 os <<
"No out-of-bounds row indices. ";
1407 auto col_h = Kokkos::create_mirror_view (col);
1410 Kokkos::deep_copy (col_h, col);
1412 std::vector<col_index_type> badCols;
1413 const size_type numColInds = col_h.extent (0);
1414 for (size_type k = 0; k < numColInds; ++k) {
1415 if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1416 badCols.push_back (col_h(k));
1420 if (badCols.size () != 0) {
1421 os << badCols.size () <<
" out-of-bounds column ind"
1422 << (badCols.size () != size_t (1) ?
"ices" :
"ex");
1423 if (badCols.size () <= maxNumBadIndicesToPrint) {
1424 for (
size_t k = 0; k < badCols.size (); ++k) {
1427 if (k + 1 < badCols.size ()) {
1438 os <<
"No out-of-bounds column indices. ";
1441 TEUCHOS_TEST_FOR_EXCEPTION
1442 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
1443 std::logic_error,
"Tpetra::MultiVector variable stride unpack "
1444 "kernel reports errorCount=" << errorCount <<
", but we failed "
1445 "to find any bad rows or columns. This should never happen. "
1446 "Please report this to the Tpetra developers.");
1448 throw std::runtime_error (os.str ());
1453 template <
typename ExecutionSpace,
1460 unpack_array_multi_column_variable_stride (
const ExecutionSpace& execSpace,
1466 const size_t numCols,
1467 const bool use_atomic_updates,
1470 static_assert (Kokkos::is_view<DstView>::value,
1471 "DstView must be a Kokkos::View.");
1472 static_assert (Kokkos::is_view<SrcView>::value,
1473 "SrcView must be a Kokkos::View.");
1474 static_assert (Kokkos::is_view<IdxView>::value,
1475 "IdxView must be a Kokkos::View.");
1476 static_assert (Kokkos::is_view<ColView>::value,
1477 "ColView must be a Kokkos::View.");
1478 static_assert (
static_cast<int> (DstView::rank) == 2,
1479 "DstView must be a rank-2 Kokkos::View.");
1480 static_assert (
static_cast<int> (SrcView::rank) == 1,
1481 "SrcView must be a rank-1 Kokkos::View.");
1482 static_assert (
static_cast<int> (IdxView::rank) == 1,
1483 "IdxView must be a rank-1 Kokkos::View.");
1484 static_assert (
static_cast<int> (ColView::rank) == 1,
1485 "ColView must be a rank-1 Kokkos::View.");
1489 UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1490 DstView, SrcView, IdxView, ColView, Op>;
1491 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1492 use_atomic_updates);
1495 using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1496 DstView, SrcView, IdxView, ColView, Op>;
1497 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1498 use_atomic_updates);
1502 template <
typename DstView,
typename SrcView,
1503 typename DstIdxView,
typename SrcIdxView,
typename Op,
1504 typename Enabled =
void>
1505 struct PermuteArrayMultiColumn {
1506 typedef typename DstView::execution_space execution_space;
1507 typedef typename execution_space::size_type size_type;
1516 PermuteArrayMultiColumn (
const DstView& dst_,
1517 const SrcView& src_,
1518 const DstIdxView& dst_idx_,
1519 const SrcIdxView& src_idx_,
1520 const size_t numCols_,
1522 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1523 numCols(numCols_), op(op_) {}
1525 KOKKOS_INLINE_FUNCTION
void
1526 operator() (
const size_type k)
const {
1527 const typename DstIdxView::value_type toRow = dst_idx(k);
1528 const typename SrcIdxView::value_type fromRow = src_idx(k);
1530 for (
size_t j = 0; j < numCols; ++j) {
1531 op(tag, dst(toRow, j), src(fromRow, j));
1536 permute (
const DstView& dst,
1538 const DstIdxView& dst_idx,
1539 const SrcIdxView& src_idx,
1540 const size_t numCols,
1544 Kokkos::RangePolicy<execution_space, size_type>;
1546 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1547 Kokkos::parallel_for
1548 (
"Tpetra::MultiVector permute multicol const stride",
1550 PermuteArrayMultiColumn (dst, src, dst_idx, src_idx, numCols, op));
1556 template <
typename DstView,
typename SrcView,
1557 typename DstIdxView,
typename SrcIdxView,
typename Op>
1558 void permute_array_multi_column(
const DstView& dst,
1560 const DstIdxView& dst_idx,
1561 const SrcIdxView& src_idx,
1564 PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView,Op>::permute(
1565 dst, src, dst_idx, src_idx, numCols, op);
1568 template <
typename DstView,
typename SrcView,
1569 typename DstIdxView,
typename SrcIdxView,
1570 typename DstColView,
typename SrcColView,
typename Op,
1571 typename Enabled =
void>
1572 struct PermuteArrayMultiColumnVariableStride {
1573 typedef typename DstView::execution_space execution_space;
1574 typedef typename execution_space::size_type size_type;
1585 PermuteArrayMultiColumnVariableStride(
const DstView& dst_,
1586 const SrcView& src_,
1587 const DstIdxView& dst_idx_,
1588 const SrcIdxView& src_idx_,
1589 const DstColView& dst_col_,
1590 const SrcColView& src_col_,
1591 const size_t numCols_,
1593 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1594 dst_col(dst_col_), src_col(src_col_),
1595 numCols(numCols_), op(op_) {}
1597 KOKKOS_INLINE_FUNCTION
void
1598 operator() (
const size_type k)
const {
1599 const typename DstIdxView::value_type toRow = dst_idx(k);
1600 const typename SrcIdxView::value_type fromRow = src_idx(k);
1601 const nonatomic_tag tag;
1602 for (
size_t j = 0; j < numCols; ++j) {
1603 op(tag, dst(toRow, dst_col(j)), src(fromRow, src_col(j)));
1608 permute (
const DstView& dst,
1610 const DstIdxView& dst_idx,
1611 const SrcIdxView& src_idx,
1612 const DstColView& dst_col,
1613 const SrcColView& src_col,
1614 const size_t numCols,
1617 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
1618 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1619 Kokkos::parallel_for
1620 (
"Tpetra::MultiVector permute multicol var stride",
1622 PermuteArrayMultiColumnVariableStride (dst, src, dst_idx, src_idx,
1623 dst_col, src_col, numCols, op));
1629 template <
typename DstView,
typename SrcView,
1630 typename DstIdxView,
typename SrcIdxView,
1631 typename DstColView,
typename SrcColView,
typename Op>
1632 void permute_array_multi_column_variable_stride(
const DstView& dst,
1634 const DstIdxView& dst_idx,
1635 const SrcIdxView& src_idx,
1636 const DstColView& dst_col,
1637 const SrcColView& src_col,
1640 PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1641 DstIdxView,SrcIdxView,DstColView,SrcColView,Op>::permute(
1642 dst, src, dst_idx, src_idx, dst_col, src_col, numCols, op);
Implementation details of Tpetra.
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...