40#ifndef TPETRA_MULTIVECTOR_DEF_HPP
41#define TPETRA_MULTIVECTOR_DEF_HPP
53#include "Tpetra_Vector.hpp"
65#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
66# include "Teuchos_SerialDenseMatrix.hpp"
68#include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
69#include "KokkosCompat_View.hpp"
70#include "KokkosBlas.hpp"
71#include "KokkosKernels_Utils.hpp"
72#include "Kokkos_Random.hpp"
73#include "Kokkos_ArithTraits.hpp"
77#ifdef HAVE_TPETRA_INST_FLOAT128
80 template<
class Generator>
81 struct rand<Generator, __float128> {
82 static KOKKOS_INLINE_FUNCTION __float128 max ()
84 return static_cast<__float128
> (1.0);
86 static KOKKOS_INLINE_FUNCTION __float128
91 const __float128 scalingFactor =
92 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
93 static_cast<__float128
> (2.0);
94 const __float128 higherOrderTerm =
static_cast<__float128
> (gen.drand ());
95 const __float128 lowerOrderTerm =
96 static_cast<__float128
> (gen.drand ()) * scalingFactor;
97 return higherOrderTerm + lowerOrderTerm;
99 static KOKKOS_INLINE_FUNCTION __float128
100 draw (Generator& gen,
const __float128& range)
103 const __float128 scalingFactor =
104 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
105 static_cast<__float128
> (2.0);
106 const __float128 higherOrderTerm =
107 static_cast<__float128
> (gen.drand (range));
108 const __float128 lowerOrderTerm =
109 static_cast<__float128
> (gen.drand (range)) * scalingFactor;
110 return higherOrderTerm + lowerOrderTerm;
112 static KOKKOS_INLINE_FUNCTION __float128
113 draw (Generator& gen,
const __float128& start,
const __float128& end)
116 const __float128 scalingFactor =
117 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
118 static_cast<__float128
> (2.0);
119 const __float128 higherOrderTerm =
120 static_cast<__float128
> (gen.drand (start, end));
121 const __float128 lowerOrderTerm =
122 static_cast<__float128
> (gen.drand (start, end)) * scalingFactor;
123 return higherOrderTerm + lowerOrderTerm;
145 template<
class ST,
class LO,
class GO,
class NT>
146 typename Tpetra::MultiVector<ST, LO, GO, NT>::wrapped_dual_view_type
147 allocDualView (
const size_t lclNumRows,
148 const size_t numCols,
149 const bool zeroOut =
true,
150 const bool allowPadding =
false)
152 using ::Tpetra::Details::Behavior;
153 using Kokkos::AllowPadding;
154 using Kokkos::view_alloc;
155 using Kokkos::WithoutInitializing;
157 typedef typename Tpetra::MultiVector<ST, LO, GO, NT>::wrapped_dual_view_type wrapped_dual_view_type;
158 typedef typename dual_view_type::t_dev dev_view_type;
164 const std::string label (
"MV::DualView");
165 const bool debug = Behavior::debug ();
175 dev_view_type d_view;
178 d_view = dev_view_type (view_alloc (label, AllowPadding),
179 lclNumRows, numCols);
182 d_view = dev_view_type (view_alloc (label),
183 lclNumRows, numCols);
188 d_view = dev_view_type (view_alloc (label,
191 lclNumRows, numCols);
194 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
195 lclNumRows, numCols);
206 const ST nan = Kokkos::Details::ArithTraits<ST>::nan ();
207 KokkosBlas::fill (d_view, nan);
211 TEUCHOS_TEST_FOR_EXCEPTION
212 (
static_cast<size_t> (d_view.extent (0)) != lclNumRows ||
213 static_cast<size_t> (d_view.extent (1)) != numCols, std::logic_error,
214 "allocDualView: d_view's dimensions actual dimensions do not match "
215 "requested dimensions. d_view is " << d_view.extent (0) <<
" x " <<
216 d_view.extent (1) <<
"; requested " << lclNumRows <<
" x " << numCols
217 <<
". Please report this bug to the Tpetra developers.");
220 return wrapped_dual_view_type(d_view);
227 template<
class T,
class ExecSpace>
228 struct MakeUnmanagedView {
238 typedef typename std::conditional<
239 Kokkos::SpaceAccessibility<
240 typename ExecSpace::memory_space,
241 Kokkos::HostSpace>::accessible,
242 typename ExecSpace::device_type,
243 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
244 typedef Kokkos::LayoutLeft array_layout;
245 typedef Kokkos::View<T*, array_layout, host_exec_space,
246 Kokkos::MemoryUnmanaged> view_type;
248 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
250 const size_t numEnt =
static_cast<size_t> (x_in.size ());
254 return view_type (x_in.getRawPtr (), numEnt);
260 template<
class WrappedDualViewType>
262 takeSubview (
const WrappedDualViewType& X,
263 const std::pair<size_t, size_t>& rowRng,
264 const Kokkos::Impl::ALL_t& colRng)
268 return WrappedDualViewType(X,rowRng,colRng);
271 template<
class WrappedDualViewType>
273 takeSubview (
const WrappedDualViewType& X,
274 const Kokkos::Impl::ALL_t& rowRng,
275 const std::pair<size_t, size_t>& colRng)
277 using DualViewType =
typename WrappedDualViewType::DVT;
280 if (X.extent (0) == 0 && X.extent (1) != 0) {
281 return WrappedDualViewType(DualViewType (
"MV::DualView", 0, colRng.second - colRng.first));
284 return WrappedDualViewType(X,rowRng,colRng);
288 template<
class WrappedDualViewType>
290 takeSubview (
const WrappedDualViewType& X,
291 const std::pair<size_t, size_t>& rowRng,
292 const std::pair<size_t, size_t>& colRng)
294 using DualViewType =
typename WrappedDualViewType::DVT;
304 if (X.extent (0) == 0 && X.extent (1) != 0) {
305 return WrappedDualViewType(DualViewType (
"MV::DualView", 0, colRng.second - colRng.first));
308 return WrappedDualViewType(X,rowRng,colRng);
312 template<
class WrappedOrNotDualViewType>
314 getDualViewStride (
const WrappedOrNotDualViewType& dv)
320 size_t strides[WrappedOrNotDualViewType::t_dev::Rank+1];
322 const size_t LDA = strides[1];
323 const size_t numRows = dv.extent (0);
326 return (numRows == 0) ? size_t (1) : numRows;
333 template<
class ViewType>
335 getViewStride (
const ViewType& view)
337 const size_t LDA = view.stride (1);
338 const size_t numRows = view.extent (0);
341 return (numRows == 0) ? size_t (1) : numRows;
348 template <
class impl_scalar_type,
class buffer_device_type>
351 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports
354 if (! imports.need_sync_device ()) {
359 size_t localLengthThreshold =
361 return imports.extent(0) <= localLengthThreshold;
366 template <
class SC,
class LO,
class GO,
class NT>
368 runKernelOnHost (const ::Tpetra::MultiVector<SC, LO, GO, NT>& X)
370 if (! X.need_sync_device ()) {
375 size_t localLengthThreshold =
377 return X.getLocalLength () <= localLengthThreshold;
381 template <
class SC,
class LO,
class GO,
class NT>
383 multiVectorNormImpl (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
385 const ::Tpetra::Details::EWhichNorm whichNorm)
388 using ::Tpetra::Details::normImpl;
390 using val_type =
typename MV::impl_scalar_type;
391 using mag_type =
typename MV::mag_type;
392 using dual_view_type =
typename MV::dual_view_type;
395 auto comm = map.is_null () ? nullptr : map->getComm ().getRawPtr ();
396 auto whichVecs = getMultiVectorWhichVectors (X);
400 const bool runOnHost = runKernelOnHost (X);
402 using view_type =
typename dual_view_type::t_host;
403 using array_layout =
typename view_type::array_layout;
404 using device_type =
typename view_type::device_type;
407 normImpl<val_type, array_layout, device_type,
408 mag_type> (norms, X_lcl, whichNorm, whichVecs,
409 isConstantStride, isDistributed, comm);
412 using view_type =
typename dual_view_type::t_dev;
413 using array_layout =
typename view_type::array_layout;
414 using device_type =
typename view_type::device_type;
417 normImpl<val_type, array_layout, device_type,
418 mag_type> (norms, X_lcl, whichNorm, whichVecs,
419 isConstantStride, isDistributed, comm);
428 template <
typename DstView,
typename SrcView>
429 struct AddAssignFunctor {
432 AddAssignFunctor(DstView &tgt_, SrcView &src_) : tgt(tgt_), src(src_) {}
434 KOKKOS_INLINE_FUNCTION
void
435 operator () (
const size_t k)
const { tgt(k) += src(k); }
442 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
446 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
449 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
455 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
457 MultiVector (
const Teuchos::RCP<const map_type>& map,
458 const size_t numVecs,
459 const bool zeroOut) :
465 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
468 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
471 const Teuchos::DataAccess copyOrView) :
473 view_ (source.view_),
474 whichVectors_ (source.whichVectors_)
477 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, "
478 "const Teuchos::DataAccess): ";
482 if (copyOrView == Teuchos::Copy) {
486 this->
view_ = cpy.view_;
489 else if (copyOrView == Teuchos::View) {
492 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
493 true, std::invalid_argument,
"Second argument 'copyOrView' has an "
494 "invalid value " << copyOrView <<
". Valid values include "
495 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = "
496 << Teuchos::View <<
".");
500 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
502 MultiVector (
const Teuchos::RCP<const map_type>& map,
505 view_ (wrapped_dual_view_type(view))
507 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
508 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
509 map->getLocalNumElements ();
510 const size_t lclNumRows_view = view.extent (0);
511 const size_t LDA = getDualViewStride (
view_);
513 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
514 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
515 std::invalid_argument,
"Kokkos::DualView does not match Map. "
516 "map->getLocalNumElements() = " << lclNumRows_map
517 <<
", view.extent(0) = " << lclNumRows_view
518 <<
", and getStride() = " << LDA <<
".");
520 using ::Tpetra::Details::Behavior;
521 const bool debug = Behavior::debug ();
523 using ::Tpetra::Details::checkGlobalDualViewValidity;
524 std::ostringstream gblErrStrm;
525 const bool verbose = Behavior::verbose ();
526 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
527 const bool gblValid =
528 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
530 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
531 (! gblValid, std::runtime_error, gblErrStrm.str ());
536 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
538 MultiVector (
const Teuchos::RCP<const map_type>& map,
539 const wrapped_dual_view_type& view) :
543 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
544 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
545 map->getLocalNumElements ();
546 const size_t lclNumRows_view = view.extent (0);
547 const size_t LDA = getDualViewStride (view);
549 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
550 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
551 std::invalid_argument,
"Kokkos::DualView does not match Map. "
552 "map->getLocalNumElements() = " << lclNumRows_map
553 <<
", view.extent(0) = " << lclNumRows_view
554 <<
", and getStride() = " << LDA <<
".");
556 using ::Tpetra::Details::Behavior;
557 const bool debug = Behavior::debug ();
559 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
560 std::ostringstream gblErrStrm;
561 const bool verbose = Behavior::verbose ();
562 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
563 const bool gblValid =
564 checkGlobalWrappedDualViewValidity (&gblErrStrm, view, verbose,
566 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
567 (! gblValid, std::runtime_error, gblErrStrm.str ());
573 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
575 MultiVector (
const Teuchos::RCP<const map_type>& map,
576 const typename dual_view_type::t_dev& d_view) :
579 using Teuchos::ArrayRCP;
581 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
585 const size_t LDA = getViewStride (d_view);
586 const size_t lclNumRows = map->getLocalNumElements ();
587 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
588 (LDA < lclNumRows, std::invalid_argument,
"Map does not match "
589 "Kokkos::View. map->getLocalNumElements() = " << lclNumRows
590 <<
", View's column stride = " << LDA
591 <<
", and View's extent(0) = " << d_view.extent (0) <<
".");
593 auto h_view = Kokkos::create_mirror_view (d_view);
595 view_ = wrapped_dual_view_type(dual_view);
597 using ::Tpetra::Details::Behavior;
598 const bool debug = Behavior::debug ();
600 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
601 std::ostringstream gblErrStrm;
602 const bool verbose = Behavior::verbose ();
603 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
604 const bool gblValid =
605 checkGlobalWrappedDualViewValidity (&gblErrStrm,
view_, verbose,
607 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
608 (! gblValid, std::runtime_error, gblErrStrm.str ());
613 dual_view.modify_device ();
616 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
618 MultiVector (
const Teuchos::RCP<const map_type>& map,
622 view_ (wrapped_dual_view_type(view,origView))
624 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
626 const size_t LDA = getDualViewStride (origView);
627 const size_t lclNumRows = this->getLocalLength ();
628 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
629 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's "
630 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
631 <<
". This may also mean that the input origView's first dimension (number "
632 "of rows = " << origView.extent (0) <<
") does not not match the number "
633 "of entries in the Map on the calling process.");
635 using ::Tpetra::Details::Behavior;
636 const bool debug = Behavior::debug ();
638 using ::Tpetra::Details::checkGlobalDualViewValidity;
639 std::ostringstream gblErrStrm;
640 const bool verbose = Behavior::verbose ();
641 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
642 const bool gblValid_0 =
643 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
645 const bool gblValid_1 =
646 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
648 const bool gblValid = gblValid_0 && gblValid_1;
649 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
650 (! gblValid, std::runtime_error, gblErrStrm.str ());
655 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
657 MultiVector (
const Teuchos::RCP<const map_type>& map,
659 const Teuchos::ArrayView<const size_t>& whichVectors) :
662 whichVectors_ (whichVectors.begin (), whichVectors.end ())
665 using Kokkos::subview;
666 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
668 using ::Tpetra::Details::Behavior;
669 const bool debug = Behavior::debug ();
671 using ::Tpetra::Details::checkGlobalDualViewValidity;
672 std::ostringstream gblErrStrm;
673 const bool verbose = Behavior::verbose ();
674 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
675 const bool gblValid =
676 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
678 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
679 (! gblValid, std::runtime_error, gblErrStrm.str ());
682 const size_t lclNumRows = map.is_null () ? size_t (0) :
683 map->getLocalNumElements ();
690 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
691 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
692 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
693 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
694 if (whichVectors.size () != 0) {
695 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
696 view.extent (1) != 0 && view.extent (1) == 0,
697 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
698 " = " << whichVectors.size () <<
" > 0.");
699 size_t maxColInd = 0;
700 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
701 for (size_type k = 0; k < whichVectors.size (); ++k) {
702 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
703 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
704 std::invalid_argument,
"whichVectors[" << k <<
"] = "
705 "Teuchos::OrdinalTraits<size_t>::invalid().");
706 maxColInd = std::max (maxColInd, whichVectors[k]);
708 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
709 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
710 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
711 <<
" <= max(whichVectors) = " << maxColInd <<
".");
716 const size_t LDA = getDualViewStride (view);
717 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
718 (LDA < lclNumRows, std::invalid_argument,
719 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
721 if (whichVectors.size () == 1) {
731 const std::pair<size_t, size_t> colRng (whichVectors[0],
732 whichVectors[0] + 1);
733 view_ = takeSubview (view_, ALL (), colRng);
735 whichVectors_.clear ();
739 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
741 MultiVector (
const Teuchos::RCP<const map_type>& map,
742 const wrapped_dual_view_type& view,
743 const Teuchos::ArrayView<const size_t>& whichVectors) :
746 whichVectors_ (whichVectors.begin (), whichVectors.end ())
749 using Kokkos::subview;
750 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
752 using ::Tpetra::Details::Behavior;
753 const bool debug = Behavior::debug ();
755 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
756 std::ostringstream gblErrStrm;
757 const bool verbose = Behavior::verbose ();
758 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
759 const bool gblValid =
760 checkGlobalWrappedDualViewValidity (&gblErrStrm, view, verbose,
762 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
763 (! gblValid, std::runtime_error, gblErrStrm.str ());
766 const size_t lclNumRows = map.is_null () ? size_t (0) :
767 map->getLocalNumElements ();
774 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
775 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
776 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
777 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
778 if (whichVectors.size () != 0) {
779 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
780 view.extent (1) != 0 && view.extent (1) == 0,
781 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
782 " = " << whichVectors.size () <<
" > 0.");
783 size_t maxColInd = 0;
784 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
785 for (size_type k = 0; k < whichVectors.size (); ++k) {
786 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
787 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
788 std::invalid_argument,
"whichVectors[" << k <<
"] = "
789 "Teuchos::OrdinalTraits<size_t>::invalid().");
790 maxColInd = std::max (maxColInd, whichVectors[k]);
792 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
793 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
794 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
795 <<
" <= max(whichVectors) = " << maxColInd <<
".");
800 const size_t LDA = getDualViewStride (view);
801 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
802 (LDA < lclNumRows, std::invalid_argument,
803 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
805 if (whichVectors.size () == 1) {
815 const std::pair<size_t, size_t> colRng (whichVectors[0],
816 whichVectors[0] + 1);
824 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
826 MultiVector (
const Teuchos::RCP<const map_type>& map,
829 const Teuchos::ArrayView<const size_t>& whichVectors) :
831 view_ (wrapped_dual_view_type(view,origView)),
832 whichVectors_ (whichVectors.begin (), whichVectors.end ())
835 using Kokkos::subview;
836 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
838 using ::Tpetra::Details::Behavior;
839 const bool debug = Behavior::debug ();
841 using ::Tpetra::Details::checkGlobalDualViewValidity;
842 std::ostringstream gblErrStrm;
843 const bool verbose = Behavior::verbose ();
844 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
845 const bool gblValid_0 =
846 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
848 const bool gblValid_1 =
849 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
851 const bool gblValid = gblValid_0 && gblValid_1;
852 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
853 (! gblValid, std::runtime_error, gblErrStrm.str ());
856 const size_t lclNumRows = this->getLocalLength ();
863 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
864 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
865 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
866 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
867 if (whichVectors.size () != 0) {
868 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
869 view.extent (1) != 0 && view.extent (1) == 0,
870 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
871 " = " << whichVectors.size () <<
" > 0.");
872 size_t maxColInd = 0;
873 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
874 for (size_type k = 0; k < whichVectors.size (); ++k) {
875 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
876 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
877 std::invalid_argument,
"whichVectors[" << k <<
"] = "
878 "Teuchos::OrdinalTraits<size_t>::invalid().");
879 maxColInd = std::max (maxColInd, whichVectors[k]);
881 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
882 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
883 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
884 <<
" <= max(whichVectors) = " << maxColInd <<
".");
889 const size_t LDA = getDualViewStride (origView);
890 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
891 (LDA < lclNumRows, std::invalid_argument,
"Map and DualView origView "
892 "do not match. LDA = " << LDA <<
" < this->getLocalLength() = " <<
893 lclNumRows <<
". origView.extent(0) = " << origView.extent(0)
894 <<
", origView.stride(1) = " << origView.d_view.stride(1) <<
".");
896 if (whichVectors.size () == 1) {
905 const std::pair<size_t, size_t> colRng (whichVectors[0],
906 whichVectors[0] + 1);
907 view_ = takeSubview (view_, ALL (), colRng);
910 whichVectors_.clear ();
914 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
916 MultiVector (
const Teuchos::RCP<const map_type>& map,
917 const Teuchos::ArrayView<const Scalar>& data,
919 const size_t numVecs) :
922 typedef LocalOrdinal LO;
923 typedef GlobalOrdinal GO;
924 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
930 const size_t lclNumRows =
931 map.is_null () ? size_t (0) : map->getLocalNumElements ();
932 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
933 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < "
934 "map->getLocalNumElements() = " << lclNumRows <<
".");
936 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
937 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
938 (
static_cast<size_t> (data.size ()) < minNumEntries,
939 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough "
940 "entries, given the input Map and number of vectors in the MultiVector."
941 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + "
942 "map->getLocalNumElements () = " << minNumEntries <<
".");
945 this->
view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
957 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
958 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
959 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
964 const size_t outStride =
965 X_out.extent (1) == 0 ? size_t (1) : X_out.stride (1);
966 if (LDA == outStride) {
973 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
975 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
977 for (
size_t j = 0; j < numVecs; ++j) {
978 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
979 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
986 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
988 MultiVector (
const Teuchos::RCP<const map_type>& map,
989 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
990 const size_t numVecs) :
994 typedef LocalOrdinal LO;
995 typedef GlobalOrdinal GO;
996 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
999 const size_t lclNumRows =
1000 map.is_null () ? size_t (0) : map->getLocalNumElements ();
1001 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1002 (numVecs < 1 || numVecs !=
static_cast<size_t> (ArrayOfPtrs.size ()),
1003 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or "
1004 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
1005 for (
size_t j = 0; j < numVecs; ++j) {
1006 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
1007 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1008 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
1009 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = "
1010 << X_j_av.size () <<
" < map->getLocalNumElements() = " << lclNumRows
1014 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
1019 using array_layout =
typename decltype (X_out)::array_layout;
1020 using input_col_view_type =
typename Kokkos::View<
const IST*,
1023 Kokkos::MemoryUnmanaged>;
1025 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
1026 for (
size_t j = 0; j < numVecs; ++j) {
1027 Teuchos::ArrayView<const IST> X_j_av =
1028 Teuchos::av_reinterpret_cast<const IST> (ArrayOfPtrs[j]);
1029 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
1030 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
1036 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1039 return whichVectors_.empty ();
1042 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1047 if (this->getMap ().is_null ()) {
1048 return static_cast<size_t> (0);
1050 return this->getMap ()->getLocalNumElements ();
1054 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1059 if (this->getMap ().is_null ()) {
1060 return static_cast<size_t> (0);
1062 return this->getMap ()->getGlobalNumElements ();
1066 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1071 return isConstantStride () ? getDualViewStride (view_) : size_t (0);
1074 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1080 auto thisData = view_.getDualView().h_view.data();
1081 auto otherData = other.
view_.getDualView().h_view.data();
1082 size_t thisSpan = view_.getDualView().h_view.span();
1083 size_t otherSpan = other.
view_.getDualView().h_view.span();
1084 return (otherData <= thisData && thisData < otherData + otherSpan)
1085 || (thisData <= otherData && otherData < thisData + thisSpan);
1088 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1097 const MV* src =
dynamic_cast<const MV*
> (&sourceObj);
1098 if (src ==
nullptr) {
1112 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1116 return this->getNumVectors ();
1119 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1124 const size_t numSameIDs,
1125 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
1126 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
1129 using ::Tpetra::Details::Behavior;
1130 using ::Tpetra::Details::getDualViewCopyFromArrayView;
1131 using ::Tpetra::Details::ProfilingRegion;
1133 using KokkosRefactor::Details::permute_array_multi_column;
1134 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
1135 using Kokkos::Compat::create_const_view;
1137 const char tfecfFuncName[] =
"copyAndPermute: ";
1138 ProfilingRegion regionCAP (
"Tpetra::MultiVector::copyAndPermute");
1140 const bool verbose = Behavior::verbose ();
1141 std::unique_ptr<std::string> prefix;
1143 auto map = this->getMap ();
1144 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1145 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1146 std::ostringstream os;
1147 os <<
"Proc " << myRank <<
": MV::copyAndPermute: ";
1148 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1149 os <<
"Start" << endl;
1150 std::cerr << os.str ();
1153 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1154 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
1155 std::logic_error,
"permuteToLIDs.extent(0) = "
1156 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
1157 << permuteFromLIDs.extent (0) <<
".");
1160 MV& sourceMV =
const_cast<MV &
>(
dynamic_cast<const MV&
> (sourceObj));
1161 const size_t numCols = this->getNumVectors ();
1165 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1166 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1167 std::logic_error,
"Input MultiVector needs sync to both host "
1169 const bool copyOnHost = runKernelOnHost(sourceMV);
1171 std::ostringstream os;
1172 os << *prefix <<
"copyOnHost=" << (copyOnHost ?
"true" :
"false") << endl;
1173 std::cerr << os.str ();
1177 std::ostringstream os;
1178 os << *prefix <<
"Copy" << endl;
1179 std::cerr << os.str ();
1204 if (numSameIDs > 0) {
1205 const std::pair<size_t, size_t> rows (0, numSameIDs);
1207 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1208 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1210 for (
size_t j = 0; j < numCols; ++j) {
1211 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1212 const size_t srcCol =
1213 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1215 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
1216 auto src_j = Kokkos::subview (src_h, rows, srcCol);
1220 Kokkos::RangePolicy<typename Node::execution_space, size_t>;
1221 range_t rp(0,numSameIDs);
1222 Tpetra::Details::AddAssignFunctor<
decltype(tgt_j),
decltype(src_j)>
1224 Kokkos::parallel_for(rp, aaf);
1229 Kokkos::deep_copy (tgt_j, src_j);
1234 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1235 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1237 for (
size_t j = 0; j < numCols; ++j) {
1238 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1239 const size_t srcCol =
1240 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1242 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
1243 auto src_j = Kokkos::subview (src_d, rows, srcCol);
1247 Kokkos::RangePolicy<typename Node::execution_space, size_t>;
1248 range_t rp(0,numSameIDs);
1249 Tpetra::Details::AddAssignFunctor<
decltype(tgt_j),
decltype(src_j)>
1251 Kokkos::parallel_for(rp, aaf);
1256 Kokkos::deep_copy (tgt_j, src_j);
1273 if (permuteFromLIDs.extent (0) == 0 ||
1274 permuteToLIDs.extent (0) == 0) {
1276 std::ostringstream os;
1277 os << *prefix <<
"No permutations. Done!" << endl;
1278 std::cerr << os.str ();
1284 std::ostringstream os;
1285 os << *prefix <<
"Permute" << endl;
1286 std::cerr << os.str ();
1291 const bool nonConstStride =
1292 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
1295 std::ostringstream os;
1296 os << *prefix <<
"nonConstStride="
1297 << (nonConstStride ?
"true" :
"false") << endl;
1298 std::cerr << os.str ();
1305 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
1306 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
1307 if (nonConstStride) {
1308 if (this->whichVectors_.size () == 0) {
1309 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
1310 tmpTgt.modify_host ();
1311 for (
size_t j = 0; j < numCols; ++j) {
1312 tmpTgt.h_view(j) = j;
1315 tmpTgt.sync_device ();
1317 tgtWhichVecs = tmpTgt;
1320 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->whichVectors_ ();
1322 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
1326 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1327 (
static_cast<size_t> (tgtWhichVecs.extent (0)) !=
1328 this->getNumVectors (),
1329 std::logic_error,
"tgtWhichVecs.extent(0) = " <<
1330 tgtWhichVecs.extent (0) <<
" != this->getNumVectors() = " <<
1331 this->getNumVectors () <<
".");
1333 if (sourceMV.whichVectors_.size () == 0) {
1334 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
1335 tmpSrc.modify_host ();
1336 for (
size_t j = 0; j < numCols; ++j) {
1337 tmpSrc.h_view(j) = j;
1340 tmpSrc.sync_device ();
1342 srcWhichVecs = tmpSrc;
1345 Teuchos::ArrayView<const size_t> srcWhichVecsT =
1346 sourceMV.whichVectors_ ();
1348 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
1352 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1353 (
static_cast<size_t> (srcWhichVecs.extent (0)) !=
1354 sourceMV.getNumVectors (), std::logic_error,
1355 "srcWhichVecs.extent(0) = " << srcWhichVecs.extent (0)
1356 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
1362 std::ostringstream os;
1363 os << *prefix <<
"Get permute LIDs on host" << std::endl;
1364 std::cerr << os.str ();
1366 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1367 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1369 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
1370 auto permuteToLIDs_h = create_const_view (permuteToLIDs.view_host ());
1371 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
1372 auto permuteFromLIDs_h =
1373 create_const_view (permuteFromLIDs.view_host ());
1376 std::ostringstream os;
1377 os << *prefix <<
"Permute on host" << endl;
1378 std::cerr << os.str ();
1380 if (nonConstStride) {
1383 auto tgtWhichVecs_h =
1384 create_const_view (tgtWhichVecs.view_host ());
1385 auto srcWhichVecs_h =
1386 create_const_view (srcWhichVecs.view_host ());
1388 using op_type = KokkosRefactor::Details::AddOp;
1389 permute_array_multi_column_variable_stride (tgt_h, src_h,
1393 srcWhichVecs_h, numCols,
1397 using op_type = KokkosRefactor::Details::InsertOp;
1398 permute_array_multi_column_variable_stride (tgt_h, src_h,
1402 srcWhichVecs_h, numCols,
1408 using op_type = KokkosRefactor::Details::AddOp;
1409 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1410 permuteFromLIDs_h, numCols, op_type());
1413 using op_type = KokkosRefactor::Details::InsertOp;
1414 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1415 permuteFromLIDs_h, numCols, op_type());
1421 std::ostringstream os;
1422 os << *prefix <<
"Get permute LIDs on device" << endl;
1423 std::cerr << os.str ();
1425 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1426 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1428 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_device () );
1429 auto permuteToLIDs_d = create_const_view (permuteToLIDs.view_device ());
1430 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_device () );
1431 auto permuteFromLIDs_d =
1432 create_const_view (permuteFromLIDs.view_device ());
1435 std::ostringstream os;
1436 os << *prefix <<
"Permute on device" << endl;
1437 std::cerr << os.str ();
1439 if (nonConstStride) {
1442 auto tgtWhichVecs_d = create_const_view (tgtWhichVecs.view_device ());
1443 auto srcWhichVecs_d = create_const_view (srcWhichVecs.view_device ());
1445 using op_type = KokkosRefactor::Details::AddOp;
1446 permute_array_multi_column_variable_stride (tgt_d, src_d,
1450 srcWhichVecs_d, numCols,
1454 using op_type = KokkosRefactor::Details::InsertOp;
1455 permute_array_multi_column_variable_stride (tgt_d, src_d,
1459 srcWhichVecs_d, numCols,
1465 using op_type = KokkosRefactor::Details::AddOp;
1466 permute_array_multi_column (tgt_d, src_d, permuteToLIDs_d,
1467 permuteFromLIDs_d, numCols, op_type());
1470 using op_type = KokkosRefactor::Details::InsertOp;
1471 permute_array_multi_column (tgt_d, src_d, permuteToLIDs_d,
1472 permuteFromLIDs_d, numCols, op_type());
1478 std::ostringstream os;
1479 os << *prefix <<
"Done!" << endl;
1480 std::cerr << os.str ();
1484 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1489 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1490 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1491 Kokkos::DualView<size_t*, buffer_device_type> ,
1492 size_t& constantNumPackets)
1494 using ::Tpetra::Details::Behavior;
1495 using ::Tpetra::Details::ProfilingRegion;
1496 using ::Tpetra::Details::reallocDualViewIfNeeded;
1497 using Kokkos::Compat::create_const_view;
1498 using Kokkos::Compat::getKokkosViewDeepCopy;
1501 const char tfecfFuncName[] =
"packAndPrepare: ";
1502 ProfilingRegion regionPAP (
"Tpetra::MultiVector::packAndPrepare");
1510 const bool debugCheckIndices = Behavior::debug ();
1515 const bool printDebugOutput = Behavior::verbose ();
1517 std::unique_ptr<std::string> prefix;
1518 if (printDebugOutput) {
1519 auto map = this->getMap ();
1520 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1521 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1522 std::ostringstream os;
1523 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1524 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1525 os <<
"Start" << endl;
1526 std::cerr << os.str ();
1530 MV& sourceMV =
const_cast<MV&
>(
dynamic_cast<const MV&
> (sourceObj));
1532 const size_t numCols = sourceMV.getNumVectors ();
1537 constantNumPackets = numCols;
1541 if (exportLIDs.extent (0) == 0) {
1542 if (printDebugOutput) {
1543 std::ostringstream os;
1544 os << *prefix <<
"No exports on this proc, DONE" << std::endl;
1545 std::cerr << os.str ();
1565 const size_t numExportLIDs = exportLIDs.extent (0);
1566 const size_t newExportsSize = numCols * numExportLIDs;
1567 if (printDebugOutput) {
1568 std::ostringstream os;
1569 os << *prefix <<
"realloc: "
1570 <<
"numExportLIDs: " << numExportLIDs
1571 <<
", exports.extent(0): " << exports.extent (0)
1572 <<
", newExportsSize: " << newExportsSize << std::endl;
1573 std::cerr << os.str ();
1575 reallocDualViewIfNeeded (exports, newExportsSize,
"exports");
1579 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1580 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1581 std::logic_error,
"Input MultiVector needs sync to both host "
1583 const bool packOnHost = runKernelOnHost(sourceMV);
1584 if (printDebugOutput) {
1585 std::ostringstream os;
1586 os << *prefix <<
"packOnHost=" << (packOnHost ?
"true" :
"false") << endl;
1587 std::cerr << os.str ();
1599 exports.clear_sync_state ();
1600 exports.modify_host ();
1608 exports.clear_sync_state ();
1609 exports.modify_device ();
1625 if (sourceMV.isConstantStride ()) {
1626 using KokkosRefactor::Details::pack_array_single_column;
1627 if (printDebugOutput) {
1628 std::ostringstream os;
1629 os << *prefix <<
"Pack numCols=1 const stride" << endl;
1630 std::cerr << os.str ();
1633 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1634 pack_array_single_column (exports.view_host (),
1636 exportLIDs.view_host (),
1641 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1642 pack_array_single_column (exports.view_device (),
1644 exportLIDs.view_device (),
1650 using KokkosRefactor::Details::pack_array_single_column;
1651 if (printDebugOutput) {
1652 std::ostringstream os;
1653 os << *prefix <<
"Pack numCols=1 nonconst stride" << endl;
1654 std::cerr << os.str ();
1657 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1658 pack_array_single_column (exports.view_host (),
1660 exportLIDs.view_host (),
1661 sourceMV.whichVectors_[0],
1665 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1666 pack_array_single_column (exports.view_device (),
1668 exportLIDs.view_device (),
1669 sourceMV.whichVectors_[0],
1675 if (sourceMV.isConstantStride ()) {
1676 using KokkosRefactor::Details::pack_array_multi_column;
1677 if (printDebugOutput) {
1678 std::ostringstream os;
1679 os << *prefix <<
"Pack numCols=" << numCols <<
" const stride" << endl;
1680 std::cerr << os.str ();
1683 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1684 pack_array_multi_column (exports.view_host (),
1686 exportLIDs.view_host (),
1691 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1692 pack_array_multi_column (exports.view_device (),
1694 exportLIDs.view_device (),
1700 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1701 if (printDebugOutput) {
1702 std::ostringstream os;
1703 os << *prefix <<
"Pack numCols=" << numCols <<
" nonconst stride"
1705 std::cerr << os.str ();
1711 using DV = Kokkos::DualView<IST*, device_type>;
1712 using HES =
typename DV::t_host::execution_space;
1713 using DES =
typename DV::t_dev::execution_space;
1714 Teuchos::ArrayView<const size_t> whichVecs = sourceMV.whichVectors_ ();
1716 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1717 pack_array_multi_column_variable_stride
1718 (exports.view_host (),
1720 exportLIDs.view_host (),
1721 getKokkosViewDeepCopy<HES> (whichVecs),
1726 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1727 pack_array_multi_column_variable_stride
1728 (exports.view_device (),
1730 exportLIDs.view_device (),
1731 getKokkosViewDeepCopy<DES> (whichVecs),
1738 if (printDebugOutput) {
1739 std::ostringstream os;
1740 os << *prefix <<
"Done!" << endl;
1741 std::cerr << os.str ();
1747 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1749 typename std::enable_if<std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1750 typename NO::device_type::memory_space>::value,
1755 const std::string* prefix,
1756 const bool areRemoteLIDsContiguous,
1765 std::ostringstream os;
1766 os << *prefix <<
"Realloc (if needed) imports_ from "
1767 << this->imports_.extent (0) <<
" to " << newSize << std::endl;
1768 std::cerr << os.str ();
1771 bool reallocated =
false;
1774 using DV = Kokkos::DualView<IST*, Kokkos::LayoutLeft, buffer_device_type>;
1786 if ((dual_view_type::impl_dualview_is_single_device::value ||
1789 areRemoteLIDsContiguous &&
1791 (getNumVectors() == 1) &&
1794 size_t offset = getLocalLength () - newSize;
1795 reallocated = this->imports_.d_view.data() != view_.getDualView().d_view.data() + offset;
1797 typedef std::pair<size_t, size_t> range_type;
1798 this->imports_ = DV(view_.getDualView(),
1799 range_type (offset, getLocalLength () ),
1803 std::ostringstream os;
1804 os << *prefix <<
"Aliased imports_ to MV.view_" << std::endl;
1805 std::cerr << os.str ();
1811 using ::Tpetra::Details::reallocDualViewIfNeeded;
1815 std::ostringstream os;
1816 os << *prefix <<
"Finished realloc'ing unaliased_imports_" << std::endl;
1817 std::cerr << os.str ();
1819 this->imports_ = this->unaliased_imports_;
1824 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1826 typename std::enable_if<!std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1827 typename NO::device_type::memory_space>::value,
1832 const std::string* prefix,
1833 const bool areRemoteLIDsContiguous,
1839 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1844 const std::string* prefix,
1845 const bool areRemoteLIDsContiguous,
1848 return reallocImportsIfNeededImpl<Node>(newSize, verbose, prefix, areRemoteLIDsContiguous, CM);
1851 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1855 return (this->imports_.d_view.data() + this->imports_.d_view.extent(0) ==
1856 view_.getDualView().d_view.data() + view_.getDualView().d_view.extent(0));
1860 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1864 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
1865 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
1866 Kokkos::DualView<size_t*, buffer_device_type> ,
1867 const size_t constantNumPackets,
1870 using ::Tpetra::Details::Behavior;
1871 using ::Tpetra::Details::ProfilingRegion;
1872 using KokkosRefactor::Details::unpack_array_multi_column;
1873 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1874 using Kokkos::Compat::getKokkosViewDeepCopy;
1876 const char longFuncName[] =
"Tpetra::MultiVector::unpackAndCombine";
1877 const char tfecfFuncName[] =
"unpackAndCombine: ";
1878 ProfilingRegion regionUAC (longFuncName);
1886 const bool debugCheckIndices = Behavior::debug ();
1888 const bool printDebugOutput = Behavior::verbose ();
1889 std::unique_ptr<std::string> prefix;
1890 if (printDebugOutput) {
1891 auto map = this->getMap ();
1892 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1893 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1894 std::ostringstream os;
1895 os <<
"Proc " << myRank <<
": " << longFuncName <<
": ";
1896 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1897 os <<
"Start" << endl;
1898 std::cerr << os.str ();
1902 if (importLIDs.extent (0) == 0) {
1903 if (printDebugOutput) {
1904 std::ostringstream os;
1905 os << *prefix <<
"No imports. Done!" << endl;
1906 std::cerr << os.str ();
1912 if (importsAreAliased()) {
1913 if (printDebugOutput) {
1914 std::ostringstream os;
1915 os << *prefix <<
"Skipping unpack, since imports_ is aliased to MV.view_. Done!" << endl;
1916 std::cerr << os.str ();
1922 const size_t numVecs = getNumVectors ();
1923 if (debugCheckIndices) {
1924 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1925 (
static_cast<size_t> (imports.extent (0)) !=
1926 numVecs * importLIDs.extent (0),
1928 "imports.extent(0) = " << imports.extent (0)
1929 <<
" != getNumVectors() * importLIDs.extent(0) = " << numVecs
1930 <<
" * " << importLIDs.extent (0) <<
" = "
1931 << numVecs * importLIDs.extent (0) <<
".");
1933 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1934 (constantNumPackets ==
static_cast<size_t> (0), std::runtime_error,
1935 "constantNumPackets input argument must be nonzero.");
1937 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1938 (
static_cast<size_t> (numVecs) !=
1939 static_cast<size_t> (constantNumPackets),
1940 std::runtime_error,
"constantNumPackets must equal numVecs.");
1948 const bool unpackOnHost = runKernelOnHost(imports);
1950 if (this->imports_.need_sync_host()) this->imports_.sync_host();
1953 if (this->imports_.need_sync_device()) this->imports_.sync_device();
1956 if (printDebugOutput) {
1957 std::ostringstream os;
1958 os << *prefix <<
"unpackOnHost=" << (unpackOnHost ?
"true" :
"false")
1960 std::cerr << os.str ();
1965 auto imports_d = imports.view_device ();
1966 auto imports_h = imports.view_host ();
1967 auto importLIDs_d = importLIDs.view_device ();
1968 auto importLIDs_h = importLIDs.view_host ();
1970 Kokkos::DualView<size_t*, device_type> whichVecs;
1971 if (! isConstantStride ()) {
1972 Kokkos::View<
const size_t*, Kokkos::HostSpace,
1973 Kokkos::MemoryUnmanaged> whichVecsIn (whichVectors_.getRawPtr (),
1975 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
1977 whichVecs.modify_host ();
1979 Kokkos::deep_copy (whichVecs.view_host (), whichVecsIn);
1982 whichVecs.modify_device ();
1984 Kokkos::deep_copy (whichVecs.view_device (), whichVecsIn);
1987 auto whichVecs_d = whichVecs.view_device ();
1988 auto whichVecs_h = whichVecs.view_host ();
1998 if (numVecs > 0 && importLIDs.extent (0) > 0) {
1999 using dev_exec_space =
typename dual_view_type::t_dev::execution_space;
2000 using host_exec_space =
typename dual_view_type::t_host::execution_space;
2003 const bool use_atomic_updates = unpackOnHost ?
2004 host_exec_space().concurrency () != 1 :
2005 dev_exec_space().concurrency () != 1;
2007 if (printDebugOutput) {
2008 std::ostringstream os;
2010 std::cerr << os.str ();
2017 using op_type = KokkosRefactor::Details::InsertOp;
2018 if (isConstantStride ()) {
2020 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2021 unpack_array_multi_column (host_exec_space (),
2022 X_h, imports_h, importLIDs_h,
2023 op_type (), numVecs,
2029 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2030 unpack_array_multi_column (dev_exec_space (),
2031 X_d, imports_d, importLIDs_d,
2032 op_type (), numVecs,
2039 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2040 unpack_array_multi_column_variable_stride (host_exec_space (),
2050 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2051 unpack_array_multi_column_variable_stride (dev_exec_space (),
2063 using op_type = KokkosRefactor::Details::AddOp;
2064 if (isConstantStride ()) {
2066 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2067 unpack_array_multi_column (host_exec_space (),
2068 X_h, imports_h, importLIDs_h,
2069 op_type (), numVecs,
2074 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2075 unpack_array_multi_column (dev_exec_space (),
2076 X_d, imports_d, importLIDs_d,
2077 op_type (), numVecs,
2084 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2085 unpack_array_multi_column_variable_stride (host_exec_space (),
2095 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2096 unpack_array_multi_column_variable_stride (dev_exec_space (),
2108 using op_type = KokkosRefactor::Details::AbsMaxOp;
2109 if (isConstantStride ()) {
2111 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2112 unpack_array_multi_column (host_exec_space (),
2113 X_h, imports_h, importLIDs_h,
2114 op_type (), numVecs,
2119 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2120 unpack_array_multi_column (dev_exec_space (),
2121 X_d, imports_d, importLIDs_d,
2122 op_type (), numVecs,
2129 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2130 unpack_array_multi_column_variable_stride (host_exec_space (),
2140 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2141 unpack_array_multi_column_variable_stride (dev_exec_space (),
2153 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2154 (
true, std::logic_error,
"Invalid CombineMode");
2158 if (printDebugOutput) {
2159 std::ostringstream os;
2160 os << *prefix <<
"Nothing to unpack" << endl;
2161 std::cerr << os.str ();
2165 if (printDebugOutput) {
2166 std::ostringstream os;
2167 os << *prefix <<
"Done!" << endl;
2168 std::cerr << os.str ();
2172 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2177 if (isConstantStride ()) {
2178 return static_cast<size_t> (view_.extent (1));
2180 return static_cast<size_t> (whichVectors_.size ());
2188 gblDotImpl (
const RV& dotsOut,
2189 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
2190 const bool distributed)
2192 using Teuchos::REDUCE_MAX;
2193 using Teuchos::REDUCE_SUM;
2194 using Teuchos::reduceAll;
2195 typedef typename RV::non_const_value_type dot_type;
2197 const size_t numVecs = dotsOut.extent (0);
2212 if (distributed && ! comm.is_null ()) {
2215 const int nv =
static_cast<int> (numVecs);
2218 if (commIsInterComm) {
2222 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
2224 Kokkos::deep_copy (lclDots, dotsOut);
2225 const dot_type*
const lclSum = lclDots.data ();
2226 dot_type*
const gblSum = dotsOut.data ();
2227 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
2230 dot_type*
const inout = dotsOut.data ();
2231 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, inout, inout);
2237 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2241 const Kokkos::View<dot_type*, Kokkos::HostSpace>& dots)
const
2243 using ::Tpetra::Details::Behavior;
2244 using Kokkos::subview;
2245 using Teuchos::Comm;
2246 using Teuchos::null;
2249 typedef Kokkos::View<dot_type*, Kokkos::HostSpace> RV;
2250 typedef typename dual_view_type::t_dev_const XMV;
2251 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
2255 const size_t numVecs = this->getNumVectors ();
2259 const size_t lclNumRows = this->getLocalLength ();
2260 const size_t numDots =
static_cast<size_t> (dots.extent (0));
2261 const bool debug = Behavior::debug ();
2264 const bool compat = this->getMap ()->isCompatible (* (A.
getMap ()));
2265 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2266 (! compat, std::invalid_argument,
"'*this' MultiVector is not "
2267 "compatible with the input MultiVector A. We only test for this "
2278 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2279 lclNumRows != A.getLocalLength (), std::runtime_error,
2280 "MultiVectors do not have the same local length. "
2281 "this->getLocalLength() = " << lclNumRows <<
" != "
2282 "A.getLocalLength() = " << A.getLocalLength () <<
".");
2283 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2284 numVecs != A.getNumVectors (), std::runtime_error,
2285 "MultiVectors must have the same number of columns (vectors). "
2286 "this->getNumVectors() = " << numVecs <<
" != "
2287 "A.getNumVectors() = " << A.getNumVectors () <<
".");
2288 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2289 numDots != numVecs, std::runtime_error,
2290 "The output array 'dots' must have the same number of entries as the "
2291 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2292 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2294 const std::pair<size_t, size_t> colRng (0, numVecs);
2295 RV dotsOut = subview (dots, colRng);
2296 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
2297 this->getMap ()->getComm ();
2299 auto thisView = this->getLocalViewDevice(Access::ReadOnly);
2300 auto A_view = A.getLocalViewDevice(Access::ReadOnly);
2302 ::Tpetra::Details::lclDot<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
2303 this->whichVectors_.getRawPtr (),
2304 A.whichVectors_.getRawPtr (),
2305 this->isConstantStride (), A.isConstantStride ());
2306 gblDotImpl (dotsOut, comm, this->isDistributed ());
2310 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2315 using ::Tpetra::Details::ProfilingRegion;
2317 using dot_type =
typename MV::dot_type;
2318 ProfilingRegion region (
"Tpetra::multiVectorSingleColumnDot");
2321 Teuchos::RCP<const Teuchos::Comm<int> > comm =
2322 map.is_null () ? Teuchos::null : map->getComm ();
2323 if (comm.is_null ()) {
2324 return Kokkos::ArithTraits<dot_type>::zero ();
2327 using LO = LocalOrdinal;
2331 const LO lclNumRows =
static_cast<LO
> (std::min (x.
getLocalLength (),
2333 const Kokkos::pair<LO, LO> rowRng (0, lclNumRows);
2334 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero ();
2335 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero ();
2342 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
2344 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
2345 lclDot = KokkosBlas::dot (x_1d, y_1d);
2348 using Teuchos::outArg;
2349 using Teuchos::REDUCE_SUM;
2350 using Teuchos::reduceAll;
2351 reduceAll<int, dot_type> (*comm, REDUCE_SUM, lclDot, outArg (gblDot));
2363 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2367 const Teuchos::ArrayView<dot_type>& dots)
const
2369 const char tfecfFuncName[] =
"dot: ";
2372 const size_t numVecs = this->getNumVectors ();
2373 const size_t lclNumRows = this->getLocalLength ();
2374 const size_t numDots =
static_cast<size_t> (dots.size ());
2384 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2386 "MultiVectors do not have the same local length. "
2387 "this->getLocalLength() = " << lclNumRows <<
" != "
2389 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2391 "MultiVectors must have the same number of columns (vectors). "
2392 "this->getNumVectors() = " << numVecs <<
" != "
2394 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2395 (numDots != numVecs, std::runtime_error,
2396 "The output array 'dots' must have the same number of entries as the "
2397 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2398 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2401 const dot_type gblDot = multiVectorSingleColumnDot (*
this, A);
2405 this->dot (A, Kokkos::View<dot_type*, Kokkos::HostSpace>(dots.getRawPtr (), numDots));
2409 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2412 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const
2415 using ::Tpetra::Details::NORM_TWO;
2416 using ::Tpetra::Details::ProfilingRegion;
2417 ProfilingRegion region (
"Tpetra::MV::norm2 (host output)");
2420 MV& X =
const_cast<MV&
> (*this);
2421 multiVectorNormImpl (norms.getRawPtr (), X, NORM_TWO);
2424 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2427 norm2 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2429 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2430 this->norm2 (norms_av);
2434 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2437 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const
2440 using ::Tpetra::Details::NORM_ONE;
2441 using ::Tpetra::Details::ProfilingRegion;
2442 ProfilingRegion region (
"Tpetra::MV::norm1 (host output)");
2445 MV& X =
const_cast<MV&
> (*this);
2446 multiVectorNormImpl (norms.data (), X, NORM_ONE);
2449 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2452 norm1 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2454 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2455 this->norm1 (norms_av);
2458 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2461 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const
2464 using ::Tpetra::Details::NORM_INF;
2465 using ::Tpetra::Details::ProfilingRegion;
2466 ProfilingRegion region (
"Tpetra::MV::normInf (host output)");
2469 MV& X =
const_cast<MV&
> (*this);
2470 multiVectorNormImpl (norms.getRawPtr (), X, NORM_INF);
2473 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2476 normInf (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2478 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2479 this->normInf (norms_av);
2482 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2485 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const
2489 using Kokkos::subview;
2490 using Teuchos::Comm;
2492 using Teuchos::reduceAll;
2493 using Teuchos::REDUCE_SUM;
2494 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
2496 const size_t lclNumRows = this->getLocalLength ();
2497 const size_t numVecs = this->getNumVectors ();
2498 const size_t numMeans =
static_cast<size_t> (means.size ());
2500 TEUCHOS_TEST_FOR_EXCEPTION(
2501 numMeans != numVecs, std::runtime_error,
2502 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2503 <<
" != this->getNumVectors() = " << numVecs <<
".");
2505 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2506 const std::pair<size_t, size_t> colRng (0, numVecs);
2511 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2513 typename local_view_type::HostMirror::array_layout,
2515 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2516 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2518 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
2519 this->getMap ()->getComm ();
2522 const bool useHostVersion = this->need_sync_device ();
2523 if (useHostVersion) {
2525 auto X_lcl = subview (getLocalViewHost(Access::ReadOnly),
2526 rowRng, Kokkos::ALL ());
2528 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2529 if (isConstantStride ()) {
2530 KokkosBlas::sum (lclSums, X_lcl);
2533 for (
size_t j = 0; j < numVecs; ++j) {
2534 const size_t col = whichVectors_[j];
2535 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2542 if (! comm.is_null () && this->isDistributed ()) {
2543 reduceAll (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2544 lclSums.data (), meansOut.data ());
2548 Kokkos::deep_copy (meansOut, lclSums);
2553 auto X_lcl = subview (this->getLocalViewDevice(Access::ReadOnly),
2554 rowRng, Kokkos::ALL ());
2557 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2558 if (isConstantStride ()) {
2559 KokkosBlas::sum (lclSums, X_lcl);
2562 for (
size_t j = 0; j < numVecs; ++j) {
2563 const size_t col = whichVectors_[j];
2564 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2572 if (! comm.is_null () && this->isDistributed ()) {
2573 reduceAll (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2574 lclSums.data (), meansOut.data ());
2578 Kokkos::deep_copy (meansOut, lclSums);
2586 ATS::one () /
static_cast<mag_type> (this->getGlobalLength ());
2587 for (
size_t k = 0; k < numMeans; ++k) {
2588 meansOut(k) = meansOut(k) * OneOverN;
2593 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2599 typedef Kokkos::Details::ArithTraits<IST> ATS;
2600 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2601 typedef typename pool_type::generator_type generator_type;
2603 const IST max = Kokkos::rand<generator_type, IST>::max ();
2604 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2606 this->randomize (
static_cast<Scalar
> (min),
static_cast<Scalar
> (max));
2610 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2613 randomize (
const Scalar& minVal,
const Scalar& maxVal)
2616 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2627 const uint64_t myRank =
2628 static_cast<uint64_t
> (this->getMap ()->getComm ()->getRank ());
2629 uint64_t seed64 =
static_cast<uint64_t
> (std::rand ()) + myRank + 17311uLL;
2630 unsigned int seed =
static_cast<unsigned int> (seed64&0xffffffff);
2632 pool_type rand_pool (seed);
2633 const IST max =
static_cast<IST
> (maxVal);
2634 const IST min =
static_cast<IST
> (minVal);
2636 auto thisView = this->getLocalViewDevice(Access::OverwriteAll);
2638 if (isConstantStride ()) {
2639 Kokkos::fill_random (thisView, rand_pool, min, max);
2642 const size_t numVecs = getNumVectors ();
2643 for (
size_t k = 0; k < numVecs; ++k) {
2644 const size_t col = whichVectors_[k];
2645 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2646 Kokkos::fill_random (X_k, rand_pool, min, max);
2651 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2656 using ::Tpetra::Details::ProfilingRegion;
2657 using ::Tpetra::Details::Blas::fill;
2658 using DES =
typename dual_view_type::t_dev::execution_space;
2659 using HES =
typename dual_view_type::t_host::execution_space;
2660 using LO = LocalOrdinal;
2661 ProfilingRegion region (
"Tpetra::MultiVector::putScalar");
2666 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
2667 const LO numVecs =
static_cast<LO
> (this->getNumVectors ());
2673 const bool runOnHost = runKernelOnHost(*
this);
2675 auto X = this->getLocalViewDevice(Access::OverwriteAll);
2676 if (this->isConstantStride ()) {
2677 fill (DES (), X, theAlpha, lclNumRows, numVecs);
2680 fill (DES (), X, theAlpha, lclNumRows, numVecs,
2681 this->whichVectors_.getRawPtr ());
2685 auto X = this->getLocalViewHost(Access::OverwriteAll);
2686 if (this->isConstantStride ()) {
2687 fill (HES (), X, theAlpha, lclNumRows, numVecs);
2690 fill (HES (), X, theAlpha, lclNumRows, numVecs,
2691 this->whichVectors_.getRawPtr ());
2697 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2700 replaceMap (
const Teuchos::RCP<const map_type>& newMap)
2702 using Teuchos::ArrayRCP;
2703 using Teuchos::Comm;
2706 using LO = LocalOrdinal;
2707 using GO = GlobalOrdinal;
2713 TEUCHOS_TEST_FOR_EXCEPTION(
2714 ! this->isConstantStride (), std::logic_error,
2715 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2716 "if the MultiVector is a column view of another MultiVector (that is, if "
2717 "isConstantStride() == false).");
2747 if (this->getMap ().is_null ()) {
2752 TEUCHOS_TEST_FOR_EXCEPTION(
2753 newMap.is_null (), std::invalid_argument,
2754 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2755 "This probably means that the input Map is incorrect.");
2759 const size_t newNumRows = newMap->getLocalNumElements ();
2760 const size_t origNumRows = view_.extent (0);
2761 const size_t numCols = this->getNumVectors ();
2763 if (origNumRows != newNumRows || view_.extent (1) != numCols) {
2764 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2767 else if (newMap.is_null ()) {
2770 const size_t newNumRows =
static_cast<size_t> (0);
2771 const size_t numCols = this->getNumVectors ();
2772 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2775 this->map_ = newMap;
2778 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2781 scale (
const Scalar& alpha)
2786 const IST theAlpha =
static_cast<IST
> (alpha);
2787 if (theAlpha == Kokkos::ArithTraits<IST>::one ()) {
2790 const size_t lclNumRows = getLocalLength ();
2791 const size_t numVecs = getNumVectors ();
2792 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2793 const std::pair<size_t, size_t> colRng (0, numVecs);
2801 const bool useHostVersion = need_sync_device ();
2802 if (useHostVersion) {
2803 auto Y_lcl = Kokkos::subview (getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2804 if (isConstantStride ()) {
2805 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2808 for (
size_t k = 0; k < numVecs; ++k) {
2809 const size_t Y_col = whichVectors_[k];
2810 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2811 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2816 auto Y_lcl = Kokkos::subview (getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
2817 if (isConstantStride ()) {
2818 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2821 for (
size_t k = 0; k < numVecs; ++k) {
2822 const size_t Y_col = isConstantStride () ? k : whichVectors_[k];
2823 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2824 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2831 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2834 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2836 const size_t numVecs = this->getNumVectors ();
2837 const size_t numAlphas =
static_cast<size_t> (alphas.size ());
2838 TEUCHOS_TEST_FOR_EXCEPTION(
2839 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::"
2840 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = "
2844 using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
2845 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2846 k_alphas.modify_host ();
2847 for (
size_t i = 0; i < numAlphas; ++i) {
2850 k_alphas.sync_device ();
2852 this->scale (k_alphas.view_device ());
2855 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2858 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2861 using Kokkos::subview;
2863 const size_t lclNumRows = this->getLocalLength ();
2864 const size_t numVecs = this->getNumVectors ();
2865 TEUCHOS_TEST_FOR_EXCEPTION(
2866 static_cast<size_t> (alphas.extent (0)) != numVecs,
2867 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): "
2868 "alphas.extent(0) = " << alphas.extent (0)
2869 <<
" != this->getNumVectors () = " << numVecs <<
".");
2870 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2871 const std::pair<size_t, size_t> colRng (0, numVecs);
2881 const bool useHostVersion = this->need_sync_device ();
2882 if (useHostVersion) {
2885 auto alphas_h = Kokkos::create_mirror_view (alphas);
2887 Kokkos::deep_copy (alphas_h, alphas);
2889 auto Y_lcl = subview (this->getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2890 if (isConstantStride ()) {
2891 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2894 for (
size_t k = 0; k < numVecs; ++k) {
2895 const size_t Y_col = this->isConstantStride () ? k :
2896 this->whichVectors_[k];
2897 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2900 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2905 auto Y_lcl = subview (this->getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
2906 if (isConstantStride ()) {
2907 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2914 auto alphas_h = Kokkos::create_mirror_view (alphas);
2916 Kokkos::deep_copy (alphas_h, alphas);
2918 for (
size_t k = 0; k < numVecs; ++k) {
2919 const size_t Y_col = this->isConstantStride () ? k :
2920 this->whichVectors_[k];
2921 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2922 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2928 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2931 scale (
const Scalar& alpha,
2935 using Kokkos::subview;
2936 const char tfecfFuncName[] =
"scale: ";
2938 const size_t lclNumRows = getLocalLength ();
2939 const size_t numVecs = getNumVectors ();
2941 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2943 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2945 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2947 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2951 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2952 const std::pair<size_t, size_t> colRng (0, numVecs);
2954 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
2956 auto Y_lcl = subview (Y_lcl_orig, rowRng, ALL ());
2957 auto X_lcl = subview (X_lcl_orig, rowRng, ALL ());
2960 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
2964 for (
size_t k = 0; k < numVecs; ++k) {
2965 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2967 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2968 auto X_k = subview (X_lcl, ALL (), X_col);
2970 KokkosBlas::scal (Y_k, theAlpha, X_k);
2977 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2982 const char tfecfFuncName[] =
"reciprocal: ";
2984 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2986 "MultiVectors do not have the same local length. "
2987 "this->getLocalLength() = " << getLocalLength ()
2989 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2990 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2991 ": MultiVectors do not have the same number of columns (vectors). "
2992 "this->getNumVectors() = " << getNumVectors ()
2993 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2995 const size_t numVecs = getNumVectors ();
2997 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
3001 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
3005 using Kokkos::subview;
3006 for (
size_t k = 0; k < numVecs; ++k) {
3007 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3008 auto vector_k = subview (this_view_dev, ALL (), this_col);
3009 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
3010 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3011 KokkosBlas::reciprocal (vector_k, vector_Ak);
3016 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3021 const char tfecfFuncName[] =
"abs";
3023 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3025 ": MultiVectors do not have the same local length. "
3026 "this->getLocalLength() = " << getLocalLength ()
3028 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3029 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3030 ": MultiVectors do not have the same number of columns (vectors). "
3031 "this->getNumVectors() = " << getNumVectors ()
3032 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3033 const size_t numVecs = getNumVectors ();
3035 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
3039 KokkosBlas::abs (this_view_dev, A_view_dev);
3043 using Kokkos::subview;
3045 for (
size_t k=0; k < numVecs; ++k) {
3046 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3047 auto vector_k = subview (this_view_dev, ALL (), this_col);
3048 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
3049 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3050 KokkosBlas::abs (vector_k, vector_Ak);
3055 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3058 update (
const Scalar& alpha,
3062 const char tfecfFuncName[] =
"update: ";
3063 using Kokkos::subview;
3068 const size_t lclNumRows = getLocalLength ();
3069 const size_t numVecs = getNumVectors ();
3072 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3074 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
3076 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3078 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
3084 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3085 const std::pair<size_t, size_t> colRng (0, numVecs);
3087 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
3088 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
3090 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
3094 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
3098 for (
size_t k = 0; k < numVecs; ++k) {
3099 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
3101 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3102 auto X_k = subview (X_lcl, ALL (), X_col);
3104 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
3109 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3112 update (
const Scalar& alpha,
3116 const Scalar& gamma)
3119 using Kokkos::subview;
3121 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
3125 const size_t lclNumRows = this->getLocalLength ();
3126 const size_t numVecs = getNumVectors ();
3129 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3131 "The input MultiVector A has " << A.
getLocalLength () <<
" local "
3132 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3134 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3136 "The input MultiVector B has " << B.
getLocalLength () <<
" local "
3137 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3139 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3141 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), "
3142 "but this MultiVector has " << numVecs <<
" column(s).");
3143 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3145 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), "
3146 "but this MultiVector has " << numVecs <<
" column(s).");
3153 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3154 const std::pair<size_t, size_t> colRng (0, numVecs);
3159 auto C_lcl = subview (this->getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
3164 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
3169 for (
size_t k = 0; k < numVecs; ++k) {
3170 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3173 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
3174 theBeta, subview (B_lcl, rowRng, B_col),
3175 theGamma, subview (C_lcl, rowRng, this_col));
3180 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3181 Teuchos::ArrayRCP<const Scalar>
3187 const char tfecfFuncName[] =
"getData: ";
3194 auto hostView = getLocalViewHost(Access::ReadOnly);
3195 const size_t col = isConstantStride () ? j : whichVectors_[j];
3196 auto hostView_j = Kokkos::subview (hostView, ALL (), col);
3197 Teuchos::ArrayRCP<const IST> dataAsArcp =
3198 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3200 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3201 (
static_cast<size_t> (hostView_j.extent (0)) <
3202 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3203 "hostView_j.extent(0) = " << hostView_j.extent (0)
3204 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3205 "Please report this bug to the Tpetra developers.");
3207 return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
3210 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3211 Teuchos::ArrayRCP<Scalar>
3216 using Kokkos::subview;
3218 const char tfecfFuncName[] =
"getDataNonConst: ";
3220 auto hostView = getLocalViewHost(Access::ReadWrite);
3221 const size_t col = isConstantStride () ? j : whichVectors_[j];
3222 auto hostView_j = subview (hostView, ALL (), col);
3223 Teuchos::ArrayRCP<IST> dataAsArcp =
3224 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3226 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3227 (
static_cast<size_t> (hostView_j.extent (0)) <
3228 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3229 "hostView_j.extent(0) = " << hostView_j.extent (0)
3230 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3231 "Please report this bug to the Tpetra developers.");
3233 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3236 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3237 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3239 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const
3246 bool contiguous =
true;
3247 const size_t numCopyVecs =
static_cast<size_t> (cols.size ());
3248 for (
size_t j = 1; j < numCopyVecs; ++j) {
3249 if (cols[j] != cols[j-1] +
static_cast<size_t> (1)) {
3254 if (contiguous && numCopyVecs > 0) {
3255 return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
3258 RCP<const MV> X_sub = this->subView (cols);
3259 RCP<MV> Y (
new MV (this->getMap (), numCopyVecs,
false));
3265 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3266 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3268 subCopy (
const Teuchos::Range1D &colRng)
const
3273 RCP<const MV> X_sub = this->subView (colRng);
3274 RCP<MV> Y (
new MV (this->getMap (),
static_cast<size_t> (colRng.size ()),
false));
3279 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3283 return view_.origExtent(0);
3286 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3290 return view_.origExtent(1);
3293 template <
class Scalar,
class LO,
class GO,
class Node>
3296 const Teuchos::RCP<const map_type>& subMap,
3297 const local_ordinal_type rowOffset) :
3301 using Kokkos::subview;
3302 using Teuchos::outArg;
3305 using Teuchos::reduceAll;
3306 using Teuchos::REDUCE_MIN;
3309 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3310 const char suffix[] =
"Please report this bug to the Tpetra developers.";
3313 std::unique_ptr<std::ostringstream> errStrm;
3320 const auto comm = subMap->getComm ();
3321 TEUCHOS_ASSERT( ! comm.is_null () );
3322 const int myRank = comm->getRank ();
3324 const LO lclNumRowsBefore =
static_cast<LO
> (X.
getLocalLength ());
3326 const LO newNumRows =
static_cast<LO
> (subMap->getLocalNumElements ());
3328 std::ostringstream os;
3329 os <<
"Proc " << myRank <<
": " << prefix
3330 <<
"X: {lclNumRows: " << lclNumRowsBefore
3332 <<
", numCols: " << numCols <<
"}, "
3333 <<
"subMap: {lclNumRows: " << newNumRows <<
"}" << endl;
3334 std::cerr << os.str ();
3339 const bool tooManyElts =
3342 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3343 *errStrm <<
" Proc " << myRank <<
": subMap->getLocalNumElements() (="
3344 << newNumRows <<
") + rowOffset (=" << rowOffset
3348 TEUCHOS_TEST_FOR_EXCEPTION
3349 (! debug && tooManyElts, std::invalid_argument,
3350 prefix << errStrm->str () << suffix);
3354 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3356 std::ostringstream gblErrStrm;
3357 const std::string myErrStr =
3358 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3360 TEUCHOS_TEST_FOR_EXCEPTION
3361 (
true, std::invalid_argument, gblErrStrm.str ());
3365 using range_type = std::pair<LO, LO>;
3366 const range_type origRowRng
3367 (rowOffset,
static_cast<LO
> (X.
view_.origExtent (0)));
3368 const range_type rowRng
3369 (rowOffset, rowOffset + newNumRows);
3371 wrapped_dual_view_type newView = takeSubview (X.
view_, rowRng, ALL ());
3378 if (newView.extent (0) == 0 &&
3379 newView.extent (1) != X.
view_.extent (1)) {
3381 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3385 MV (subMap, newView) :
3389 const LO lclNumRowsRet =
static_cast<LO
> (subViewMV.getLocalLength ());
3390 const LO numColsRet =
static_cast<LO
> (subViewMV.getNumVectors ());
3391 if (newNumRows != lclNumRowsRet || numCols != numColsRet) {
3393 if (errStrm.get () ==
nullptr) {
3394 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3396 *errStrm <<
" Proc " << myRank <<
3397 ": subMap.getLocalNumElements(): " << newNumRows <<
3398 ", subViewMV.getLocalLength(): " << lclNumRowsRet <<
3399 ", X.getNumVectors(): " << numCols <<
3400 ", subViewMV.getNumVectors(): " << numColsRet << endl;
3402 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3404 std::ostringstream gblErrStrm;
3406 gblErrStrm << prefix <<
"Returned MultiVector has the wrong local "
3407 "dimensions on one or more processes:" << endl;
3409 const std::string myErrStr =
3410 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3412 gblErrStrm << suffix << endl;
3413 TEUCHOS_TEST_FOR_EXCEPTION
3414 (
true, std::invalid_argument, gblErrStrm.str ());
3419 std::ostringstream os;
3420 os <<
"Proc " << myRank <<
": " << prefix <<
"Call op=" << endl;
3421 std::cerr << os.str ();
3427 std::ostringstream os;
3428 os <<
"Proc " << myRank <<
": " << prefix <<
"Done!" << endl;
3429 std::cerr << os.str ();
3433 template <
class Scalar,
class LO,
class GO,
class Node>
3436 const map_type& subMap,
3437 const size_t rowOffset) :
3438 MultiVector (X, Teuchos::RCP<const map_type> (new map_type (subMap)),
3442 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3443 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3445 offsetView (
const Teuchos::RCP<const map_type>& subMap,
3446 const size_t offset)
const
3449 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3452 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3453 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3456 const size_t offset)
3459 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3462 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3463 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3465 subView (
const Teuchos::ArrayView<const size_t>& cols)
const
3467 using Teuchos::Array;
3471 const size_t numViewCols =
static_cast<size_t> (cols.size ());
3472 TEUCHOS_TEST_FOR_EXCEPTION(
3473 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView"
3474 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3475 "contain at least one entry, but cols.size() = " << cols.size ()
3480 bool contiguous =
true;
3481 for (
size_t j = 1; j < numViewCols; ++j) {
3482 if (cols[j] != cols[j-1] +
static_cast<size_t> (1)) {
3488 if (numViewCols == 0) {
3490 return rcp (
new MV (this->getMap (), numViewCols));
3493 return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3497 if (isConstantStride ()) {
3498 return rcp (
new MV (this->getMap (), view_, cols));
3501 Array<size_t> newcols (cols.size ());
3502 for (
size_t j = 0; j < numViewCols; ++j) {
3503 newcols[j] = whichVectors_[cols[j]];
3505 return rcp (
new MV (this->getMap (), view_, newcols ()));
3510 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3511 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3513 subView (
const Teuchos::Range1D& colRng)
const
3515 using ::Tpetra::Details::Behavior;
3517 using Kokkos::subview;
3518 using Teuchos::Array;
3522 const char tfecfFuncName[] =
"subView(Range1D): ";
3524 const size_t lclNumRows = this->getLocalLength ();
3525 const size_t numVecs = this->getNumVectors ();
3529 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3530 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3531 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = "
3533 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3534 numVecs != 0 && colRng.size () != 0 &&
3535 (colRng.lbound () <
static_cast<Teuchos::Ordinal
> (0) ||
3536 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3537 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3538 "," << colRng.ubound () <<
"] exceeds the valid range of column indices "
3539 "[0, " << numVecs <<
"].");
3541 RCP<const MV> X_ret;
3551 const std::pair<size_t, size_t> rows (0, lclNumRows);
3552 if (colRng.size () == 0) {
3553 const std::pair<size_t, size_t> cols (0, 0);
3554 wrapped_dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3555 X_ret = rcp (
new MV (this->getMap (), X_sub));
3559 if (isConstantStride ()) {
3560 const std::pair<size_t, size_t> cols (colRng.lbound (),
3561 colRng.ubound () + 1);
3562 wrapped_dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3563 X_ret = rcp (
new MV (this->getMap (), X_sub));
3566 if (
static_cast<size_t> (colRng.size ()) ==
static_cast<size_t> (1)) {
3569 const std::pair<size_t, size_t> col (whichVectors_[0] + colRng.lbound (),
3570 whichVectors_[0] + colRng.ubound () + 1);
3571 wrapped_dual_view_type X_sub = takeSubview (view_, ALL (), col);
3572 X_ret = rcp (
new MV (this->getMap (), X_sub));
3575 Array<size_t> which (whichVectors_.begin () + colRng.lbound (),
3576 whichVectors_.begin () + colRng.ubound () + 1);
3577 X_ret = rcp (
new MV (this->getMap (), view_, which));
3582 const bool debug = Behavior::debug ();
3584 using Teuchos::Comm;
3585 using Teuchos::outArg;
3586 using Teuchos::REDUCE_MIN;
3587 using Teuchos::reduceAll;
3589 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
3590 Teuchos::null : this->getMap ()->getComm ();
3591 if (! comm.is_null ()) {
3595 if (X_ret.is_null ()) {
3598 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3599 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3600 (lclSuccess != 1, std::logic_error,
"X_ret (the subview of this "
3601 "MultiVector; the return value of this method) is null on some MPI "
3602 "process in this MultiVector's communicator. This should never "
3603 "happen. Please report this bug to the Tpetra developers.");
3604 if (! X_ret.is_null () &&
3605 X_ret->getNumVectors () !=
static_cast<size_t> (colRng.size ())) {
3608 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
3609 outArg (gblSuccess));
3610 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3611 (lclSuccess != 1, std::logic_error,
"X_ret->getNumVectors() != "
3612 "colRng.size(), on at least one MPI process in this MultiVector's "
3613 "communicator. This should never happen. "
3614 "Please report this bug to the Tpetra developers.");
3621 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3622 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3627 return Teuchos::rcp_const_cast<MV> (this->subView (cols));
3631 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3632 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3637 return Teuchos::rcp_const_cast<MV> (this->subView (colRng));
3641 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3647 using Kokkos::subview;
3648 typedef std::pair<size_t, size_t> range_type;
3649 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3652 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3653 (j >= numCols, std::invalid_argument,
"Input index j (== " << j
3654 <<
") exceeds valid column index range [0, " << numCols <<
" - 1].");
3656 static_cast<size_t> (j) :
3658 this->
view_ = takeSubview (X.
view_, Kokkos::ALL (), range_type (jj, jj+1));
3669 const size_t newSize = X.
imports_.extent (0) / numCols;
3670 const size_t offset = jj*newSize;
3672 newImports.d_view = subview (X.
imports_.d_view,
3673 range_type (offset, offset+newSize));
3674 newImports.h_view = subview (X.
imports_.h_view,
3675 range_type (offset, offset+newSize));
3679 const size_t newSize = X.
exports_.extent (0) / numCols;
3680 const size_t offset = jj*newSize;
3682 newExports.d_view = subview (X.
exports_.d_view,
3683 range_type (offset, offset+newSize));
3684 newExports.h_view = subview (X.
exports_.h_view,
3685 range_type (offset, offset+newSize));
3696 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3697 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3702 return Teuchos::rcp (
new V (*
this, j));
3706 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3707 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3712 return Teuchos::rcp (
new V (*
this, j));
3716 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3719 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const
3722 using input_view_type = Kokkos::View<IST**, Kokkos::LayoutLeft,
3724 Kokkos::MemoryUnmanaged>;
3725 const char tfecfFuncName[] =
"get1dCopy: ";
3727 const size_t numRows = this->getLocalLength ();
3728 const size_t numCols = this->getNumVectors ();
3730 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3731 (LDA < numRows, std::runtime_error,
3732 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3733 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3734 (numRows >
size_t (0) && numCols >
size_t (0) &&
3735 size_t (A.size ()) < LDA * (numCols - 1) + numRows,
3737 "A.size() = " << A.size () <<
", but its size must be at least "
3738 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3740 const std::pair<size_t, size_t> rowRange (0, numRows);
3741 const std::pair<size_t, size_t> colRange (0, numCols);
3743 input_view_type A_view_orig (
reinterpret_cast<IST*
> (A.getRawPtr ()),
3745 auto A_view = Kokkos::subview (A_view_orig, rowRange, colRange);
3758 if (this->need_sync_host() && this->need_sync_device()) {
3761 throw std::runtime_error(
"Tpetra::MultiVector: A non-const view is alive outside and we cannot give a copy where host or device view can be modified outside");
3764 const bool useHostView = view_.host_view_use_count() >= view_.device_view_use_count();
3765 if (this->isConstantStride ()) {
3767 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3769 Kokkos::deep_copy (A_view, srcView_host);
3771 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3773 Kokkos::deep_copy (A_view, srcView_device);
3777 for (
size_t j = 0; j < numCols; ++j) {
3778 const size_t srcCol = this->whichVectors_[j];
3779 auto dstColView = Kokkos::subview (A_view, rowRange, j);
3782 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3783 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3785 Kokkos::deep_copy (dstColView, srcColView_host);
3787 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3788 auto srcColView_device = Kokkos::subview (srcView_device, rowRange, srcCol);
3790 Kokkos::deep_copy (dstColView, srcColView_device);
3798 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3801 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const
3804 const char tfecfFuncName[] =
"get2dCopy: ";
3805 const size_t numRows = this->getLocalLength ();
3806 const size_t numCols = this->getNumVectors ();
3808 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3809 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3810 std::runtime_error,
"Input array of pointers must contain as many "
3811 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3812 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3814 if (numRows != 0 && numCols != 0) {
3816 for (
size_t j = 0; j < numCols; ++j) {
3817 const size_t dstLen =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3818 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3819 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of "
3820 "the input array of arrays is not long enough to fit all entries in "
3821 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3822 <<
" < getLocalLength() = " << numRows <<
".");
3826 for (
size_t j = 0; j < numCols; ++j) {
3827 Teuchos::RCP<const V> X_j = this->getVector (j);
3828 const size_t LDA =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3829 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3835 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3836 Teuchos::ArrayRCP<const Scalar>
3840 if (getLocalLength () == 0 || getNumVectors () == 0) {
3841 return Teuchos::null;
3843 TEUCHOS_TEST_FOR_EXCEPTION(
3844 ! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3845 "get1dView: This MultiVector does not have constant stride, so it is "
3846 "not possible to view its data as a single array. You may check "
3847 "whether a MultiVector has constant stride by calling "
3848 "isConstantStride().");
3852 auto X_lcl = getLocalViewHost(Access::ReadOnly);
3853 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3854 Kokkos::Compat::persistingView (X_lcl);
3855 return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
3859 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3860 Teuchos::ArrayRCP<Scalar>
3864 if (this->getLocalLength () == 0 || this->getNumVectors () == 0) {
3865 return Teuchos::null;
3868 TEUCHOS_TEST_FOR_EXCEPTION
3869 (! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3870 "get1dViewNonConst: This MultiVector does not have constant stride, "
3871 "so it is not possible to view its data as a single array. You may "
3872 "check whether a MultiVector has constant stride by calling "
3873 "isConstantStride().");
3874 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3875 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3876 Kokkos::Compat::persistingView (X_lcl);
3877 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3881 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3882 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3886 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3892 const size_t myNumRows = this->getLocalLength ();
3893 const size_t numCols = this->getNumVectors ();
3894 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3896 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3897 for (
size_t j = 0; j < numCols; ++j) {
3898 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3899 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3900 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3901 Kokkos::Compat::persistingView (X_lcl_j);
3902 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3907 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3912 return view_.getHostView(s);
3915 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3920 return view_.getHostView(s);
3923 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3928 return view_.getHostView(s);
3931 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3936 return view_.getDeviceView(s);
3939 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3944 return view_.getDeviceView(s);
3947 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3952 return view_.getDeviceView(s);
3955 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3956 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
3963 auto X_lcl = getLocalViewHost(Access::ReadOnly);
3969 const size_t myNumRows = this->getLocalLength ();
3970 const size_t numCols = this->getNumVectors ();
3971 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3973 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
3974 for (
size_t j = 0; j < numCols; ++j) {
3975 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3976 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3977 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
3978 Kokkos::Compat::persistingView (X_lcl_j);
3979 views[j] = Teuchos::arcp_reinterpret_cast<const Scalar> (X_lcl_j_arcp);
3984 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3988 Teuchos::ETransp transB,
3989 const Scalar& alpha,
3994 using ::Tpetra::Details::ProfilingRegion;
3995 using Teuchos::CONJ_TRANS;
3996 using Teuchos::NO_TRANS;
3997 using Teuchos::TRANS;
4000 using Teuchos::rcpFromRef;
4002 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
4004 using STS = Teuchos::ScalarTraits<Scalar>;
4006 const char tfecfFuncName[] =
"multiply: ";
4007 ProfilingRegion region (
"Tpetra::MV::multiply");
4039 const bool C_is_local = ! this->isDistributed ();
4049 auto myMap = this->getMap ();
4050 if (! myMap.is_null () && ! myMap->getComm ().is_null ()) {
4051 using Teuchos::REDUCE_MIN;
4052 using Teuchos::reduceAll;
4053 using Teuchos::outArg;
4055 auto comm = myMap->getComm ();
4056 const size_t A_nrows =
4058 const size_t A_ncols =
4060 const size_t B_nrows =
4062 const size_t B_ncols =
4065 const bool lclBad = this->getLocalLength () != A_nrows ||
4066 this->getNumVectors () != B_ncols || A_ncols != B_nrows;
4068 const int myRank = comm->getRank ();
4069 std::ostringstream errStrm;
4070 if (this->getLocalLength () != A_nrows) {
4071 errStrm <<
"Proc " << myRank <<
": this->getLocalLength()="
4072 << this->getLocalLength () <<
" != A_nrows=" << A_nrows
4073 <<
"." << std::endl;
4075 if (this->getNumVectors () != B_ncols) {
4076 errStrm <<
"Proc " << myRank <<
": this->getNumVectors()="
4077 << this->getNumVectors () <<
" != B_ncols=" << B_ncols
4078 <<
"." << std::endl;
4080 if (A_ncols != B_nrows) {
4081 errStrm <<
"Proc " << myRank <<
": A_ncols="
4082 << A_ncols <<
" != B_nrows=" << B_nrows
4083 <<
"." << std::endl;
4086 const int lclGood = lclBad ? 0 : 1;
4088 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
4090 std::ostringstream os;
4093 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4094 (
true, std::runtime_error,
"Inconsistent local dimensions on at "
4095 "least one process in this object's communicator." << std::endl
4097 <<
"C(" << (C_is_local ?
"local" :
"distr") <<
") = "
4099 << (transA == Teuchos::TRANS ?
"^T" :
4100 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4101 <<
"(" << (A_is_local ?
"local" :
"distr") <<
") * "
4103 << (transA == Teuchos::TRANS ?
"^T" :
4104 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4105 <<
"(" << (B_is_local ?
"local" :
"distr") <<
")." << std::endl
4106 <<
"Global dimensions: C(" << this->getGlobalLength () <<
", "
4116 const bool Case1 = C_is_local && A_is_local && B_is_local;
4118 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
4119 transA != NO_TRANS &&
4122 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
4126 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4127 (! Case1 && ! Case2 && ! Case3, std::runtime_error,
4128 "Multiplication of op(A) and op(B) into *this is not a "
4129 "supported use case.");
4131 if (beta != STS::zero () && Case2) {
4137 const int myRank = this->getMap ()->getComm ()->getRank ();
4139 beta_local = ATS::zero ();
4148 if (! isConstantStride ()) {
4149 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
4151 C_tmp = rcp (
this,
false);
4154 RCP<const MV> A_tmp;
4156 A_tmp = rcp (
new MV (A, Teuchos::Copy));
4158 A_tmp = rcpFromRef (A);
4161 RCP<const MV> B_tmp;
4163 B_tmp = rcp (
new MV (B, Teuchos::Copy));
4165 B_tmp = rcpFromRef (B);
4168 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4169 (! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
4170 ! A_tmp->isConstantStride (), std::logic_error,
4171 "Failed to make temporary constant-stride copies of MultiVectors.");
4174 const LO A_lclNumRows = A_tmp->getLocalLength ();
4175 const LO A_numVecs = A_tmp->getNumVectors ();
4176 auto A_lcl = A_tmp->getLocalViewDevice(Access::ReadOnly);
4177 auto A_sub = Kokkos::subview (A_lcl,
4178 std::make_pair (LO (0), A_lclNumRows),
4179 std::make_pair (LO (0), A_numVecs));
4182 const LO B_lclNumRows = B_tmp->getLocalLength ();
4183 const LO B_numVecs = B_tmp->getNumVectors ();
4184 auto B_lcl = B_tmp->getLocalViewDevice(Access::ReadOnly);
4185 auto B_sub = Kokkos::subview (B_lcl,
4186 std::make_pair (LO (0), B_lclNumRows),
4187 std::make_pair (LO (0), B_numVecs));
4189 const LO C_lclNumRows = C_tmp->getLocalLength ();
4190 const LO C_numVecs = C_tmp->getNumVectors ();
4192 auto C_lcl = C_tmp->getLocalViewDevice(Access::ReadWrite);
4193 auto C_sub = Kokkos::subview (C_lcl,
4194 std::make_pair (LO (0), C_lclNumRows),
4195 std::make_pair (LO (0), C_numVecs));
4196 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' :
4197 (transA == Teuchos::TRANS ?
'T' :
'C'));
4198 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' :
4199 (transB == Teuchos::TRANS ?
'T' :
'C'));
4202 ProfilingRegion regionGemm (
"Tpetra::MV::multiply-call-gemm");
4204 KokkosBlas::gemm (&ctransA, &ctransB, alpha_IST, A_sub, B_sub,
4208 if (! isConstantStride ()) {
4213 A_tmp = Teuchos::null;
4214 B_tmp = Teuchos::null;
4222 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4231 using Kokkos::subview;
4232 const char tfecfFuncName[] =
"elementWiseMultiply: ";
4234 const size_t lclNumRows = this->getLocalLength ();
4235 const size_t numVecs = this->getNumVectors ();
4237 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4239 std::runtime_error,
"MultiVectors do not have the same local length.");
4240 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4241 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors"
4242 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
4245 auto this_view = this->getLocalViewDevice(Access::ReadWrite);
4255 KokkosBlas::mult (scalarThis,
4258 subview (A_view, ALL (), 0),
4262 for (
size_t j = 0; j < numVecs; ++j) {
4263 const size_t C_col = isConstantStride () ? j : whichVectors_[j];
4265 KokkosBlas::mult (scalarThis,
4266 subview (this_view, ALL (), C_col),
4268 subview (A_view, ALL (), 0),
4269 subview (B_view, ALL (), B_col));
4274 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4279 using ::Tpetra::Details::allReduceView;
4280 using ::Tpetra::Details::ProfilingRegion;
4281 ProfilingRegion region (
"Tpetra::MV::reduce");
4283 const auto map = this->getMap ();
4284 if (map.get () ==
nullptr) {
4287 const auto comm = map->getComm ();
4288 if (comm.get () ==
nullptr) {
4294 const bool changed_on_device = this->need_sync_host ();
4295 if (changed_on_device) {
4300 auto X_lcl = this->getLocalViewDevice(Access::ReadWrite);
4301 allReduceView (X_lcl, X_lcl, *comm);
4304 auto X_lcl = this->getLocalViewHost(Access::ReadWrite);
4305 allReduceView (X_lcl, X_lcl, *comm);
4309 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4317 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4318 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4319 TEUCHOS_TEST_FOR_EXCEPTION(
4320 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4322 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4323 <<
" is invalid. The range of valid row indices on this process "
4324 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4325 <<
", " << maxLocalIndex <<
"].");
4326 TEUCHOS_TEST_FOR_EXCEPTION(
4327 vectorIndexOutOfRange(col),
4329 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4330 <<
" of the multivector is invalid.");
4333 auto view = this->getLocalViewHost(Access::ReadWrite);
4334 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4335 view (lclRow, colInd) = ScalarValue;
4339 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4348 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4349 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4350 TEUCHOS_TEST_FOR_EXCEPTION(
4351 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4353 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4354 <<
" is invalid. The range of valid row indices on this process "
4355 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4356 <<
", " << maxLocalIndex <<
"].");
4357 TEUCHOS_TEST_FOR_EXCEPTION(
4358 vectorIndexOutOfRange(col),
4360 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4361 <<
" of the multivector is invalid.");
4364 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4366 auto view = this->getLocalViewHost(Access::ReadWrite);
4368 Kokkos::atomic_add (& (view(lclRow, colInd)), value);
4371 view(lclRow, colInd) += value;
4376 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4385 const LocalOrdinal lclRow = this->map_->getLocalElement (gblRow);
4388 const char tfecfFuncName[] =
"replaceGlobalValue: ";
4389 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4390 (lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4392 "Global row index " << gblRow <<
" is not present on this process "
4393 << this->getMap ()->getComm ()->getRank () <<
".");
4394 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4395 (this->vectorIndexOutOfRange (col), std::runtime_error,
4396 "Vector index " << col <<
" of the MultiVector is invalid.");
4399 this->replaceLocalValue (lclRow, col, ScalarValue);
4402 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4412 const LocalOrdinal lclRow = this->map_->getLocalElement (globalRow);
4415 TEUCHOS_TEST_FOR_EXCEPTION(
4416 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4418 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4419 <<
" is not present on this process "
4420 << this->getMap ()->getComm ()->getRank () <<
".");
4421 TEUCHOS_TEST_FOR_EXCEPTION(
4422 vectorIndexOutOfRange(col),
4424 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4425 <<
" of the multivector is invalid.");
4428 this->sumIntoLocalValue (lclRow, col, value, atomic);
4431 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4433 Teuchos::ArrayRCP<T>
4439 typename dual_view_type::array_layout,
4441 const size_t col = isConstantStride () ? j : whichVectors_[j];
4442 col_dual_view_type X_col =
4443 Kokkos::subview (view_, Kokkos::ALL (), col);
4444 return Kokkos::Compat::persistingView (X_col.d_view);
4447 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4451 return view_.need_sync_host ();
4454 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4458 return view_.need_sync_device ();
4461 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4466 using Teuchos::TypeNameTraits;
4468 std::ostringstream out;
4469 out <<
"\"" << className <<
"\": {";
4470 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4471 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4472 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4473 <<
", Node" << Node::name ()
4475 if (this->getObjectLabel () !=
"") {
4476 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4478 out <<
", numRows: " << this->getGlobalLength ();
4479 if (className !=
"Tpetra::Vector") {
4480 out <<
", numCols: " << this->getNumVectors ()
4481 <<
", isConstantStride: " << this->isConstantStride ();
4483 if (this->isConstantStride ()) {
4484 out <<
", columnStride: " << this->getStride ();
4491 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4496 return this->descriptionImpl (
"Tpetra::MultiVector");
4501 template<
typename ViewType>
4502 void print_vector(Teuchos::FancyOStream & out,
const char* prefix,
const ViewType& v)
4505 static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace, typename ViewType::memory_space>::accessible,
4506 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a host-accessible view.");
4507 static_assert(ViewType::rank == 2,
4508 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a rank-2 view.");
4511 out <<
"Values("<<prefix<<
"): " << std::endl
4513 const size_t numRows = v.extent(0);
4514 const size_t numCols = v.extent(1);
4516 for (
size_t i = 0; i < numRows; ++i) {
4518 if (i + 1 < numRows) {
4524 for (
size_t i = 0; i < numRows; ++i) {
4525 for (
size_t j = 0; j < numCols; ++j) {
4527 if (j + 1 < numCols) {
4531 if (i + 1 < numRows) {
4540 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4545 typedef LocalOrdinal LO;
4548 if (vl <= Teuchos::VERB_LOW) {
4549 return std::string ();
4551 auto map = this->getMap ();
4552 if (map.is_null ()) {
4553 return std::string ();
4555 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4556 auto outp = Teuchos::getFancyOStream (outStringP);
4557 Teuchos::FancyOStream& out = *outp;
4558 auto comm = map->getComm ();
4559 const int myRank = comm->getRank ();
4560 const int numProcs = comm->getSize ();
4562 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4563 Teuchos::OSTab tab1 (out);
4566 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
4567 out <<
"Local number of rows: " << lclNumRows << endl;
4569 if (vl > Teuchos::VERB_MEDIUM) {
4572 if (this->getNumVectors () !=
static_cast<size_t> (1)) {
4573 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4575 if (this->isConstantStride ()) {
4576 out <<
"Column stride: " << this->getStride () << endl;
4579 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4587 auto X_dev = view_.getDeviceCopy();
4588 auto X_host = view_.getHostCopy();
4590 if(X_dev.data() == X_host.data()) {
4592 Details::print_vector(out,
"unified",X_host);
4595 Details::print_vector(out,
"host",X_host);
4596 Details::print_vector(out,
"dev",X_dev);
4601 return outStringP->str ();
4604 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4608 const std::string& className,
4609 const Teuchos::EVerbosityLevel verbLevel)
const
4611 using Teuchos::TypeNameTraits;
4612 using Teuchos::VERB_DEFAULT;
4613 using Teuchos::VERB_NONE;
4614 using Teuchos::VERB_LOW;
4616 const Teuchos::EVerbosityLevel vl =
4617 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4619 if (vl == VERB_NONE) {
4626 auto map = this->getMap ();
4627 if (map.is_null ()) {
4630 auto comm = map->getComm ();
4631 if (comm.is_null ()) {
4635 const int myRank = comm->getRank ();
4644 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4648 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4649 out <<
"\"" << className <<
"\":" << endl;
4650 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4652 out <<
"Template parameters:" << endl;
4653 Teuchos::OSTab tab2 (out);
4654 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4655 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4656 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4657 <<
"Node: " << Node::name () << endl;
4659 if (this->getObjectLabel () !=
"") {
4660 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4662 out <<
"Global number of rows: " << this->getGlobalLength () << endl;
4663 if (className !=
"Tpetra::Vector") {
4664 out <<
"Number of columns: " << this->getNumVectors () << endl;
4671 if (vl > VERB_LOW) {
4672 const std::string lclStr = this->localDescribeToString (vl);
4677 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4680 describe (Teuchos::FancyOStream &out,
4681 const Teuchos::EVerbosityLevel verbLevel)
const
4683 this->describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4686 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4691 replaceMap (newMap);
4694 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4699 using ::Tpetra::Details::localDeepCopy;
4700 const char prefix[] =
"Tpetra::MultiVector::assign: ";
4702 TEUCHOS_TEST_FOR_EXCEPTION
4704 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4705 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4706 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4707 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions ["
4708 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4710 TEUCHOS_TEST_FOR_EXCEPTION
4711 (this->getLocalLength () != src.
getLocalLength (), std::invalid_argument,
4712 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4713 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) "
4714 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4729 if (src_last_updated_on_host) {
4730 localDeepCopy (this->getLocalViewHost(Access::ReadWrite),
4732 this->isConstantStride (),
4734 this->whichVectors_ (),
4738 localDeepCopy (this->getLocalViewDevice(Access::ReadWrite),
4740 this->isConstantStride (),
4742 this->whichVectors_ (),
4747 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4749 Teuchos::RCP<MultiVector<T, LocalOrdinal, GlobalOrdinal, Node> >
4756 this->getNumVectors(),
4762 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4767 using ::Tpetra::Details::PackTraits;
4769 const size_t l1 = this->getLocalLength();
4771 if ((l1!=l2) || (this->getNumVectors() != vec.
getNumVectors())) {
4778 template <
class ST,
class LO,
class GO,
class NT>
4781 std::swap(mv.
map_, this->map_);
4782 std::swap(mv.
view_, this->view_);
4786#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4787 template <
class ST,
class LO,
class GO,
class NT>
4790 const Teuchos::SerialDenseMatrix<int, ST>& src)
4792 using ::Tpetra::Details::localDeepCopy;
4794 using IST =
typename MV::impl_scalar_type;
4795 using input_view_type =
4796 Kokkos::View<
const IST**, Kokkos::LayoutLeft,
4797 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4798 using pair_type = std::pair<LO, LO>;
4800 const LO numRows =
static_cast<LO
> (src.numRows ());
4801 const LO numCols =
static_cast<LO
> (src.numCols ());
4803 TEUCHOS_TEST_FOR_EXCEPTION
4806 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4807 << dst.
getMap ()->getComm ()->getRank () <<
", dst is "
4809 <<
", but src is " << numRows <<
" x " << numCols <<
".");
4811 const IST* src_raw =
reinterpret_cast<const IST*
> (src.values ());
4812 input_view_type src_orig (src_raw, src.stride (), numCols);
4813 input_view_type src_view =
4814 Kokkos::subview (src_orig, pair_type (0, numRows), Kokkos::ALL ());
4816 constexpr bool src_isConstantStride =
true;
4817 Teuchos::ArrayView<const size_t> srcWhichVectors (
nullptr, 0);
4821 src_isConstantStride,
4822 getMultiVectorWhichVectors (dst),
4826 template <
class ST,
class LO,
class GO,
class NT>
4828 deep_copy (Teuchos::SerialDenseMatrix<int, ST>& dst,
4831 using ::Tpetra::Details::localDeepCopy;
4833 using IST =
typename MV::impl_scalar_type;
4834 using output_view_type =
4835 Kokkos::View<IST**, Kokkos::LayoutLeft,
4836 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4837 using pair_type = std::pair<LO, LO>;
4839 const LO numRows =
static_cast<LO
> (dst.numRows ());
4840 const LO numCols =
static_cast<LO
> (dst.numCols ());
4842 TEUCHOS_TEST_FOR_EXCEPTION
4845 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4846 << src.
getMap ()->getComm ()->getRank () <<
", src is "
4848 <<
", but dst is " << numRows <<
" x " << numCols <<
".");
4850 IST* dst_raw =
reinterpret_cast<IST*
> (dst.values ());
4851 output_view_type dst_orig (dst_raw, dst.stride (), numCols);
4853 Kokkos::subview (dst_orig, pair_type (0, numRows), Kokkos::ALL ());
4855 constexpr bool dst_isConstantStride =
true;
4856 Teuchos::ArrayView<const size_t> dstWhichVectors (
nullptr, 0);
4859 localDeepCopy (dst_view,
4861 dst_isConstantStride,
4864 getMultiVectorWhichVectors (src));
4868 template <
class Scalar,
class LO,
class GO,
class NT>
4869 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
4874 return Teuchos::rcp (
new MV (map, numVectors));
4877 template <
class ST,
class LO,
class GO,
class NT>
4895#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4896# define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4897 template class MultiVector< SCALAR , LO , GO , NODE >; \
4898 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
4899 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \
4900 template void deep_copy (MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src); \
4901 template void deep_copy (Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src);
4904# define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4905 template class MultiVector< SCALAR , LO , GO , NODE >; \
4906 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src);
4911#define TPETRA_MULTIVECTOR_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
4913 template Teuchos::RCP< MultiVector< SO , LO , GO , NODE > > \
4914 MultiVector< SI , LO , GO , NODE >::convert< SO > () const;
Functions for initializing and finalizing Tpetra.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
All-reduce a 1-D or 2-D Kokkos::View.
Declaration of functions for checking whether a given pointer is accessible from a given Kokkos execu...
Declaration and definition of Tpetra::Details::Blas::fill, an implementation detail of Tpetra::MultiV...
Declaration of a function that prints strings from each process.
Declaration of Tpetra::Details::isInterComm.
Declaration and definition of Tpetra::Details::lclDot, an implementation detail of Tpetra::MultiVecto...
Declaration and definition of Tpetra::Details::normImpl, which is an implementation detail of Tpetra:...
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Stand-alone utility functions and macros.
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
Base class for distributed Tpetra objects that support data redistribution.
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
Kokkos::DualView< size_t *, buffer_device_type > numImportPacketsPerLID_
Number of packets to receive for each receive operation.
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Buffer into which packed data are imported (received from other processes).
Kokkos::DualView< size_t *, buffer_device_type > numExportPacketsPerLID_
Number of packets to send for each send operation.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
bool isDistributed() const
Whether this is a globally distributed object.
A parallel distribution of indices over processes.
One or more distributed dense vectors.
void reciprocal(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise reciprocal values of input Multi-vector in target, this(i,j) = 1/A(i,...
void normInf(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the infinity-norm of each vector (column), storing the result in a host View.
void get1dCopy(const Teuchos::ArrayView< Scalar > &A, const size_t LDA) const
Fill the given array with a copy of this multivector's local values.
size_t getStride() const
Stride between columns in the multivector.
virtual size_t constantNumberOfPackets() const
Number of packets to send per LID.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
virtual std::string description() const
A simple one-line description of this object.
void reduce()
Sum values of a locally replicated multivector across all processes.
void randomize()
Set all values in the multivector to pseudorandom numbers.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes owning zero rows from the Map and their communicator.
typename map_type::local_ordinal_type local_ordinal_type
The type of local indices that this class uses.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
void dot(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayView< dot_type > &dots) const
Compute the dot product of each corresponding pair of vectors (columns) in A and B.
void describeImpl(Teuchos::FancyOStream &out, const std::string &className, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Implementation of describe() for this class, and its subclass Vector.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
Teuchos::ArrayRCP< Scalar > get1dViewNonConst()
Nonconst persisting (1-D) view of this multivector's local values.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &src)
Copy the contents of src into *this (deep copy).
Teuchos::RCP< Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVectorNonConst(const size_t j)
Return a Vector which is a nonconst view of column j.
void meanValue(const Teuchos::ArrayView< impl_scalar_type > &means) const
Compute mean (average) value of each column.
std::string descriptionImpl(const std::string &className) const
Implementation of description() for this class, and its subclass Vector.
void multiply(Teuchos::ETransp transA, Teuchos::ETransp transB, const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Scalar &beta)
Matrix-matrix multiplication: this = beta*this + alpha*op(A)*op(B).
bool need_sync_device() const
Whether this MultiVector needs synchronization to the device.
wrapped_dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM)
Perform copies and permutations that are local to the calling (MPI) process.
void replaceLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &value)
Replace value in host memory, using local (row) index.
void swap(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &mv)
Swap contents of mv with contents of *this.
size_t getLocalLength() const
Local number of rows on the calling process.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subCopy(const Teuchos::Range1D &colRng) const
Return a MultiVector with copies of selected columns.
typename Kokkos::Details::InnerProductSpaceTraits< impl_scalar_type >::dot_type dot_type
Type of an inner ("dot") product result.
Teuchos::ArrayRCP< const Scalar > getData(size_t j) const
Const view of the local values in a particular vector of this multivector.
bool need_sync_host() const
Whether this MultiVector needs synchronization to the host.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
void norm2(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the two-norm of each vector (column), storing the result in a host View.
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
Teuchos::ArrayRCP< const Scalar > get1dView() const
Const persisting (1-D) view of this multivector's local values.
Teuchos::ArrayRCP< T > getSubArrayRCP(Teuchos::ArrayRCP< T > arr, size_t j) const
Persisting view of j-th column in the given ArrayRCP.
void replaceGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value)
Replace value in host memory, using global row index.
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, device_type > dual_view_type
Kokkos::DualView specialization used by this class.
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool areRemoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with the given verbosity level to a FancyOStream.
void elementWiseMultiply(Scalar scalarAB, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarThis)
Multiply a Vector A elementwise by a MultiVector B.
size_t getNumVectors() const
Number of columns in the multivector.
void sumIntoLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &val, const bool atomic=useAtomicUpdatesByDefault)
Update (+=) a value in host memory, using local row index.
void replaceMap(const Teuchos::RCP< const map_type > &map)
Replace the underlying Map in place.
virtual bool checkSizes(const SrcDistObject &sourceObj)
Whether data redistribution between sourceObj and this object is legal.
Teuchos::RCP< MultiVector< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another MultiVector with the same entries, but converted to a different Scalar type T.
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
void abs(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise absolute values of input Multi-vector in target: A = abs(this)
void norm1(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the one-norm of each vector (column), storing the result in a host view.
void get2dCopy(const Teuchos::ArrayView< const Teuchos::ArrayView< Scalar > > &ArrayOfPtrs) const
Fill the given array with a copy of this multivector's local values.
void sumIntoGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value, const bool atomic=useAtomicUpdatesByDefault)
Update (+=) a value in host memory, using global row index.
bool aliases(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &other) const
Whether this multivector's memory might alias other. This is conservative: if either this or other is...
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subViewNonConst(const Teuchos::Range1D &colRng)
Return a MultiVector with views of selected columns.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
Teuchos::ArrayRCP< Scalar > getDataNonConst(size_t j)
View of the local values in a particular vector of this multivector.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
size_t getOrigNumLocalCols() const
"Original" number of columns in the (local) data.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< Scalar > > get2dViewNonConst()
Return non-const persisting pointers to values.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetViewNonConst(const Teuchos::RCP< const map_type > &subMap, const size_t offset)
Return a nonconst view of a subset of rows.
std::string localDescribeToString(const Teuchos::EVerbosityLevel vl) const
Print the calling process' verbose describe() information to the returned string.
bool isSameSize(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &vec) const
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< const Scalar > > get2dView() const
Return const persisting pointers to values.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
Abstract base class for objects that can be the source of an Import or Export operation.
A distributed dense vector.
Implementation details of Tpetra.
int local_ordinal_type
Default value of Scalar template parameter.
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
bool isInterComm(const Teuchos::Comm< int > &)
Return true if and only if the input communicator wraps an MPI intercommunicator.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
size_t global_size_t
Global size_t object.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ ADD_ASSIGN
Accumulate new values into existing values (may not be supported in all classes)
@ INSERT
Insert new values that don't currently exist.