47#ifndef MUELU_AGGREGATES_KOKKOS_DEF_HPP
48#define MUELU_AGGREGATES_KOKKOS_DEF_HPP
50#include <Xpetra_Map.hpp>
51#include <Xpetra_Vector.hpp>
52#include <Xpetra_MultiVectorFactory.hpp>
53#include <Xpetra_VectorFactory.hpp>
55#include "MueLu_LWGraph_kokkos.hpp"
61 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
65 numGlobalAggregates_ = 0;
67 vertex2AggId_ = LOVectorFactory::Build(graph.GetImportMap());
70 procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
73 isRoot_ = Kokkos::View<bool*, device_type>(Kokkos::ViewAllocateWithoutInitializing(
"roots"), graph.GetImportMap()->getLocalNumElements());
74 Kokkos::deep_copy(isRoot_,
false);
77 aggregatesIncludeGhosts_ =
true;
80 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
82 Aggregates_kokkos(
const RCP<const Map>& map) {
84 numGlobalAggregates_ = 0;
86 vertex2AggId_ = LOVectorFactory::Build(map);
89 procWinner_ = LOVectorFactory::Build(map);
92 isRoot_ = Kokkos::View<bool*,device_type>(Kokkos::ViewAllocateWithoutInitializing(
"roots"), map->getLocalNumElements());
93 Kokkos::deep_copy(isRoot_,
false);
96 aggregatesIncludeGhosts_ =
true;
99 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
102 if (aggregateSizes_.size() && !forceRecompute) {
103 return aggregateSizes_;
109 int myPID = GetMap()->getComm()->getRank();
111 auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
112 auto procWinner = procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly);
114 typename AppendTrait<
decltype(aggregateSizes_), Kokkos::Atomic>::type aggregateSizesAtomic = aggregateSizes;
115 Kokkos::parallel_for(
"MueLu:Aggregates:ComputeAggregateSizes:for",
range_type(0,procWinner.size()),
116 KOKKOS_LAMBDA(
const LO i) {
117 if (procWinner(i, 0) == myPID)
118 aggregateSizesAtomic(vertex2AggId(i, 0))++;
121 aggregateSizes_ = aggregateSizes;
123 return aggregateSizes;
128 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
131 using row_map_type =
typename local_graph_type::row_map_type;
132 using entries_type =
typename local_graph_type::entries_type;
133 using size_type =
typename local_graph_type::size_type;
135 auto numAggregates = numAggregates_;
137 if (
static_cast<LO
>(graph_.numRows()) == numAggregates)
140 auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
141 auto procWinner = procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly);
142 auto sizes = ComputeAggregateSizes();
145 typename row_map_type::non_const_type
rows(
"Agg_rows", numAggregates+1);
148 Kokkos::parallel_scan(
"MueLu:Aggregates:GetGraph:compute_rows",
range_type(0, numAggregates),
149 KOKKOS_LAMBDA(
const LO i, LO& update,
const bool& final_pass) {
155 decltype(
rows) offsets(Kokkos::ViewAllocateWithoutInitializing(
"Agg_offsets"), numAggregates+1);
156 Kokkos::deep_copy(offsets,
rows);
158 int myPID = GetMap()->getComm()->getRank();
162 Kokkos::View<size_type, device_type> numNNZ_device = Kokkos::subview(
rows, numAggregates);
163 typename Kokkos::View<size_type, device_type>::HostMirror numNNZ_host = Kokkos::create_mirror_view(numNNZ_device);
164 Kokkos::deep_copy(numNNZ_host, numNNZ_device);
165 numNNZ = numNNZ_host();
167 typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing(
"Agg_cols"), numNNZ);
169 Kokkos::parallel_reduce(
"MueLu:Aggregates:GetGraph:compute_cols",
range_type(0, procWinner.size()),
170 KOKKOS_LAMBDA(
const LO i,
size_t& nnz) {
171 if (procWinner(i, 0) == myPID) {
172 typedef typename std::remove_reference<
decltype( offsets(0) ) >::type atomic_incr_type;
173 auto idx = Kokkos::atomic_fetch_add( &offsets(vertex2AggId(i,0)), atomic_incr_type(1));
179 "MueLu: Internal error: Something is wrong with aggregates graph construction: numNNZ = " << numNNZ <<
" != " << realnnz <<
" = realnnz");
186 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
189 LO numAggs = GetNumAggregates();
190 LO numNodes = vertex2AggId_->getLocalLength();
191 auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
192 typename aggregates_sizes_type::const_type aggSizes = ComputeAggregateSizes(
true);
193 LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
195 aggPtr =
LO_view(
"aggPtr",numAggs+1);
196 aggNodes =
LO_view(
"aggNodes",numNodes);
197 LO_view aggCurr(
"agg curr",numAggs+1);
200 Kokkos::parallel_scan(
"MueLu:Aggregates:ComputeNodesInAggregate:scan",
range_type(0,numAggs+1),
201 KOKKOS_LAMBDA(
const LO aggIdx, LO& aggOffset,
bool final_pass) {
204 count = aggSizes(aggIdx);
206 aggPtr(aggIdx) = aggOffset;
207 aggCurr(aggIdx) = aggOffset;
209 aggCurr(numAggs) = 0;
215 LO numUnaggregated = 0;
216 Kokkos::parallel_reduce(
"MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize",
range_type(0,numNodes),
217 KOKKOS_LAMBDA(
const LO nodeIdx, LO & count) {
218 if(vertex2AggId(nodeIdx,0)==INVALID)
221 unaggregated =
LO_view(
"unaggregated",numUnaggregated);
224 Kokkos::parallel_for(
"MueLu:Aggregates:ComputeNodesInAggregate:for",
range_type(0,numNodes),
225 KOKKOS_LAMBDA(
const LO nodeIdx) {
226 LO aggIdx = vertex2AggId(nodeIdx,0);
227 if(aggIdx != INVALID) {
229 aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx),1)) = nodeIdx;
232 unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs),1)) = nodeIdx;
238 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
244 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
249 if (numGlobalAggregates_ == -1) out0 <<
"Global number of aggregates: not computed " << std::endl;
250 else out0 <<
"Global number of aggregates: " << numGlobalAggregates_ << std::endl;
254 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
257 if (numGlobalAggregates_ != -1) {
258 LO nAggregates = GetNumAggregates();
259 GO nGlobalAggregates;
260 MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates);
261 SetNumGlobalAggregates(nGlobalAggregates);
263 return numGlobalAggregates_;
266 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
267 const RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>> >
269 return vertex2AggId_->getMap();
#define MUELU_UNAGGREGATED
#define MUELU_DESCRIBE
Helper macro for implementing Describable::describe() for BaseClass objects.
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Kokkos::View< local_ordinal_type *, device_type > LO_view
typename LWGraph_kokkos::local_graph_type local_graph_type
Kokkos::View< LocalOrdinal *, device_type > aggregates_sizes_type
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
virtual std::string description() const
Return a simple one-line description of this object.
Exception throws to report errors in the internal logical of the program.
Lightweight MueLu representation of a compressed row storage graph.
Namespace for MueLu classes and methods.
@ Statistics1
Print more statistics.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.