46#ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP
47#define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP
50#include <Teuchos_Comm.hpp>
51#include <Teuchos_CommHelpers.hpp>
53#include <Xpetra_MapFactory.hpp>
54#include <Xpetra_Map.hpp>
55#include <Xpetra_CrsGraphFactory.hpp>
56#include <Xpetra_CrsGraph.hpp>
61#include "MueLu_LWGraph_kokkos.hpp"
62#include "MueLu_Aggregates_kokkos.hpp"
63#include "MueLu_IndexManager_kokkos.hpp"
68 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
72 Kokkos::View<unsigned*, device_type>& aggStat,
73 LO& numNonAggregatedNodes)
const {
74 Monitor m(*
this,
"BuildAggregates");
76 RCP<Teuchos::FancyOStream> out;
77 if(
const char* dbg = std::getenv(
"MUELU_STRUCTUREDALGORITHM_DEBUG")) {
78 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
79 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
81 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
84 RCP<IndexManager_kokkos> geoData = aggregates.GetIndexManager();
85 const LO numLocalFineNodes= geoData->getNumLocalFineNodes();
86 const LO numCoarseNodes = geoData->getNumCoarseNodes();
87 LOVectorView vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
88 LOVectorView procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
90 *out <<
"Loop over fine nodes and assign them to an aggregate and a rank" << std::endl;
91 LO numAggregatedNodes;
93 graph.GetComm()->getRank(),
97 Kokkos::parallel_reduce(
"StructuredAggregation: fill aggregates data",
98 Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
102 *out <<
"numCoarseNodes= " << numCoarseNodes
103 <<
", numAggregatedNodes= " << numAggregatedNodes << std::endl;
104 numNonAggregatedNodes = numNonAggregatedNodes - numAggregatedNodes;
109 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
112 RCP<CrsGraph>& myGraph)
const {
113 Monitor m(*
this,
"BuildGraphP");
115 RCP<Teuchos::FancyOStream> out;
116 if(
const char* dbg = std::getenv(
"MUELU_STRUCTUREDALGORITHM_DEBUG")) {
117 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
118 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
120 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
124 int numInterpolationPoints = 0;
125 if(geoData->getInterpolationOrder() == 0) {
126 numInterpolationPoints = 1;
127 }
else if(geoData->getInterpolationOrder() == 1) {
129 numInterpolationPoints = 1 << geoData->getNumDimensions();
131 *out <<
"numInterpolationPoints=" << numInterpolationPoints << std::endl;
133 const LO numLocalFineNodes = geoData->getNumLocalFineNodes();
134 const LO numCoarseNodes = geoData->getNumCoarseNodes();
135 const LO numNnzEntries = dofsPerNode*(numCoarseNodes + numInterpolationPoints
136 *(numLocalFineNodes - numCoarseNodes));
139 entries_type colIndex(
"Prolongator graph, colIndices", numNnzEntries);
141 *out <<
"Compute prolongatorGraph data" << std::endl;
142 if(geoData->getInterpolationOrder() == 0) {
146 geoData->getCoarseningRates(),
147 geoData->getCoarseningEndRates(),
148 geoData->getLocalFineNodesPerDir(),
151 Kokkos::parallel_for(
"Structured Aggregation: compute loca graph data",
152 Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
154 }
else if(geoData->getInterpolationOrder() == 1) {
161 numInterpolationPoints,
163 geoData->getCoarseningRates(),
164 geoData->getLocalFineNodesPerDir(),
166 Kokkos::parallel_scan(
"Structured Aggregation: compute rowPtr for prolongator graph",
167 Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes + 1),
171 geoData->getNumDimensions(),
174 numInterpolationPoints,
175 geoData->getCoarseningRates(),
176 geoData->getCoarseningEndRates(),
177 geoData->getLocalFineNodesPerDir(),
178 geoData->getCoarseNodesPerDir(),
181 Kokkos::parallel_for(
"Structured Aggregation: compute loca graph data",
182 Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
189 RCP<Map> colMap, domainMap;
190 *out <<
"Compute domain and column maps of the CrsGraph" << std::endl;
191 colMap = MapFactory::Build(graph.GetDomainMap()->lib(),
192 Teuchos::OrdinalTraits<GO>::invalid(),
194 graph.GetDomainMap()->getIndexBase(),
195 graph.GetDomainMap()->getComm());
198 myGraph = CrsGraphFactory::Build(myLocalGraph, graph.GetDomainMap(), colMap,
199 colMap, graph.GetDomainMap());
204 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
208 Kokkos::View<unsigned*, device_type> aggStat,
211 geoData_(*geoData), myRank_(myRank), aggStat_(aggStat),
212 vertex2AggID_(vertex2AggID), procWinner_(procWinner) {}
214 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
215 KOKKOS_INLINE_FUNCTION
220 LO coarseNodeCoarseLID;
221 LO nodeFineTuple[3], coarseIdx[3];
222 auto coarseRate = geoData_.getCoarseningRates();
223 auto endRate = geoData_.getCoarseningEndRates();
224 auto lFineNodesPerDir = geoData_.getLocalFineNodesPerDir();
226 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
228 for(
int dim = 0; dim < 3; ++dim) {
229 coarseIdx[dim] = nodeFineTuple[dim] / coarseRate(dim);
230 rem = nodeFineTuple[dim] % coarseRate(dim);
231 rate = (nodeFineTuple[dim] < lFineNodesPerDir(dim) - endRate(dim)) ? coarseRate(dim) : endRate(dim);
232 if(rem > (rate / 2)) {++coarseIdx[dim];}
235 geoData_.getCoarseTuple2CoarseLID(coarseIdx[0], coarseIdx[1], coarseIdx[2],
236 coarseNodeCoarseLID);
238 vertex2AggID_(nodeIdx, 0) = coarseNodeCoarseLID;
239 procWinner_(nodeIdx, 0) = myRank_;
241 ++lNumAggregatedNodes;
245 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
249 const LO NumGhostedNodes,
250 const LO dofsPerNode,
256 numGhostedNodes_(NumGhostedNodes), dofsPerNode_(dofsPerNode),
257 coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir),
258 rowPtr_(rowPtr), colIndex_(colIndex) {
262 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
263 KOKKOS_INLINE_FUNCTION
266 LO nodeFineTuple[3] = {0, 0, 0};
267 LO nodeCoarseTuple[3] = {0, 0, 0};
270 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
274 LO rem, rate, coarseNodeCoarseLID;
275 for(
int dim = 0; dim < 3; ++dim) {
276 nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim);
277 rem = nodeFineTuple[dim] % coarseRate_(dim);
278 if( nodeFineTuple[dim] < (lFineNodesPerDir_(dim) - endRate_(dim)) ) {
279 rate = coarseRate_(dim);
281 rate = endRate_(dim);
283 if(rem > (rate / 2)) {++nodeCoarseTuple[dim];}
287 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2],
288 coarseNodeCoarseLID);
291 for(LO dof = 0; dof < dofsPerNode_; ++dof) {
292 rowPtr_(nodeIdx*dofsPerNode_ + dof + 1) = nodeIdx*dofsPerNode_ + dof + 1;
293 colIndex_(nodeIdx*dofsPerNode_ + dof) = coarseNodeCoarseLID*dofsPerNode_ + dof;
298 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
301 const LO dofsPerNode,
302 const int numInterpolationPoints,
303 const LO numLocalRows,
307 geoData_(*geoData), dofsPerNode_(dofsPerNode),
308 numInterpolationPoints_(numInterpolationPoints), numLocalRows_(numLocalRows),
309 coarseRate_(coarseRate), lFineNodesPerDir_(lFineNodesPerDir), rowPtr_(rowPtr) {}
311 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
312 KOKKOS_INLINE_FUNCTION
321 rowPtr_(rowIdx) = update;
323 if (rowIdx < numLocalRows_) {
324 LO nodeIdx = rowIdx / dofsPerNode_;
325 bool allCoarse =
true;
326 LO nodeFineTuple[3] = {0, 0, 0};
327 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
328 for(
int dim = 0; dim < 3; ++dim) {
329 const LO rem = nodeFineTuple[dim] % coarseRate_(dim);
332 allCoarse = (allCoarse && ((rem == 0) || (nodeFineTuple[dim] == lFineNodesPerDir_(dim) - 1)));
334 update += (allCoarse ? 1 : numInterpolationPoints_);
338 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
341 const int numDimensions,
342 const LO numGhostedNodes,
343 const LO dofsPerNode,
344 const int numInterpolationPoints,
351 geoData_(*geoData), numDimensions_(numDimensions),
352 numGhostedNodes_(numGhostedNodes),
353 dofsPerNode_(dofsPerNode), numInterpolationPoints_(numInterpolationPoints),
354 coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir),
355 ghostedNodesPerDir_(ghostedNodesPerDir), rowPtr_(rowPtr), colIndex_(colIndex) {
359 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
360 KOKKOS_INLINE_FUNCTION
363 LO nodeFineTuple[3] = {0, 0, 0};
364 LO nodeCoarseTuple[3] = {0, 0, 0};
367 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
369 LO coarseNodeCoarseLID;
370 bool allCoarse =
false;
371 for(
int dim = 0; dim < 3; ++dim) {
372 nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim);
374 if(rowPtr_(nodeIdx + 1) == rowPtr_(nodeIdx) + 1) {allCoarse =
true;}
376 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2],
377 coarseNodeCoarseLID);
381 for(LO dof = 0; dof < dofsPerNode_; ++dof) {
382 colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)) = coarseNodeCoarseLID*dofsPerNode_ + dof;
386 for(
int dim = 0; dim < numDimensions_; ++dim) {
387 if(nodeCoarseTuple[dim] == ghostedNodesPerDir_(dim) - 1) { --nodeCoarseTuple[dim]; }
392 for(LO dof = 0; dof < dofsPerNode_; ++dof) {
393 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+0));
394 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+1));
395 if(numDimensions_ > 1) {
396 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+2));
397 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+3));
398 if(numDimensions_ > 2) {
399 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+4));
400 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+5));
401 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+6));
402 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+7));
decltype(std::declval< LOVector >().getDeviceLocalView(Xpetra::Access::ReadWrite)) LOVectorView
typename Kokkos::View< const int[3], device_type > constIntTupleView
typename local_graph_type::row_map_type::non_const_type non_const_row_map_type
typename LWGraph_kokkos::local_graph_type local_graph_type
void BuildAggregates(const Teuchos::ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Build aggregates object.
typename Kokkos::View< const LO[3], device_type > constLOTupleView
typename local_graph_type::entries_type entries_type
void BuildGraph(const LWGraph_kokkos &graph, RCP< IndexManager_kokkos > &geoData, const LO dofsPerNode, RCP< CrsGraph > &myGraph) const
Build a CrsGraph instead of aggregates.
Lightweight MueLu representation of a compressed row storage graph.
Timer to be used in non-factories.
Namespace for MueLu classes and methods.
computeGraphDataConstantFunctor(RCP< IndexManager_kokkos > geoData, const LO numGhostedNodes, const LO dofsPerNode, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex)
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx) const
computeGraphDataLinearFunctor(RCP< IndexManager_kokkos > geoData, const int numDimensions, const LO numGhostedNodes, const LO dofsPerNode, const int numInterpolationPoints, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, constLOTupleView ghostedNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex)
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx) const
computeGraphRowPtrFunctor(RCP< IndexManager_kokkos > geoData, const LO dofsPerNode, const int numInterpolationPoints, const LO numLocalRows, constIntTupleView coarseRate, constLOTupleView lFineNodesPerDir, non_const_row_map_type rowPtr)
KOKKOS_INLINE_FUNCTION void operator()(const LO rowIdx, GO &update, const bool final) const
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx, LO &lNumAggregatedNodes) const
fillAggregatesFunctor(RCP< IndexManager_kokkos > geoData, const int myRank, Kokkos::View< unsigned *, device_type > aggStat, LOVectorView vertex2AggID, LOVectorView procWinner)