MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_RefMaxwell_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_REFMAXWELL_DEF_HPP
47#define MUELU_REFMAXWELL_DEF_HPP
48
49#include <sstream>
50
51#include "MueLu_ConfigDefs.hpp"
52
53#include "Xpetra_Map.hpp"
54#include "Xpetra_MatrixMatrix.hpp"
55#include "Xpetra_TripleMatrixMultiply.hpp"
56#include "Xpetra_CrsMatrixUtils.hpp"
57#include "Xpetra_MatrixUtils.hpp"
58
60
61#include "MueLu_AmalgamationFactory.hpp"
62#include "MueLu_RAPFactory.hpp"
63#include "MueLu_ThresholdAFilterFactory.hpp"
64#include "MueLu_TransPFactory.hpp"
65#include "MueLu_SmootherFactory.hpp"
66
67#include "MueLu_CoalesceDropFactory.hpp"
68#include "MueLu_CoarseMapFactory.hpp"
69#include "MueLu_CoordinatesTransferFactory.hpp"
70#include "MueLu_UncoupledAggregationFactory.hpp"
71#include "MueLu_TentativePFactory.hpp"
72#include "MueLu_SaPFactory.hpp"
73#include "MueLu_AggregationExportFactory.hpp"
74#include "MueLu_Utilities.hpp"
75#include "MueLu_Maxwell_Utils.hpp"
76
77#include "MueLu_AmalgamationFactory_kokkos.hpp"
78#include "MueLu_CoalesceDropFactory_kokkos.hpp"
79#include "MueLu_CoarseMapFactory_kokkos.hpp"
80#include "MueLu_CoordinatesTransferFactory_kokkos.hpp"
81#include "MueLu_UncoupledAggregationFactory_kokkos.hpp"
82#include "MueLu_TentativePFactory_kokkos.hpp"
83#include "MueLu_SaPFactory_kokkos.hpp"
84#include "MueLu_Utilities_kokkos.hpp"
85#include <Kokkos_Core.hpp>
86#include <KokkosSparse_CrsMatrix.hpp>
87
88#include "MueLu_ZoltanInterface.hpp"
89#include "MueLu_Zoltan2Interface.hpp"
90#include "MueLu_RepartitionHeuristicFactory.hpp"
91#include "MueLu_RepartitionFactory.hpp"
92#include "MueLu_RebalanceAcFactory.hpp"
93#include "MueLu_RebalanceTransferFactory.hpp"
94
96
99
100#ifdef HAVE_MUELU_CUDA
101#include "cuda_profiler_api.h"
102#endif
103
104// Stratimikos
105#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
107#endif
108
109
110namespace MueLu {
111
112 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
113 Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getDomainMap() const {
114 return SM_Matrix_->getDomainMap();
115 }
116
117
118 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
119 Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getRangeMap() const {
120 return SM_Matrix_->getRangeMap();
121 }
122
123
124 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
126
127 if (list.isType<std::string>("parameterlist: syntax") && list.get<std::string>("parameterlist: syntax") == "ml") {
128 Teuchos::ParameterList newList = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list,"refmaxwell"));
129 if(list.isSublist("refmaxwell: 11list") && list.sublist("refmaxwell: 11list").isSublist("edge matrix free: coarse"))
130 newList.sublist("refmaxwell: 11list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse"),"SA"));
131 if(list.isSublist("refmaxwell: 22list"))
132 newList.sublist("refmaxwell: 22list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 22list"),"SA"));
133 list = newList;
134 }
135
136 parameterList_ = list;
137 std::string verbosityLevel = parameterList_.get<std::string>("verbosity", MasterList::getDefault<std::string>("verbosity"));
139 std::string outputFilename = parameterList_.get<std::string>("output filename", MasterList::getDefault<std::string>("output filename"));
140 if (outputFilename != "")
142 if (parameterList_.isType<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"))
143 VerboseObject::SetMueLuOStream(parameterList_.get<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"));
144
145 if (parameterList_.get("print initial parameters",MasterList::getDefault<bool>("print initial parameters")))
146 GetOStream(static_cast<MsgType>(Runtime1), 0) << parameterList_ << std::endl;
147 disable_addon_ = list.get("refmaxwell: disable addon", MasterList::getDefault<bool>("refmaxwell: disable addon"));
148 mode_ = list.get("refmaxwell: mode", MasterList::getDefault<std::string>("refmaxwell: mode"));
149 use_as_preconditioner_ = list.get("refmaxwell: use as preconditioner", MasterList::getDefault<bool>("refmaxwell: use as preconditioner"));
150 dump_matrices_ = list.get("refmaxwell: dump matrices", MasterList::getDefault<bool>("refmaxwell: dump matrices"));
151 enable_reuse_ = list.get("refmaxwell: enable reuse", MasterList::getDefault<bool>("refmaxwell: enable reuse"));
152 implicitTranspose_ = list.get("transpose: use implicit", MasterList::getDefault<bool>("transpose: use implicit"));
153 fuseProlongationAndUpdate_ = list.get("fuse prolongation and update", MasterList::getDefault<bool>("fuse prolongation and update"));
154 skipFirstLevel_ = list.get("refmaxwell: skip first (1,1) level", MasterList::getDefault<bool>("refmaxwell: skip first (1,1) level"));
155 syncTimers_ = list.get("sync timers", false);
156 numItersH_ = list.get("refmaxwell: num iters H", 1);
157 numIters22_ = list.get("refmaxwell: num iters 22", 1);
158 applyBCsToAnodal_ = list.get("refmaxwell: apply BCs to Anodal", false);
159 applyBCsToH_ = list.get("refmaxwell: apply BCs to H", true);
160 applyBCsTo22_ = list.get("refmaxwell: apply BCs to 22", true);
161
162 if(list.isSublist("refmaxwell: 11list"))
163 precList11_ = list.sublist("refmaxwell: 11list");
164 if(!precList11_.isType<std::string>("Preconditioner Type") &&
165 !precList11_.isType<std::string>("smoother: type") &&
166 !precList11_.isType<std::string>("smoother: pre type") &&
167 !precList11_.isType<std::string>("smoother: post type")) {
168 precList11_.set("smoother: type", "CHEBYSHEV");
169 precList11_.sublist("smoother: params").set("chebyshev: degree",2);
170 precList11_.sublist("smoother: params").set("chebyshev: ratio eigenvalue",5.4);
171 precList11_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
172 }
173
174 if(list.isSublist("refmaxwell: 22list"))
175 precList22_ = list.sublist("refmaxwell: 22list");
176 if(!precList22_.isType<std::string>("Preconditioner Type") &&
177 !precList22_.isType<std::string>("smoother: type") &&
178 !precList22_.isType<std::string>("smoother: pre type") &&
179 !precList22_.isType<std::string>("smoother: post type")) {
180 precList22_.set("smoother: type", "CHEBYSHEV");
181 precList22_.sublist("smoother: params").set("chebyshev: degree",2);
182 precList22_.sublist("smoother: params").set("chebyshev: ratio eigenvalue",7.0);
183 precList22_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
184 }
185
186 if(!list.isType<std::string>("smoother: type") && !list.isType<std::string>("smoother: pre type") && !list.isType<std::string>("smoother: post type")) {
187 list.set("smoother: type", "CHEBYSHEV");
188 list.sublist("smoother: params").set("chebyshev: degree",2);
189 list.sublist("smoother: params").set("chebyshev: ratio eigenvalue",20.0);
190 list.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
191 }
192
193 if(list.isSublist("smoother: params")) {
194 smootherList_ = list.sublist("smoother: params");
195 }
196
197 if (enable_reuse_ &&
198 !precList11_.isType<std::string>("Preconditioner Type") &&
199 !precList11_.isParameter("reuse: type"))
200 precList11_.set("reuse: type", "full");
201 if (enable_reuse_ &&
202 !precList22_.isType<std::string>("Preconditioner Type") &&
203 !precList22_.isParameter("reuse: type"))
204 precList22_.set("reuse: type", "full");
205
206# ifdef HAVE_MUELU_SERIAL
207 if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosSerialWrapperNode).name())
208 useKokkos_ = false;
209# endif
210# ifdef HAVE_MUELU_OPENMP
211 if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosOpenMPWrapperNode).name())
212 useKokkos_ = true;
213# endif
214# ifdef HAVE_MUELU_CUDA
215 if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosCudaWrapperNode).name())
216 useKokkos_ = true;
217# endif
218# ifdef HAVE_MUELU_HIP
219 if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosHIPWrapperNode).name())
220 useKokkos_ = true;
221# endif
222 useKokkos_ = list.get("use kokkos refactor",useKokkos_);
223 }
224
225
226
227 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
229
230#ifdef HAVE_MUELU_CUDA
231 if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStart();
232#endif
233
234 std::string timerLabel;
235 if (reuse)
236 timerLabel = "MueLu RefMaxwell: compute (reuse)";
237 else
238 timerLabel = "MueLu RefMaxwell: compute";
239 RCP<Teuchos::TimeMonitor> tmCompute = getTimer(timerLabel);
240
242 // Remove explicit zeros from matrices
243 Maxwell_Utils<SC,LO,GO,NO>::removeExplicitZeros(parameterList_,D0_Matrix_,SM_Matrix_,M1_Matrix_,Ms_Matrix_);
244
245 if (IsPrint(Statistics2)) {
246 RCP<ParameterList> params = rcp(new ParameterList());;
247 params->set("printLoadBalancingInfo", true);
248 params->set("printCommInfo", true);
249 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*SM_Matrix_, "SM_Matrix", params);
250 }
251
253 // Detect Dirichlet boundary conditions
254 if (!reuse) {
255 magnitudeType rowSumTol = parameterList_.get("refmaxwell: row sum drop tol (1,1)",-1.0);
256 Maxwell_Utils<SC,LO,GO,NO>::detectBoundaryConditionsSM(SM_Matrix_,D0_Matrix_,rowSumTol,
257 useKokkos_,BCrowsKokkos_,BCcolsKokkos_,BCdomainKokkos_,
258 BCedges_,BCnodes_,BCrows_,BCcols_,BCdomain_,
259 allEdgesBoundary_,allNodesBoundary_);
260 if (IsPrint(Statistics2)) {
261 GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCedges_ << " BC rows and " << BCnodes_ << " BC columns." << std::endl;
262 }
263 }
264
265 if (allEdgesBoundary_) {
266 // All edges have been detected as boundary edges.
267 // Do not attempt to construct sub-hierarchies, but just set up a single level preconditioner.
268 GetOStream(Warnings0) << "All edges are detected as boundary edges!" << std::endl;
269 mode_ = "none";
270 setFineLevelSmoother();
271 return;
272 }
273
275 // build nullspace if necessary
276
277 if(Nullspace_ != null) {
278 // no need to do anything - nullspace is built
279 TEUCHOS_ASSERT(Nullspace_->getMap()->isCompatible(*(SM_Matrix_->getRowMap())));
280 }
281 else if(Nullspace_ == null && Coords_ != null) {
282 RCP<MultiVector> CoordsSC;
283 if (useKokkos_)
285 else
287 Nullspace_ = MultiVectorFactory::Build(SM_Matrix_->getRowMap(),Coords_->getNumVectors());
288 D0_Matrix_->apply(*CoordsSC,*Nullspace_);
289
290 bool normalize = parameterList_.get<bool>("refmaxwell: normalize nullspace", MasterList::getDefault<bool>("refmaxwell: normalize nullspace"));
291
292 coordinateType minLen, maxLen, meanLen;
293 if (IsPrint(Statistics2) || normalize){
294 // compute edge lengths
295 ArrayRCP<ArrayRCP<const Scalar> > localNullspace(Nullspace_->getNumVectors());
296 for (size_t i = 0; i < Nullspace_->getNumVectors(); i++)
297 localNullspace[i] = Nullspace_->getData(i);
298 coordinateType localMinLen = Teuchos::ScalarTraits<coordinateType>::rmax();
299 coordinateType localMeanLen = Teuchos::ScalarTraits<coordinateType>::zero();
300 coordinateType localMaxLen = Teuchos::ScalarTraits<coordinateType>::zero();
301 for (size_t j=0; j < Nullspace_->getMap()->getLocalNumElements(); j++) {
302 Scalar lenSC = Teuchos::ScalarTraits<Scalar>::zero();
303 for (size_t i=0; i < Nullspace_->getNumVectors(); i++)
304 lenSC += localNullspace[i][j]*localNullspace[i][j];
305 coordinateType len = Teuchos::as<coordinateType>(Teuchos::ScalarTraits<Scalar>::real(Teuchos::ScalarTraits<Scalar>::squareroot(lenSC)));
306 localMinLen = std::min(localMinLen, len);
307 localMaxLen = std::max(localMaxLen, len);
308 localMeanLen += len;
309 }
310
311 RCP<const Teuchos::Comm<int> > comm = Nullspace_->getMap()->getComm();
312 MueLu_minAll(comm, localMinLen, minLen);
313 MueLu_sumAll(comm, localMeanLen, meanLen);
314 MueLu_maxAll(comm, localMaxLen, maxLen);
315 meanLen /= Nullspace_->getMap()->getGlobalNumElements();
316 }
317
318 if (IsPrint(Statistics2)) {
319 GetOStream(Statistics0) << "Edge length (min/mean/max): " << minLen << " / " << meanLen << " / " << maxLen << std::endl;
320 }
321
322 if (normalize) {
323 // normalize the nullspace
324 GetOStream(Runtime0) << "RefMaxwell::compute(): normalizing nullspace" << std::endl;
325
326 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
327
328 Array<Scalar> normsSC(Coords_->getNumVectors(), one / Teuchos::as<Scalar>(meanLen));
329 Nullspace_->scale(normsSC());
330 }
331 }
332 else {
333 GetOStream(Errors) << "MueLu::RefMaxwell::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
334 }
335
336 if (!reuse && skipFirstLevel_) {
337 // Nuke the BC edges in nullspace
338 if (useKokkos_)
339 Utilities_kokkos::ZeroDirichletRows(Nullspace_,BCrowsKokkos_);
340 else
341 Utilities::ZeroDirichletRows(Nullspace_,BCrows_);
342 dump(*Nullspace_, "nullspace.m");
343 }
344
346 // build special prolongator for (1,1)-block
347
348 if(P11_.is_null()) {
349 if (skipFirstLevel_) {
350 // Form A_nodal = D0* Ms D0 (aka TMT_agg)
351 std::string label("D0*Ms*D0");
352 A_nodal_Matrix_ = Maxwell_Utils<SC,LO,GO,NO>::PtAPWrapper(Ms_Matrix_,D0_Matrix_,parameterList_,label);
353
354 if (applyBCsToAnodal_) {
355 // Apply boundary conditions to A_nodal
356 if (useKokkos_)
357 Utilities_kokkos::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomainKokkos_);
358 else
359 Utilities::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomain_);
360 }
361 dump(*A_nodal_Matrix_, "A_nodal.m");
362 }
363
364 // build special prolongator
365 GetOStream(Runtime0) << "RefMaxwell::compute(): building special prolongator" << std::endl;
366 buildProlongator();
367 }
368
369#ifdef HAVE_MPI
370 bool doRebalancing = false;
371 doRebalancing = parameterList_.get<bool>("refmaxwell: subsolves on subcommunicators", MasterList::getDefault<bool>("refmaxwell: subsolves on subcommunicators"));
372 int rebalanceStriding = parameterList_.get<int>("refmaxwell: subsolves striding", -1);
373 int numProcsAH, numProcsA22;
374 int numProcs = SM_Matrix_->getDomainMap()->getComm()->getSize();
375 if (numProcs == 1)
376 doRebalancing = false;
377#endif
378 {
379 // build coarse grid operator for (1,1)-block
380 formCoarseMatrix();
381
382 if (!reuse) {
383#ifdef HAVE_MPI
384 if (doRebalancing) {
385
386 {
387 // decide on number of ranks for coarse (1, 1) problem
388
389 Level level;
390 level.SetFactoryManager(null);
391 level.SetLevelID(0);
392 level.Set("A",AH_);
393
394 auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
395 ParameterList repartheurParams;
396 repartheurParams.set("repartition: start level", 0);
397 // Setting min == target on purpose.
398 int defaultTargetRows = 10000;
399 repartheurParams.set("repartition: min rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
400 repartheurParams.set("repartition: target rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
401 repartheurParams.set("repartition: min rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
402 repartheurParams.set("repartition: target rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
403 repartheurParams.set("repartition: max imbalance", precList11_.get<double>("repartition: max imbalance", 1.1));
404 repartheurFactory->SetParameterList(repartheurParams);
405
406 level.Request("number of partitions", repartheurFactory.get());
407 repartheurFactory->Build(level);
408 numProcsAH = level.Get<int>("number of partitions", repartheurFactory.get());
409 numProcsAH = std::min(numProcsAH,numProcs);
410 }
411
412 {
413 // decide on number of ranks for (2, 2) problem
414
415 Level level;
416 level.SetFactoryManager(null);
417 level.SetLevelID(0);
418
419 level.Set("Map",D0_Matrix_->getDomainMap());
420
421 auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
422 ParameterList repartheurParams;
423 repartheurParams.set("repartition: start level", 0);
424 repartheurParams.set("repartition: use map", true);
425 // Setting min == target on purpose.
426 int defaultTargetRows = 10000;
427 repartheurParams.set("repartition: min rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
428 repartheurParams.set("repartition: target rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
429 repartheurParams.set("repartition: min rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
430 repartheurParams.set("repartition: target rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
431 // repartheurParams.set("repartition: max imbalance", precList22_.get<double>("repartition: max imbalance", 1.1));
432 repartheurFactory->SetParameterList(repartheurParams);
433
434 level.Request("number of partitions", repartheurFactory.get());
435 repartheurFactory->Build(level);
436 numProcsA22 = level.Get<int>("number of partitions", repartheurFactory.get());
437 numProcsA22 = std::min(numProcsA22,numProcs);
438 }
439
440 if (rebalanceStriding >= 1) {
441 TEUCHOS_ASSERT(rebalanceStriding*numProcsAH<=numProcs);
442 TEUCHOS_ASSERT(rebalanceStriding*numProcsA22<=numProcs);
443 if (rebalanceStriding*(numProcsAH+numProcsA22)>numProcs) {
444 GetOStream(Warnings0) << "RefMaxwell::compute(): Disabling striding = " << rebalanceStriding << ", since AH needs " << numProcsAH
445 << " procs and A22 needs " << numProcsA22 << " procs."<< std::endl;
446 rebalanceStriding = -1;
447 }
448 }
449
450 // }
451
452 if ((numProcsAH < 0) || (numProcsA22 < 0) || (numProcsAH + numProcsA22 > numProcs)) {
453 GetOStream(Warnings0) << "RefMaxwell::compute(): Disabling rebalancing of subsolves, since partition heuristic resulted "
454 << "in undesirable number of partitions: " << numProcsAH << ", " << numProcsA22 << std::endl;
455 doRebalancing = false;
456 }
457
458 // check again, as we could have changed the value above
459 if (doRebalancing) {
460 // rebalance AH
461 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Rebalance AH");
462
463 Level fineLevel, coarseLevel;
464 fineLevel.SetFactoryManager(null);
465 coarseLevel.SetFactoryManager(null);
466 coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
467 fineLevel.SetLevelID(0);
468 coarseLevel.SetLevelID(1);
469 coarseLevel.Set("A",AH_);
470 coarseLevel.Set("P",P11_);
471 coarseLevel.Set("Coordinates",CoordsH_);
472 if (!NullspaceH_.is_null())
473 coarseLevel.Set("Nullspace",NullspaceH_);
474 coarseLevel.Set("number of partitions", numProcsAH);
475 coarseLevel.Set("repartition: heuristic target rows per process", 1000);
476
477 coarseLevel.setlib(AH_->getDomainMap()->lib());
478 fineLevel.setlib(AH_->getDomainMap()->lib());
479 coarseLevel.setObjectLabel("RefMaxwell coarse (1,1)");
480 fineLevel.setObjectLabel("RefMaxwell coarse (1,1)");
481
482 std::string partName = precList11_.get<std::string>("repartition: partitioner", "zoltan2");
483 RCP<Factory> partitioner;
484 if (partName == "zoltan") {
485#ifdef HAVE_MUELU_ZOLTAN
486 partitioner = rcp(new ZoltanInterface());
487 // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
488 // partitioner->SetFactory("number of partitions", repartheurFactory);
489#else
490 throw Exceptions::RuntimeError("Zoltan interface is not available");
491#endif
492 } else if (partName == "zoltan2") {
493#ifdef HAVE_MUELU_ZOLTAN2
494 partitioner = rcp(new Zoltan2Interface());
495 ParameterList partParams;
496 RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList11_.sublist("repartition: params", false)));
497 partParams.set("ParameterList", partpartParams);
498 partitioner->SetParameterList(partParams);
499 // partitioner->SetFactory("number of partitions", repartheurFactory);
500#else
501 throw Exceptions::RuntimeError("Zoltan2 interface is not available");
502#endif
503 }
504
505 auto repartFactory = rcp(new RepartitionFactory());
506 ParameterList repartParams;
507 repartParams.set("repartition: print partition distribution", precList11_.get<bool>("repartition: print partition distribution", false));
508 repartParams.set("repartition: remap parts", precList11_.get<bool>("repartition: remap parts", true));
509 if (rebalanceStriding >= 1) {
510 bool acceptPart = (SM_Matrix_->getDomainMap()->getComm()->getRank() % rebalanceStriding) == 0;
511 if (SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsAH*rebalanceStriding)
512 acceptPart = false;
513 repartParams.set("repartition: remap accept partition", acceptPart);
514 }
515 repartFactory->SetParameterList(repartParams);
516 // repartFactory->SetFactory("number of partitions", repartheurFactory);
517 repartFactory->SetFactory("Partition", partitioner);
518
519 auto newP = rcp(new RebalanceTransferFactory());
520 ParameterList newPparams;
521 newPparams.set("type", "Interpolation");
522 newPparams.set("repartition: rebalance P and R", precList11_.get<bool>("repartition: rebalance P and R", false));
523 newPparams.set("repartition: use subcommunicators", true);
524 newPparams.set("repartition: rebalance Nullspace", !NullspaceH_.is_null());
525 newP->SetFactory("Coordinates", NoFactory::getRCP());
526 if (!NullspaceH_.is_null())
527 newP->SetFactory("Nullspace", NoFactory::getRCP());
528 newP->SetParameterList(newPparams);
529 newP->SetFactory("Importer", repartFactory);
530
531 auto newA = rcp(new RebalanceAcFactory());
532 ParameterList rebAcParams;
533 rebAcParams.set("repartition: use subcommunicators", true);
534 newA->SetParameterList(rebAcParams);
535 newA->SetFactory("Importer", repartFactory);
536
537 coarseLevel.Request("P", newP.get());
538 coarseLevel.Request("Importer", repartFactory.get());
539 coarseLevel.Request("A", newA.get());
540 coarseLevel.Request("Coordinates", newP.get());
541 if (!NullspaceH_.is_null())
542 coarseLevel.Request("Nullspace", newP.get());
543 repartFactory->Build(coarseLevel);
544
545 if (!precList11_.get<bool>("repartition: rebalance P and R", false))
546 ImporterH_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
547 P11_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
548 AH_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
549 CoordsH_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
550 if (!NullspaceH_.is_null())
551 NullspaceH_ = coarseLevel.Get< RCP<MultiVector> >("Nullspace", newP.get());
552
553 AH_AP_reuse_data_ = Teuchos::null;
554 AH_RAP_reuse_data_ = Teuchos::null;
555
556 if (!disable_addon_ && enable_reuse_) {
557 // Rebalance the addon for next setup
558 RCP<const Import> ImporterH = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
559 RCP<const Map> targetMap = ImporterH->getTargetMap();
560 ParameterList XpetraList;
561 XpetraList.set("Restrict Communicator",true);
562 Addon_Matrix_ = MatrixFactory::Build(Addon_Matrix_, *ImporterH, *ImporterH, targetMap, targetMap, rcp(&XpetraList,false));
563 }
564 }
565 }
566#endif // HAVE_MPI
567
568 // This should be taken out again as soon as
569 // CoalesceDropFactory_kokkos supports BlockSize > 1 and
570 // drop tol != 0.0
571 if (useKokkos_ && precList11_.isParameter("aggregation: drop tol") && precList11_.get<double>("aggregation: drop tol") != 0.0) {
572 GetOStream(Warnings0) << "RefMaxwell::compute(): Setting \"aggregation: drop tol\". to 0.0, since CoalesceDropFactory_kokkos does not "
573 << "support BlockSize > 1 and drop tol != 0.0" << std::endl;
574 precList11_.set("aggregation: drop tol", 0.0);
575 }
576 dump(*P11_, "P11.m");
577
578 if (!implicitTranspose_) {
579 if (useKokkos_)
580 R11_ = Utilities_kokkos::Transpose(*P11_);
581 else
582 R11_ = Utilities::Transpose(*P11_);
583 dump(*R11_, "R11.m");
584 }
585 }
586
588 if (!AH_.is_null()) {
589 // set fixed block size for vector nodal matrix
590 size_t dim = Nullspace_->getNumVectors();
591 AH_->SetFixedBlockSize(dim);
592 AH_->setObjectLabel("RefMaxwell coarse (1,1)");
593 dump(*AH_, "AH.m");
594 int oldRank = SetProcRankVerbose(AH_->getDomainMap()->getComm()->getRank());
595 if (IsPrint(Statistics2)) {
596 RCP<ParameterList> params = rcp(new ParameterList());;
597 params->set("printLoadBalancingInfo", true);
598 params->set("printCommInfo", true);
599 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*AH_, "AH", params);
600 }
601#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
602 if (precList11_.isType<std::string>("Preconditioner Type")) {
603 // build a Stratimikos preconditioner
604 if (precList11_.get<std::string>("Preconditioner Type") == "MueLu") {
605 ParameterList& userParamList = precList11_.sublist("Preconditioner Types").sublist("MueLu").sublist("user data");
606 userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", CoordsH_);
607 }
608 thyraPrecOpH_ = rcp(new XpetraThyraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,Node>(AH_, rcp(&precList11_, false)));
609 } else
610#endif
611 {
612 // build a MueLu hierarchy
613
614 if (!reuse) {
615 dumpCoords(*CoordsH_, "coordsH.m");
616 if (!NullspaceH_.is_null())
617 dump(*NullspaceH_, "NullspaceH.m");
618 ParameterList& userParamList = precList11_.sublist("user data");
619 userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", CoordsH_);
620 if (!NullspaceH_.is_null())
621 userParamList.set<RCP<MultiVector> >("Nullspace", NullspaceH_);
622 HierarchyH_ = MueLu::CreateXpetraPreconditioner(AH_, precList11_);
623 } else {
624 RCP<MueLu::Level> level0 = HierarchyH_->GetLevel(0);
625 level0->Set("A", AH_);
626 HierarchyH_->SetupRe();
627 }
628 }
629 SetProcRankVerbose(oldRank);
630 }
632
633 }
634
635 if(!reuse && applyBCsTo22_) {
636 GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC nodes of D0" << std::endl;
637
638 D0_Matrix_->resumeFill();
639 Scalar replaceWith;
640 if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
641 replaceWith= Teuchos::ScalarTraits<SC>::eps();
642 else
643 replaceWith = Teuchos::ScalarTraits<SC>::zero();
644 if (useKokkos_) {
645 Utilities_kokkos::ZeroDirichletCols(D0_Matrix_,BCcolsKokkos_,replaceWith);
646 } else {
647 Utilities::ZeroDirichletCols(D0_Matrix_,BCcols_,replaceWith);
648 }
649 D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
650 }
651
653 // Build A22 = D0* SM D0 and hierarchy for A22
654 if (!allNodesBoundary_) {
655 GetOStream(Runtime0) << "RefMaxwell::compute(): building MG for (2,2)-block" << std::endl;
656
657 if (!reuse) { // build fine grid operator for (2,2)-block, D0* SM D0 (aka TMT)
658 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build A22");
659
660 Level fineLevel, coarseLevel;
661 fineLevel.SetFactoryManager(null);
662 coarseLevel.SetFactoryManager(null);
663 coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
664 fineLevel.SetLevelID(0);
665 coarseLevel.SetLevelID(1);
666 fineLevel.Set("A",SM_Matrix_);
667 coarseLevel.Set("P",D0_Matrix_);
668 coarseLevel.Set("Coordinates",Coords_);
669
670 coarseLevel.setlib(SM_Matrix_->getDomainMap()->lib());
671 fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
672 coarseLevel.setObjectLabel("RefMaxwell (2,2)");
673 fineLevel.setObjectLabel("RefMaxwell (2,2)");
674
675 RCP<RAPFactory> rapFact = rcp(new RAPFactory());
676 ParameterList rapList = *(rapFact->GetValidParameterList());
677 rapList.set("transpose: use implicit", true);
678 rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
679 rapList.set("rap: fix zero diagonals threshold", parameterList_.get<double>("rap: fix zero diagonals threshold", Teuchos::ScalarTraits<double>::eps()));
680 rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
681 rapFact->SetParameterList(rapList);
682
683 if (!A22_AP_reuse_data_.is_null()) {
684 coarseLevel.AddKeepFlag("AP reuse data", rapFact.get());
685 coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("AP reuse data", A22_AP_reuse_data_, rapFact.get());
686 }
687 if (!A22_RAP_reuse_data_.is_null()) {
688 coarseLevel.AddKeepFlag("RAP reuse data", rapFact.get());
689 coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("RAP reuse data", A22_RAP_reuse_data_, rapFact.get());
690 }
691
692#ifdef HAVE_MPI
693 if (doRebalancing) {
694
695 coarseLevel.Set("number of partitions", numProcsA22);
696 coarseLevel.Set("repartition: heuristic target rows per process", 1000);
697
698 std::string partName = precList22_.get<std::string>("repartition: partitioner", "zoltan2");
699 RCP<Factory> partitioner;
700 if (partName == "zoltan") {
701#ifdef HAVE_MUELU_ZOLTAN
702 partitioner = rcp(new ZoltanInterface());
703 partitioner->SetFactory("A", rapFact);
704 // partitioner->SetFactory("number of partitions", repartheurFactory);
705 // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
706#else
707 throw Exceptions::RuntimeError("Zoltan interface is not available");
708#endif
709 } else if (partName == "zoltan2") {
710#ifdef HAVE_MUELU_ZOLTAN2
711 partitioner = rcp(new Zoltan2Interface());
712 ParameterList partParams;
713 RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList22_.sublist("repartition: params", false)));
714 partParams.set("ParameterList", partpartParams);
715 partitioner->SetParameterList(partParams);
716 partitioner->SetFactory("A", rapFact);
717 // partitioner->SetFactory("number of partitions", repartheurFactory);
718#else
719 throw Exceptions::RuntimeError("Zoltan2 interface is not available");
720#endif
721 }
722
723 auto repartFactory = rcp(new RepartitionFactory());
724 ParameterList repartParams;
725 repartParams.set("repartition: print partition distribution", precList22_.get<bool>("repartition: print partition distribution", false));
726 repartParams.set("repartition: remap parts", precList22_.get<bool>("repartition: remap parts", true));
727 if (rebalanceStriding >= 1) {
728 bool acceptPart = ((SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank()) % rebalanceStriding) == 0;
729 if (SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsA22*rebalanceStriding)
730 acceptPart = false;
731 if (acceptPart)
732 TEUCHOS_ASSERT(AH_.is_null());
733 repartParams.set("repartition: remap accept partition", acceptPart);
734 } else
735 repartParams.set("repartition: remap accept partition", AH_.is_null());
736 repartFactory->SetParameterList(repartParams);
737 repartFactory->SetFactory("A", rapFact);
738 // repartFactory->SetFactory("number of partitions", repartheurFactory);
739 repartFactory->SetFactory("Partition", partitioner);
740
741 auto newP = rcp(new RebalanceTransferFactory());
742 ParameterList newPparams;
743 newPparams.set("type", "Interpolation");
744 newPparams.set("repartition: rebalance P and R", precList22_.get<bool>("repartition: rebalance P and R", false));
745 newPparams.set("repartition: use subcommunicators", true);
746 newPparams.set("repartition: rebalance Nullspace", false);
747 newP->SetFactory("Coordinates", NoFactory::getRCP());
748 newP->SetParameterList(newPparams);
749 newP->SetFactory("Importer", repartFactory);
750
751 auto newA = rcp(new RebalanceAcFactory());
752 ParameterList rebAcParams;
753 rebAcParams.set("repartition: use subcommunicators", true);
754 newA->SetParameterList(rebAcParams);
755 newA->SetFactory("A", rapFact);
756 newA->SetFactory("Importer", repartFactory);
757
758 coarseLevel.Request("P", newP.get());
759 coarseLevel.Request("Importer", repartFactory.get());
760 coarseLevel.Request("A", newA.get());
761 coarseLevel.Request("Coordinates", newP.get());
762 rapFact->Build(fineLevel,coarseLevel);
763 repartFactory->Build(coarseLevel);
764
765 if (!precList22_.get<bool>("repartition: rebalance P and R", false))
766 Importer22_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
767 D0_Matrix_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
768 A22_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
769 Coords_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
770
771 } else
772#endif // HAVE_MPI
773 {
774 coarseLevel.Request("A", rapFact.get());
775 if (enable_reuse_) {
776 coarseLevel.Request("AP reuse data", rapFact.get());
777 coarseLevel.Request("RAP reuse data", rapFact.get());
778 }
779
780 A22_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
781
782 if (enable_reuse_) {
783 if (coarseLevel.IsAvailable("AP reuse data", rapFact.get()))
784 A22_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
785 if (coarseLevel.IsAvailable("RAP reuse data", rapFact.get()))
786 A22_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
787 }
788 }
789 } else {
790 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build A22");
791 if (Importer22_.is_null()) {
792 RCP<Matrix> temp;
793 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*D0_Matrix_,false,temp,GetOStream(Runtime0),true,true);
794 if (!implicitTranspose_)
795 A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_T_Matrix_,false,*temp,false,A22_,GetOStream(Runtime0),true,true);
796 else
797 A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*temp,false,A22_,GetOStream(Runtime0),true,true);
798 } else {
799 // we replaced domain map and importer on D0, reverse that
800 RCP<const Import> D0importer = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter();
801 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(D0origDomainMap_, D0origImporter_);
802
803 RCP<Matrix> temp, temp2;
804 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*D0_Matrix_,false,temp,GetOStream(Runtime0),true,true);
805 if (!implicitTranspose_)
806 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_T_Matrix_,false,*temp,false,temp2,GetOStream(Runtime0),true,true);
807 else
808 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*temp,false,temp2,GetOStream(Runtime0),true,true);
809
810 // and back again
811 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), D0importer);
812
813 ParameterList XpetraList;
814 XpetraList.set("Restrict Communicator",true);
815 XpetraList.set("Timer Label","MueLu::RebalanceA22");
816 RCP<const Map> targetMap = Importer22_->getTargetMap();
817 A22_ = MatrixFactory::Build(temp2, *Importer22_, *Importer22_, targetMap, targetMap, rcp(&XpetraList,false));
818 }
819 }
820
821 if (!implicitTranspose_ && !reuse) {
822 if (useKokkos_)
823 D0_T_Matrix_ = Utilities_kokkos::Transpose(*D0_Matrix_);
824 else
825 D0_T_Matrix_ = Utilities::Transpose(*D0_Matrix_);
826 }
827
829 if (!A22_.is_null()) {
830 dump(*A22_, "A22.m");
831 A22_->setObjectLabel("RefMaxwell (2,2)");
832 int oldRank = SetProcRankVerbose(A22_->getDomainMap()->getComm()->getRank());
833 if (IsPrint(Statistics2)) {
834 RCP<ParameterList> params = rcp(new ParameterList());;
835 params->set("printLoadBalancingInfo", true);
836 params->set("printCommInfo", true);
837 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*A22_, "A22", params);
838 }
839#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
840 if (precList22_.isType<std::string>("Preconditioner Type")) {
841 // build a Stratimikos preconditioner
842 if (precList22_.get<std::string>("Preconditioner Type") == "MueLu") {
843 ParameterList& userParamList = precList22_.sublist("Preconditioner Types").sublist("MueLu").sublist("user data");
844 userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords_);
845 }
846 thyraPrecOp22_ = rcp(new XpetraThyraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A22_, rcp(&precList22_, false)));
847 } else
848#endif
849 {
850 // build a MueLu hierarchy
851 if (!reuse) {
852 ParameterList& userParamList = precList22_.sublist("user data");
853 userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords_);
854 // If we detected no boundary conditions, the (2,2) problem is singular.
855 // Therefore, if we want to use a direct coarse solver, we need to fix up the nullspace.
856 std::string coarseType = "";
857 if (precList22_.isParameter("coarse: type")) {
858 coarseType = precList22_.get<std::string>("coarse: type");
859 // Transform string to "Abcde" notation
860 std::transform(coarseType.begin(), coarseType.end(), coarseType.begin(), ::tolower);
861 std::transform(coarseType.begin(), ++coarseType.begin(), coarseType.begin(), ::toupper);
862 }
863 if (BCedges_ == 0 &&
864 (coarseType == "" ||
865 coarseType == "Klu" ||
866 coarseType == "Klu2") &&
867 (!precList22_.isSublist("coarse: params") ||
868 !precList22_.sublist("coarse: params").isParameter("fix nullspace")))
869 precList22_.sublist("coarse: params").set("fix nullspace",true);
870 Hierarchy22_ = MueLu::CreateXpetraPreconditioner(A22_, precList22_);
871 } else {
872 RCP<MueLu::Level> level0 = Hierarchy22_->GetLevel(0);
873 level0->Set("A", A22_);
874 Hierarchy22_->SetupRe();
875 }
876 }
877 SetProcRankVerbose(oldRank);
878 }
880
881 }
882
883 if(!reuse && !allNodesBoundary_ && applyBCsTo22_) {
884 GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC edges of D0" << std::endl;
885
886 D0_Matrix_->resumeFill();
887 Scalar replaceWith;
888 if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
889 replaceWith= Teuchos::ScalarTraits<SC>::eps();
890 else
891 replaceWith = Teuchos::ScalarTraits<SC>::zero();
892 if (useKokkos_) {
893 Utilities_kokkos::ZeroDirichletRows(D0_Matrix_,BCrowsKokkos_,replaceWith);
894 } else {
895 Utilities::ZeroDirichletRows(D0_Matrix_,BCrows_,replaceWith);
896 }
897 D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
898 dump(*D0_Matrix_, "D0_nuked.m");
899 }
900
901 setFineLevelSmoother();
902
903 if (!reuse) {
904 if (!ImporterH_.is_null()) {
905 RCP<const Import> ImporterP11 = ImportFactory::Build(ImporterH_->getTargetMap(),P11_->getColMap());
906 rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix()->replaceDomainMapAndImporter(ImporterH_->getTargetMap(), ImporterP11);
907 }
908
909 if (!Importer22_.is_null()) {
910 if (enable_reuse_) {
911 D0origDomainMap_ = D0_Matrix_->getDomainMap();
912 D0origImporter_ = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter();
913 }
914 RCP<const Import> ImporterD0 = ImportFactory::Build(Importer22_->getTargetMap(),D0_Matrix_->getColMap());
915 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), ImporterD0);
916 }
917
918#ifdef HAVE_MUELU_TPETRA
919 if ((!D0_T_Matrix_.is_null()) &&
920 (!R11_.is_null()) &&
921 (!rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
922 (!rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
923 (D0_T_Matrix_->getColMap()->lib() == Xpetra::UseTpetra) &&
924 (R11_->getColMap()->lib() == Xpetra::UseTpetra))
925 D0_T_R11_colMapsMatch_ = D0_T_Matrix_->getColMap()->isSameAs(*R11_->getColMap());
926 else
927#endif
928 D0_T_R11_colMapsMatch_ = false;
929 if (D0_T_R11_colMapsMatch_)
930 GetOStream(Runtime0) << "RefMaxwell::compute(): D0_T and R11 have matching colMaps" << std::endl;
931
932 // Allocate temporary MultiVectors for solve
933 allocateMemory(1);
934
935 if (parameterList_.isSublist("matvec params"))
936 {
937 RCP<ParameterList> matvecParams = rcpFromRef(parameterList_.sublist("matvec params"));
938 Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*D0_Matrix_, matvecParams);
940 if (!D0_T_Matrix_.is_null()) Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*D0_T_Matrix_, matvecParams);
941 if (!R11_.is_null()) Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*R11_, matvecParams);
942 if (!ImporterH_.is_null()) ImporterH_->setDistributorParameters(matvecParams);
943 if (!Importer22_.is_null()) Importer22_->setDistributorParameters(matvecParams);
944 }
945 if (!ImporterH_.is_null() && parameterList_.isSublist("refmaxwell: ImporterH params")){
946 RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: ImporterH params"));
947 ImporterH_->setDistributorParameters(importerParams);
948 }
949 if (!Importer22_.is_null() && parameterList_.isSublist("refmaxwell: Importer22 params")){
950 RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: Importer22 params"));
951 Importer22_->setDistributorParameters(importerParams);
952 }
953 }
954
955 describe(GetOStream(Runtime0));
956
957#ifdef HAVE_MUELU_CUDA
958 if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStop();
959#endif
960 }
961
962
963 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
965
966 Level level;
967 RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager());
968 level.SetFactoryManager(factoryHandler);
969 level.SetLevelID(0);
970 level.setObjectLabel("RefMaxwell (1,1)");
971 level.Set("A",SM_Matrix_);
972 level.setlib(SM_Matrix_->getDomainMap()->lib());
973 // For Hiptmair
974 level.Set("NodeMatrix", A22_);
975 level.Set("D0", D0_Matrix_);
976
977 if (parameterList_.isType<std::string>("smoother: pre type") && parameterList_.isType<std::string>("smoother: post type")) {
978 std::string preSmootherType = parameterList_.get<std::string>("smoother: pre type");
979 std::string postSmootherType = parameterList_.get<std::string>("smoother: post type");
980
981 ParameterList preSmootherList, postSmootherList;
982 if (parameterList_.isSublist("smoother: pre params"))
983 preSmootherList = parameterList_.sublist("smoother: pre params");
984 if (parameterList_.isSublist("smoother: post params"))
985 postSmootherList = parameterList_.sublist("smoother: post params");
986
987 RCP<SmootherPrototype> preSmootherPrototype = rcp(new TrilinosSmoother(preSmootherType, preSmootherList));
988 RCP<SmootherPrototype> postSmootherPrototype = rcp(new TrilinosSmoother(postSmootherType, postSmootherList));
989 RCP<SmootherFactory> smootherFact = rcp(new SmootherFactory(preSmootherPrototype, postSmootherPrototype));
990
991 level.Request("PreSmoother",smootherFact.get());
992 level.Request("PostSmoother",smootherFact.get());
993 if (enable_reuse_) {
994 ParameterList smootherFactoryParams;
995 smootherFactoryParams.set("keep smoother data", true);
996 smootherFact->SetParameterList(smootherFactoryParams);
997 level.Request("PreSmoother data", smootherFact.get());
998 level.Request("PostSmoother data", smootherFact.get());
999 if (!PreSmootherData_.is_null())
1000 level.Set("PreSmoother data", PreSmootherData_, smootherFact.get());
1001 if (!PostSmootherData_.is_null())
1002 level.Set("PostSmoother data", PostSmootherData_, smootherFact.get());
1003 }
1004 smootherFact->Build(level);
1005 PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",smootherFact.get());
1006 PostSmoother_ = level.Get<RCP<SmootherBase> >("PostSmoother",smootherFact.get());
1007 if (enable_reuse_) {
1008 PreSmootherData_ = level.Get<RCP<SmootherPrototype> >("PreSmoother data",smootherFact.get());
1009 PostSmootherData_ = level.Get<RCP<SmootherPrototype> >("PostSmoother data",smootherFact.get());
1010 }
1011 } else {
1012 std::string smootherType = parameterList_.get<std::string>("smoother: type", "CHEBYSHEV");
1013
1014 RCP<SmootherPrototype> smootherPrototype = rcp(new TrilinosSmoother(smootherType, smootherList_));
1015 RCP<SmootherFactory> smootherFact = rcp(new SmootherFactory(smootherPrototype));
1016 level.Request("PreSmoother",smootherFact.get());
1017 if (enable_reuse_) {
1018 ParameterList smootherFactoryParams;
1019 smootherFactoryParams.set("keep smoother data", true);
1020 smootherFact->SetParameterList(smootherFactoryParams);
1021 level.Request("PreSmoother data", smootherFact.get());
1022 if (!PreSmootherData_.is_null())
1023 level.Set("PreSmoother data", PreSmootherData_, smootherFact.get());
1024 }
1025 smootherFact->Build(level);
1026 PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",smootherFact.get());
1027 PostSmoother_ = PreSmoother_;
1028 if (enable_reuse_)
1029 PreSmootherData_ = level.Get<RCP<SmootherPrototype> >("PreSmoother data",smootherFact.get());
1030 }
1031
1032 }
1033
1034
1035 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1037
1038 RCP<Teuchos::TimeMonitor> tmAlloc = getTimer("MueLu RefMaxwell: Allocate MVs");
1039
1040 if (!R11_.is_null())
1041 P11res_ = MultiVectorFactory::Build(R11_->getRangeMap(), numVectors);
1042 else
1043 P11res_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1044 P11res_->setObjectLabel("P11res");
1045 if (D0_T_R11_colMapsMatch_) {
1046 D0TR11Tmp_ = MultiVectorFactory::Build(R11_->getColMap(), numVectors);
1047 D0TR11Tmp_->setObjectLabel("D0TR11Tmp");
1048 }
1049 if (!ImporterH_.is_null()) {
1050 P11resTmp_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), numVectors);
1051 P11resTmp_->setObjectLabel("P11resTmp");
1052 P11x_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), numVectors);
1053 } else
1054 P11x_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1055 P11x_->setObjectLabel("P11x");
1056 if (!D0_T_Matrix_.is_null())
1057 D0res_ = MultiVectorFactory::Build(D0_T_Matrix_->getRangeMap(), numVectors);
1058 else
1059 D0res_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), numVectors);
1060 D0res_->setObjectLabel("D0res");
1061 if (!Importer22_.is_null()) {
1062 D0resTmp_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1063 D0resTmp_->setObjectLabel("D0resTmp");
1064 D0x_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1065 } else
1066 D0x_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), numVectors);
1067 D0x_->setObjectLabel("D0x");
1068 if (!AH_.is_null()) {
1069 if (!ImporterH_.is_null() && !implicitTranspose_)
1070 P11resSubComm_ = MultiVectorFactory::Build(P11resTmp_, Teuchos::View);
1071 else
1072 P11resSubComm_ = MultiVectorFactory::Build(P11res_, Teuchos::View);
1073 P11resSubComm_->replaceMap(AH_->getRangeMap());
1074 P11resSubComm_->setObjectLabel("P11resSubComm");
1075
1076 P11xSubComm_ = MultiVectorFactory::Build(P11x_, Teuchos::View);
1077 P11xSubComm_->replaceMap(AH_->getDomainMap());
1078 P11xSubComm_->setObjectLabel("P11xSubComm");
1079 }
1080 if (!A22_.is_null()) {
1081 if (!Importer22_.is_null() && !implicitTranspose_)
1082 D0resSubComm_ = MultiVectorFactory::Build(D0resTmp_, Teuchos::View);
1083 else
1084 D0resSubComm_ = MultiVectorFactory::Build(D0res_, Teuchos::View);
1085 D0resSubComm_->replaceMap(A22_->getRangeMap());
1086 D0resSubComm_->setObjectLabel("D0resSubComm");
1087
1088 D0xSubComm_ = MultiVectorFactory::Build(D0x_, Teuchos::View);
1089 D0xSubComm_->replaceMap(A22_->getDomainMap());
1090 D0xSubComm_->setObjectLabel("D0xSubComm");
1091 }
1092 residual_ = MultiVectorFactory::Build(SM_Matrix_->getDomainMap(), numVectors);
1093 residual_->setObjectLabel("residual");
1094 }
1095
1096
1097 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1098 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Matrix& A, std::string name) const {
1099 if (dump_matrices_) {
1100 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1101 Xpetra::IO<SC, LO, GO, NO>::Write(name, A);
1102 }
1103 }
1104
1105
1106 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1107 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const MultiVector& X, std::string name) const {
1108 if (dump_matrices_) {
1109 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1110 Xpetra::IO<SC, LO, GO, NO>::Write(name, X);
1111 }
1112 }
1113
1114
1115 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1117 if (dump_matrices_) {
1118 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1119 Xpetra::IO<coordinateType, LO, GO, NO>::Write(name, X);
1120 }
1121 }
1122
1123
1124 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1125 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Teuchos::ArrayRCP<bool>& v, std::string name) const {
1126 if (dump_matrices_) {
1127 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1128 std::ofstream out(name);
1129 for (size_t i = 0; i < Teuchos::as<size_t>(v.size()); i++)
1130 out << v[i] << "\n";
1131 }
1132 }
1133
1134 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1135 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Kokkos::View<bool*, typename Node::device_type>& v, std::string name) const {
1136 if (dump_matrices_) {
1137 GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1138 std::ofstream out(name);
1139 auto vH = Kokkos::create_mirror_view (v);
1140 Kokkos::deep_copy(vH , v);
1141 for (size_t i = 0; i < vH.size(); i++)
1142 out << vH[i] << "\n";
1143 }
1144 }
1145
1146 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1147 Teuchos::RCP<Teuchos::TimeMonitor> RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getTimer(std::string name, RCP<const Teuchos::Comm<int> > comm) const {
1148 if (IsPrint(Timings)) {
1149 if (!syncTimers_)
1150 return Teuchos::rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name)));
1151 else {
1152 if (comm.is_null())
1153 return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name), SM_Matrix_->getRowMap()->getComm().ptr()));
1154 else
1155 return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name), comm.ptr()));
1156 }
1157 } else
1158 return Teuchos::null;
1159 }
1160
1161
1162 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1164 // The P11 matrix maps node based aggregrates { A_j } to edges { e_i }.
1165 //
1166 // The old implementation used
1167 // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i) * P(n_l, A_j)
1168 // yet the paper gives
1169 // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i)
1170 // where phi_k is the k-th nullspace vector.
1171 //
1172 // The graph of D0 contains the incidence from nodes to edges.
1173 // The nodal prolongator P maps aggregates to nodes.
1174
1175 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
1176 const SC SC_ONE = Teuchos::ScalarTraits<SC>::one();
1177 const Scalar half = SC_ONE / (SC_ONE + SC_ONE);
1178 size_t dim = Nullspace_->getNumVectors();
1179 size_t numLocalRows = SM_Matrix_->getLocalNumRows();
1180
1181 RCP<Matrix> P_nodal;
1182 RCP<CrsMatrix> P_nodal_imported;
1183 RCP<MultiVector> Nullspace_nodal;
1184 if (skipFirstLevel_) {
1185 // build prolongator: algorithm 1 in the reference paper
1186 // First, build nodal unsmoothed prolongator using the matrix A_nodal
1187 bool read_P_from_file = parameterList_.get("refmaxwell: read_P_from_file",false);
1188 if (read_P_from_file) {
1189 // This permits to read in an ML prolongator, so that we get the same hierarchy.
1190 // (ML and MueLu typically produce different aggregates.)
1191 std::string P_filename = parameterList_.get("refmaxwell: P_filename",std::string("P.m"));
1192 std::string domainmap_filename = parameterList_.get("refmaxwell: P_domainmap_filename",std::string("domainmap_P.m"));
1193 std::string colmap_filename = parameterList_.get("refmaxwell: P_colmap_filename",std::string("colmap_P.m"));
1194 std::string coords_filename = parameterList_.get("refmaxwell: CoordsH",std::string("coordsH.m"));
1195 RCP<const Map> colmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(colmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1196 RCP<const Map> domainmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(domainmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1197 P_nodal = Xpetra::IO<SC, LO, GO, NO>::Read(P_filename, A_nodal_Matrix_->getDomainMap(), colmap, domainmap, A_nodal_Matrix_->getDomainMap());
1198 CoordsH_ = Xpetra::IO<typename RealValuedMultiVector::scalar_type, LO, GO, NO>::ReadMultiVector(coords_filename, domainmap);
1199 } else {
1200 Level fineLevel, coarseLevel;
1201 fineLevel.SetFactoryManager(null);
1202 coarseLevel.SetFactoryManager(null);
1203 coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
1204 fineLevel.SetLevelID(0);
1205 coarseLevel.SetLevelID(1);
1206 fineLevel.Set("A",A_nodal_Matrix_);
1207 fineLevel.Set("Coordinates",Coords_);
1208 fineLevel.Set("DofsPerNode",1);
1209 coarseLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1210 fineLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1211 coarseLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
1212 fineLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
1213
1214 LocalOrdinal NSdim = 1;
1215 RCP<MultiVector> nullSpace = MultiVectorFactory::Build(A_nodal_Matrix_->getRowMap(),NSdim);
1216 nullSpace->putScalar(SC_ONE);
1217 fineLevel.Set("Nullspace",nullSpace);
1218
1219 RCP<Factory> amalgFact, dropFact, UncoupledAggFact, coarseMapFact, TentativePFact, Tfact, SaPFact;
1220 if (useKokkos_) {
1221 amalgFact = rcp(new AmalgamationFactory_kokkos());
1222 dropFact = rcp(new CoalesceDropFactory_kokkos());
1223 UncoupledAggFact = rcp(new UncoupledAggregationFactory_kokkos());
1224 coarseMapFact = rcp(new CoarseMapFactory_kokkos());
1225 TentativePFact = rcp(new TentativePFactory_kokkos());
1226 if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1227 SaPFact = rcp(new SaPFactory_kokkos());
1228 Tfact = rcp(new CoordinatesTransferFactory_kokkos());
1229 } else
1230 {
1231 amalgFact = rcp(new AmalgamationFactory());
1232 dropFact = rcp(new CoalesceDropFactory());
1233 UncoupledAggFact = rcp(new UncoupledAggregationFactory());
1234 coarseMapFact = rcp(new CoarseMapFactory());
1235 TentativePFact = rcp(new TentativePFactory());
1236 if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1237 SaPFact = rcp(new SaPFactory());
1238 Tfact = rcp(new CoordinatesTransferFactory());
1239 }
1240 dropFact->SetFactory("UnAmalgamationInfo", amalgFact);
1241 double dropTol = parameterList_.get("aggregation: drop tol",0.0);
1242 std::string dropScheme = parameterList_.get("aggregation: drop scheme","classical");
1243 std::string distLaplAlgo = parameterList_.get("aggregation: distance laplacian algo","default");
1244 dropFact->SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(dropTol));
1245 dropFact->SetParameter("aggregation: drop scheme",Teuchos::ParameterEntry(dropScheme));
1246 if (!useKokkos_)
1247 dropFact->SetParameter("aggregation: distance laplacian algo",Teuchos::ParameterEntry(distLaplAlgo));
1248
1249 UncoupledAggFact->SetFactory("Graph", dropFact);
1250 int minAggSize = parameterList_.get("aggregation: min agg size",2);
1251 UncoupledAggFact->SetParameter("aggregation: min agg size",Teuchos::ParameterEntry(minAggSize));
1252 int maxAggSize = parameterList_.get("aggregation: max agg size",-1);
1253 UncoupledAggFact->SetParameter("aggregation: max agg size",Teuchos::ParameterEntry(maxAggSize));
1254
1255 coarseMapFact->SetFactory("Aggregates", UncoupledAggFact);
1256
1257 TentativePFact->SetFactory("Aggregates", UncoupledAggFact);
1258 TentativePFact->SetFactory("UnAmalgamationInfo", amalgFact);
1259 TentativePFact->SetFactory("CoarseMap", coarseMapFact);
1260
1261 Tfact->SetFactory("Aggregates", UncoupledAggFact);
1262 Tfact->SetFactory("CoarseMap", coarseMapFact);
1263
1264 if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa") {
1265 SaPFact->SetFactory("P", TentativePFact);
1266 coarseLevel.Request("P", SaPFact.get());
1267 } else
1268 coarseLevel.Request("P",TentativePFact.get());
1269 coarseLevel.Request("Nullspace",TentativePFact.get());
1270 coarseLevel.Request("Coordinates",Tfact.get());
1271
1272 RCP<AggregationExportFactory> aggExport;
1273 if (parameterList_.get("aggregation: export visualization data",false)) {
1274 aggExport = rcp(new AggregationExportFactory());
1275 ParameterList aggExportParams;
1276 aggExportParams.set("aggregation: output filename", "aggs.vtk");
1277 aggExportParams.set("aggregation: output file: agg style", "Jacks");
1278 aggExport->SetParameterList(aggExportParams);
1279
1280 aggExport->SetFactory("Aggregates", UncoupledAggFact);
1281 aggExport->SetFactory("UnAmalgamationInfo", amalgFact);
1282 fineLevel.Request("Aggregates",UncoupledAggFact.get());
1283 fineLevel.Request("UnAmalgamationInfo",amalgFact.get());
1284 }
1285
1286 if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1287 coarseLevel.Get("P",P_nodal,SaPFact.get());
1288 else
1289 coarseLevel.Get("P",P_nodal,TentativePFact.get());
1290 coarseLevel.Get("Nullspace",Nullspace_nodal,TentativePFact.get());
1291 coarseLevel.Get("Coordinates",CoordsH_,Tfact.get());
1292
1293
1294 if (parameterList_.get("aggregation: export visualization data",false))
1295 aggExport->Build(fineLevel, coarseLevel);
1296 }
1297 dump(*P_nodal, "P_nodal.m");
1298 dump(*Nullspace_nodal, "Nullspace_nodal.m");
1299
1300
1301 RCP<CrsMatrix> D0Crs = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix();
1302
1303 // Import off-rank rows of P_nodal into P_nodal_imported
1304 int numProcs = P_nodal->getDomainMap()->getComm()->getSize();
1305 if (numProcs > 1) {
1306 RCP<CrsMatrixWrap> P_nodal_temp;
1307 RCP<const Map> targetMap = D0Crs->getColMap();
1308 P_nodal_temp = rcp(new CrsMatrixWrap(targetMap));
1309 RCP<const Import> importer = D0Crs->getCrsGraph()->getImporter();
1310 P_nodal_temp->doImport(*P_nodal, *importer, Xpetra::INSERT);
1311 P_nodal_temp->fillComplete(rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getDomainMap(),
1312 rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getRangeMap());
1313 P_nodal_imported = P_nodal_temp->getCrsMatrix();
1314 dump(*P_nodal_temp, "P_nodal_imported.m");
1315 } else
1316 P_nodal_imported = rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix();
1317
1318 }
1319
1320 if (useKokkos_) {
1321
1322 using ATS = Kokkos::ArithTraits<SC>;
1323 using impl_Scalar = typename ATS::val_type;
1324 using impl_ATS = Kokkos::ArithTraits<impl_Scalar>;
1325 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1326
1327 typedef typename Matrix::local_matrix_type KCRS;
1328 typedef typename KCRS::StaticCrsGraphType graph_t;
1329 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1330 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1331 typedef typename KCRS::values_type::non_const_type scalar_view_t;
1332
1333 const impl_Scalar impl_SC_ZERO = impl_ATS::zero();
1334 const impl_Scalar impl_SC_ONE = impl_ATS::one();
1335 const impl_Scalar impl_half = impl_SC_ONE / (impl_SC_ONE + impl_SC_ONE);
1336
1337
1338 // Which algorithm should we use for the construction of the special prolongator?
1339 // Option "mat-mat":
1340 // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1341 std::string defaultAlgo = "mat-mat";
1342 std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1343
1344 if (skipFirstLevel_) {
1345 // Get data out of P_nodal_imported and D0.
1346
1347 if (algo == "mat-mat") {
1348 RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1349 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1350
1351#ifdef HAVE_MUELU_DEBUG
1352 TEUCHOS_ASSERT(D0_P_nodal->getColMap()->isSameAs(*P_nodal_imported->getColMap()));
1353#endif
1354
1355 // Get data out of D0*P.
1356 auto localD0P = D0_P_nodal->getLocalMatrixDevice();
1357
1358 // Create the matrix object
1359 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1360 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1361
1362 size_t nnzEstimate = dim*localD0P.graph.entries.size();
1363 lno_view_t P11rowptr("P11_rowptr", numLocalRows+1);
1364 lno_nnz_view_t P11colind("P11_colind",nnzEstimate);
1365 scalar_view_t P11vals("P11_vals",nnzEstimate);
1366
1367 // adjust rowpointer
1368 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1369 KOKKOS_LAMBDA(const size_t i) {
1370 P11rowptr(i) = dim*localD0P.graph.row_map(i);
1371 });
1372
1373 // adjust column indices
1374 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0P.graph.entries.size()),
1375 KOKKOS_LAMBDA(const size_t jj) {
1376 for (size_t k = 0; k < dim; k++) {
1377 P11colind(dim*jj+k) = dim*localD0P.graph.entries(jj)+k;
1378 P11vals(dim*jj+k) = impl_SC_ZERO;
1379 }
1380 });
1381
1382 auto localNullspace = Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1383
1384 // enter values
1385 if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1386 // The matrix D0 has too many entries per row.
1387 // Therefore we need to check whether its entries are actually non-zero.
1388 // This is the case for the matrices built by MiniEM.
1389 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1390
1391 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1392
1393 auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1394 auto localP = P_nodal_imported->getLocalMatrixDevice();
1395 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1396 KOKKOS_LAMBDA(const size_t i) {
1397 for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1398 LO l = localD0.graph.entries(ll);
1399 impl_Scalar p = localD0.values(ll);
1400 if (impl_ATS::magnitude(p) < tol)
1401 continue;
1402 for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1403 LO j = localP.graph.entries(jj);
1404 impl_Scalar v = localP.values(jj);
1405 for (size_t k = 0; k < dim; k++) {
1406 LO jNew = dim*j+k;
1407 impl_Scalar n = localNullspace(i,k);
1408 size_t m;
1409 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1410 if (P11colind(m) == jNew)
1411 break;
1412#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1413 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1414#endif
1415 P11vals(m) += impl_half * v * n;
1416 }
1417 }
1418 }
1419 });
1420
1421 } else {
1422 auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1423 auto localP = P_nodal_imported->getLocalMatrixDevice();
1424 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1425 KOKKOS_LAMBDA(const size_t i) {
1426 for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1427 LO l = localD0.graph.entries(ll);
1428 for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1429 LO j = localP.graph.entries(jj);
1430 impl_Scalar v = localP.values(jj);
1431 for (size_t k = 0; k < dim; k++) {
1432 LO jNew = dim*j+k;
1433 impl_Scalar n = localNullspace(i,k);
1434 size_t m;
1435 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1436 if (P11colind(m) == jNew)
1437 break;
1438#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1439 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1440#endif
1441 P11vals(m) += impl_half * v * n;
1442 }
1443 }
1444 }
1445 });
1446 }
1447
1448 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1449 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1450 P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1451 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1452
1453 } else
1454 TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1455
1456 NullspaceH_ = MultiVectorFactory::Build(P11_->getDomainMap(), dim);
1457
1458 auto localNullspace_nodal = Nullspace_nodal->getDeviceLocalView(Xpetra::Access::ReadOnly);
1459 auto localNullspaceH = NullspaceH_->getDeviceLocalView(Xpetra::Access::ReadWrite);
1460 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_nullspace", range_type(0,Nullspace_nodal->getLocalLength()),
1461 KOKKOS_LAMBDA(const size_t i) {
1462 impl_Scalar val = localNullspace_nodal(i,0);
1463 for (size_t j = 0; j < dim; j++)
1464 localNullspaceH(dim*i+j, j) = val;
1465 });
1466
1467 } else { // !skipFirstLevel_
1468 // Get data out of P_nodal_imported and D0.
1469 auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1470
1471 CoordsH_ = Coords_;
1472
1473 if (algo == "mat-mat") {
1474
1475 // Create the matrix object
1476 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getColMap(), dim);
1477 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getDomainMap(), dim);
1478
1479 size_t nnzEstimate = dim*localD0.graph.entries.size();
1480 lno_view_t P11rowptr("P11_rowptr", numLocalRows+1);
1481 lno_nnz_view_t P11colind("P11_colind",nnzEstimate);
1482 scalar_view_t P11vals("P11_vals",nnzEstimate);
1483
1484 // adjust rowpointer
1485 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1486 KOKKOS_LAMBDA(const size_t i) {
1487 P11rowptr(i) = dim*localD0.graph.row_map(i);
1488 });
1489
1490 // adjust column indices
1491 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0.graph.entries.size()),
1492 KOKKOS_LAMBDA(const size_t jj) {
1493 for (size_t k = 0; k < dim; k++) {
1494 P11colind(dim*jj+k) = dim*localD0.graph.entries(jj)+k;
1495 P11vals(dim*jj+k) = impl_SC_ZERO;
1496 }
1497 });
1498
1499 auto localNullspace = Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1500
1501 // enter values
1502 if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1503 // The matrix D0 has too many entries per row.
1504 // Therefore we need to check whether its entries are actually non-zero.
1505 // This is the case for the matrices built by MiniEM.
1506 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1507
1508 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1509
1510 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1511 KOKKOS_LAMBDA(const size_t i) {
1512 for (size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1513 LO j = localD0.graph.entries(jj);
1514 impl_Scalar p = localD0.values(jj);
1515 if (impl_ATS::magnitude(p) < tol)
1516 continue;
1517 for (size_t k = 0; k < dim; k++) {
1518 LO jNew = dim*j+k;
1519 impl_Scalar n = localNullspace(i,k);
1520 size_t m;
1521 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1522 if (P11colind(m) == jNew)
1523 break;
1524#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1525 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1526#endif
1527 P11vals(m) += impl_half * n;
1528 }
1529 }
1530 });
1531
1532 } else {
1533 Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1534 KOKKOS_LAMBDA(const size_t i) {
1535 for (size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1536 LO j = localD0.graph.entries(jj);
1537 for (size_t k = 0; k < dim; k++) {
1538 LO jNew = dim*j+k;
1539 impl_Scalar n = localNullspace(i,k);
1540 size_t m;
1541 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1542 if (P11colind(m) == jNew)
1543 break;
1544#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1545 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1546#endif
1547 P11vals(m) += impl_half * n;
1548 }
1549 }
1550 });
1551 }
1552
1553 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1554 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1555 P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1556 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1557 } else
1558 TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1559
1560 }
1561 } else
1562 {
1563 // get nullspace vectors
1564 ArrayRCP<ArrayRCP<const SC> > nullspaceRCP(dim);
1565 ArrayRCP<ArrayView<const SC> > nullspace(dim);
1566 for(size_t i=0; i<dim; i++) {
1567 nullspaceRCP[i] = Nullspace_->getData(i);
1568 nullspace[i] = nullspaceRCP[i]();
1569 }
1570
1571 // Get data out of P_nodal_imported and D0.
1572 ArrayRCP<size_t> P11rowptr_RCP;
1573 ArrayRCP<LO> P11colind_RCP;
1574 ArrayRCP<SC> P11vals_RCP;
1575
1576
1577 // Which algorithm should we use for the construction of the special prolongator?
1578 // Option "mat-mat":
1579 // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1580 // Option "gustavson":
1581 // Loop over D0, P and nullspace and allocate directly. (Gustavson-like)
1582 // More efficient, but only available for serial node.
1583 std::string defaultAlgo = "mat-mat";
1584 std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1585
1586 if (skipFirstLevel_) {
1587
1588 if (algo == "mat-mat") {
1589 RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1590 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1591
1592
1593 ArrayRCP<const size_t> D0rowptr_RCP;
1594 ArrayRCP<const LO> D0colind_RCP;
1595 ArrayRCP<const SC> D0vals_RCP;
1596 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1597 // For efficiency
1598 // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1599 // slower than Teuchos::ArrayView::operator[].
1600 ArrayView<const size_t> D0rowptr;
1601 ArrayView<const LO> D0colind;
1602 ArrayView<const SC> D0vals;
1603 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1604
1605 // Get data out of P_nodal_imported and D0.
1606 ArrayRCP<const size_t> Prowptr_RCP;
1607 ArrayRCP<const LO> Pcolind_RCP;
1608 ArrayRCP<const SC> Pvals_RCP;
1609 P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1610 ArrayView<const size_t> Prowptr;
1611 ArrayView<const LO> Pcolind;
1612 ArrayView<const SC> Pvals;
1613 Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1614
1615 // Get data out of D0*P.
1616 ArrayRCP<const size_t> D0Prowptr_RCP;
1617 ArrayRCP<const LO> D0Pcolind_RCP;
1618 ArrayRCP<const SC> D0Pvals_RCP;
1619 rcp_dynamic_cast<CrsMatrixWrap>(D0_P_nodal)->getCrsMatrix()->getAllValues(D0Prowptr_RCP, D0Pcolind_RCP, D0Pvals_RCP);
1620
1621 // For efficiency
1622 // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1623 // slower than Teuchos::ArrayView::operator[].
1624 ArrayView<const size_t> D0Prowptr;
1625 ArrayView<const LO> D0Pcolind;
1626 D0Prowptr = D0Prowptr_RCP(); D0Pcolind = D0Pcolind_RCP();
1627
1628 // Create the matrix object
1629 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1630 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1631 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1632 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1633 size_t nnzEstimate = dim*D0Prowptr[numLocalRows];
1634 P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1635
1636 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1637 ArrayView<LO> P11colind = P11colind_RCP();
1638 ArrayView<SC> P11vals = P11vals_RCP();
1639
1640 // adjust rowpointer
1641 for (size_t i = 0; i < numLocalRows+1; i++) {
1642 P11rowptr[i] = dim*D0Prowptr[i];
1643 }
1644
1645 // adjust column indices
1646 for (size_t jj = 0; jj < (size_t) D0Prowptr[numLocalRows]; jj++)
1647 for (size_t k = 0; k < dim; k++) {
1648 P11colind[dim*jj+k] = dim*D0Pcolind[jj]+k;
1649 P11vals[dim*jj+k] = SC_ZERO;
1650 }
1651
1652 RCP<const Map> P_nodal_imported_colmap = P_nodal_imported->getColMap();
1653 RCP<const Map> D0_P_nodal_colmap = D0_P_nodal->getColMap();
1654 // enter values
1655 if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1656 // The matrix D0 has too many entries per row.
1657 // Therefore we need to check whether its entries are actually non-zero.
1658 // This is the case for the matrices built by MiniEM.
1659 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1660
1661 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1662 for (size_t i = 0; i < numLocalRows; i++) {
1663 for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1664 LO l = D0colind[ll];
1665 SC p = D0vals[ll];
1666 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1667 continue;
1668 for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1669 LO j = Pcolind[jj];
1670 j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1671 SC v = Pvals[jj];
1672 for (size_t k = 0; k < dim; k++) {
1673 LO jNew = dim*j+k;
1674 SC n = nullspace[k][i];
1675 size_t m;
1676 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1677 if (P11colind[m] == jNew)
1678 break;
1679#ifdef HAVE_MUELU_DEBUG
1680 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1681#endif
1682 P11vals[m] += half * v * n;
1683 }
1684 }
1685 }
1686 }
1687 } else {
1688 // enter values
1689 for (size_t i = 0; i < numLocalRows; i++) {
1690 for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1691 LO l = D0colind[ll];
1692 for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1693 LO j = Pcolind[jj];
1694 j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1695 SC v = Pvals[jj];
1696 for (size_t k = 0; k < dim; k++) {
1697 LO jNew = dim*j+k;
1698 SC n = nullspace[k][i];
1699 size_t m;
1700 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1701 if (P11colind[m] == jNew)
1702 break;
1703#ifdef HAVE_MUELU_DEBUG
1704 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1705#endif
1706 P11vals[m] += half * v * n;
1707 }
1708 }
1709 }
1710 }
1711 }
1712
1713 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1714 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1715
1716 } else if (algo == "gustavson") {
1717 ArrayRCP<const size_t> D0rowptr_RCP;
1718 ArrayRCP<const LO> D0colind_RCP;
1719 ArrayRCP<const SC> D0vals_RCP;
1720 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1721 // For efficiency
1722 // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1723 // slower than Teuchos::ArrayView::operator[].
1724 ArrayView<const size_t> D0rowptr;
1725 ArrayView<const LO> D0colind;
1726 ArrayView<const SC> D0vals;
1727 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1728
1729 // Get data out of P_nodal_imported and D0.
1730 ArrayRCP<const size_t> Prowptr_RCP;
1731 ArrayRCP<const LO> Pcolind_RCP;
1732 ArrayRCP<const SC> Pvals_RCP;
1733 P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1734 ArrayView<const size_t> Prowptr;
1735 ArrayView<const LO> Pcolind;
1736 ArrayView<const SC> Pvals;
1737 Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1738
1739 LO maxP11col = dim * P_nodal_imported->getColMap()->getMaxLocalIndex();
1740 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1741 Array<size_t> P11_status(dim*maxP11col, ST_INVALID);
1742 // This is ad-hoc and should maybe be replaced with some better heuristics.
1743 size_t nnz_alloc = dim*D0vals_RCP.size();
1744
1745 // Create the matrix object
1746 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1747 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1748 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1749 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1750 P11Crs->allocateAllValues(nnz_alloc, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1751
1752 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1753 ArrayView<LO> P11colind = P11colind_RCP();
1754 ArrayView<SC> P11vals = P11vals_RCP();
1755
1756 size_t nnz;
1757 if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1758 // The matrix D0 has too many entries per row.
1759 // Therefore we need to check whether its entries are actually non-zero.
1760 // This is the case for the matrices built by MiniEM.
1761 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1762
1763 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1764 nnz = 0;
1765 size_t nnz_old = 0;
1766 for (size_t i = 0; i < numLocalRows; i++) {
1767 P11rowptr[i] = nnz;
1768 for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1769 LO l = D0colind[ll];
1770 SC p = D0vals[ll];
1771 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1772 continue;
1773 for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1774 LO j = Pcolind[jj];
1775 SC v = Pvals[jj];
1776 for (size_t k = 0; k < dim; k++) {
1777 LO jNew = dim*j+k;
1778 SC n = nullspace[k][i];
1779 // do we already have an entry for (i, jNew)?
1780 if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1781 P11_status[jNew] = nnz;
1782 P11colind[nnz] = jNew;
1783 P11vals[nnz] = half * v * n;
1784 // or should it be
1785 // P11vals[nnz] = half * n;
1786 nnz++;
1787 } else {
1788 P11vals[P11_status[jNew]] += half * v * n;
1789 // or should it be
1790 // P11vals[P11_status[jNew]] += half * n;
1791 }
1792 }
1793 }
1794 }
1795 nnz_old = nnz;
1796 }
1797 P11rowptr[numLocalRows] = nnz;
1798 } else {
1799 nnz = 0;
1800 size_t nnz_old = 0;
1801 for (size_t i = 0; i < numLocalRows; i++) {
1802 P11rowptr[i] = nnz;
1803 for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1804 LO l = D0colind[ll];
1805 for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1806 LO j = Pcolind[jj];
1807 SC v = Pvals[jj];
1808 for (size_t k = 0; k < dim; k++) {
1809 LO jNew = dim*j+k;
1810 SC n = nullspace[k][i];
1811 // do we already have an entry for (i, jNew)?
1812 if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1813 P11_status[jNew] = nnz;
1814 P11colind[nnz] = jNew;
1815 P11vals[nnz] = half * v * n;
1816 // or should it be
1817 // P11vals[nnz] = half * n;
1818 nnz++;
1819 } else {
1820 P11vals[P11_status[jNew]] += half * v * n;
1821 // or should it be
1822 // P11vals[P11_status[jNew]] += half * n;
1823 }
1824 }
1825 }
1826 }
1827 nnz_old = nnz;
1828 }
1829 P11rowptr[numLocalRows] = nnz;
1830 }
1831
1832 if (blockDomainMap->lib() == Xpetra::UseTpetra) {
1833 // Downward resize
1834 // - Cannot resize for Epetra, as it checks for same pointers
1835 // - Need to resize for Tpetra, as it checks ().size() == P11rowptr[numLocalRows]
1836 P11vals_RCP.resize(nnz);
1837 P11colind_RCP.resize(nnz);
1838 }
1839
1840 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1841 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1842 } else
1843 TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1844
1845 NullspaceH_ = MultiVectorFactory::Build(P11_->getDomainMap(), dim);
1846
1847 ArrayRCP<const Scalar> ns_rcp = Nullspace_nodal->getData(0);
1848 ArrayView<const Scalar> ns_view = ns_rcp();
1849 for (size_t i = 0; i < Nullspace_nodal->getLocalLength(); i++) {
1850 Scalar val = ns_view[i];
1851 for (size_t j = 0; j < dim; j++)
1852 NullspaceH_->replaceLocalValue(dim*i+j, j, val);
1853 }
1854
1855
1856 } else { // !skipFirstLevel_
1857 ArrayRCP<const size_t> D0rowptr_RCP;
1858 ArrayRCP<const LO> D0colind_RCP;
1859 ArrayRCP<const SC> D0vals_RCP;
1860 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1861 // For efficiency
1862 // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1863 // slower than Teuchos::ArrayView::operator[].
1864 ArrayView<const size_t> D0rowptr;
1865 ArrayView<const LO> D0colind;
1866 ArrayView<const SC> D0vals;
1867 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1868
1869 CoordsH_ = Coords_;
1870 if (algo == "mat-mat") {
1871
1872 // Create the matrix object
1873 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getColMap(), dim);
1874 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getDomainMap(), dim);
1875 P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1876 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1877 size_t nnzEstimate = dim*D0rowptr[numLocalRows];
1878 P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1879
1880 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1881 ArrayView<LO> P11colind = P11colind_RCP();
1882 ArrayView<SC> P11vals = P11vals_RCP();
1883
1884 // adjust rowpointer
1885 for (size_t i = 0; i < numLocalRows+1; i++) {
1886 P11rowptr[i] = dim*D0rowptr[i];
1887 }
1888
1889 // adjust column indices
1890 for (size_t jj = 0; jj < (size_t) D0rowptr[numLocalRows]; jj++)
1891 for (size_t k = 0; k < dim; k++) {
1892 P11colind[dim*jj+k] = dim*D0colind[jj]+k;
1893 P11vals[dim*jj+k] = SC_ZERO;
1894 }
1895
1896 // enter values
1897 if (D0_Matrix_->getLocalMaxNumRowEntries()>2) {
1898 // The matrix D0 has too many entries per row.
1899 // Therefore we need to check whether its entries are actually non-zero.
1900 // This is the case for the matrices built by MiniEM.
1901 GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1902
1903 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1904 for (size_t i = 0; i < numLocalRows; i++) {
1905 for (size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
1906 LO j = D0colind[jj];
1907 SC p = D0vals[jj];
1908 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1909 continue;
1910 for (size_t k = 0; k < dim; k++) {
1911 LO jNew = dim*j+k;
1912 SC n = nullspace[k][i];
1913 size_t m;
1914 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1915 if (P11colind[m] == jNew)
1916 break;
1917#ifdef HAVE_MUELU_DEBUG
1918 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1919#endif
1920 P11vals[m] += half * n;
1921 }
1922 }
1923 }
1924 } else {
1925 // enter values
1926 for (size_t i = 0; i < numLocalRows; i++) {
1927 for (size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
1928 LO j = D0colind[jj];
1929
1930 for (size_t k = 0; k < dim; k++) {
1931 LO jNew = dim*j+k;
1932 SC n = nullspace[k][i];
1933 size_t m;
1934 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1935 if (P11colind[m] == jNew)
1936 break;
1937#ifdef HAVE_MUELU_DEBUG
1938 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1939#endif
1940 P11vals[m] += half * n;
1941 }
1942 }
1943 }
1944 }
1945
1946 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1947 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1948
1949 } else
1950 TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1951 }
1952 }
1953 }
1954
1955 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1957 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build coarse (1,1) matrix");
1958
1959 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1960
1961 // coarse matrix for P11* (M1 + D1* M2 D1) P11
1962 RCP<Matrix> temp;
1963 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*P11_,false,temp,GetOStream(Runtime0),true,true);
1964 if (ImporterH_.is_null())
1965 AH_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*P11_,true,*temp,false,AH_,GetOStream(Runtime0),true,true);
1966 else {
1967
1968 RCP<Matrix> temp2;
1969 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*P11_,true,*temp,false,temp2,GetOStream(Runtime0),true,true);
1970
1971 RCP<const Map> map = ImporterH_->getTargetMap()->removeEmptyProcesses();
1972 temp2->removeEmptyProcessesInPlace(map);
1973 if (!temp2.is_null() && temp2->getRowMap().is_null())
1974 temp2 = Teuchos::null;
1975 AH_ = temp2;
1976 }
1977
1978 if (!disable_addon_) {
1979
1980 RCP<Matrix> addon;
1981
1982 if (!AH_.is_null() && Addon_Matrix_.is_null()) {
1983 // construct addon
1984 RCP<Teuchos::TimeMonitor> tmAddon = getTimer("MueLu RefMaxwell: Build coarse addon matrix");
1985 // catch a failure
1986 TEUCHOS_TEST_FOR_EXCEPTION(M0inv_Matrix_==Teuchos::null,std::invalid_argument,
1987 "MueLu::RefMaxwell::formCoarseMatrix(): Inverse of "
1988 "lumped mass matrix required for add-on (i.e. M0inv_Matrix is null)");
1989
1990 // coarse matrix for add-on, i.e P11* (M1 D0 M0inv D0* M1) P11
1991 RCP<Matrix> Zaux, Z;
1992
1993 // construct Zaux = M1 P11
1994 Zaux = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M1_Matrix_,false,*P11_,false,Zaux,GetOStream(Runtime0),true,true);
1995 // construct Z = D0* M1 P11 = D0* Zaux
1996 Z = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*Zaux,false,Z,GetOStream(Runtime0),true,true);
1997
1998 // construct Z* M0inv Z
1999 if (M0inv_Matrix_->getGlobalMaxNumRowEntries()<=1) {
2000 // We assume that if M0inv has at most one entry per row then
2001 // these are all diagonal entries.
2002 RCP<Vector> diag = VectorFactory::Build(M0inv_Matrix_->getRowMap());
2003 M0inv_Matrix_->getLocalDiagCopy(*diag);
2004 {
2005 ArrayRCP<Scalar> diagVals = diag->getDataNonConst(0);
2006 for (size_t j=0; j < diag->getMap()->getLocalNumElements(); j++) {
2007 diagVals[j] = Teuchos::ScalarTraits<Scalar>::squareroot(diagVals[j]);
2008 }
2009 }
2010 if (Z->getRowMap()->isSameAs(*(diag->getMap())))
2011 Z->leftScale(*diag);
2012 else {
2013 RCP<Import> importer = ImportFactory::Build(diag->getMap(),Z->getRowMap());
2014 RCP<Vector> diag2 = VectorFactory::Build(Z->getRowMap());
2015 diag2->doImport(*diag,*importer,Xpetra::INSERT);
2016 Z->leftScale(*diag2);
2017 }
2018 addon = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*Z,false,addon,GetOStream(Runtime0),true,true);
2019 } else if (parameterList_.get<bool>("rap: triple product", false) == false) {
2020 RCP<Matrix> C2;
2021 // construct C2 = M0inv Z
2022 C2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M0inv_Matrix_,false,*Z,false,C2,GetOStream(Runtime0),true,true);
2023 // construct Matrix2 = Z* M0inv Z = Z* C2
2024 addon = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*C2,false,addon,GetOStream(Runtime0),true,true);
2025 } else {
2026 addon = MatrixFactory::Build(Z->getDomainMap());
2027 // construct Matrix2 = Z* M0inv Z
2028 Xpetra::TripleMatrixMultiply<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
2029 MultiplyRAP(*Z, true, *M0inv_Matrix_, false, *Z, false, *addon, true, true);
2030 }
2031 // Should we keep the addon for next setup?
2032 if (enable_reuse_)
2033 Addon_Matrix_ = addon;
2034 } else
2035 addon = Addon_Matrix_;
2036
2037 if (!AH_.is_null()) {
2038 // add matrices together
2039 RCP<Matrix> newAH;
2040 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::TwoMatrixAdd(*AH_,false,one,*addon,false,one,newAH,GetOStream(Runtime0));
2041 newAH->fillComplete();
2042 AH_ = newAH;
2043 }
2044 }
2045
2046 if (!AH_.is_null() && !skipFirstLevel_) {
2047 ArrayRCP<bool> AHBCrows;
2048 AHBCrows.resize(AH_->getRowMap()->getLocalNumElements());
2049 size_t dim = Nullspace_->getNumVectors();
2050 if (useKokkos_)
2051 for (size_t i = 0; i < BCdomainKokkos_.size(); i++)
2052 for (size_t k = 0; k < dim; k++)
2053 AHBCrows[i*dim+k] = BCdomainKokkos_(i);
2054 else
2055 for (size_t i = 0; i < static_cast<size_t>(BCdomain_.size()); i++)
2056 for (size_t k = 0; k < dim; k++)
2057 AHBCrows[i*dim+k] = BCdomain_[i];
2058 magnitudeType rowSumTol = parameterList_.get("refmaxwell: row sum drop tol (1,1)",-1.0);
2059 if (rowSumTol > 0.)
2060 Utilities::ApplyRowSumCriterion(*AH_, rowSumTol, AHBCrows);
2061 if (applyBCsToH_)
2062 Utilities::ApplyOAZToMatrixRows(AH_, AHBCrows);
2063 }
2064
2065 if (!AH_.is_null()) {
2066 // If we already applied BCs to A_nodal, we likely do not need
2067 // to fix up AH.
2068 // If we did not apply BCs to A_nodal, we now need to correct
2069 // the zero diagonals of AH, since we did nuke the nullspace.
2070
2071 bool fixZeroDiagonal = !applyBCsToAnodal_;
2072 if (precList11_.isParameter("rap: fix zero diagonals"))
2073 fixZeroDiagonal = precList11_.get<bool>("rap: fix zero diagonals");
2074
2075 if (fixZeroDiagonal) {
2076 magnitudeType threshold = 1e-16;
2077 Scalar replacement = 1.0;
2078 if (precList11_.isType<magnitudeType>("rap: fix zero diagonals threshold"))
2079 threshold = precList11_.get<magnitudeType>("rap: fix zero diagonals threshold");
2080 else if (precList11_.isType<double>("rap: fix zero diagonals threshold"))
2081 threshold = Teuchos::as<magnitudeType>(precList11_.get<double>("rap: fix zero diagonals threshold"));
2082 if (precList11_.isType<double>("rap: fix zero diagonals replacement"))
2083 replacement = Teuchos::as<Scalar>(precList11_.get<double>("rap: fix zero diagonals replacement"));
2084 Xpetra::MatrixUtils<SC,LO,GO,NO>::CheckRepairMainDiagonal(AH_, true, GetOStream(Warnings1), threshold, replacement);
2085 }
2086
2087 // Set block size
2088 size_t dim = Nullspace_->getNumVectors();
2089 AH_->SetFixedBlockSize(dim);
2090 }
2091 }
2092
2093
2094 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2095 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::resetMatrix(RCP<Matrix> SM_Matrix_new, bool ComputePrec) {
2096 bool reuse = !SM_Matrix_.is_null();
2097 SM_Matrix_ = SM_Matrix_new;
2098 dump(*SM_Matrix_, "SM.m");
2099 if (ComputePrec)
2100 compute(reuse);
2101 }
2102
2103
2104 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2105 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverseAdditive(const MultiVector& RHS, MultiVector& X) const {
2106
2107 Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2108
2109 { // compute residual
2110
2111 RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2112 Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2113 }
2114
2115 { // restrict residual to sub-hierarchies
2116
2117 if (implicitTranspose_) {
2118 {
2119 RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (implicit)");
2120 P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2121 }
2122 if (!allNodesBoundary_) {
2123 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (implicit)");
2124 D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2125 }
2126 } else {
2127#ifdef MUELU_HAVE_TPETRA
2128 if (D0_T_R11_colMapsMatch_) {
2129 // Column maps of D0_T and R11 match, and we're running Tpetra
2130 {
2131 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restrictions import");
2132 D0TR11Tmp_->doImport(*residual_, *rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter(), Xpetra::INSERT);
2133 }
2134 if (!allNodesBoundary_) {
2135 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (explicit)");
2136 rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*D0res_),Teuchos::NO_TRANS);
2137 }
2138 {
2139 RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2140 rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*P11res_),Teuchos::NO_TRANS);
2141 }
2142 } else
2143#endif
2144 {
2145 {
2146 RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2147 R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2148 }
2149 if (!allNodesBoundary_) {
2150 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (explicit)");
2151 D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2152 }
2153 }
2154 }
2155 }
2156
2157 {
2158 RCP<Teuchos::TimeMonitor> tmSubSolves = getTimer("MueLu RefMaxwell: subsolves");
2159
2160 // block diagonal preconditioner on 2x2 (V-cycle for diagonal blocks)
2161
2162 if (!ImporterH_.is_null() && !implicitTranspose_) {
2163 RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: import coarse (1,1)");
2164 P11resTmp_->beginImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2165 }
2166 if (!allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_) {
2167 RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: import (2,2)");
2168 D0resTmp_->beginImport(*D0res_, *Importer22_, Xpetra::INSERT);
2169 }
2170
2171 // iterate on coarse (1, 1) block
2172 if (!AH_.is_null()) {
2173 if (!ImporterH_.is_null() && !implicitTranspose_)
2174 P11resTmp_->endImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2175
2176 RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2177
2178#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2179 if (!thyraPrecOpH_.is_null()) {
2180 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
2181 thyraPrecOpH_->apply(*P11resSubComm_, *P11xSubComm_, Teuchos::NO_TRANS, one, zero);
2182 }
2183 else
2184#endif
2185 HierarchyH_->Iterate(*P11resSubComm_, *P11xSubComm_, numItersH_, true);
2186 }
2187
2188 // iterate on (2, 2) block
2189 if (!A22_.is_null()) {
2190 if (!allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_)
2191 D0resTmp_->endImport(*D0res_, *Importer22_, Xpetra::INSERT);
2192
2193 RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2194
2195#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2196 if (!thyraPrecOp22_.is_null()) {
2197 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
2198 thyraPrecOp22_->apply(*D0resSubComm_, *D0xSubComm_, Teuchos::NO_TRANS, one, zero);
2199 }
2200 else
2201#endif
2202 Hierarchy22_->Iterate(*D0resSubComm_, *D0xSubComm_, numIters22_, true);
2203 }
2204
2205 if (AH_.is_null() && !ImporterH_.is_null() && !implicitTranspose_)
2206 P11resTmp_->endImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2207 if (A22_.is_null() && !allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_)
2208 D0resTmp_->endImport(*D0res_, *Importer22_, Xpetra::INSERT);
2209 }
2210
2211 if (fuseProlongationAndUpdate_) {
2212 { // prolongate (1,1) block
2213 RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: prolongation coarse (1,1) (fused)");
2214 P11_->apply(*P11x_,X,Teuchos::NO_TRANS,one,one);
2215 }
2216
2217 if (!allNodesBoundary_) { // prolongate (2,2) block
2218 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: prolongation (2,2) (fused)");
2219 D0_Matrix_->apply(*D0x_,X,Teuchos::NO_TRANS,one,one);
2220 }
2221 } else {
2222 { // prolongate (1,1) block
2223 RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: prolongation coarse (1,1) (unfused)");
2224 P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2225 }
2226
2227 if (!allNodesBoundary_) { // prolongate (2,2) block
2228 RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: prolongation (2,2) (unfused)");
2229 D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS,one,one);
2230 }
2231
2232 { // update current solution
2233 RCP<Teuchos::TimeMonitor> tmUpdate = getTimer("MueLu RefMaxwell: update");
2234 X.update(one, *residual_, one);
2235 }
2236 }
2237
2238 }
2239
2240
2241 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2242 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solveH(const MultiVector& RHS, MultiVector& X) const {
2243
2244 Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2245
2246 { // compute residual
2247 RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2248 Utilities::Residual(*SM_Matrix_, X, RHS,*residual_);
2249 if (implicitTranspose_)
2250 P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2251 else
2252 R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2253 }
2254
2255 { // solve coarse (1,1) block
2256 if (!ImporterH_.is_null() && !implicitTranspose_) {
2257 RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: import coarse (1,1)");
2258 P11resTmp_->doImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2259 }
2260 if (!AH_.is_null()) {
2261 RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2262 HierarchyH_->Iterate(*P11resSubComm_, *P11xSubComm_, numItersH_, true);
2263 }
2264 }
2265
2266 { // update current solution
2267 RCP<Teuchos::TimeMonitor> tmUp = getTimer("MueLu RefMaxwell: update");
2268 P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2269 X.update(one, *residual_, one);
2270 }
2271
2272 }
2273
2274
2275 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2276 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solve22(const MultiVector& RHS, MultiVector& X) const {
2277
2278 if (allNodesBoundary_)
2279 return;
2280
2281 Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2282
2283 { // compute residual
2284 RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2285 Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2286 if (implicitTranspose_)
2287 D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2288 else
2289 D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2290 }
2291
2292 { // solve (2,2) block
2293 if (!Importer22_.is_null() && !implicitTranspose_) {
2294 RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: import (2,2)");
2295 D0resTmp_->doImport(*D0res_, *Importer22_, Xpetra::INSERT);
2296 }
2297 if (!A22_.is_null()) {
2298 RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2299 Hierarchy22_->Iterate(*D0resSubComm_, *D0xSubComm_, numIters22_, true);
2300 }
2301 }
2302
2303 { //update current solution
2304 RCP<Teuchos::TimeMonitor> tmUp = getTimer("MueLu RefMaxwell: update");
2305 D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS);
2306 X.update(one, *residual_, one);
2307 }
2308
2309 }
2310
2311
2312 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2313 void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::apply (const MultiVector& RHS, MultiVector& X,
2314 Teuchos::ETransp /* mode */,
2315 Scalar /* alpha */,
2316 Scalar /* beta */) const {
2317
2318 RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: solve");
2319
2320 // make sure that we have enough temporary memory
2321 if (!allEdgesBoundary_ && X.getNumVectors() != P11res_->getNumVectors())
2322 allocateMemory(X.getNumVectors());
2323
2324 { // apply pre-smoothing
2325
2326 RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2327
2328 PreSmoother_->Apply(X, RHS, use_as_preconditioner_);
2329 }
2330
2331 // do solve for the 2x2 block system
2332 if(mode_=="additive")
2333 applyInverseAdditive(RHS,X);
2334 else if(mode_=="121") {
2335 solveH(RHS,X);
2336 solve22(RHS,X);
2337 solveH(RHS,X);
2338 } else if(mode_=="212") {
2339 solve22(RHS,X);
2340 solveH(RHS,X);
2341 solve22(RHS,X);
2342 } else if(mode_=="1")
2343 solveH(RHS,X);
2344 else if(mode_=="2")
2345 solve22(RHS,X);
2346 else if(mode_=="7") {
2347 solveH(RHS,X);
2348 { // apply pre-smoothing
2349
2350 RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2351
2352 PreSmoother_->Apply(X, RHS, false);
2353 }
2354 solve22(RHS,X);
2355 { // apply post-smoothing
2356
2357 RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2358
2359 PostSmoother_->Apply(X, RHS, false);
2360 }
2361 solveH(RHS,X);
2362 } else if(mode_=="none") {
2363 // do nothing
2364 }
2365 else
2366 applyInverseAdditive(RHS,X);
2367
2368 { // apply post-smoothing
2369
2370 RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2371
2372 PostSmoother_->Apply(X, RHS, false);
2373 }
2374
2375 }
2376
2377
2378 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2380 return false;
2381 }
2382
2383
2384 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2386 initialize(const Teuchos::RCP<Matrix> & D0_Matrix,
2387 const Teuchos::RCP<Matrix> & Ms_Matrix,
2388 const Teuchos::RCP<Matrix> & M0inv_Matrix,
2389 const Teuchos::RCP<Matrix> & M1_Matrix,
2390 const Teuchos::RCP<MultiVector> & Nullspace,
2391 const Teuchos::RCP<RealValuedMultiVector> & Coords,
2392 Teuchos::ParameterList& List)
2393 {
2394 // some pre-conditions
2395 TEUCHOS_ASSERT(D0_Matrix!=Teuchos::null);
2396 TEUCHOS_ASSERT(Ms_Matrix!=Teuchos::null);
2397 TEUCHOS_ASSERT(M1_Matrix!=Teuchos::null);
2398
2399#ifdef HAVE_MUELU_DEBUG
2400 TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*M1_Matrix->getRangeMap()));
2401 TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*M1_Matrix->getRowMap()));
2402 TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getRangeMap()));
2403
2404 TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*Ms_Matrix->getRangeMap()));
2405 TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*Ms_Matrix->getRowMap()));
2406 TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getRangeMap()));
2407
2408 TEUCHOS_ASSERT(D0_Matrix->getRangeMap()->isSameAs(*D0_Matrix->getRowMap()));
2409
2410 if (!M0inv_Matrix) {
2411 TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*M0inv_Matrix->getRowMap()));
2412 TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*M0inv_Matrix->getRangeMap()));
2413 TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getDomainMap()));
2414 }
2415#endif
2416
2417 HierarchyH_ = Teuchos::null;
2418 Hierarchy22_ = Teuchos::null;
2419 PreSmoother_ = Teuchos::null;
2420 PostSmoother_ = Teuchos::null;
2421 disable_addon_ = false;
2422 mode_ = "additive";
2423
2424 // set parameters
2425 setParameters(List);
2426
2427 if (D0_Matrix->getRowMap()->lib() == Xpetra::UseTpetra) {
2428 // We will remove boundary conditions from D0, and potentially change maps, so we copy the input.
2429 // Fortunately, D0 is quite sparse.
2430 // We cannot use the Tpetra copy constructor, since it does not copy the graph.
2431
2432 RCP<Matrix> D0copy = MatrixFactory::Build(D0_Matrix->getRowMap(), D0_Matrix->getColMap(), 0);
2433 RCP<CrsMatrix> D0copyCrs = rcp_dynamic_cast<CrsMatrixWrap>(D0copy,true)->getCrsMatrix();
2434 ArrayRCP<const size_t> D0rowptr_RCP;
2435 ArrayRCP<const LO> D0colind_RCP;
2436 ArrayRCP<const SC> D0vals_RCP;
2437 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getAllValues(D0rowptr_RCP,
2438 D0colind_RCP,
2439 D0vals_RCP);
2440
2441 ArrayRCP<size_t> D0copyrowptr_RCP;
2442 ArrayRCP<LO> D0copycolind_RCP;
2443 ArrayRCP<SC> D0copyvals_RCP;
2444 D0copyCrs->allocateAllValues(D0vals_RCP.size(),D0copyrowptr_RCP,D0copycolind_RCP,D0copyvals_RCP);
2445 D0copyrowptr_RCP.deepCopy(D0rowptr_RCP());
2446 D0copycolind_RCP.deepCopy(D0colind_RCP());
2447 D0copyvals_RCP.deepCopy(D0vals_RCP());
2448 D0copyCrs->setAllValues(D0copyrowptr_RCP,
2449 D0copycolind_RCP,
2450 D0copyvals_RCP);
2451 D0copyCrs->expertStaticFillComplete(D0_Matrix->getDomainMap(), D0_Matrix->getRangeMap(),
2452 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getCrsGraph()->getImporter(),
2453 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getCrsGraph()->getExporter());
2454 D0_Matrix_ = D0copy;
2455 } else
2456 D0_Matrix_ = MatrixFactory::BuildCopy(D0_Matrix);
2457
2458
2459 M0inv_Matrix_ = M0inv_Matrix;
2460 Ms_Matrix_ = Ms_Matrix;
2461 M1_Matrix_ = M1_Matrix;
2462 Coords_ = Coords;
2463 Nullspace_ = Nullspace;
2464
2465 dump(*D0_Matrix_, "D0_clean.m");
2466 dump(*Ms_Matrix_, "Ms.m");
2467 dump(*M1_Matrix_, "M1.m");
2468 if (!M0inv_Matrix_.is_null()) dump(*M0inv_Matrix_, "M0inv.m");
2469 if (!Coords_.is_null()) dumpCoords(*Coords_, "coords.m");
2470
2471 }
2472
2473
2474 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2476 describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel /* verbLevel */) const {
2477
2478 std::ostringstream oss;
2479
2480 RCP<const Teuchos::Comm<int> > comm = SM_Matrix_->getDomainMap()->getComm();
2481
2482#ifdef HAVE_MPI
2483 int root;
2484 if (!AH_.is_null())
2485 root = comm->getRank();
2486 else
2487 root = -1;
2488
2489 int actualRoot;
2490 reduceAll(*comm, Teuchos::REDUCE_MAX, root, Teuchos::ptr(&actualRoot));
2491 root = actualRoot;
2492#endif
2493
2494
2495 oss << "\n--------------------------------------------------------------------------------\n" <<
2496 "--- RefMaxwell Summary ---\n"
2497 "--------------------------------------------------------------------------------" << std::endl;
2498 oss << std::endl;
2499
2500 GlobalOrdinal numRows;
2501 GlobalOrdinal nnz;
2502
2503 SM_Matrix_->getRowMap()->getComm()->barrier();
2504
2505 numRows = SM_Matrix_->getGlobalNumRows();
2506 nnz = SM_Matrix_->getGlobalNumEntries();
2507
2508 Xpetra::global_size_t tt = numRows;
2509 int rowspacer = 3; while (tt != 0) { tt /= 10; rowspacer++; }
2510 tt = nnz;
2511 int nnzspacer = 2; while (tt != 0) { tt /= 10; nnzspacer++; }
2512
2513 oss << "block " << std::setw(rowspacer) << " rows " << std::setw(nnzspacer) << " nnz " << std::setw(9) << " nnz/row" << std::endl;
2514 oss << "(1, 1)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2515
2516 if (!A22_.is_null()) {
2517 // ToDo: make sure that this is printed correctly
2518 numRows = A22_->getGlobalNumRows();
2519 nnz = A22_->getGlobalNumEntries();
2520
2521 oss << "(2, 2)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2522 }
2523
2524 oss << std::endl;
2525
2526 {
2527 if (PreSmoother_ != null && PreSmoother_ == PostSmoother_)
2528 oss << "Smoother both : " << PreSmoother_->description() << std::endl;
2529 else {
2530 oss << "Smoother pre : "
2531 << (PreSmoother_ != null ? PreSmoother_->description() : "no smoother") << std::endl;
2532 oss << "Smoother post : "
2533 << (PostSmoother_ != null ? PostSmoother_->description() : "no smoother") << std::endl;
2534 }
2535 }
2536 oss << std::endl;
2537
2538 std::string outstr = oss.str();
2539
2540#ifdef HAVE_MPI
2541 RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2542 MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
2543
2544 int strLength = outstr.size();
2545 MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
2546 if (comm->getRank() != root)
2547 outstr.resize(strLength);
2548 MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
2549#endif
2550
2551 out << outstr;
2552
2553 if (!HierarchyH_.is_null())
2554 HierarchyH_->describe(out, GetVerbLevel());
2555
2556 if (!Hierarchy22_.is_null())
2557 Hierarchy22_->describe(out, GetVerbLevel());
2558
2559 if (IsPrint(Statistics2)) {
2560 // Print the grid of processors
2561 std::ostringstream oss2;
2562
2563 oss2 << "Sub-solver distribution over ranks" << std::endl;
2564 oss2 << "( (1,1) block only is indicated by '1', (2,2) block only by '2', and both blocks by 'B' and none by '.')" << std::endl;
2565
2566 int numProcs = comm->getSize();
2567#ifdef HAVE_MPI
2568 RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2569 TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
2570 RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
2571#endif
2572
2573 char status = 0;
2574 if (!AH_.is_null())
2575 status += 1;
2576 if (!A22_.is_null())
2577 status += 2;
2578 std::vector<char> states(numProcs, 0);
2579#ifdef HAVE_MPI
2580 MPI_Gather(&status, 1, MPI_CHAR, &states[0], 1, MPI_CHAR, 0, *rawMpiComm);
2581#else
2582 states.push_back(status);
2583#endif
2584
2585 int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100);
2586 for (int proc = 0; proc < numProcs; proc += rowWidth) {
2587 for (int j = 0; j < rowWidth; j++)
2588 if (proc + j < numProcs)
2589 if (states[proc+j] == 0)
2590 oss2 << ".";
2591 else if (states[proc+j] == 1)
2592 oss2 << "1";
2593 else if (states[proc+j] == 2)
2594 oss2 << "2";
2595 else
2596 oss2 << "B";
2597 else
2598 oss2 << " ";
2599
2600 oss2 << " " << proc << ":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl;
2601 }
2602 oss2 << std::endl;
2603 GetOStream(Statistics2) << oss2.str();
2604 }
2605
2606
2607 }
2608
2609
2610} // namespace
2611
2612#define MUELU_REFMAXWELL_SHORT
2613#endif //ifdef MUELU_REFMAXWELL_DEF_HPP
Various adapters that will create a MueLu preconditioner that is an Xpetra::Matrix.
#define MueLu_maxAll(rcpComm, in, out)
#define MueLu_sumAll(rcpComm, in, out)
#define MueLu_minAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
Factory to export aggregation info or visualize aggregates using VTK.
AmalgamationFactory_kokkos for subblocks of strided map based amalgamation data.
AmalgamationFactory for subblocks of strided map based amalgamation data.
Factory for creating a graph based on a given matrix.
Factory for creating a graph based on a given matrix.
Factory for generating coarse level map. Used by TentativePFactory.
Class for transferring coordinates from a finer level to a coarser one.
Exception throws to report errors in the internal logical of the program.
This class specifies the default factory that should generate some data on a Level if the data does n...
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void setlib(Xpetra::UnderlyingLib lib2)
void SetLevelID(int levelID)
Set level number.
Definition: MueLu_Level.cpp:78
void AddKeepFlag(const std::string &ename, const FactoryBase *factory=NoFactory::get(), KeepType keep=MueLu::Keep)
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
void Request(const FactoryBase &factory)
Increment the storage counter for all the inputs of a factory.
void SetPreviousLevel(const RCP< Level > &previousLevel)
Definition: MueLu_Level.cpp:85
void SetFactoryManager(const RCP< const FactoryManagerBase > &factoryManager)
Set default factories (used internally by Hierarchy::SetLevel()).
Definition: MueLu_Level.cpp:92
static std::string translate(Teuchos::ParameterList &paramList, const std::string &defaultVals="")
: Translate ML parameters to MueLu parameter XML string
static void removeExplicitZeros(Teuchos::ParameterList &parameterList, RCP< Matrix > &D0_Matrix, RCP< Matrix > &SM_Matrix, RCP< Matrix > &M1_Matrix, RCP< Matrix > &Ms_Matrix)
Remove explicit zeros.
static void detectBoundaryConditionsSM(RCP< Matrix > &SM_Matrix, RCP< Matrix > &D0_Matrix, magnitudeType rowSumTol, bool useKokkos_, Kokkos::View< bool *, typename Node::device_type > &BCrowsKokkos, Kokkos::View< bool *, typename Node::device_type > &BCcolsKokkos, Kokkos::View< bool *, typename Node::device_type > &BCdomainKokkos, int &BCedges, int &BCnodes, Teuchos::ArrayRCP< bool > &BCrows, Teuchos::ArrayRCP< bool > &BCcols, Teuchos::ArrayRCP< bool > &BCdomain, bool &allEdgesBoundary, bool &allNodesBoundary)
Detect Dirichlet boundary conditions.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > PtAPWrapper(RCP< Matrix > &A, RCP< Matrix > &P, Teuchos::ParameterList &params, std::string &label)
static void setMatvecParams(Matrix &A, RCP< ParameterList > matvecParams)
Sets matvec params on a matrix.
static const RCP< const NoFactory > getRCP()
Static Get() functions.
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Factory for building coarse matrices.
Factory for building coarse matrices.
Applies permutation to grid transfer operators.
Preconditioner (wrapped as a Xpetra::Operator) for Maxwell's equations in curl-curl form.
void dumpCoords(const RealValuedMultiVector &X, std::string name) const
dump out real-valued multivector
Teuchos::ScalarTraits< Scalar >::coordinateType coordinateType
Xpetra::MultiVector< coordinateType, LO, GO, NO > RealValuedMultiVector
Teuchos::RCP< Teuchos::TimeMonitor > getTimer(std::string name, RCP< const Teuchos::Comm< int > > comm=Teuchos::null) const
get a (synced) timer
void allocateMemory(int numVectors) const
allocate multivectors for solve
Teuchos::ScalarTraits< Scalar >::magnitudeType magnitudeType
void buildProlongator()
Setup the prolongator for the (1,1)-block.
void setFineLevelSmoother()
Set the fine level smoother.
Teuchos::RCP< const Map > getRangeMap() const
Returns the Xpetra::Map object associated with the range of this operator.
void compute(bool reuse=false)
Setup the preconditioner.
void setParameters(Teuchos::ParameterList &list)
Set parameters.
void dump(const Matrix &A, std::string name) const
dump out matrix
Teuchos::RCP< const Map > getDomainMap() const
Returns the Xpetra::Map object associated with the domain of this operator.
Factory for building permutation matrix that can be be used to shuffle data (matrices,...
Factory for determing the number of partitions for rebalancing.
Factory for building Smoothed Aggregation prolongators.
Factory for building Smoothed Aggregation prolongators.
Generic Smoother Factory for generating the smoothers of the MG hierarchy.
Factory for building tentative prolongator.
Class that encapsulates external library smoothers.
Factory for building uncoupled aggregates.
static void ApplyOAZToMatrixRows(RCP< Matrix > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows)
static RCP< MultiVector > RealValuedToScalarMultiVector(RCP< RealValuedMultiVector > X)
static void ZeroDirichletRows(RCP< Matrix > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletRows, SC replaceWith=Teuchos::ScalarTraits< SC >::zero())
static RCP< Matrix > Transpose(Matrix &Op, bool optimizeTranspose=false, const std::string &label=std::string())
Transpose a Xpetra::Matrix.
static void ZeroDirichletCols(RCP< Matrix > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletCols, SC replaceWith=Teuchos::ScalarTraits< SC >::zero())
static void ZeroDirichletCols(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > RealValuedToScalarMultiVector(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::coordinateType, LocalOrdinal, GlobalOrdinal, Node > > X)
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows)
static VerbLevel GetDefaultVerbLevel()
Get the default (global) verbosity level.
static void SetMueLuOStream(const Teuchos::RCP< Teuchos::FancyOStream > &mueluOStream)
static void SetDefaultVerbLevel(const VerbLevel defaultVerbLevel)
Set the default (global) verbosity level.
static void SetMueLuOFileStream(const std::string &filename)
Interface to Zoltan2 library.
Interface to Zoltan library.
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line)
@ Statistics2
Print even more statistics.
@ Errors
Errors.
@ Runtime0
One-liner description of what is happening.
@ Runtime1
Description of what is happening (more verbose)
@ Warnings1
Additional warnings.
@ Timings
Print all timing information.
@ Statistics0
Print statistics that do not involve significant additional computation.
MsgType toVerbLevel(const std::string &verbLevelStr)
Teuchos::RCP< MueLu::Hierarchy< Scalar, LocalOrdinal, GlobalOrdinal, Node > > CreateXpetraPreconditioner(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > op, const Teuchos::ParameterList &inParamList)
Helper function to create a MueLu preconditioner that can be used by Xpetra.Given an Xpetra::Matrix,...