Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Details_AdditiveSchwarzFilter_def.hpp
1/*@HEADER
2// ***********************************************************************
3//
4// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40//@HEADER
41*/
42
43#ifndef IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
44#define IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
45
46#include "Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp"
47#include "KokkosKernels_Sorting.hpp"
48#include "KokkosSparse_SortCrs.hpp"
49#include "Kokkos_Bitset.hpp"
50
51namespace Ifpack2
52{
53namespace Details
54{
55
56template<class MatrixType>
58AdditiveSchwarzFilter(const Teuchos::RCP<const row_matrix_type>& A_unfiltered,
59 const Teuchos::ArrayRCP<local_ordinal_type>& perm,
60 const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
61 bool filterSingletons)
62{
63 setup(A_unfiltered, perm, reverseperm, filterSingletons);
64}
65
66template<class MatrixType>
68setup(const Teuchos::RCP<const row_matrix_type>& A_unfiltered,
69 const Teuchos::ArrayRCP<local_ordinal_type>& perm,
70 const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
71 bool filterSingletons)
72{
73 using Teuchos::RCP;
74 using Teuchos::rcp;
75 using Teuchos::rcp_dynamic_cast;
76 //Check that A is an instance of allowed types, either:
77 // - Tpetra::CrsMatrix
78 // - Ifpack2::OverlappingRowMatrix (which always consists of two CrsMatrices, A_ and ExtMatrix_)
79 TEUCHOS_TEST_FOR_EXCEPTION(
80 !rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered) &&
81 !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered),
82 std::invalid_argument,
83 "Ifpack2::Details::AdditiveSchwarzFilter: The input matrix must be a Tpetra::CrsMatrix or an Ifpack2::OverlappingRowMatrix");
84 A_unfiltered_ = A_unfiltered;
85 local_matrix_type A_local, A_halo;
86 bool haveHalo = !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
87 if(haveHalo)
88 {
89 auto A_overlapping = rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
90 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
91 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
92 }
93 else
94 {
95 auto A_crs = rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
96 A_local = A_crs->getLocalMatrixDevice();
97 //leave A_halo empty in this case
98 }
99 //Check that perm and reversePerm are the same length and match the number of local rows in A
100 TEUCHOS_TEST_FOR_EXCEPTION(
101 perm.size() != reverseperm.size(),
102 std::invalid_argument,
103 "Ifpack2::Details::AdditiveSchwarzFilter: perm and reverseperm should be the same length");
104 TEUCHOS_TEST_FOR_EXCEPTION(
105 (size_t) perm.size() != (size_t) A_unfiltered_->getLocalNumRows(),
106 std::invalid_argument,
107 "Ifpack2::Details::AdditiveSchwarzFilter: length of perm and reverseperm should be the same as the number of local rows in unfiltered A");
108 //First, compute the permutation tables (that exclude singletons, if requested)
109 FilterSingletons_ = filterSingletons;
110 local_ordinal_type numLocalRows;
111 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
112 if(FilterSingletons_)
113 {
114 //Fill singletons and singletonDiagonals (allocate them to the upper bound at first, then shrink it to size)
115 singletons_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletons_"), totalLocalRows);
116 singletonDiagonals_ = Kokkos::DualView<impl_scalar_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletonDiagonals_"), totalLocalRows);
117 auto singletonsDevice = singletons_.view_device();
118 singletons_.modify_device();
119 auto singletonDiagonalsDevice = singletonDiagonals_.view_device();
120 singletonDiagonals_.modify_device();
121 local_ordinal_type numSingletons;
122 Kokkos::Bitset<device_type> isSingletonBitset(totalLocalRows);
123 Kokkos::parallel_scan(policy_type(0, totalLocalRows),
124 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lnumSingletons, bool finalPass)
125 {
126 bool isSingleton = true;
127 impl_scalar_type singletonValue = Kokkos::ArithTraits<impl_scalar_type>::zero();
128 if(i < A_local.numRows())
129 {
130 //row i is in original local matrix
131 size_type rowBegin = A_local.graph.row_map(i);
132 size_type rowEnd = A_local.graph.row_map(i + 1);
133 for(size_type j = rowBegin; j < rowEnd; j++)
134 {
135 local_ordinal_type entry = A_local.graph.entries(j);
136 if(entry < totalLocalRows && entry != i)
137 {
138 isSingleton = false;
139 break;
140 }
141 if(finalPass && entry == i)
142 {
143 //note: using a sum to compute the diagonal value, in case entries are not compressed.
144 singletonValue += A_local.values(j);
145 }
146 }
147 }
148 else
149 {
150 //row i is in halo
151 local_ordinal_type row = i - A_local.numRows();
152 size_type rowBegin = A_halo.graph.row_map(row);
153 size_type rowEnd = A_halo.graph.row_map(row + 1);
154 for(size_type j = rowBegin; j < rowEnd; j++)
155 {
156 local_ordinal_type entry = A_halo.graph.entries(j);
157 if(entry < totalLocalRows && entry != i)
158 {
159 isSingleton = false;
160 break;
161 }
162 if(finalPass && entry == i)
163 {
164 singletonValue += A_halo.values(j);
165 }
166 }
167 }
168 if(isSingleton)
169 {
170 if(finalPass)
171 {
172 isSingletonBitset.set(i);
173 singletonsDevice(lnumSingletons) = i;
174 singletonDiagonalsDevice(lnumSingletons) = singletonValue;
175 }
176 lnumSingletons++;
177 }
178 }, numSingletons);
179 numSingletons_ = numSingletons;
180 //Each local row in A_unfiltered is either a singleton or a row in the filtered matrix.
181 numLocalRows = totalLocalRows - numSingletons_;
182 //Using the list of singletons, create the reduced permutations.
183 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), totalLocalRows);
184 perm_.modify_device();
185 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), numLocalRows);
186 reverseperm_.modify_device();
187 auto permView = perm_.view_device();
188 auto reversepermView = reverseperm_.view_device();
189 //Get the original inverse permutation on device
190 Kokkos::View<local_ordinal_type*, device_type> origpermView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "input reverse perm"), totalLocalRows);
191 Kokkos::View<local_ordinal_type*, Kokkos::HostSpace> origpermHost(reverseperm.get(), totalLocalRows);
192 Kokkos::deep_copy(execution_space(), origpermView, origpermHost);
193 //reverseperm is a list of local rows in A_unfiltered, so filter out the elements of reverseperm where isSingleton is true. Then regenerate the forward permutation.
194 Kokkos::parallel_scan(policy_type(0, totalLocalRows),
195 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lindex, bool finalPass)
196 {
197 local_ordinal_type origRow = origpermView(i);
198 if(!isSingletonBitset.test(origRow))
199 {
200 if(finalPass)
201 {
202 //mark the mapping in both directions between origRow and lindex (a row in filtered A)
203 reversepermView(lindex) = origRow;
204 permView(origRow) = lindex;
205 }
206 lindex++;
207 }
208 else
209 {
210 //origRow is a singleton, so mark a -1 in the new forward permutation to show that it has no corresponding row in filtered A.
211 if(finalPass)
212 permView(origRow) = local_ordinal_type(-1);
213 }
214 });
215 perm_.sync_host();
216 reverseperm_.sync_host();
217 }
218 else
219 {
220 //We will keep all the local rows, so use the permutation as is
221 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), perm.size());
222 perm_.modify_host();
223 auto permHost = perm_.view_host();
224 for(size_t i = 0; i < (size_t) perm.size(); i++)
225 permHost(i) = perm[i];
226 perm_.sync_device();
227 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverseperm_"), reverseperm.size());
228 reverseperm_.modify_host();
229 auto reversepermHost = reverseperm_.view_host();
230 for(size_t i = 0; i < (size_t) reverseperm.size(); i++)
231 reversepermHost(i) = reverseperm[i];
232 reverseperm_.sync_device();
233 numSingletons_ = 0;
234 numLocalRows = totalLocalRows;
235 }
236 auto permView = perm_.view_device();
237 auto reversepermView = reverseperm_.view_device();
238 //Fill the local matrix of A_ (filtered and permuted)
239 //First, construct the rowmap by counting the entries in each row
240 row_map_type localRowptrs(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered rowptrs"), numLocalRows + 1);
241 Kokkos::parallel_for(policy_type(0, numLocalRows + 1),
242 KOKKOS_LAMBDA(local_ordinal_type i)
243 {
244 if(i == numLocalRows)
245 {
246 localRowptrs(i) = 0;
247 return;
248 }
249 //Count the entries that will be in filtered row i.
250 //This means entries which correspond to local, non-singleton rows/columns.
251 local_ordinal_type numEntries = 0;
252 local_ordinal_type origRow = reversepermView(i);
253 if(origRow < A_local.numRows())
254 {
255 //row i is in original local matrix
256 size_type rowBegin = A_local.graph.row_map(origRow);
257 size_type rowEnd = A_local.graph.row_map(origRow + 1);
258 for(size_type j = rowBegin; j < rowEnd; j++)
259 {
260 local_ordinal_type entry = A_local.graph.entries(j);
261 if(entry < totalLocalRows && permView(entry) != -1)
262 numEntries++;
263 }
264 }
265 else
266 {
267 //row i is in halo
268 local_ordinal_type row = origRow - A_local.numRows();
269 size_type rowBegin = A_halo.graph.row_map(row);
270 size_type rowEnd = A_halo.graph.row_map(row + 1);
271 for(size_type j = rowBegin; j < rowEnd; j++)
272 {
273 local_ordinal_type entry = A_halo.graph.entries(j);
274 if(entry < totalLocalRows && permView(entry) != -1)
275 numEntries++;
276 }
277 }
278 localRowptrs(i) = numEntries;
279 });
280 //Prefix sum to finish computing the rowptrs
281 size_type numLocalEntries;
282 Kokkos::parallel_scan(policy_type(0, numLocalRows + 1),
283 KOKKOS_LAMBDA(local_ordinal_type i, size_type& lnumEntries, bool finalPass)
284 {
285 size_type numEnt = localRowptrs(i);
286 if(finalPass)
287 localRowptrs(i) = lnumEntries;
288 lnumEntries += numEnt;
289 }, numLocalEntries);
290 //Allocate and fill local entries and values
291 entries_type localEntries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered entries"), numLocalEntries);
292 values_type localValues(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered values"), numLocalEntries);
293 //Create the local matrix with the uninitialized entries/values, then fill it.
294 local_matrix_type localMatrix("AdditiveSchwarzFilter", numLocalRows, numLocalRows, numLocalEntries, localValues, localRowptrs, localEntries);
295 fillLocalMatrix(localMatrix);
296 //Create a serial comm and the map for the final filtered CrsMatrix (each process uses its own local map)
297#ifdef HAVE_IFPACK2_DEBUG
298 TEUCHOS_TEST_FOR_EXCEPTION(
299 ! mapPairsAreFitted (*A_unfiltered_), std::invalid_argument, "Ifpack2::LocalFilter: "
300 "A's Map pairs are not fitted to each other on Process "
301 << A_->getRowMap ()->getComm ()->getRank () << " of the input matrix's "
302 "communicator. "
303 "This means that LocalFilter does not currently know how to work with A. "
304 "This will change soon. Please see discussion of Bug 5992.");
305#endif // HAVE_IFPACK2_DEBUG
306
307 // Build the local communicator (containing this process only).
308 RCP<const Teuchos::Comm<int> > localComm;
309#ifdef HAVE_MPI
310 localComm = rcp (new Teuchos::MpiComm<int> (MPI_COMM_SELF));
311#else
312 localComm = rcp (new Teuchos::SerialComm<int> ());
313#endif // HAVE_MPI
314 //All 4 maps are the same for a local square operator.
315 localMap_ = rcp (new map_type (numLocalRows, 0, localComm, Tpetra::GloballyDistributed));
316 //Create the inner filtered matrix.
317 auto crsParams = rcp(new Teuchos::ParameterList);
318 crsParams->template set<bool>("sorted", true);
319 //NOTE: this is the fastest way to construct A_ - it's created as fillComplete,
320 //and no communication needs to be done since localMap_ uses a local comm.
321 //It does need to copy the whole local matrix to host when DualViews are constructed
322 //(only an issue with non-UVM GPU backends) but this is just unavoidable when creating a Tpetra::CrsMatrix.
323 //It also needs to compute local constants (maxNumRowEntries) but this should be a
324 //cheap operation relative to what this constructor already did.
325 A_ = rcp(new crs_matrix_type(localMap_, localMap_, localMatrix, crsParams));
326}
327
328template<class MatrixType>
330
331template<class MatrixType>
333{
334 //Get the local matrix back from A_
335 auto localMatrix = A_->getLocalMatrixDevice();
336 //Copy new values from A_unfiltered to the local matrix, and then reconstruct A_.
337 fillLocalMatrix(localMatrix);
338 A_->setAllValues(localMatrix.graph.row_map, localMatrix.graph.entries, localMatrix.values);
339}
340
341template<class MatrixType>
342Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::crs_matrix_type>
344{
345 return A_;
346}
347
348template<class MatrixType>
350{
351 auto localRowptrs = localMatrix.graph.row_map;
352 auto localEntries = localMatrix.graph.entries;
353 auto localValues = localMatrix.values;
354 auto permView = perm_.view_device();
355 auto reversepermView = reverseperm_.view_device();
356 local_matrix_type A_local, A_halo;
357 bool haveHalo = !Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
358 if(haveHalo)
359 {
360 auto A_overlapping = Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
361 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
362 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
363 }
364 else
365 {
366 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
367 A_local = A_crs->getLocalMatrixDevice();
368 //leave A_halo empty in this case
369 }
370 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
371 //Fill entries and values
372 Kokkos::parallel_for(policy_type(0, localMatrix.numRows()),
373 KOKKOS_LAMBDA(local_ordinal_type i)
374 {
375 //Count the entries that will be in filtered row i.
376 //This means entries which correspond to local, non-singleton rows/columns.
377 size_type outRowStart = localRowptrs(i);
378 local_ordinal_type insertPos = 0;
379 local_ordinal_type origRow = reversepermView(i);
380 if(origRow < A_local.numRows())
381 {
382 //row i is in original local matrix
383 size_type rowBegin = A_local.graph.row_map(origRow);
384 size_type rowEnd = A_local.graph.row_map(origRow + 1);
385 for(size_type j = rowBegin; j < rowEnd; j++)
386 {
387 local_ordinal_type entry = A_local.graph.entries(j);
388 if(entry < totalLocalRows)
389 {
390 auto newCol = permView(entry);
391 if(newCol != -1)
392 {
393 localEntries(outRowStart + insertPos) = newCol;
394 localValues(outRowStart + insertPos) = A_local.values(j);
395 insertPos++;
396 }
397 }
398 }
399 }
400 else
401 {
402 //row i is in halo
403 local_ordinal_type row = origRow - A_local.numRows();
404 size_type rowBegin = A_halo.graph.row_map(row);
405 size_type rowEnd = A_halo.graph.row_map(row + 1);
406 for(size_type j = rowBegin; j < rowEnd; j++)
407 {
408 local_ordinal_type entry = A_halo.graph.entries(j);
409 if(entry < totalLocalRows)
410 {
411 auto newCol = permView(entry);
412 if(newCol != -1)
413 {
414 localEntries(outRowStart + insertPos) = newCol;
415 localValues(outRowStart + insertPos) = A_halo.values(j);
416 insertPos++;
417 }
418 }
419 }
420 }
421 });
422 //Sort the matrix, since the reordering can shuffle the entries into any order.
423 KokkosSparse::sort_crs_matrix(localMatrix);
424}
425
426template<class MatrixType>
427Teuchos::RCP<const Teuchos::Comm<int> > AdditiveSchwarzFilter<MatrixType>::getComm() const
428{
429 return localMap_->getComm();
430}
431
432template<class MatrixType>
433Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
435{
436 return localMap_;
437}
438
439template<class MatrixType>
440Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
442{
443 return localMap_;
444}
445
446template<class MatrixType>
447Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
449{
450 return localMap_;
451}
452
453template<class MatrixType>
454Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
456{
457 return localMap_;
458}
459
460template<class MatrixType>
461Teuchos::RCP<const Tpetra::RowGraph<typename MatrixType::local_ordinal_type,
462 typename MatrixType::global_ordinal_type,
463 typename MatrixType::node_type> >
465{
466 //NOTE BMK 6-22: this is to maintain compatibilty with LocalFilter.
467 //Situations like overlapping AdditiveSchwarz + BlockRelaxation
468 //require the importer of the original distributed graph, even though the
469 //BlockRelaxation is preconditioning a local matrix (A_).
470 return A_unfiltered_->getGraph();
471}
472
473template<class MatrixType>
475{
476 return A_->getGlobalNumRows();
477}
478
479template<class MatrixType>
481{
482 return A_->getGlobalNumCols();
483}
484
485template<class MatrixType>
487{
488 return A_->getLocalNumRows();
489}
490
491template<class MatrixType>
493{
494 return A_->getLocalNumCols();
495}
496
497template<class MatrixType>
498typename MatrixType::global_ordinal_type AdditiveSchwarzFilter<MatrixType>::getIndexBase() const
499{
500 return A_->getIndexBase();
501}
502
503template<class MatrixType>
505{
506 return getLocalNumEntries();
507}
508
509template<class MatrixType>
511{
512 return A_->getLocalNumEntries();
513}
514
515template<class MatrixType>
517getNumEntriesInGlobalRow (global_ordinal_type globalRow) const
518{
519 return A_->getNumEntriesInGlobalRow(globalRow);
520}
521
522template<class MatrixType>
524getNumEntriesInLocalRow (local_ordinal_type localRow) const
525{
526 return A_->getNumEntriesInLocalRow(localRow);
527}
528
529template<class MatrixType>
531{
532 //Use A_unfiltered_ to get a valid upper bound for this.
533 //This lets us support this function without computing global constants on A_.
534 return A_unfiltered_->getGlobalMaxNumRowEntries();
535}
536
537template<class MatrixType>
539{
540 //Use A_unfiltered_ to get a valid upper bound for this
541 return A_unfiltered_->getLocalMaxNumRowEntries();
542}
543
544
545template<class MatrixType>
547{
548 return true;
549}
550
551
552template<class MatrixType>
554{
555 return true;
556}
557
558
559template<class MatrixType>
561{
562 return false;
563}
564
565
566template<class MatrixType>
568{
569 return true;
570}
571
572
573template<class MatrixType>
575 getGlobalRowCopy (global_ordinal_type globalRow,
576 nonconst_global_inds_host_view_type &globalInd,
577 nonconst_values_host_view_type &val,
578 size_t& numEntries) const
579{
580 throw std::runtime_error("Ifpack2::Details::AdditiveSchwarzFilter does not implement getGlobalRowCopy.");
581}
582
583template<class MatrixType>
585getLocalRowCopy (local_ordinal_type LocalRow,
586 nonconst_local_inds_host_view_type &Indices,
587 nonconst_values_host_view_type &Values,
588 size_t& NumEntries) const
589
590{
591 A_->getLocalRowCopy(LocalRow, Indices, Values, NumEntries);
592}
593
594template<class MatrixType>
595void AdditiveSchwarzFilter<MatrixType>::getGlobalRowView(global_ordinal_type /* GlobalRow */,
596 global_inds_host_view_type &/*indices*/,
597 values_host_view_type &/*values*/) const
598{
599 throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter: does not support getGlobalRowView.");
600}
601
602template<class MatrixType>
603void AdditiveSchwarzFilter<MatrixType>::getLocalRowView(local_ordinal_type LocalRow,
604 local_inds_host_view_type & indices,
605 values_host_view_type & values) const
606{
607 A_->getLocalRowView(LocalRow, indices, values);
608}
609
610template<class MatrixType>
612getLocalDiagCopy (Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &diag) const
613{
614 // This is somewhat dubious as to how the maps match.
615 A_->getLocalDiagCopy(diag);
616}
617
618template<class MatrixType>
619void AdditiveSchwarzFilter<MatrixType>::leftScale(const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
620{
621 throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support leftScale.");
622}
623
624template<class MatrixType>
625void AdditiveSchwarzFilter<MatrixType>::rightScale(const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
626{
627 throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support rightScale.");
628}
629
630template<class MatrixType>
632apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
633 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
634 Teuchos::ETransp mode,
635 scalar_type alpha,
636 scalar_type beta) const
637{
638 A_->apply(X, Y, mode, alpha, beta);
639}
640
641template<class MatrixType>
644 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingB,
645 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY,
646 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedB) const
647{
648 //NOTE: the three vectors here are always constructed within AdditiveSchwarz and are not subviews,
649 //so they are all constant stride (this avoids a lot of boilerplate with whichVectors)
650 TEUCHOS_TEST_FOR_EXCEPTION(!OverlappingB.isConstantStride() || !OverlappingY.isConstantStride() || !ReducedReorderedB.isConstantStride(),
651 std::logic_error, "Ifpack2::AdditiveSchwarzFilter::CreateReducedProblem ERROR: One of the input MultiVectors is not constant stride.");
652 local_ordinal_type numVecs = OverlappingB.getNumVectors();
653 auto b = OverlappingB.getLocalViewDevice(Tpetra::Access::ReadOnly);
654 auto reducedB = ReducedReorderedB.getLocalViewDevice(Tpetra::Access::OverwriteAll);
655 auto singletons = singletons_.view_device();
656 auto singletonDiags = singletonDiagonals_.view_device();
657 auto revperm = reverseperm_.view_device();
658 //First, solve the singletons.
659 {
660 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
661 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) numSingletons_, numVecs}),
662 KOKKOS_LAMBDA(local_ordinal_type singletonIndex, local_ordinal_type col)
663 {
664 local_ordinal_type row = singletons(singletonIndex);
665 y(row, col) = b(row, col) / singletonDiags(singletonIndex);
666 });
667 }
668 //Next, permute OverlappingB to ReducedReorderedB.
669 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
670 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
671 {
672 reducedB(row, col) = b(revperm(row), col);
673 });
674 //Finally, if there are singletons, eliminate the matrix entries which are in singleton columns ("eliminate" here means update reducedB like in row reduction)
675 //This could be done more efficiently by storing a separate matrix of just the singleton columns and permuted non-singleton rows, but this adds a lot of complexity.
676 //Instead, just apply() the unfiltered matrix to OverlappingY (which is 0, except for singletons), and then permute the result of that and subtract it from reducedB.
677 if(numSingletons_)
678 {
679 mv_type singletonUpdates(A_unfiltered_->getRowMap(), numVecs, false);
680 A_unfiltered_->apply(OverlappingY, singletonUpdates);
681 auto updatesView = singletonUpdates.getLocalViewDevice(Tpetra::Access::ReadOnly);
682 // auto revperm = reverseperm_.view_device();
683 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
684 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
685 {
686 local_ordinal_type origRow = revperm(row);
687 reducedB(row, col) -= updatesView(origRow, col);
688 });
689 }
690}
691
692template<class MatrixType>
694 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedY,
695 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY) const
696{
697 //NOTE: both vectors here are always constructed within AdditiveSchwarz and are not subviews,
698 //so they are all constant stride (this avoids a lot of boilerplate with whichVectors)
699 TEUCHOS_TEST_FOR_EXCEPTION(!ReducedReorderedY.isConstantStride() || !OverlappingY.isConstantStride(),
700 std::logic_error, "Ifpack2::AdditiveSchwarzFilter::UpdateLHS ERROR: One of the input MultiVectors is not constant stride.");
701 auto reducedY = ReducedReorderedY.getLocalViewDevice(Tpetra::Access::ReadOnly);
702 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
703 auto revperm = reverseperm_.view_device();
704 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedY.extent(0), (local_ordinal_type) reducedY.extent(1)}),
705 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
706 {
707 y(revperm(row), col) = reducedY(row, col);
708 });
709}
710
711template<class MatrixType>
713{
714 return true;
715}
716
717
718template<class MatrixType>
720{
721 return true;
722}
723
724template<class MatrixType>
726{
727 // Reordering doesn't change the Frobenius norm.
728 return A_->getFrobeniusNorm ();
729}
730
731template<class MatrixType>
732bool
734mapPairsAreFitted (const row_matrix_type& A)
735{
736 const map_type& rangeMap = * (A.getRangeMap ());
737 const map_type& rowMap = * (A.getRowMap ());
738 const bool rangeAndRowFitted = mapPairIsFitted (rowMap, rangeMap);
739
740 const map_type& domainMap = * (A.getDomainMap ());
741 const map_type& columnMap = * (A.getColMap ());
742 const bool domainAndColumnFitted = mapPairIsFitted (columnMap, domainMap);
743
744 //Note BMK 6-22: Map::isLocallyFitted is a local-only operation, not a collective.
745 //This means that it can return different values on different ranks. This can cause MPI to hang,
746 //even though it's supposed to terminate globally when any single rank does.
747 //
748 //This function doesn't need to be fast since it's debug-only code.
749 int localSuccess = rangeAndRowFitted && domainAndColumnFitted;
750 int globalSuccess;
751
752 Teuchos::reduceAll<int, int> (*(A.getComm()), Teuchos::REDUCE_MIN, localSuccess, Teuchos::outArg (globalSuccess));
753
754 return globalSuccess == 1;
755}
756
757
758template<class MatrixType>
759bool
761mapPairIsFitted (const map_type& map1, const map_type& map2)
762{
763 return map1.isLocallyFitted (map2);
764}
765
766
767}} // namespace Ifpack2::Details
768
769#define IFPACK2_DETAILS_ADDITIVESCHWARZFILTER_INSTANT(S,LO,GO,N) \
770 template class Ifpack2::Details::AdditiveSchwarzFilter< Tpetra::RowMatrix<S, LO, GO, N> >;
771
772#endif
773
Wraps a Tpetra::CrsMatrix or Ifpack2::OverlappingRowMatrix in a filter that removes off-process edges...
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:74