Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Details_GaussSeidel.hpp
1/*@HEADER
2// ***********************************************************************
3//
4// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ***********************************************************************
38//@HEADER
39*/
40
41#ifndef IFPACK2_GAUSS_SEIDEL_HPP
42#define IFPACK2_GAUSS_SEIDEL_HPP
43
44namespace Ifpack2
45{
46namespace Details
47{
48 template<typename Scalar, typename LO, typename GO, typename NT>
49 struct GaussSeidel
50 {
51 using crs_matrix_type = Tpetra::CrsMatrix<Scalar, LO, GO, NT>;
52 using bcrs_matrix_type = Tpetra::BlockCrsMatrix<Scalar, LO, GO, NT>;
53 using row_matrix_type = Tpetra::RowMatrix<Scalar, LO, GO, NT>;
54 using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
55 using vector_type = Tpetra::Vector<Scalar, LO, GO, NT>;
56 using multivector_type = Tpetra::MultiVector<Scalar, LO, GO, NT>;
57 using block_multivector_type = Tpetra::BlockMultiVector<Scalar, LO, GO, NT>;
58 using mem_space_t = typename local_matrix_device_type::memory_space;
59 using rowmap_t = typename local_matrix_device_type::row_map_type::HostMirror;
60 using entries_t = typename local_matrix_device_type::index_type::HostMirror;
61 using values_t = typename local_matrix_device_type::values_type::HostMirror;
62 using const_rowmap_t = typename rowmap_t::const_type;
63 using const_entries_t = typename entries_t::const_type;
64 using const_values_t = typename values_t::const_type;
65 using Offset = typename rowmap_t::non_const_value_type;
66 using IST = typename crs_matrix_type::impl_scalar_type;
67 using KAT = Kokkos::ArithTraits<IST>;
68 //Type of view representing inverse diagonal blocks, and its HostMirror.
69 using InverseBlocks = Kokkos::View<IST***, typename bcrs_matrix_type::device_type>;
70 using InverseBlocksHost = typename InverseBlocks::HostMirror;
71
72 typedef typename crs_matrix_type::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type;
73 typedef typename crs_matrix_type::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
74 typedef typename crs_matrix_type::nonconst_values_host_view_type nonconst_values_host_view_type;
75
76 //Setup for CrsMatrix
77 GaussSeidel(Teuchos::RCP<const crs_matrix_type> A_, Teuchos::RCP<vector_type>& inverseDiagVec_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_)
78 {
79 A = A_;
80 numRows = A_->getLocalNumRows();
81 haveRowMatrix = false;
82 inverseDiagVec = inverseDiagVec_;
83 applyRows = applyRows_;
84 blockSize = 1;
85 omega = omega_;
86 }
87
88 GaussSeidel(Teuchos::RCP<const row_matrix_type> A_, Teuchos::RCP<vector_type>& inverseDiagVec_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_)
89 {
90 A = A_;
91 numRows = A_->getLocalNumRows();
92 haveRowMatrix = true;
93 inverseDiagVec = inverseDiagVec_;
94 applyRows = applyRows_;
95 blockSize = 1;
96 omega = omega_;
97 //Here, need to make a deep CRS copy to avoid slow access via getLocalRowCopy
98 rowMatrixRowmap = rowmap_t(Kokkos::ViewAllocateWithoutInitializing("Arowmap"), numRows + 1);
99 rowMatrixEntries = entries_t(Kokkos::ViewAllocateWithoutInitializing("Aentries"), A_->getLocalNumEntries());
100 rowMatrixValues = values_t(Kokkos::ViewAllocateWithoutInitializing("Avalues"), A_->getLocalNumEntries());
101 size_t maxDegree = A_->getLocalMaxNumRowEntries();
102 nonconst_values_host_view_type rowValues("rowValues", maxDegree);
103 nonconst_local_inds_host_view_type rowEntries("rowEntries", maxDegree);
104 size_t accum = 0;
105 for(LO i = 0; i <= numRows; i++)
106 {
107 rowMatrixRowmap(i) = accum;
108 if(i == numRows)
109 break;
110 size_t degree;
111 A_->getLocalRowCopy(i, rowEntries, rowValues, degree);
112 accum += degree;
113 size_t rowBegin = rowMatrixRowmap(i);
114 for(size_t j = 0; j < degree; j++)
115 {
116 rowMatrixEntries(rowBegin + j) = rowEntries[j];
117 rowMatrixValues(rowBegin + j) = rowValues[j];
118 }
119 }
120 }
121
122 GaussSeidel(Teuchos::RCP<const bcrs_matrix_type> A_, const InverseBlocks& inverseBlockDiag_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_)
123 {
124 A = A_;
125 numRows = A_->getLocalNumRows();
126 haveRowMatrix = false;
127 //note: next 2 lines are no-ops if inverseBlockDiag_ is already host-accessible
128 inverseBlockDiag = Kokkos::create_mirror_view(inverseBlockDiag_);
129 Kokkos::deep_copy(inverseBlockDiag, inverseBlockDiag_);
130 applyRows = applyRows_;
131 omega = omega_;
132 blockSize = A_->getBlockSize();
133 }
134
135 template<bool useApplyRows, bool multipleRHS, bool omegaNotOne>
136 void applyImpl(const const_values_t& Avalues, const const_rowmap_t& Arowmap, const const_entries_t& Aentries, multivector_type& x, const multivector_type& b, Tpetra::ESweepDirection direction)
137 {
138 //note: direction is either Forward or Backward (Symmetric is handled in apply())
139 LO numApplyRows = useApplyRows ? (LO) applyRows.size() : numRows;
140 //note: inverseDiagMV always has only one column
141 auto inverseDiag = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
142 bool forward = direction == Tpetra::Forward;
143 if(multipleRHS)
144 {
145 LO numVecs = x.getNumVectors();
146 Teuchos::Array<IST> accum(numVecs);
147 auto xlcl = x.get2dViewNonConst();
148 auto blcl = b.get2dView();
149 for(LO i = 0; i < numApplyRows; i++)
150 {
151 LO row;
152 if(forward)
153 row = useApplyRows ? applyRows[i] : i;
154 else
155 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
156 for(LO k = 0; k < numVecs; k++)
157 {
158 accum[k] = KAT::zero();
159 }
160 Offset rowBegin = Arowmap(row);
161 Offset rowEnd = Arowmap(row + 1);
162 for(Offset j = rowBegin; j < rowEnd; j++)
163 {
164 LO col = Aentries(j);
165 IST val = Avalues(j);
166 for(LO k = 0; k < numVecs; k++)
167 {
168 accum[k] += val * IST(xlcl[k][col]);
169 }
170 }
171 //Update x
172 IST dinv = inverseDiag(row);
173 for(LO k = 0; k < numVecs; k++)
174 {
175 if(omegaNotOne)
176 xlcl[k][row] += Scalar(omega * dinv * (IST(blcl[k][row]) - accum[k]));
177 else
178 xlcl[k][row] += Scalar(dinv * (IST(blcl[k][row]) - accum[k]));
179 }
180 }
181 }
182 else
183 {
184 auto xlcl = Kokkos::subview(x.getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
185 auto blcl = Kokkos::subview(b.getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
186 auto dlcl = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
187 for(LO i = 0; i < numApplyRows; i++)
188 {
189 LO row;
190 if(forward)
191 row = useApplyRows ? applyRows[i] : i;
192 else
193 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
194 IST accum = KAT::zero();
195 Offset rowBegin = Arowmap(row);
196 Offset rowEnd = Arowmap(row + 1);
197 for(Offset j = rowBegin; j < rowEnd; j++)
198 {
199 accum += Avalues(j) * xlcl(Aentries(j));
200 }
201 //Update x
202 IST dinv = dlcl(row);
203 if(omegaNotOne)
204 xlcl(row) += omega * dinv * (blcl(row) - accum);
205 else
206 xlcl(row) += dinv * (blcl(row) - accum);
207 }
208 }
209 }
210
211 void applyBlock(block_multivector_type& x, const block_multivector_type& b, Tpetra::ESweepDirection direction)
212 {
213 if(direction == Tpetra::Symmetric)
214 {
215 applyBlock(x, b, Tpetra::Forward);
216 applyBlock(x, b, Tpetra::Backward);
217 return;
218 }
219 auto Abcrs = Teuchos::rcp_dynamic_cast<const bcrs_matrix_type>(A);
220 if(Abcrs.is_null())
221 throw std::runtime_error("Ifpack2::Details::GaussSeidel::applyBlock: A must be a BlockCrsMatrix");
222 auto Avalues = Abcrs->getValuesHost();
223 auto AlclGraph = Abcrs->getCrsGraph().getLocalGraphHost();
224 auto Arowmap = AlclGraph.row_map;
225 auto Aentries = AlclGraph.entries;
226 //Number of scalars in Avalues per block entry.
227 Offset bs2 = blockSize * blockSize;
228 LO numVecs = x.getNumVectors();
229 Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> accum(
230 Kokkos::ViewAllocateWithoutInitializing("BlockGaussSeidel Accumulator"), blockSize, numVecs);
231 Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> dinv_accum(
232 Kokkos::ViewAllocateWithoutInitializing("BlockGaussSeidel A_ii^-1*Accumulator"), blockSize, numVecs);
233 bool useApplyRows = !applyRows.is_null();
234 bool forward = direction == Tpetra::Forward;
235 LO numApplyRows = useApplyRows ? applyRows.size() : numRows;
236 for(LO i = 0; i < numApplyRows; i++)
237 {
238 LO row;
239 if(forward)
240 row = useApplyRows ? applyRows[i] : i;
241 else
242 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
243 for(LO v = 0; v < numVecs; v++)
244 {
245 auto bRow = b.getLocalBlockHost (row, v, Tpetra::Access::ReadOnly);
246 for(LO k = 0; k < blockSize; k++)
247 {
248 accum(k, v) = KAT::zero();
249 }
250 }
251 Offset rowBegin = Arowmap(row);
252 Offset rowEnd = Arowmap(row + 1);
253 for(Offset j = rowBegin; j < rowEnd; j++)
254 {
255 LO col = Aentries(j);
256 const IST* blk = &Avalues(j * bs2);
257 for(LO v = 0; v < numVecs; v++)
258 {
259 auto xCol = x.getLocalBlockHost (col, v, Tpetra::Access::ReadOnly);
260 for(LO br = 0; br < blockSize; br++)
261 {
262 for(LO bc = 0; bc < blockSize; bc++)
263 {
264 IST Aval = blk[br * blockSize + bc];
265 accum(br, v) += Aval * xCol(bc);
266 }
267 }
268 }
269 }
270 //Update x: term is omega * Aii^-1 * accum, where omega is scalar, Aii^-1 is bs*bs, and accum is bs*nv
271 auto invBlock = Kokkos::subview(inverseBlockDiag, row, Kokkos::ALL(), Kokkos::ALL());
272 Kokkos::deep_copy(dinv_accum, KAT::zero());
273 for(LO v = 0; v < numVecs; v++)
274 {
275 auto bRow = b.getLocalBlockHost (row, v, Tpetra::Access::ReadOnly);
276 for(LO br = 0; br < blockSize; br++)
277 {
278 accum(br, v) = bRow(br) - accum(br, v);
279 }
280 }
281 for(LO v = 0; v < numVecs; v++)
282 {
283 for(LO br = 0; br < blockSize; br++)
284 {
285 for(LO bc = 0; bc < blockSize; bc++)
286 {
287 dinv_accum(br, v) += invBlock(br, bc) * accum(bc, v);
288 }
289 }
290 }
291 //Update x
292 for(LO v = 0; v < numVecs; v++)
293 {
294 auto xRow = x.getLocalBlockHost (row, v, Tpetra::Access::ReadWrite);
295 for(LO k = 0; k < blockSize; k++)
296 {
297 xRow(k) += omega * dinv_accum(k, v);
298 }
299 }
300 }
301 }
302
303 //Version of apply for CrsMatrix/RowMatrix (for BlockCrsMatrix, call applyBlock)
304 void apply(multivector_type& x, const multivector_type& b, Tpetra::ESweepDirection direction)
305 {
306 if(direction == Tpetra::Symmetric)
307 {
308 apply(x, b, Tpetra::Forward);
309 apply(x, b, Tpetra::Backward);
310 return;
311 }
312 const_values_t Avalues;
313 const_rowmap_t Arowmap;
314 const_entries_t Aentries;
315 if(haveRowMatrix)
316 {
317 Avalues = rowMatrixValues;
318 Arowmap = rowMatrixRowmap;
319 Aentries = rowMatrixEntries;
320 }
321 else
322 {
323 auto Acrs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
324 if(Acrs.is_null())
325 throw std::runtime_error("Ifpack2::Details::GaussSeidel::apply: either haveRowMatrix, or A is CrsMatrix");
326 auto Alcl = Acrs->getLocalMatrixHost();
327 Avalues = Alcl.values;
328 Arowmap = Alcl.graph.row_map;
329 Aentries = Alcl.graph.entries;
330 }
331 if(applyRows.is_null())
332 {
333 if(x.getNumVectors() > 1)
334 this->template applyImpl<false, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
335 else
336 {
337 //Optimize for the all-rows, single vector, omega = 1 case
338 if(omega == KAT::one())
339 this->template applyImpl<false, false, false>(Avalues, Arowmap, Aentries, x, b, direction);
340 else
341 this->template applyImpl<false, false, true>(Avalues, Arowmap, Aentries, x, b, direction);
342 }
343 }
344 else
345 {
346 this->template applyImpl<true, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
347 }
348 }
349
350 Teuchos::RCP<const row_matrix_type> A;
351 //For efficiency, if input is a RowMatrix, keep a persistent copy of the CRS formatted local matrix.
352 bool haveRowMatrix;
353 values_t rowMatrixValues;
354 rowmap_t rowMatrixRowmap;
355 entries_t rowMatrixEntries;
356 LO numRows;
357 IST omega;
358 //If set up with BlockCrsMatrix, the block size. Otherwise 1.
359 LO blockSize;
360 //If using a non-block matrix, the inverse diagonal.
361 Teuchos::RCP<vector_type> inverseDiagVec;
362 //If using a block matrix, the inverses of all diagonal blocks.
363 InverseBlocksHost inverseBlockDiag;
364 //If null, apply over all rows in natural order. Otherwise, apply for each row listed in order.
365 Teuchos::ArrayRCP<LO> applyRows;
366 };
367}
368}
369
370#endif
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:74