Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Superlu_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// Amesos2: Templated Direct Sparse Solver Package
6// Copyright 2011 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ***********************************************************************
41//
42// @HEADER
43
53#ifndef AMESOS2_SUPERLU_DEF_HPP
54#define AMESOS2_SUPERLU_DEF_HPP
55
56#include <Teuchos_Tuple.hpp>
57#include <Teuchos_ParameterList.hpp>
58#include <Teuchos_StandardParameterEntryValidators.hpp>
59
62
63#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
64// TODO: This 'using namespace SLU' is not a good thing.
65// It was added because kernels does not namespace SuperLU but Amesos2 does.
66// Declaring the namespace SLU allows us to reuse that file without duplication.
67// We need to determine a uniform system to avoid this this but for now, at least
68// this issue is only present when TRSV is enabled.
69using namespace SLU;
70#include "KokkosSparse_sptrsv_superlu.hpp"
71#endif
72
73namespace Amesos2 {
74
75
76template <class Matrix, class Vector>
78 Teuchos::RCP<const Matrix> A,
79 Teuchos::RCP<Vector> X,
80 Teuchos::RCP<const Vector> B )
81 : SolverCore<Amesos2::Superlu,Matrix,Vector>(A, X, B)
82#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
83 , sptrsv_invert_diag_(true)
84 , sptrsv_invert_offdiag_ (false)
85 , sptrsv_u_in_csr_ (true)
86 , sptrsv_merge_supernodes_ (false)
87 , sptrsv_use_spmv_ (false)
88#endif
89 , is_contiguous_(true) // default is set by params
90 , use_triangular_solves_(false) // default is set by params
91 , use_metis_(false)
92 , symmetrize_metis_(true)
93{
94 // ilu_set_default_options is called later in set parameter list if required.
95 // This is not the ideal way, but the other option to always call
96 // ilu_set_default_options here and assuming it won't have any side effect
97 // in the TPL is more dangerous. It is not a good idea to rely on external
98 // libraries' internal "features".
99 SLU::set_default_options(&(data_.options));
100 // Override some default options
101 data_.options.PrintStat = SLU::NO;
102
103 SLU::StatInit(&(data_.stat));
104
105 Kokkos::resize(data_.perm_r, this->globalNumRows_);
106 Kokkos::resize(data_.perm_c, this->globalNumCols_);
107 Kokkos::resize(data_.etree, this->globalNumCols_);
108 Kokkos::resize(data_.R, this->globalNumRows_);
109 Kokkos::resize(data_.C, this->globalNumCols_);
110
111 data_.relax = SLU::sp_ienv(2); // Query optimal relax param from superlu
112 data_.panel_size = SLU::sp_ienv(1); // Query optimal panel size
113
114 data_.equed = 'N'; // No equilibration
115 data_.rowequ = false;
116 data_.colequ = false;
117 data_.A.Store = NULL;
118 data_.L.Store = NULL;
119 data_.U.Store = NULL;
120 data_.X.Store = NULL;
121 data_.B.Store = NULL;
122
123 ILU_Flag_=false; // default: turn off ILU
124}
125
126
127template <class Matrix, class Vector>
129{
130 /* Free Superlu data_types
131 * - Matrices
132 * - Vectors
133 * - Stat object
134 */
135 SLU::StatFree( &(data_.stat) ) ;
136
137 // Storage is initialized in numericFactorization_impl()
138 if ( data_.A.Store != NULL ){
139 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
140 }
141
142 // only root allocated these SuperMatrices.
143 if ( data_.L.Store != NULL ){ // will only be true for this->root_
144 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
145 SLU::Destroy_CompCol_Matrix( &(data_.U) );
146 }
147}
148
149template <class Matrix, class Vector>
150std::string
152{
153 std::ostringstream oss;
154 oss << "SuperLU solver interface";
155 if (ILU_Flag_) {
156 oss << ", \"ILUTP\" : {";
157 oss << "drop tol = " << data_.options.ILU_DropTol;
158 oss << ", fill factor = " << data_.options.ILU_FillFactor;
159 oss << ", fill tol = " << data_.options.ILU_FillTol;
160 switch(data_.options.ILU_MILU) {
161 case SLU::SMILU_1 :
162 oss << ", MILU 1";
163 break;
164 case SLU::SMILU_2 :
165 oss << ", MILU 2";
166 break;
167 case SLU::SMILU_3 :
168 oss << ", MILU 3";
169 break;
170 case SLU::SILU :
171 default:
172 oss << ", regular ILU";
173 }
174 switch(data_.options.ILU_Norm) {
175 case SLU::ONE_NORM :
176 oss << ", 1-norm";
177 break;
178 case SLU::TWO_NORM :
179 oss << ", 2-norm";
180 break;
181 case SLU::INF_NORM :
182 default:
183 oss << ", infinity-norm";
184 }
185 oss << "}";
186 } else {
187 oss << ", direct solve";
188 }
189 return oss.str();
190 /*
191
192 // ILU parameters
193 if( parameterList->isParameter("RowPerm") ){
194 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
195 parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
196 data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
197 }
198
199
200 */
201}
202
203template<class Matrix, class Vector>
204int
206{
207 /*
208 * Get column permutation vector perm_c[], according to permc_spec:
209 * permc_spec = NATURAL: natural ordering
210 * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
211 * permc_spec = MMD_ATA: minimum degree on structure of A'*A
212 * permc_spec = COLAMD: approximate minimum degree column ordering
213 * permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
214 */
215 int permc_spec = data_.options.ColPerm;
216 if (!use_metis_ && permc_spec != SLU::MY_PERMC && this->root_ ){
217#ifdef HAVE_AMESOS2_TIMERS
218 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
219#endif
220
221 SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
222 }
223
224 return(0);
225}
226
227
228template <class Matrix, class Vector>
229int
231{
232 /*
233 * SuperLU performs symbolic factorization and numeric factorization
234 * together, but does leave some options for reusing symbolic
235 * structures that have been created on previous factorizations. If
236 * our Amesos2 user calls this function, that is an indication that
237 * the symbolic structure of the matrix is no longer valid, and
238 * SuperLU should do the factorization from scratch.
239 *
240 * This can be accomplished by setting the options.Fact flag to
241 * DOFACT, as well as setting our own internal flag to false.
242 */
243#ifdef HAVE_AMESOS2_TIMERS
244 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
245#endif
246 same_symbolic_ = false;
247 data_.options.Fact = SLU::DOFACT;
248
249 if (use_metis_) {
250#ifdef HAVE_AMESOS2_TIMERS
251 Teuchos::RCP< Teuchos::Time > MetisTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for METIS");
252 Teuchos::TimeMonitor numFactTimer(*MetisTimer_);
253#endif
254 size_t n = this->globalNumRows_;
255 size_t nz = this->globalNumNonZeros_;
256 host_size_type_array host_rowind_view_ = host_rows_view_;
257 host_ordinal_type_array host_colptr_view_ = host_col_ptr_view_;
258
259 /* symmetrize the matrix (A + A') if needed */
260 if (symmetrize_metis_) {
261 int new_nz = 0;
262 int *new_col_ptr;
263 int *new_row_ind;
264
265 // NOTE: both size_type and ordinal_type are defined as int
266 // Consider using "symmetrize_graph" in KokkosKernels, if this becomes significant in time.
267 SLU::at_plus_a(n, nz, host_col_ptr_view_.data(), host_rows_view_.data(),
268 &new_nz, &new_col_ptr, &new_row_ind);
269
270 // reallocate (so that we won't overwrite the original)
271 // and copy for output
272 nz = new_nz;
273 Kokkos::resize (host_rowind_view_, nz);
274 Kokkos::realloc(host_colptr_view_, n+1);
275 for (size_t i = 0; i <= n; i++) {
276 host_colptr_view_(i) = new_col_ptr[i];
277 }
278 for (size_t i = 0; i < nz; i++) {
279 host_rowind_view_(i) = new_row_ind[i];
280 }
281
282 // free
283 SLU::SUPERLU_FREE(new_col_ptr);
284 if (new_nz > 0) {
285 SLU::SUPERLU_FREE(new_row_ind);
286 }
287 }
288
289 // reorder will convert both graph and perm/iperm to the internal METIS integer type
290 using metis_int_array_type = host_ordinal_type_array;
291 metis_int_array_type metis_perm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_perm"), n);
292 metis_int_array_type metis_iperm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_iperm"), n);
293
294 // call METIS
295 size_t new_nnz = 0;
296 Amesos2::Util::reorder(
297 host_colptr_view_, host_rowind_view_,
298 metis_perm, metis_iperm, new_nnz, false);
299
300 for (size_t i = 0; i < n; i++) {
301 data_.perm_r(i) = metis_iperm(i);
302 data_.perm_c(i) = metis_iperm(i);
303 }
304
305 // SLU will use user-specified METIS ordering
306 data_.options.ColPerm = SLU::MY_PERMC;
307 data_.options.RowPerm = SLU::MY_PERMR;
308 // Turn off Equil not to mess up METIS ordering?
309 //data_.options.Equil = SLU::NO;
310 }
311 return(0);
312}
313
314
315template <class Matrix, class Vector>
316int
318{
319 using Teuchos::as;
320
321 // Cleanup old L and U matrices if we are not reusing a symbolic
322 // factorization. Stores and other data will be allocated in gstrf.
323 // Only rank 0 has valid pointers
324 if ( !same_symbolic_ && data_.L.Store != NULL ){
325 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
326 SLU::Destroy_CompCol_Matrix( &(data_.U) );
327 data_.L.Store = NULL;
328 data_.U.Store = NULL;
329 }
330
331 if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
332
333 int info = 0;
334 if ( this->root_ ){
335
336#ifdef HAVE_AMESOS2_DEBUG
337 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
338 std::runtime_error,
339 "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
340 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
341 std::runtime_error,
342 "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
343#endif
344
345 if( data_.options.Equil == SLU::YES ){
346 magnitude_type rowcnd, colcnd, amax;
347 int info2 = 0;
348
349 // calculate row and column scalings
350 function_map::gsequ(&(data_.A), data_.R.data(),
351 data_.C.data(), &rowcnd, &colcnd,
352 &amax, &info2);
353 TEUCHOS_TEST_FOR_EXCEPTION
354 (info2 < 0, std::runtime_error,
355 "SuperLU's gsequ function returned with status " << info2 << " < 0."
356 " This means that argument " << (-info2) << " given to the function"
357 " had an illegal value.");
358 if (info2 > 0) {
359 if (info2 <= data_.A.nrow) {
360 TEUCHOS_TEST_FOR_EXCEPTION
361 (true, std::runtime_error, "SuperLU's gsequ function returned with "
362 "info = " << info2 << " > 0, and info <= A.nrow = " << data_.A.nrow
363 << ". This means that row " << info2 << " of A is exactly zero.");
364 }
365 else if (info2 > data_.A.ncol) {
366 TEUCHOS_TEST_FOR_EXCEPTION
367 (true, std::runtime_error, "SuperLU's gsequ function returned with "
368 "info = " << info2 << " > 0, and info > A.ncol = " << data_.A.ncol
369 << ". This means that column " << (info2 - data_.A.nrow) << " of "
370 "A is exactly zero.");
371 }
372 else {
373 TEUCHOS_TEST_FOR_EXCEPTION
374 (true, std::runtime_error, "SuperLU's gsequ function returned "
375 "with info = " << info2 << " > 0, but its value is not in the "
376 "range permitted by the documentation. This should never happen "
377 "(it appears to be SuperLU's fault).");
378 }
379 }
380
381 // apply row and column scalings if necessary
382 function_map::laqgs(&(data_.A), data_.R.data(),
383 data_.C.data(), rowcnd, colcnd,
384 amax, &(data_.equed));
385
386 // check what types of equilibration was actually done
387 data_.rowequ = (data_.equed == 'R') || (data_.equed == 'B');
388 data_.colequ = (data_.equed == 'C') || (data_.equed == 'B');
389 }
390
391 // Apply the column permutation computed in preOrdering. Place the
392 // column-permuted matrix in AC
393 SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
394 data_.etree.data(), &(data_.AC));
395
396 { // Do factorization
397#ifdef HAVE_AMESOS2_TIMERS
398 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
399#endif
400
401#ifdef HAVE_AMESOS2_VERBOSE_DEBUG
402 std::cout << "Superlu:: Before numeric factorization" << std::endl;
403 std::cout << "nzvals_ : " << nzvals_.toString() << std::endl;
404 std::cout << "rowind_ : " << rowind_.toString() << std::endl;
405 std::cout << "colptr_ : " << colptr_.toString() << std::endl;
406#endif
407
408 if(ILU_Flag_==false) {
409 function_map::gstrf(&(data_.options), &(data_.AC),
410 data_.relax, data_.panel_size, data_.etree.data(),
411 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
412 &(data_.L), &(data_.U),
413#ifdef HAVE_AMESOS2_SUPERLU5_API
414 &(data_.lu),
415#endif
416 &(data_.stat), &info);
417 }
418 else {
419 function_map::gsitrf(&(data_.options), &(data_.AC),
420 data_.relax, data_.panel_size, data_.etree.data(),
421 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
422 &(data_.L), &(data_.U),
423#ifdef HAVE_AMESOS2_SUPERLU5_API
424 &(data_.lu),
425#endif
426 &(data_.stat), &info);
427 }
428
429 if (data_.options.ConditionNumber == SLU::YES) {
430 char norm[1];
431 if (data_.options.Trans == SLU::NOTRANS) {
432 *(unsigned char *)norm = '1';
433 } else {
434 *(unsigned char *)norm = 'I';
435 }
436
437 data_.anorm = function_map::langs(norm, &(data_.A));
438 function_map::gscon(norm, &(data_.L), &(data_.U),
439 data_.anorm, &(data_.rcond),
440 &(data_.stat), &info);
441 }
442 }
443 // Cleanup. AC data will be alloc'd again for next factorization (if at all)
444 SLU::Destroy_CompCol_Permuted( &(data_.AC) );
445
446 // Set the number of non-zero values in the L and U factors
447 this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
448 ((SLU::NCformat*)data_.U.Store)->nnz) );
449 }
450
451 /* All processes should have the same error code */
452 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
453
454 global_size_type info_st = as<global_size_type>(info);
455 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
456 std::runtime_error,
457 "Factorization complete, but matrix is singular. Division by zero eminent");
458 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
459 std::runtime_error,
460 "Memory allocation failure in Superlu factorization");
461
462 data_.options.Fact = SLU::FACTORED;
463 same_symbolic_ = true;
464
465 if(use_triangular_solves_) {
466#ifdef HAVE_AMESOS2_VERBOSE_DEBUG
467 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
468 if (this->getComm()->getRank()) {
469 std::cout << " > Metis : " << (use_metis_ ? "YES" : "NO") << std::endl;
470 std::cout << " > Equil : " << (data_.options.Equil == SLU::YES ? "YES" : "NO") << std::endl;
471 std::cout << " > Cond Number : " << (data_.options.ConditionNumber == SLU::YES ? "YES" : "NO") << std::endl;
472 std::cout << " > Invert diag : " << sptrsv_invert_diag_ << std::endl;
473 std::cout << " > Invert off-diag : " << sptrsv_invert_offdiag_ << std::endl;
474 std::cout << " > U in CSR : " << sptrsv_u_in_csr_ << std::endl;
475 std::cout << " > Merge : " << sptrsv_merge_supernodes_ << std::endl;
476 std::cout << " > Use SpMV : " << sptrsv_use_spmv_ << std::endl;
477 }
478 //std::cout << myRank << " : siize(A) " << (data_.A.nrow) << " x " << (data_.A.ncol) << std::endl;
479 //std::cout << myRank << " : nnz(A) " << ((SLU::NCformat*)data_.A.Store)->nnz << std::endl;
480 //std::cout << myRank << " : nnz(L) " << ((SLU::SCformat*)data_.L.Store)->nnz << std::endl;
481 //std::cout << myRank << " : nnz(U) " << ((SLU::SCformat*)data_.U.Store)->nnz << std::endl;
482 #endif
483#endif
484#ifdef HAVE_AMESOS2_TIMERS
485 Teuchos::RCP< Teuchos::Time > SpTrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv setup");
486 Teuchos::TimeMonitor numFactTimer(*SpTrsvTimer_);
487#endif
488 triangular_solve_factor();
489 }
490
491 return(info);
492}
493
494
495template <class Matrix, class Vector>
496int
498 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
499{
500 using Teuchos::as;
501#ifdef HAVE_AMESOS2_TIMERS
502 Teuchos::RCP< Teuchos::Time > Amesos2SolveTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for Amesos2");
503 Teuchos::TimeMonitor solveTimer(*Amesos2SolveTimer_);
504#endif
505
506 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
507 const size_t nrhs = X->getGlobalNumVectors();
508
509 bool bDidAssignX = false; // will be set below
510 bool bDidAssignB = false; // will be set below
511 { // Get values from RHS B
512#ifdef HAVE_AMESOS2_TIMERS
513 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
514 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
515#endif
516
517 // In general we may want to write directly to the x space without a copy.
518 // So we 'get' x which may be a direct view assignment to the MV.
519 const bool initialize_data = true;
520 const bool do_not_initialize_data = false;
521 if(use_triangular_solves_) { // to device
522#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
523 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
524 device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
525 as<size_t>(ld_rhs),
526 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
527 this->rowIndexBase_);
528 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
529 device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
530 as<size_t>(ld_rhs),
531 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
532 this->rowIndexBase_);
533#endif
534 }
535 else { // to host
536 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
537 host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
538 as<size_t>(ld_rhs),
539 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
540 this->rowIndexBase_);
541 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
542 host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
543 as<size_t>(ld_rhs),
544 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
545 this->rowIndexBase_);
546 }
547 }
548
549 // If equilibration was applied at numeric, then gssvx and gsisx are going to
550 // modify B, so we can't use the optimized assignment to B since we'll change
551 // the source test vector and then fail the subsequent cycle. We need a copy.
552 // If bDidAssignB is false, then we skip the copy since it was copied already.
553 if(bDidAssignB && data_.equed != 'N') {
554 if(use_triangular_solves_) { // to device
555#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
556 device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
557 device_bValues_.extent(0), device_bValues_.extent(1));
558 Kokkos::deep_copy(copyB, device_bValues_);
559 device_bValues_ = copyB;
560#endif
561 }
562 else {
563 host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
564 host_bValues_.extent(0), host_bValues_.extent(1));
565 Kokkos::deep_copy(copyB, host_bValues_);
566 host_bValues_ = copyB;
567 }
568 }
569
570 int ierr = 0; // returned error code
571
572 magnitude_type rpg, rcond;
573 if ( this->root_ ) {
574 // Note: the values of B and X (after solution) are adjusted
575 // appropriately within gssvx for row and column scalings.
576 { // Do solve!
577#ifdef HAVE_AMESOS2_TIMERS
578 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
579#endif
580
581 if(use_triangular_solves_) {
582 triangular_solve();
583 }
584 else {
585 Kokkos::resize(data_.ferr, nrhs);
586 Kokkos::resize(data_.berr, nrhs);
587
588 {
589 #ifdef HAVE_AMESOS2_TIMERS
590 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
591 #endif
592 SLU::Dtype_t dtype = type_map::dtype;
593 int i_ld_rhs = as<int>(ld_rhs);
594 function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
595 convert_bValues_, host_bValues_, i_ld_rhs,
596 SLU::SLU_DN, dtype, SLU::SLU_GE);
597 function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
598 convert_xValues_, host_xValues_, i_ld_rhs,
599 SLU::SLU_DN, dtype, SLU::SLU_GE);
600 }
601
602 if(ILU_Flag_==false) {
603 function_map::gssvx(&(data_.options), &(data_.A),
604 data_.perm_c.data(), data_.perm_r.data(),
605 data_.etree.data(), &(data_.equed), data_.R.data(),
606 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
607 &(data_.X), &rpg, &rcond, data_.ferr.data(),
608 data_.berr.data(),
609 #ifdef HAVE_AMESOS2_SUPERLU5_API
610 &(data_.lu),
611 #endif
612 &(data_.mem_usage), &(data_.stat), &ierr);
613 }
614 else {
615 function_map::gsisx(&(data_.options), &(data_.A),
616 data_.perm_c.data(), data_.perm_r.data(),
617 data_.etree.data(), &(data_.equed), data_.R.data(),
618 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
619 &(data_.X), &rpg, &rcond,
620 #ifdef HAVE_AMESOS2_SUPERLU5_API
621 &(data_.lu),
622 #endif
623 &(data_.mem_usage), &(data_.stat), &ierr);
624 }
625
626 // Cleanup X and B stores
627 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
628 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
629 data_.X.Store = NULL;
630 data_.B.Store = NULL;
631 }
632
633 } // do solve
634
635 // convert back to Kokkos views
636 {
637#ifdef HAVE_AMESOS2_TIMERS
638 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
639#endif
640 function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
641 function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
642 }
643
644 // Cleanup X and B stores
645 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
646 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
647 data_.X.Store = NULL;
648 data_.B.Store = NULL;
649 }
650
651 /* All processes should have the same error code */
652 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
653
654 global_size_type ierr_st = as<global_size_type>(ierr);
655 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
656 std::invalid_argument,
657 "Argument " << -ierr << " to SuperLU xgssvx had illegal value" );
658 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
659 std::runtime_error,
660 "Factorization complete, but U is exactly singular" );
661 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
662 std::runtime_error,
663 "SuperLU allocated " << ierr - this->globalNumCols_ << " bytes of "
664 "memory before allocation failure occured." );
665
666 /* Update X's global values */
667
668 // if bDidAssignX, then we solved straight to the adapter's X memory space without
669 // requiring additional memory allocation, so the x data is already in place.
670 if(!bDidAssignX) {
671#ifdef HAVE_AMESOS2_TIMERS
672 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
673#endif
674
675 if(use_triangular_solves_) { // to device
676#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
677 Util::put_1d_data_helper_kokkos_view<
678 MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
679 as<size_t>(ld_rhs),
680 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
681 this->rowIndexBase_);
682#endif
683 }
684 else {
685 Util::put_1d_data_helper_kokkos_view<
686 MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
687 as<size_t>(ld_rhs),
688 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
689 this->rowIndexBase_);
690 }
691 }
692
693 return(ierr);
694}
695
696
697template <class Matrix, class Vector>
698bool
700{
701 // The Superlu factorization routines can handle square as well as
702 // rectangular matrices, but Superlu can only apply the solve routines to
703 // square matrices, so we check the matrix for squareness.
704 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
705}
706
707
708template <class Matrix, class Vector>
709void
710Superlu<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
711{
712 using Teuchos::RCP;
713 using Teuchos::getIntegralValue;
714 using Teuchos::ParameterEntryValidator;
715
716 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
717
718 ILU_Flag_ = parameterList->get<bool>("ILU_Flag",false);
719 if (ILU_Flag_) {
720 SLU::ilu_set_default_options(&(data_.options));
721 // Override some default options
722 data_.options.PrintStat = SLU::NO;
723 }
724
725 data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
726 // The SuperLU transpose option can override the Amesos2 option
727 if( parameterList->isParameter("Trans") ){
728 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry("Trans").validator();
729 parameterList->getEntry("Trans").setValidator(trans_validator);
730
731 data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList, "Trans");
732 }
733
734 if( parameterList->isParameter("IterRefine") ){
735 RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry("IterRefine").validator();
736 parameterList->getEntry("IterRefine").setValidator(refine_validator);
737
738 data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList, "IterRefine");
739 }
740
741 if( parameterList->isParameter("ColPerm") ){
742 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
743 parameterList->getEntry("ColPerm").setValidator(colperm_validator);
744
745 data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList, "ColPerm");
746 }
747
748 data_.options.DiagPivotThresh = parameterList->get<double>("DiagPivotThresh", 1.0);
749
750 bool equil = parameterList->get<bool>("Equil", true);
751 data_.options.Equil = equil ? SLU::YES : SLU::NO;
752
753 bool condNum = parameterList->get<bool>("ConditionNumber", false);
754 data_.options.ConditionNumber = condNum ? SLU::YES : SLU::NO;
755
756 bool symmetric_mode = parameterList->get<bool>("SymmetricMode", false);
757 data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
758
759 // ILU parameters
760 if( parameterList->isParameter("RowPerm") ){
761 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
762 parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
763 data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
764 }
765
766 /*if( parameterList->isParameter("ILU_DropRule") ) {
767 RCP<const ParameterEntryValidator> droprule_validator = valid_params->getEntry("ILU_DropRule").validator();
768 parameterList->getEntry("ILU_DropRule").setValidator(droprule_validator);
769 data_.options.ILU_DropRule = getIntegralValue<SLU::rule_t>(*parameterList, "ILU_DropRule");
770 }*/
771
772 data_.options.ILU_DropTol = parameterList->get<double>("ILU_DropTol", 0.0001);
773
774 data_.options.ILU_FillFactor = parameterList->get<double>("ILU_FillFactor", 10.0);
775
776 if( parameterList->isParameter("ILU_Norm") ) {
777 RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry("ILU_Norm").validator();
778 parameterList->getEntry("ILU_Norm").setValidator(norm_validator);
779 data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList, "ILU_Norm");
780 }
781
782 if( parameterList->isParameter("ILU_MILU") ) {
783 RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry("ILU_MILU").validator();
784 parameterList->getEntry("ILU_MILU").setValidator(milu_validator);
785 data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList, "ILU_MILU");
786 }
787
788 data_.options.ILU_FillTol = parameterList->get<double>("ILU_FillTol", 0.01);
789
790 is_contiguous_ = parameterList->get<bool>("IsContiguous", true);
791
792 use_metis_ = parameterList->get<bool>("UseMetis", false);
793 symmetrize_metis_ = parameterList->get<bool>("SymmetrizeMetis", true);
794
795 use_triangular_solves_ = parameterList->get<bool>("Enable_KokkosKernels_TriangularSolves", false);
796 if(use_triangular_solves_) {
797#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
798 // specify whether to invert diagonal blocks
799 sptrsv_invert_diag_ = parameterList->get<bool>("SpTRSV_Invert_Diag", true);
800 // specify whether to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
801 sptrsv_invert_offdiag_ = parameterList->get<bool>("SpTRSV_Invert_OffDiag", false);
802 // specify whether to store U in CSR format
803 sptrsv_u_in_csr_ = parameterList->get<bool>("SpTRSV_U_CSR", true);
804 // specify whether to merge supernodes with similar sparsity structures
805 sptrsv_merge_supernodes_ = parameterList->get<bool>("SpTRSV_Merge_Supernodes", false);
806 // specify whether to use spmv for sptrsv
807 sptrsv_use_spmv_ = parameterList->get<bool>("SpTRSV_Use_SpMV", false);
808#else
809 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
810 "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
811#endif
812 }
813}
814
815
816template <class Matrix, class Vector>
817Teuchos::RCP<const Teuchos::ParameterList>
819{
820 using std::string;
821 using Teuchos::tuple;
822 using Teuchos::ParameterList;
823 using Teuchos::EnhancedNumberValidator;
824 using Teuchos::setStringToIntegralParameter;
825 using Teuchos::stringToIntegralParameterEntryValidator;
826
827 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
828
829 if( is_null(valid_params) ){
830 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
831
832 setStringToIntegralParameter<SLU::trans_t>("Trans", "NOTRANS",
833 "Solve for the transpose system or not",
834 tuple<string>("TRANS","NOTRANS","CONJ"),
835 tuple<string>("Solve with transpose",
836 "Do not solve with transpose",
837 "Solve with the conjugate transpose"),
838 tuple<SLU::trans_t>(SLU::TRANS,
839 SLU::NOTRANS,
840 SLU::CONJ),
841 pl.getRawPtr());
842
843 setStringToIntegralParameter<SLU::IterRefine_t>("IterRefine", "NOREFINE",
844 "Type of iterative refinement to use",
845 tuple<string>("NOREFINE", "SLU_SINGLE", "SLU_DOUBLE"),
846 tuple<string>("Do not use iterative refinement",
847 "Do single iterative refinement",
848 "Do double iterative refinement"),
849 tuple<SLU::IterRefine_t>(SLU::NOREFINE,
850 SLU::SLU_SINGLE,
851 SLU::SLU_DOUBLE),
852 pl.getRawPtr());
853
854 // Note: MY_PERMC not yet supported
855 setStringToIntegralParameter<SLU::colperm_t>("ColPerm", "COLAMD",
856 "Specifies how to permute the columns of the "
857 "matrix for sparsity preservation",
858 tuple<string>("NATURAL", "MMD_AT_PLUS_A",
859 "MMD_ATA", "COLAMD"),
860 tuple<string>("Natural ordering",
861 "Minimum degree ordering on A^T + A",
862 "Minimum degree ordering on A^T A",
863 "Approximate minimum degree column ordering"),
864 tuple<SLU::colperm_t>(SLU::NATURAL,
865 SLU::MMD_AT_PLUS_A,
866 SLU::MMD_ATA,
867 SLU::COLAMD),
868 pl.getRawPtr());
869
870 Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
871 = Teuchos::rcp( new EnhancedNumberValidator<double>(0.0, 1.0) );
872 pl->set("DiagPivotThresh", 1.0,
873 "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
874 diag_pivot_thresh_validator); // partial pivoting
875
876 pl->set("Equil", true, "Whether to equilibrate the system before solve");
877 pl->set("ConditionNumber", false, "Whether to approximate condition number");
878
879 pl->set("SymmetricMode", false,
880 "Specifies whether to use the symmetric mode. "
881 "Gives preference to diagonal pivots and uses "
882 "an (A^T + A)-based column permutation.");
883
884 // ILU parameters
885
886 setStringToIntegralParameter<SLU::rowperm_t>("RowPerm", "LargeDiag",
887 "Type of row permutation strategy to use",
888 tuple<string>("NOROWPERM","LargeDiag","MY_PERMR"),
889 tuple<string>("Use natural ordering",
890 "Use weighted bipartite matching algorithm",
891 "Use the ordering given in perm_r input"),
892 tuple<SLU::rowperm_t>(SLU::NOROWPERM,
893#if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
894 SLU::LargeDiag_MC64,
895#else
896 SLU::LargeDiag,
897#endif
898 SLU::MY_PERMR),
899 pl.getRawPtr());
900
901 /*setStringToIntegralParameter<SLU::rule_t>("ILU_DropRule", "DROP_BASIC",
902 "Type of dropping strategy to use",
903 tuple<string>("DROP_BASIC","DROP_PROWS",
904 "DROP_COLUMN","DROP_AREA",
905 "DROP_DYNAMIC","DROP_INTERP"),
906 tuple<string>("ILUTP(t)","ILUTP(p,t)",
907 "Variant of ILUTP(p,t) for j-th column",
908 "Variant of ILUTP to control memory",
909 "Dynamically adjust threshold",
910 "Compute second dropping threshold by interpolation"),
911 tuple<SLU::rule_t>(SLU::DROP_BASIC,SLU::DROP_PROWS,SLU::DROP_COLUMN,
912 SLU::DROP_AREA,SLU::DROP_DYNAMIC,SLU::DROP_INTERP),
913 pl.getRawPtr());*/
914
915 pl->set("ILU_DropTol", 0.0001, "ILUT drop tolerance");
916
917 pl->set("ILU_FillFactor", 10.0, "ILUT fill factor");
918
919 setStringToIntegralParameter<SLU::norm_t>("ILU_Norm", "INF_NORM",
920 "Type of norm to use",
921 tuple<string>("ONE_NORM","TWO_NORM","INF_NORM"),
922 tuple<string>("1-norm","2-norm","inf-norm"),
923 tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
924 pl.getRawPtr());
925
926 setStringToIntegralParameter<SLU::milu_t>("ILU_MILU", "SILU",
927 "Type of modified ILU to use",
928 tuple<string>("SILU","SMILU_1","SMILU_2","SMILU_3"),
929 tuple<string>("Regular ILU","MILU 1","MILU 2","MILU 3"),
930 tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
931 SLU::SMILU_3),
932 pl.getRawPtr());
933
934 pl->set("ILU_FillTol", 0.01, "ILUT fill tolerance");
935
936 pl->set("ILU_Flag", false, "ILU flag: if true, run ILU routines");
937
938 pl->set("IsContiguous", true, "Whether GIDs contiguous");
939
940 // call METIS before SuperLU
941 pl->set("UseMetis", false, "Whether to call METIS before SuperLU");
942 pl->set("SymmetrizeMetis", true, "Whether to symmetrize matrix before METIS");
943
944 pl->set("Enable_KokkosKernels_TriangularSolves", false, "Whether to use triangular solves.");
945#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
946 pl->set("SpTRSV_Invert_Diag", true, "specify whether to invert diagonal blocks for supernodal sparse-trianguular solve");
947 pl->set("SpTRSV_Invert_OffDiag", false, "specify whether to apply diagonal-inversion to off-diagonal blocks for supernodal sparse-trianguular solve");
948 pl->set("SpTRSV_U_CSR", true, "specify whether to store U in CSR forma for supernodal sparse-trianguular solve");
949 pl->set("SpTRSV_Merge_Supernodes", false, "specify whether to merge supernodes with similar sparsity structures for supernodal sparse-trianguular solve");
950 pl->set("SpTRSV_Use_SpMV", false, "specify whether to use spmv for supernodal sparse-trianguular solve");
951#endif
952
953 valid_params = pl;
954 }
955
956 return valid_params;
957}
958
959
960template <class Matrix, class Vector>
961bool
963{
964 using Teuchos::as;
965
966#ifdef HAVE_AMESOS2_TIMERS
967 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
968#endif
969
970 // SuperLU does not need the matrix at symbolicFactorization()
971 if( current_phase == SYMBFACT ) return false;
972
973 // Cleanup old store memory if it's non-NULL (should only ever be non-NULL at root_)
974 if( data_.A.Store != NULL ){
975 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
976 data_.A.Store = NULL;
977 }
978
979 // Only the root image needs storage allocated
980 if( this->root_ ){
981 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
982 Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
983 Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
984 }
985
986 int nnz_ret = 0;
987 {
988#ifdef HAVE_AMESOS2_TIMERS
989 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
990#endif
991
992 TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
993 std::runtime_error,
994 "Row and column maps have different indexbase ");
995
996 if ( is_contiguous_ == true ) {
998 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
999 host_size_type_array>::do_get(this->matrixA_.ptr(),
1000 host_nzvals_view_, host_rows_view_,
1001 host_col_ptr_view_, nnz_ret, ROOTED,
1002 ARBITRARY,
1003 this->rowIndexBase_);
1004 }
1005 else {
1007 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
1008 host_size_type_array>::do_get(this->matrixA_.ptr(),
1009 host_nzvals_view_, host_rows_view_,
1010 host_col_ptr_view_, nnz_ret, CONTIGUOUS_AND_ROOTED,
1011 ARBITRARY,
1012 this->rowIndexBase_);
1013 }
1014 }
1015
1016 // Get the SLU data type for this type of matrix
1017 SLU::Dtype_t dtype = type_map::dtype;
1018
1019 if( this->root_ ){
1020 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
1021 std::runtime_error,
1022 "Did not get the expected number of non-zero vals");
1023
1024 function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
1025 this->globalNumRows_, this->globalNumCols_,
1026 nnz_ret,
1027 convert_nzvals_, host_nzvals_view_,
1028 host_rows_view_.data(),
1029 host_col_ptr_view_.data(),
1030 SLU::SLU_NC, dtype, SLU::SLU_GE);
1031 }
1032
1033 return true;
1034}
1035
1036template <class Matrix, class Vector>
1037void
1039{
1040#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1041 size_t ld_rhs = this->matrixA_->getGlobalNumRows();
1042
1043 // convert etree to parents
1044 SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
1045 int nsuper = 1 + Lstore->nsuper; // # of supernodal columns
1046 Kokkos::resize(data_.parents, nsuper);
1047 for (int s = 0; s < nsuper; s++) {
1048 int j = Lstore->sup_to_col[s+1]-1; // the last column index of this supernode
1049 if (data_.etree[j] == static_cast<int>(ld_rhs)) {
1050 data_.parents(s) = -1;
1051 } else {
1052 data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
1053 }
1054 }
1055
1056 deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r); // will use device to permute
1057 deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c); // will use device to permute
1058 if (data_.options.Equil == SLU::YES) {
1059 deep_copy_or_assign_view(device_trsv_R_, data_.R); // will use device to scale
1060 deep_copy_or_assign_view(device_trsv_C_, data_.C); // will use device to scale
1061 }
1062
1063 bool condition_flag = false;
1064 if (data_.options.ConditionNumber == SLU::YES) {
1065 using STM = Teuchos::ScalarTraits<magnitude_type>;
1066 const magnitude_type eps = STM::eps ();
1067
1068 SCformat *Lstore = (SCformat*)(data_.L.Store);
1069 int nsuper = 1 + Lstore->nsuper;
1070 int *nb = Lstore->sup_to_col;
1071 int max_cols = 0;
1072 for (int i = 0; i < nsuper; i++) {
1073 if (nb[i+1] - nb[i] > max_cols) {
1074 max_cols = nb[i+1] - nb[i];
1075 }
1076 }
1077
1078 // when rcond is small, it is ill-conditioned and flag is true
1079 const magnitude_type multiply_fact (10.0); // larger the value, more likely flag is true, and no invert
1080 condition_flag = (((double)max_cols * nsuper) * eps * multiply_fact >= data_.rcond);
1081
1082#ifdef HAVE_AMESOS2_VERBOSE_DEBUG
1083 int n = data_.perm_r.extent(0);
1084 std::cout << this->getComm()->getRank()
1085 << " : anorm = " << data_.anorm << ", rcond = " << data_.rcond << ", n = " << n
1086 << ", num super cols = " << nsuper << ", max super cols = " << max_cols
1087 << " -> " << ((double)max_cols * nsuper) * eps / data_.rcond
1088 << (((double)max_cols * nsuper) * eps * multiply_fact < data_.rcond ? " (okay)" : " (warn)") << std::endl;
1089#endif
1090 }
1091
1092 // Create handles for U and U^T solves
1093 if (sptrsv_use_spmv_ && !condition_flag) {
1094 device_khL_.create_sptrsv_handle(
1095 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, true);
1096 device_khU_.create_sptrsv_handle(
1097 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, false);
1098 } else {
1099 device_khL_.create_sptrsv_handle(
1100 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, true);
1101 device_khU_.create_sptrsv_handle(
1102 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, false);
1103 }
1104
1105 // specify whether to store U in CSR format
1106 device_khU_.set_sptrsv_column_major (!sptrsv_u_in_csr_);
1107
1108 // specify whether to merge supernodes with similar sparsity structure
1109 device_khL_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1110 device_khU_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1111
1112 // invert only if flag is not on
1113 bool sptrsv_invert_diag = (!condition_flag && sptrsv_invert_diag_);
1114 bool sptrsv_invert_offdiag = (!condition_flag && sptrsv_invert_offdiag_);
1115
1116 // specify whether to invert diagonal blocks
1117 device_khL_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1118 device_khU_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1119
1120 // specify wheather to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
1121 device_khL_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1122 device_khU_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1123
1124 // set etree
1125 device_khL_.set_sptrsv_etree(data_.parents.data());
1126 device_khU_.set_sptrsv_etree(data_.parents.data());
1127
1128 // set permutation
1129 device_khL_.set_sptrsv_perm(data_.perm_r.data());
1130 device_khU_.set_sptrsv_perm(data_.perm_c.data());
1131
1132 int block_size = -1; // TODO: add needed params
1133 if (block_size >= 0) {
1134 std::cout << " Block Size : " << block_size << std::endl;
1135 device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1136 device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1137 }
1138
1139 // Do symbolic analysis
1140 {
1141#ifdef HAVE_AMESOS2_TIMERS
1142 Teuchos::RCP< Teuchos::Time > SymboTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv symbolic");
1143 Teuchos::TimeMonitor numFactTimer(*SymboTimer_);
1144#endif
1145 KokkosSparse::Experimental::sptrsv_symbolic
1146 (&device_khL_, &device_khU_, data_.L, data_.U);
1147 }
1148
1149 // Do numerical compute
1150 {
1151#ifdef HAVE_AMESOS2_TIMERS
1152 Teuchos::RCP< Teuchos::Time > NumerTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv numeric");
1153 Teuchos::TimeMonitor numFactTimer(*NumerTimer_);
1154#endif
1155 KokkosSparse::Experimental::sptrsv_compute
1156 (&device_khL_, &device_khU_, data_.L, data_.U);
1157 }
1158#endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1159}
1160
1161template <class Matrix, class Vector>
1162void
1163Superlu<Matrix,Vector>::triangular_solve() const
1164{
1165#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1166 size_t ld_rhs = device_xValues_.extent(0);
1167 size_t nrhs = device_xValues_.extent(1);
1168
1169 Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
1170 Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
1171
1172 // forward pivot
1173 auto local_device_bValues = device_bValues_;
1174 auto local_device_trsv_perm_r = device_trsv_perm_r_;
1175 auto local_device_trsv_rhs = device_trsv_rhs_;
1176
1177 if (data_.rowequ) {
1178 auto local_device_trsv_R = device_trsv_R_;
1179 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1180 KOKKOS_LAMBDA(size_t j) {
1181 for(size_t k = 0; k < nrhs; ++k) {
1182 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_trsv_R(j) * local_device_bValues(j,k);
1183 }
1184 });
1185 } else {
1186 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1187 KOKKOS_LAMBDA(size_t j) {
1188 for(size_t k = 0; k < nrhs; ++k) {
1189 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
1190 }
1191 });
1192 }
1193
1194 for(size_t k = 0; k < nrhs; ++k) { // sptrsv_solve does not batch
1195 auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
1196 auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
1197
1198 // do L solve= - numeric (only rhs is modified) on the default device/host space
1199 {
1200#ifdef HAVE_AMESOS2_TIMERS
1201 Teuchos::RCP< Teuchos::Time > SpLtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for L solve");
1202 Teuchos::TimeMonitor solveTimer(*SpLtrsvTimer_);
1203#endif
1204 KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
1205 }
1206 // do L^T solve - numeric (only rhs is modified) on the default device/host space
1207 {
1208#ifdef HAVE_AMESOS2_TIMERS
1209 Teuchos::RCP< Teuchos::Time > SpUtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for U solve");
1210 Teuchos::TimeMonitor solveTimer(*SpUtrsvTimer_);
1211#endif
1212 KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
1213 }
1214 } // end loop over rhs vectors
1215
1216 // backward pivot
1217 auto local_device_xValues = device_xValues_;
1218 auto local_device_trsv_perm_c = device_trsv_perm_c_;
1219 if (data_.colequ) {
1220 auto local_device_trsv_C = device_trsv_C_;
1221 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1222 KOKKOS_LAMBDA(size_t j) {
1223 for(size_t k = 0; k < nrhs; ++k) {
1224 local_device_xValues(j,k) = local_device_trsv_C(j) * local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1225 }
1226 });
1227 } else {
1228 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1229 KOKKOS_LAMBDA(size_t j) {
1230 for(size_t k = 0; k < nrhs; ++k) {
1231 local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1232 }
1233 });
1234 }
1235#endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1236}
1237
1238template<class Matrix, class Vector>
1239const char* Superlu<Matrix,Vector>::name = "SuperLU";
1240
1241
1242} // end namespace Amesos2
1243
1244#endif // AMESOS2_SUPERLU_DEF_HPP
Amesos2 Superlu declarations.
@ ROOTED
Definition: Amesos2_TypeDecl.hpp:127
@ CONTIGUOUS_AND_ROOTED
Definition: Amesos2_TypeDecl.hpp:128
@ ARBITRARY
Definition: Amesos2_TypeDecl.hpp:143
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition: Amesos2_SolverCore_decl.hpp:106
Amesos2 interface to the SuperLU package.
Definition: Amesos2_Superlu_decl.hpp:77
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlu_def.hpp:699
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition: Amesos2_Superlu_def.hpp:317
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlu_def.hpp:205
static const char * name
Name of this solver interface.
Definition: Amesos2_Superlu_decl.hpp:84
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlu_def.hpp:818
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition: Amesos2_Superlu_def.hpp:497
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition: Amesos2_Superlu_def.hpp:230
~Superlu()
Destructor.
Definition: Amesos2_Superlu_def.hpp:128
Superlu(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Superlu_def.hpp:77
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Superlu_def.hpp:710
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlu_def.hpp:962
std::string description() const
Returns a short description of this Solver.
Definition: Amesos2_Superlu_def.hpp:151
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:652