Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Stokhos_Tpetra_Utilities.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#ifndef STOKHOS_TPETRA_UTILITIES_HPP
43#define STOKHOS_TPETRA_UTILITIES_HPP
44
47#include "Tpetra_CrsMatrix.hpp"
48
49namespace Stokhos {
50
52
55 template <class ViewType>
57 public:
58 typedef ViewType MeanViewType;
59 typedef typename ViewType::execution_space execution_space;
60 typedef typename ViewType::size_type size_type;
61
62 GetMeanValsFunc(const ViewType& vals) {
63 mean_vals = ViewType("mean-values", vals.extent(0));
64 Kokkos::deep_copy( mean_vals, vals );
65 }
66
68
69 private:
71 };
72
74
76 template <class Storage, class ... P>
77 class GetMeanValsFunc< Kokkos::View< Sacado::UQ::PCE<Storage>*,
78 P... > > {
79 public:
81 typedef Kokkos::View< Scalar*, P... > ViewType;
83 typedef typename ViewType::execution_space execution_space;
84 typedef typename ViewType::size_type size_type;
85
86 GetMeanValsFunc(const ViewType& vals_) : vals(vals_) {
87 const size_type nnz = vals.extent(0);
88 typename Scalar::cijk_type mean_cijk =
89 Stokhos::create_mean_based_product_tensor<execution_space, typename Storage::ordinal_type, typename Storage::value_type>();
90 mean_vals = Kokkos::make_view<ViewType>("mean-values", mean_cijk, nnz, 1);
91 Kokkos::parallel_for( nnz, *this );
92 }
93
94 KOKKOS_INLINE_FUNCTION
95 void operator() (const size_type i) const {
96 mean_vals(i) = vals(i).fastAccessCoeff(0);
97 }
98
100
101 private:
104 };
105
107
109 template <class Storage, class ... P>
110 class GetMeanValsFunc< Kokkos::View< Sacado::MP::Vector<Storage>*,
111 P... > > {
112 public:
114 typedef Kokkos::View< Scalar*, P... > ViewType;
116 typedef typename ViewType::execution_space execution_space;
117 typedef typename ViewType::size_type size_type;
118
119 GetMeanValsFunc(const ViewType& vals_) :
120 vals(vals_), vec_size(Kokkos::dimension_scalar(vals))
121 {
122 const size_type nnz = vals.extent(0);
123 mean_vals = ViewType("mean-values", nnz, 1);
124 Kokkos::parallel_for( nnz, *this );
125 }
126
127 KOKKOS_INLINE_FUNCTION
128 void operator() (const size_type i) const
129 {
130 typename Scalar::value_type s = 0.0;
131 for (size_type j=0; j<vec_size; ++j)
132 s += vals(i).fastAccessCoeff(j);
133 mean_vals(i) = s;
134 }
135
137
138 private:
142 };
143
145
148 template <class ViewType>
150 public:
151 typedef ViewType MeanViewType;
152 typedef typename ViewType::execution_space execution_space;
153 typedef typename ViewType::size_type size_type;
154
155 GetScalarMeanValsFunc(const ViewType& vals) {
156 mean_vals = ViewType("mean-values", vals.extent(0));
157 Kokkos::deep_copy( mean_vals, vals );
158 }
159
161
162 private:
164 };
165
167
169 template <class Storage, class ... P>
170 class GetScalarMeanValsFunc< Kokkos::View< Sacado::UQ::PCE<Storage>*,
171 P... > > {
172 public:
174 typedef typename Scalar::value_type MeanScalar;
175 typedef Kokkos::View< Scalar*, P... > ViewType;
176 typedef Kokkos::View< MeanScalar*, P... > MeanViewType;
177 typedef typename ViewType::execution_space execution_space;
178 typedef typename ViewType::size_type size_type;
179
180 GetScalarMeanValsFunc(const ViewType& vals_) : vals(vals_) {
181 const size_type nnz = vals.extent(0);
182 mean_vals = MeanViewType("mean-values", nnz);
183 Kokkos::parallel_for( nnz, *this );
184 }
185
186 KOKKOS_INLINE_FUNCTION
187 void operator() (const size_type i) const {
188 mean_vals(i) = vals(i).fastAccessCoeff(0);
189 }
190
192
193 private:
196 };
197
199
201 template <class Storage, class ... P>
202 class GetScalarMeanValsFunc< Kokkos::View< Sacado::MP::Vector<Storage>*,
203 P... > > {
204 public:
206 typedef typename Scalar::value_type MeanScalar;
207 typedef Kokkos::View< Scalar*, P... > ViewType;
208 typedef Kokkos::View< MeanScalar*, P... > MeanViewType;
209 typedef typename ViewType::execution_space execution_space;
210 typedef typename ViewType::size_type size_type;
211
213 vals(vals_), vec_size(Kokkos::dimension_scalar(vals))
214 {
215 const size_type nnz = vals.extent(0);
216 mean_vals = ViewType("mean-values", nnz);
217 Kokkos::parallel_for( nnz, *this );
218 }
219
220 KOKKOS_INLINE_FUNCTION
221 void operator() (const size_type i) const
222 {
223 typename Scalar::value_type s = 0.0;
224 for (size_type j=0; j<vec_size; ++j)
225 s += vals(i).fastAccessCoeff(j);
226 mean_vals(i) = s;
227 }
228
230
231 private:
235 };
236
237 template <typename Scalar, typename LO, typename GO, typename N>
238 Teuchos::RCP< Tpetra::CrsMatrix<Scalar,LO,GO,N> >
239 build_mean_matrix(const Tpetra::CrsMatrix<Scalar,LO,GO,N>& A)
240 {
241 using Teuchos::RCP;
242 using Teuchos::rcp;
243 typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
244 typedef typename MatrixType::local_matrix_device_type KokkosMatrixType;
245 typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
246
247 KokkosMatrixType kokkos_matrix = A.getLocalMatrixDevice();
248 KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
249 const size_t ncols = kokkos_matrix.numCols();
250 typedef GetMeanValsFunc <KokkosMatrixValuesType > MeanFunc;
251 typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
252 MeanFunc meanfunc(matrix_values);
253 KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
254
255 // From here on we are assuming that
256 // KokkosMeanMatrixValuesType == KokkosMatrixValuestype
257
258 RCP < MatrixType > mean_matrix =
259 rcp( new MatrixType(A.getCrsGraph(), mean_matrix_values) );
260 mean_matrix->fillComplete();
261 return mean_matrix;
262 }
263
264 template <typename Scalar, typename LO, typename GO, typename N>
265 Teuchos::RCP< Tpetra::CrsMatrix<typename Scalar::value_type,LO,GO,N> >
266 build_mean_scalar_matrix(const Tpetra::CrsMatrix<Scalar,LO,GO,N>& A)
267 {
268 using Teuchos::RCP;
269 using Teuchos::rcp;
270 typedef typename Scalar::value_type BaseScalar;
271 typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
272 typedef Tpetra::CrsMatrix<BaseScalar,LO,GO,N> ScalarMatrixType;
273 typedef typename MatrixType::local_matrix_device_type KokkosMatrixType;
274 typedef typename ScalarMatrixType::local_matrix_device_type ScalarKokkosMatrixType;
275 typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
276
277 KokkosMatrixType kokkos_matrix = A.getLocalMatrixDevice();
278 KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
279 typedef GetScalarMeanValsFunc <KokkosMatrixValuesType > MeanFunc;
280 typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
281 MeanFunc meanfunc(matrix_values);
282 KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
283
284 // From here on we are assuming that
285 // KokkosMeanMatrixValuesType == ScalarKokkosMatrixValuesType
286
287 RCP < ScalarMatrixType > mean_matrix =
288 rcp( new ScalarMatrixType(A.getCrsGraph(), mean_matrix_values) );
289 mean_matrix->fillComplete();
290 return mean_matrix;
291 }
292
293 namespace Impl {
294
295 // Functor for copying a PCE view to a scalar view
296 // (Assumes view is rank 2, LayoutLeft)
297 template <typename ExecSpace>
299 typedef ExecSpace exec_space;
300 template <typename DstView, typename SrcView>
301 CopyPCE2Scalar(const DstView& dst, const SrcView& src) {
302 impl(dst,src);
303 }
304
305 template <typename DstView, typename SrcView>
306 void impl(const DstView& dst, const SrcView& src) {
307 typedef typename SrcView::non_const_value_type Scalar;
308 const size_t m = src.extent(0);
309 const size_t n = src.extent(1);
310 const size_t p = Kokkos::dimension_scalar(src);
311 Kokkos::RangePolicy<exec_space> policy(0,m);
312 Kokkos::parallel_for( policy, KOKKOS_LAMBDA(const size_t i)
313 {
314 for (size_t j=0; j<n; ++j) {
315 const Scalar& s = src(i,j);
316 for (size_t k=0; k<p; ++k)
317 dst(i,j*p+k) = s.fastAccessCoeff(k);
318 }
319 });
320 }
321 };
322
323 // Functor for copying a scalar view to a PCE view
324 // (Assumes view is rank 2, LayoutLeft)
325 template <typename ExecSpace>
327 typedef ExecSpace exec_space;
328
329 template <typename DstView, typename SrcView>
330 CopyScalar2PCE(const DstView& dst, const SrcView& src) {
331 impl(dst,src);
332 }
333
334 template <typename DstView, typename SrcView>
335 void impl(const DstView& dst, const SrcView& src) {
336 typedef typename DstView::non_const_value_type Scalar;
337 const size_t m = dst.extent(0);
338 const size_t n = dst.extent(1);
339 const size_t p = Kokkos::dimension_scalar(dst);
340
341 Kokkos::RangePolicy<exec_space> policy(0,m);
342 Kokkos::parallel_for( policy, KOKKOS_LAMBDA(const size_t i)
343 {
344 for (size_t j=0; j<n; ++j) {
345 Scalar& d = dst(i,j);
346 for (size_t k=0; k<p; ++k)
347 d.fastAccessCoeff(k) = src(i,j*p+k);
348 }
349 });
350 }
351 };
352
353#ifdef KOKKOS_ENABLE_CUDA
354 // Specialization for CopyPCE2Scalar specifically for Cuda that ensures
355 // coalesced reads and writes
356 template <>
357 struct CopyPCE2Scalar<Kokkos::Cuda> {
358 typedef Kokkos::Cuda exec_space;
359
360 template <typename DstView, typename SrcView>
361 CopyPCE2Scalar(const DstView& dst, const SrcView& src) {
362 impl(dst,src);
363 }
364
365 template <typename DstView, typename SrcView>
366 void impl(const DstView& dst, const SrcView& src) {
367 typedef typename DstView::non_const_value_type Scalar;
368 typedef Kokkos::TeamPolicy<exec_space> Policy;
369 typedef typename Policy::member_type Member;
370
371 const size_t m = src.extent(0);
372 const size_t n = src.extent(1);
373 const size_t p = Kokkos::dimension_scalar(src);
374
375 const size_t ChunkSize = 16;
376 const size_t M = (m+ChunkSize-1)/ChunkSize;
377
378 Policy policy(M,ChunkSize,ChunkSize);
379 Kokkos::parallel_for( policy, [=] __device__(const Member& team)
380 {
381 __shared__ Scalar tmp[ChunkSize][ChunkSize];
382 const size_t i_block = blockIdx.x*ChunkSize;
383
384 for (size_t j=0; j<n; ++j) {
385 for (size_t k_block=0; k_block<p; k_block+=ChunkSize) {
386
387 // Make sure previous iteration has completed before overwriting tmp
388 __syncthreads();
389
390 // Read ChunkSize x ChunkSize block (coalesced on k)
391 size_t i = i_block + threadIdx.y;
392 size_t k = k_block + threadIdx.x;
393 if (i < m && k < p)
394 tmp[threadIdx.y][threadIdx.x] = src(i,j).fastAccessCoeff(k);
395
396 // Wait for all threads to finish
397 __syncthreads();
398
399 // Write ChunkSize x ChunkSize block (coalesced on i for LayoutLeft)
400 i = i_block + threadIdx.x;
401 k = k_block + threadIdx.y;
402 if (i < m && k < p)
403 dst(i,j*p+k) = tmp[threadIdx.x][threadIdx.y];
404
405 }
406 }
407 });
408 }
409 };
410
411 // Specialization for Scalar2PCE specifically for Cuda that ensures
412 // coalesced reads and writes
413 template <>
414 struct CopyScalar2PCE<Kokkos::Cuda> {
415 typedef Kokkos::Cuda exec_space;
416
417 template <typename DstView, typename SrcView>
418 CopyScalar2PCE(const DstView& dst, const SrcView& src) {
419 impl(dst,src);
420 }
421
422 template <typename DstView, typename SrcView>
423 void impl(const DstView& dst, const SrcView& src) {
424 typedef typename SrcView::non_const_value_type Scalar;
425 typedef Kokkos::TeamPolicy<exec_space> Policy;
426 typedef typename Policy::member_type Member;
427
428 const size_t m = dst.extent(0);
429 const size_t n = dst.extent(1);
430 const size_t p = Kokkos::dimension_scalar(dst);
431
432 const size_t ChunkSize = 16;
433 const size_t M = (m+ChunkSize-1)/ChunkSize;
434
435 Policy policy(M,ChunkSize,ChunkSize);
436 Kokkos::parallel_for( policy, [=] __device__(const Member& team)
437 {
438 __shared__ Scalar tmp[ChunkSize][ChunkSize];
439 const size_t i_block = blockIdx.x*ChunkSize;
440
441 for (size_t j=0; j<n; ++j) {
442 for (size_t k_block=0; k_block<p; k_block+=ChunkSize) {
443
444 // Make sure previous iteration has completed before overwriting tmp
445 __syncthreads();
446
447 // Read ChunkSize x ChunkSize block (coalesced on i for LayoutLeft)
448 size_t i = i_block + threadIdx.x;
449 size_t k = k_block + threadIdx.y;
450 if (i < m && k < p)
451 tmp[threadIdx.x][threadIdx.y] = src(i,j*p+k);
452
453 // Wait for all threads to finish
454 __syncthreads();
455
456 // Write ChunkSize x ChunkSize block (coalesced on k)
457 i = i_block + threadIdx.y;
458 k = k_block + threadIdx.x;
459 if (i < m && k < p)
460 dst(i,j).fastAccessCoeff(k) = tmp[threadIdx.y][threadIdx.x];
461
462 }
463 }
464 });
465 }
466 };
467#endif
468
469 } // Impl
470
471 template <typename DstView, typename SrcView>
472 void copy_pce_to_scalar(const DstView& dst, const SrcView& src)
473 {
475 }
476
477 template <typename DstView, typename SrcView>
478 void copy_scalar_to_pce(const DstView& dst, const SrcView& src)
479 {
481 }
482
483 // Tpetra operator wrapper allowing a mean0-based operator (with double
484 // scalar type) to be applied to a UQ::PCE multi-vector
485 template <typename Scalar,
486 typename LocalOrdinal,
487 typename GlobalOrdinal,
488 typename Node>
490 virtual public Tpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node> {
491 public:
496 typedef typename scalar_type::value_type base_scalar_type;
497 typedef Tpetra::Operator<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node> scalar_op_type;
498
499 MeanBasedTpetraOperator(const Teuchos::RCP<const scalar_op_type>& mb_op_) :
500 mb_op(mb_op_) {}
501
503
504 virtual Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
505 getDomainMap() const {
506 return mb_op->getDomainMap();
507 }
508
509 virtual Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
510 getRangeMap() const {
511 return mb_op->getRangeMap();
512 }
513
514 virtual void
515 apply (const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
516 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
517 Teuchos::ETransp mode = Teuchos::NO_TRANS,
518 Scalar alpha = Teuchos::ScalarTraits<Scalar>::one(),
519 Scalar beta = Teuchos::ScalarTraits<Scalar>::zero()) const
520 {
521 typedef typename scalar_mv_type::device_type device_type;
522
523 auto xv = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
524 auto yv = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
525 const size_t pce_size = Kokkos::dimension_scalar(xv);
526 if (X_s == Teuchos::null ||
527 X_s->getNumVectors() != X.getNumVectors()*pce_size)
528 X_s = Teuchos::rcp(new scalar_mv_type(X.getMap(),
529 X.getNumVectors()*pce_size));
530 if (Y_s == Teuchos::null ||
531 Y_s->getNumVectors() != Y.getNumVectors()*pce_size)
532 Y_s = Teuchos::rcp(new scalar_mv_type(Y.getMap(),
533 Y.getNumVectors()*pce_size));
534 base_scalar_type alpha_s = alpha.fastAccessCoeff(0);
535 base_scalar_type beta_s = beta.fastAccessCoeff(0);
536
537 {
538 auto xv_s = X_s->getLocalViewDevice(Tpetra::Access::ReadWrite);
539 auto yv_s = Y_s->getLocalViewDevice(Tpetra::Access::ReadWrite);
540
541 copy_pce_to_scalar(xv_s, xv);
542 if (beta_s != 0.0)
543 copy_pce_to_scalar(yv_s, yv);
544 }
545
546 mb_op->apply(*X_s, *Y_s, mode, alpha_s, beta_s);
547
548 {
549 auto yv_s = Y_s->getLocalViewDevice(Tpetra::Access::ReadOnly);
550 copy_scalar_to_pce(yv, yv_s);
551 }
552 }
553
554 virtual bool hasTransposeApply() const {
555 return mb_op->hasTransposeApply();
556 }
557
558 private:
559
560 typedef Tpetra::MultiVector<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node> scalar_mv_type;
561 mutable Teuchos::RCP<scalar_mv_type> X_s, Y_s;
562 Teuchos::RCP<const scalar_op_type> mb_op;
563
564 };
565
566}
567
568#endif // STOKHOS_TPETRA_UTILITIES_HPP
Get mean values matrix for mean-based preconditioning.
GetMeanValsFunc(const ViewType &vals)
ViewType::execution_space execution_space
Get mean values matrix for mean-based preconditioning.
ViewType::execution_space execution_space
Tpetra::Operator< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_op_type
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const
Teuchos::RCP< scalar_mv_type > X_s
Teuchos::RCP< const scalar_op_type > mb_op
Tpetra::MultiVector< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_mv_type
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const
Teuchos::RCP< scalar_mv_type > Y_s
MeanBasedTpetraOperator(const Teuchos::RCP< const scalar_op_type > &mb_op_)
virtual void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
KokkosClassic::DefaultNode::DefaultNodeType Node
Stokhos::StandardStorage< int, double > Storage
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P... > >::value, unsigned >::type dimension_scalar(const View< T, P... > &view)
Top-level namespace for Stokhos classes and functions.
Teuchos::RCP< Tpetra::CrsMatrix< typename Scalar::value_type, LO, GO, N > > build_mean_scalar_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, N > > build_mean_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
void copy_scalar_to_pce(const DstView &dst, const SrcView &src)
void copy_pce_to_scalar(const DstView &dst, const SrcView &src)
void impl(const DstView &dst, const SrcView &src)
CopyPCE2Scalar(const DstView &dst, const SrcView &src)
CopyScalar2PCE(const DstView &dst, const SrcView &src)
void impl(const DstView &dst, const SrcView &src)