48#include "Sacado_mpl_range_c.hpp"
49#include "Sacado_mpl_for_each.hpp"
50#include "Sacado_mpl_integral_c.hpp"
53#include <Kokkos_UnorderedMap.hpp>
54#include <Kokkos_StaticCrsGraph.hpp>
55#include <Kokkos_Timer.hpp>
58#include <Teuchos_CommHelpers.hpp>
59#include "Teuchos_TestingHelpers.hpp"
60#include "Teuchos_VerboseObject.hpp"
68double maximum(
const Teuchos::Comm<int>& comm ,
double local )
71 Teuchos::reduceAll( comm , Teuchos::REDUCE_MAX , 1 , & local , & global );
111template <
typename Scalar,
typename Device>
113 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm ,
114 const int use_print ,
115 const int use_trials ,
116 const int use_nodes[] ,
118 Kokkos::View< Scalar* , Kokkos::LayoutLeft, Device >& nodal_residual)
122 using Teuchos::rcpFromRef;
123 using Teuchos::arrayView;
124 using Teuchos::ParameterList;
130 typedef typename LocalMatrixType::StaticCrsGraphType
139 ElementComputationType ;
142 DirichletComputationType ;
144 typedef typename ElementComputationType::vector_type VectorType ;
147 typename FixtureType::comm_list_type ,
148 typename FixtureType::send_nodeid_type ,
149 VectorType > ImportType ;
153 const int print_flag = use_print && std::is_same< Kokkos::HostSpace , typename Device::memory_space >::value ;
155 const int comm_rank = comm->getRank();
156 const int comm_size = comm->getSize();
160 const double bubble_x = 1.0 ;
161 const double bubble_y = 1.0 ;
162 const double bubble_z = 1.0 ;
165 comm_size , comm_rank ,
166 use_nodes[0] , use_nodes[1] , use_nodes[2] ,
167 bubble_x , bubble_y , bubble_z );
169 if (
maximum(*comm, ( fixture.ok() ? 0 : 1 ) ) ) {
170 throw std::runtime_error(std::string(
"Problem fixture setup failed"));
175 const ImportType comm_nodal_import(
177 fixture.recv_node() ,
178 fixture.send_node() ,
179 fixture.send_nodeid() ,
180 fixture.node_count_owned() ,
181 fixture.node_count() - fixture.node_count_owned() );
185 const double bc_lower_value = 1 ;
186 const double bc_upper_value = 2 ;
188 CoeffFunctionType diffusion_coefficient( 1.0, 0.1, 1.0, 5 );
189 Kokkos::deep_copy( diffusion_coefficient.getRandomVariables(), 1.0 );
194 std::cout <<
"ElemNode {" << std::endl ;
195 for (
unsigned ielem = 0 ; ielem < fixture.elem_count() ; ++ielem ) {
196 std::cout <<
" elem[" << ielem <<
"]{" ;
197 for (
unsigned inode = 0 ; inode < FixtureType::ElemNode ; ++inode ) {
198 std::cout <<
" " << fixture.elem_node(ielem,inode);
200 std::cout <<
" }" << std::endl ;
202 std::cout <<
"}" << std::endl ;
207 Kokkos::Timer wall_clock ;
211 for (
int itrial = 0 ; itrial < use_trials ; ++itrial ) {
223 typename NodeNodeGraphType::Times graph_times;
224 const NodeNodeGraphType
225 mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(),
229 LocalMatrixType jacobian( mesh_to_graph.graph );
234 VectorType nodal_solution(
"nodal_solution" , fixture.node_count() );
235 nodal_residual = VectorType(
"nodal_residual" , fixture.node_count_owned() );
243 const ElementComputationType elemcomp( fixture , diffusion_coefficient ,
245 mesh_to_graph.elem_graph ,
246 jacobian , nodal_residual ,
250 const DirichletComputationType dirichlet(
251 fixture , nodal_solution , jacobian , nodal_residual ,
257 Kokkos::deep_copy( nodal_solution ,
Scalar(1) );
264 comm_nodal_import( nodal_solution );
275 Kokkos::deep_copy( nodal_residual ,
Scalar(0) );
276 Kokkos::deep_copy( jacobian.values ,
Scalar(0) );
294 perf_stats.
min(perf);
301template <
typename ScalarViewType,
typename EnsembleViewType>
303 const EnsembleViewType& ensemble_residual)
305 const double tol = 1e-14;
307 Teuchos::RCP<Teuchos::FancyOStream> out =
308 Teuchos::VerboseObjectBase::getDefaultOStream();
309 std::stringstream buf;
310 Teuchos::FancyOStream fbuf(Teuchos::rcp(&buf,
false));
312 typename ScalarViewType::HostMirror host_scalar_residual =
313 Kokkos::create_mirror_view(scalar_residual);
314 typename EnsembleViewType::HostMirror host_ensemble_residual =
315 Kokkos::create_mirror_view(ensemble_residual);
316 Kokkos::deep_copy( host_scalar_residual, scalar_residual );
317 Kokkos::deep_copy( host_ensemble_residual, ensemble_residual );
319 TEUCHOS_TEST_EQUALITY( host_scalar_residual.extent(0),
320 host_ensemble_residual.extent(0), fbuf, success );
322 const size_t num_node = host_scalar_residual.extent(0);
324 for (
size_t i=0; i<num_node; ++i) {
325 for (
size_t j=0;
j<num_ensemble; ++
j) {
326 TEUCHOS_TEST_FLOATING_EQUALITY(
328 tol, fbuf, success );
338template <
class Storage>
340 typedef typename Storage::value_type
Scalar;
341 typedef typename Storage::ordinal_type
Ordinal;
342 typedef typename Storage::execution_space
Device;
343 Teuchos::RCP<const Teuchos::Comm<int> >
comm ;
351 const int use_print_ ,
352 const int use_trials_ ,
353 const int use_nodes_[] ,
363 template <
typename ArgT>
365 const int ensemble = ArgT::value;
366 typedef typename Storage::template apply_N<ensemble> NewStorageApply;
370 typedef Kokkos::View< Scalar* , Kokkos::LayoutLeft, Device > scalar_vector_type ;
371 typedef Kokkos::View< mp_vector_type* , Kokkos::LayoutLeft, Device > ensemble_vector_type ;
373 scalar_vector_type scalar_residual;
376 fenl_assembly<Scalar,Device>(
378 scalar_dev_config, scalar_residual );
380 ensemble_vector_type ensemble_residual;
382#if defined( KOKKOS_ENABLE_CUDA )
383 const bool is_cuda = std::is_same<Device,Kokkos::Cuda>::value;
385 const bool is_cuda = false ;
390 ensemble_dev_config.
block_dim.
y = block_size/ensemble;
393 fenl_assembly<mp_vector_type,Device>(
395 ensemble_dev_config, ensemble_residual);
403 if (
comm->getRank() == 0) {
404 std::cout.precision(3);
407 << std::setw(2) << ensemble <<
" , "
411 << std::fixed << std::setw(6)
416 << std::fixed << std::setw(6)
423template <
class Storage,
int entry_min,
int entry_max,
int entry_step>
425 const int use_print ,
426 const int use_trials ,
427 const int use_nodes[] ,
431 if (comm->getRank() == 0) {
432 std::cout.precision(8);
433 std::cout << std::endl
434 <<
"\"Grid Size\" , "
436 <<
"\"Ensemble Size\" , "
437 <<
"\"Scalar Import Time (ms)\" , "
438 <<
"\"Ensemble Import Time (ms)\" , "
439 <<
"\"Ensemble Import Speedup\" , "
440 <<
"\"Scalar Fill Time (ms)\" , "
441 <<
"\"Ensemble Fill Time (ms)\" , "
442 <<
"\"Ensemble Fill Speedup\" , "
447 typedef Sacado::mpl::range_c< int, entry_min, entry_max+1, entry_step > Range;
449 use_nodes,
check, dev_config);
450 Sacado::mpl::for_each_no_kokkos<Range>
f(op);
bool check_residuals(const ScalarViewType &scalar_residual, const EnsembleViewType &ensemble_residual)
double maximum(const Teuchos::Comm< int > &comm, double local)
bool check_residuals(const ScalarViewType &scalar_residual, const EnsembleViewType &ensemble_residual)
Perf fenl_assembly(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], Kokkos::Example::FENL::DeviceConfig dev_config, Kokkos::View< Scalar *, Kokkos::LayoutLeft, Device > &nodal_residual)
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c fastAccessCoeff(j) - expr2.val(j)
Stokhos::StandardStorage< int, double > storage_type
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements.
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)
ScalarType f(const Teuchos::Array< ScalarType > &x, double a, double b)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P... > >::value, unsigned >::type dimension_scalar(const View< T, P... > &view)
static void eval(Kokkos::Example::FENL::DeviceConfig &dev_config_elem, Kokkos::Example::FENL::DeviceConfig &dev_config_bc)
void reduceMax(const Teuchos::Comm< int > &comm)
void increment(const Perf &p)