Sacado Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
fenl_assembly_view/TestAssembly.cpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#include <iostream>
43
44// Tests
45#include "TestAssembly.hpp"
46
47// Devices
48#include "Kokkos_Core.hpp"
49
50// Utilities
51#include "Teuchos_CommandLineProcessor.hpp"
52#include "Teuchos_StandardCatchMacros.hpp"
53#ifdef KOKKOS_ENABLE_CUDA
54#include "cuda_runtime_api.h"
55#endif
56
57// For vtune
58#include <sys/types.h>
59#include <unistd.h>
60
61int main(int argc, char *argv[])
62{
63 bool success = true;
64 bool verbose = false;
65 try {
66
67 const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
68 const size_t num_cores_per_socket =
69 Kokkos::hwloc::get_available_cores_per_numa();
70 const size_t num_threads_per_core =
71 Kokkos::hwloc::get_available_threads_per_core();
72
73 // Setup command line options
74 Teuchos::CommandLineProcessor CLP;
75 CLP.setDocString(
76 "This test performance of MP::Vector FEM assembly.\n");
77 int nGrid = 0;
78 CLP.setOption("n", &nGrid, "Number of mesh points in each direction. Set to zero to use a range");
79 int nGridBegin = 8;
80 CLP.setOption("n-begin", &nGridBegin, "Beginning number of mesh points in each direction.");
81 int nGridEnd = 48;
82 CLP.setOption("n-end", &nGridEnd, "Ending number of mesh points in each direction.");
83 int nGridStep = 8;
84 CLP.setOption("n-step", &nGridStep, "Increment in number of mesh points in each direction.");
85 int nIter = 10;
86 CLP.setOption("ni", &nIter, "Number of assembly iterations");
87 bool print = false;
88 CLP.setOption("print", "no-print", &print, "Print debugging output");
89 bool check = false;
90 CLP.setOption("check", "no-check", &check, "Check correctness");
91 bool quadratic = false;
92 CLP.setOption("quadratic", "linear", &quadratic, "Use quadratic basis functions");
93 int num_cores = num_cores_per_socket * num_sockets;
94 CLP.setOption("cores", &num_cores,
95 "Number of CPU cores to use (defaults to all)");
96 int num_hyper_threads = num_threads_per_core;
97 CLP.setOption("hyperthreads", &num_hyper_threads,
98 "Number of hyper threads per core to use (defaults to all)");
99#ifdef KOKKOS_ENABLE_THREADS
100 bool threads = true;
101 CLP.setOption("threads", "no-threads", &threads, "Enable Threads device");
102#endif
103#ifdef KOKKOS_ENABLE_OPENMP
104 bool openmp = true;
105 CLP.setOption("openmp", "no-openmp", &openmp, "Enable OpenMP device");
106#endif
107#ifdef KOKKOS_ENABLE_CUDA
108 bool cuda = true;
109 CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
110 int device_id = 0;
111 CLP.setOption("device", &device_id, "CUDA device ID.");
112#endif
113 bool vtune = false;
114 CLP.setOption("vtune", "no-vtune", &vtune, "connect to vtune");
115 CLP.parse( argc, argv );
116
117 if (nGrid > 0) {
118 nGridBegin = nGrid;
119 nGridEnd = nGrid;
120 }
121
122 // Connect to VTune if requested
123 if (vtune) {
124 std::stringstream cmd;
125 pid_t my_os_pid=getpid();
126 const std::string vtune_loc =
127 "amplxe-cl";
128 const std::string output_dir = "./vtune/vtune.0";
129 cmd << vtune_loc
130 << " -collect hotspots -result-dir " << output_dir
131 << " -target-pid " << my_os_pid << " &";
132 std::cout << cmd.str() << std::endl;
133 system(cmd.str().c_str());
134 system("sleep 10");
135 }
136
137 Kokkos::initialize(argc,argv);
138#ifdef KOKKOS_ENABLE_THREADS
139 if (threads) {
140 typedef Kokkos::Threads Device;
141
142 std::cout << std::endl
143 << "Threads performance with " << num_cores*num_hyper_threads
144 << " threads:" << std::endl;
145
146 performance_test_driver<Device>(
147 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
148 }
149#endif
150
151#ifdef KOKKOS_ENABLE_OPENMP
152 if (openmp) {
153 typedef Kokkos::OpenMP Device;
154
155 std::cout << std::endl
156 << "OpenMP performance with " << num_cores*num_hyper_threads
157 << " threads:" << std::endl;
158
159 performance_test_driver<Device>(
160 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
161 }
162#endif
163
164#ifdef KOKKOS_ENABLE_CUDA
165 if (cuda) {
166 typedef Kokkos::Cuda Device;
167
168 cudaDeviceProp deviceProp;
169 cudaGetDeviceProperties(&deviceProp, device_id);
170 std::cout << std::endl
171 << "CUDA performance performance with device " << device_id
172 << " ("
173 << deviceProp.name << "):"
174 << std::endl;
175
176 performance_test_driver<Device>(
177 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
178
179 }
180#endif
181 Kokkos::finalize();
182 }
183 TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
184
185 if (success)
186 return 0;
187 return -1;
188}
int main()
Definition: ad_example.cpp:191
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)