Sacado Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
fad_blas.cpp
Go to the documentation of this file.
1// $Id$
2// $Source$
3// @HEADER
4// ***********************************************************************
5//
6// Sacado Package
7// Copyright (2006) Sandia Corporation
8//
9// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
10// the U.S. Government retains certain rights in this software.
11//
12// This library is free software; you can redistribute it and/or modify
13// it under the terms of the GNU Lesser General Public License as
14// published by the Free Software Foundation; either version 2.1 of the
15// License, or (at your option) any later version.
16//
17// This library is distributed in the hope that it will be useful, but
18// WITHOUT ANY WARRANTY; without even the implied warranty of
19// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20// Lesser General Public License for more details.
21//
22// You should have received a copy of the GNU Lesser General Public
23// License along with this library; if not, write to the Free Software
24// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25// USA
26// Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
27// (etphipp@sandia.gov).
28//
29// ***********************************************************************
30// @HEADER
31
32#include "Sacado_Random.hpp"
33#include "Sacado_No_Kokkos.hpp"
34#include "Teuchos_BLAS.hpp"
35#include "Sacado_Fad_BLAS.hpp"
36
37#include "Teuchos_Time.hpp"
38#include "Teuchos_CommandLineProcessor.hpp"
39
40// A performance test that compares the cost of differentiating BLAS routines
41// with Fad
42
43double
44do_time_teuchos_double_gemm(unsigned int m, unsigned int n, unsigned int k,
45 unsigned int nloop)
46{
47 Sacado::Random<double> urand(0.0, 1.0);
48 Teuchos::BLAS<int,double> blas;
49
50 std::vector<double> A(m*k), B(k*n), C(m*n);
51 for (unsigned int j=0; j<k; j++)
52 for (unsigned int i=0; i<m; i++)
53 A[i+j*m] = urand.number();
54 for (unsigned int j=0; j<n; j++)
55 for (unsigned int i=0; i<k; i++)
56 B[i+j*k] = urand.number();
57 for (unsigned int j=0; j<n; j++)
58 for (unsigned int i=0; i<m; i++)
59 C[i+j*m] = urand.number();
60 double alpha = urand.number();
61 double beta = urand.number();
62
63 Teuchos::Time timer("Teuchos Double GEMM", false);
64 timer.start(true);
65 for (unsigned int j=0; j<nloop; j++) {
66 blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m,
67 &B[0], k, beta, &C[0], m);
68 }
69 timer.stop();
70
71 return timer.totalElapsedTime() / nloop;
72}
73
74double
75do_time_teuchos_double_gemv(unsigned int m, unsigned int n, unsigned int nloop)
76{
77 Sacado::Random<double> urand(0.0, 1.0);
78 Teuchos::BLAS<int,double> blas;
79
80 std::vector<double> A(m*n), B(n), C(m);
81 for (unsigned int j=0; j<n; j++) {
82 for (unsigned int i=0; i<m; i++)
83 A[i+j*m] = urand.number();
84 B[j] = urand.number();
85 }
86 for (unsigned int i=0; i<m; i++)
87 C[i] = urand.number();
88 double alpha = urand.number();
89 double beta = urand.number();
90
91 Teuchos::Time timer("Teuchos Double GEMV", false);
92 timer.start(true);
93 for (unsigned int j=0; j<nloop; j++) {
94 blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1);
95 }
96 timer.stop();
97
98 return timer.totalElapsedTime() / nloop;
99}
100
101double
102do_time_teuchos_double_dot(unsigned int m, unsigned int nloop)
103{
104 Sacado::Random<double> urand(0.0, 1.0);
105 Teuchos::BLAS<int,double> blas;
106
107 std::vector<double> X(m), Y(m);
108 for (unsigned int i=0; i<m; i++) {
109 X[i] = urand.number();
110 Y[i] = urand.number();
111 }
112
113 Teuchos::Time timer("Teuchos Double DOT", false);
114 timer.start(true);
115 double z = 0.0;
116 for (unsigned int j=0; j<nloop; j++) {
117 z += blas.DOT(m, &X[0], 1, &Y[0], 1);
118 }
119 timer.stop();
120
121 return timer.totalElapsedTime() / nloop;
122}
123
124template <typename FadType>
125double
126do_time_teuchos_fad_gemm(unsigned int m, unsigned int n, unsigned int k,
127 unsigned int ndot, unsigned int nloop)
128{
129 Sacado::Random<double> urand(0.0, 1.0);
130 Teuchos::BLAS<int,FadType> blas;
131
132 std::vector<FadType> A(m*k), B(k*n), C(m*n);
133 for (unsigned int j=0; j<k; j++) {
134 for (unsigned int i=0; i<m; i++) {
135 A[i+j*m] = FadType(ndot, urand.number());
136 for (unsigned int l=0; l<ndot; l++)
137 A[i+j*m].fastAccessDx(l) = urand.number();
138 }
139 }
140 for (unsigned int j=0; j<n; j++) {
141 for (unsigned int i=0; i<k; i++) {
142 B[i+j*k] = FadType(ndot, urand.number());
143 for (unsigned int l=0; l<ndot; l++)
144 B[i+j*k].fastAccessDx(l) = urand.number();
145 }
146 }
147 for (unsigned int j=0; j<n; j++) {
148 for (unsigned int i=0; i<m; i++) {
149 C[i+j*m] = FadType(ndot, urand.number());
150 for (unsigned int l=0; l<ndot; l++)
151 C[i+j*m].fastAccessDx(l) = urand.number();
152 }
153 }
154 FadType alpha(ndot, urand.number());
155 FadType beta(ndot, urand.number());
156 for (unsigned int l=0; l<ndot; l++) {
157 alpha.fastAccessDx(l) = urand.number();
158 beta.fastAccessDx(l) = urand.number();
159 }
160
161 Teuchos::Time timer("Teuchos Fad GEMM", false);
162 timer.start(true);
163 for (unsigned int j=0; j<nloop; j++) {
164 blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m,
165 &B[0], k, beta, &C[0], m);
166 }
167 timer.stop();
168
169 return timer.totalElapsedTime() / nloop;
170}
171
172template <typename FadType>
173double
174do_time_teuchos_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot,
175 unsigned int nloop)
176{
177 Sacado::Random<double> urand(0.0, 1.0);
178 Teuchos::BLAS<int,FadType> blas;
179
180 std::vector<FadType> A(m*n), B(n), C(m);
181 for (unsigned int j=0; j<n; j++) {
182 for (unsigned int i=0; i<m; i++) {
183 //A[i+j*m] = urand.number();
184 A[i+j*m] = FadType(ndot, urand.number());
185 for (unsigned int k=0; k<ndot; k++)
186 A[i+j*m].fastAccessDx(k) = urand.number();
187 }
188 B[j] = FadType(ndot, urand.number());
189 for (unsigned int k=0; k<ndot; k++)
190 B[j].fastAccessDx(k) = urand.number();
191 }
192 for (unsigned int i=0; i<m; i++) {
193 C[i] = FadType(ndot, urand.number());
194 for (unsigned int k=0; k<ndot; k++)
195 C[i].fastAccessDx(k) = urand.number();
196 }
197 FadType alpha(ndot, urand.number());
198 FadType beta(ndot, urand.number());
199 for (unsigned int k=0; k<ndot; k++) {
200 alpha.fastAccessDx(k) = urand.number();
201 beta.fastAccessDx(k) = urand.number();
202 }
203
204 Teuchos::Time timer("Teuchos Fad GEMV", false);
205 timer.start(true);
206 for (unsigned int j=0; j<nloop; j++) {
207 blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1);
208 }
209 timer.stop();
210
211 return timer.totalElapsedTime() / nloop;
212}
213
214template <typename FadType>
215double
216do_time_teuchos_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop)
217{
218 Sacado::Random<double> urand(0.0, 1.0);
219 Teuchos::BLAS<int,FadType> blas;
220
221 std::vector<FadType> X(m), Y(m);
222 for (unsigned int i=0; i<m; i++) {
223 X[i] = FadType(ndot, urand.number());
224 Y[i] = FadType(ndot, urand.number());
225 for (unsigned int k=0; k<ndot; k++) {
226 X[i].fastAccessDx(k) = urand.number();
227 Y[i].fastAccessDx(k) = urand.number();
228 }
229 }
230
231 Teuchos::Time timer("Teuchos Fad DOT", false);
232 timer.start(true);
233 for (unsigned int j=0; j<nloop; j++) {
234 FadType z = blas.DOT(m, &X[0], 1, &Y[0], 1);
235 }
236 timer.stop();
237
238 return timer.totalElapsedTime() / nloop;
239}
240
241template <typename FadType>
242double
243do_time_sacado_fad_gemm(unsigned int m, unsigned int n, unsigned int k,
244 unsigned int ndot, unsigned int nloop, bool use_dynamic)
245{
246 Sacado::Random<double> urand(0.0, 1.0);
247 unsigned int sz = (m*k+k*n+m*n)*(1+ndot);
248 Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz);
249
250 Sacado::Fad::Vector<unsigned int, FadType> A(m*k,ndot), B(k*n,ndot), C
251 (m*n,ndot);
252 for (unsigned int j=0; j<k; j++) {
253 for (unsigned int i=0; i<m; i++) {
254 A[i+j*m] = FadType(ndot, urand.number());
255 for (unsigned int l=0; l<ndot; l++)
256 A[i+j*m].fastAccessDx(l) = urand.number();
257 }
258 }
259 for (unsigned int j=0; j<n; j++) {
260 for (unsigned int i=0; i<k; i++) {
261 B[i+j*k] = FadType(ndot, urand.number());
262 for (unsigned int l=0; l<ndot; l++)
263 B[i+j*k].fastAccessDx(l) = urand.number();
264 }
265 }
266 for (unsigned int j=0; j<n; j++) {
267 for (unsigned int i=0; i<m; i++) {
268 C[i+j*m] = FadType(ndot, urand.number());
269 for (unsigned int l=0; l<ndot; l++)
270 C[i+j*m].fastAccessDx(l) = urand.number();
271 }
272 }
273 FadType alpha(ndot, urand.number());
274 FadType beta(ndot, urand.number());
275 for (unsigned int l=0; l<ndot; l++) {
276 alpha.fastAccessDx(l) = urand.number();
277 beta.fastAccessDx(l) = urand.number();
278 }
279
280 Teuchos::Time timer("Teuchos Fad GEMM", false);
281 timer.start(true);
282 for (unsigned int j=0; j<nloop; j++) {
283 blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m,
284 &B[0], k, beta, &C[0], m);
285 }
286 timer.stop();
287
288 return timer.totalElapsedTime() / nloop;
289}
290
291template <typename FadType>
292double
293do_time_sacado_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot,
294 unsigned int nloop, bool use_dynamic)
295{
296 Sacado::Random<double> urand(0.0, 1.0);
297 unsigned int sz = m*n*(1+ndot) + 2*n*(1+ndot);
298 Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz);
299
300 Sacado::Fad::Vector<unsigned int, FadType> A(m*n,ndot), B(n,ndot), C(m,ndot);
301 for (unsigned int j=0; j<n; j++) {
302 for (unsigned int i=0; i<m; i++) {
303 //A[i+j*m] = urand.number();
304 A[i+j*m] = FadType(ndot, urand.number());
305 for (unsigned int k=0; k<ndot; k++)
306 A[i+j*m].fastAccessDx(k) = urand.number();
307 }
308 B[j] = FadType(ndot, urand.number());
309 for (unsigned int k=0; k<ndot; k++)
310 B[j].fastAccessDx(k) = urand.number();
311 }
312 for (unsigned int i=0; i<m; i++) {
313 C[i] = FadType(ndot, urand.number());
314 for (unsigned int k=0; k<ndot; k++)
315 C[i].fastAccessDx(k) = urand.number();
316 }
317 FadType alpha(ndot, urand.number());
318 FadType beta(ndot, urand.number());
319 for (unsigned int k=0; k<ndot; k++) {
320 alpha.fastAccessDx(k) = urand.number();
321 beta.fastAccessDx(k) = urand.number();
322 }
323
324 Teuchos::Time timer("Teuchos Fad GEMV", false);
325 timer.start(true);
326 for (unsigned int j=0; j<nloop; j++) {
327 blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1);
328 }
329 timer.stop();
330
331 return timer.totalElapsedTime() / nloop;
332}
333
334template <typename FadType>
335double
336do_time_sacado_fad_dot(unsigned int m, unsigned int ndot,
337 unsigned int nloop, bool use_dynamic)
338{
339 Sacado::Random<double> urand(0.0, 1.0);
340 unsigned int sz = 2*m*(1+ndot);
341 Teuchos::BLAS<int,FadType> blas(false,use_dynamic,sz);
342
344 for (unsigned int i=0; i<m; i++) {
345 X[i] = FadType(ndot, urand.number());
346 Y[i] = FadType(ndot, urand.number());
347 for (unsigned int k=0; k<ndot; k++) {
348 X[i].fastAccessDx(k) = urand.number();
349 Y[i].fastAccessDx(k) = urand.number();
350 }
351 }
352
353 Teuchos::Time timer("Teuchos Fad DOT", false);
354 timer.start(true);
355 for (unsigned int j=0; j<nloop; j++) {
356 FadType z = blas.DOT(m, &X[0], 1, &Y[0], 1);
357 }
358 timer.stop();
359
360 return timer.totalElapsedTime() / nloop;
361}
362
363int main(int argc, char* argv[]) {
364 int ierr = 0;
365
366 try {
367 double t, tb;
368 int p = 2;
369 int w = p+7;
370
371 // Set up command line options
372 Teuchos::CommandLineProcessor clp;
373 clp.setDocString("This program tests the speed of differentiating BLAS routines using Fad");
374 int m = 10;
375 clp.setOption("m", &m, "Number of rows");
376 int n = 10;
377 clp.setOption("n", &n, "Number of columns");
378 int k = 10;
379 clp.setOption("k", &k, "Number of columns for GEMM");
380 int ndot = 10;
381 clp.setOption("ndot", &ndot, "Number of derivative components");
382 int nloop = 100000;
383 clp.setOption("nloop", &nloop, "Number of loops");
384 int dynamic = 1;
385 clp.setOption("dynamic", &dynamic, "Use dynamic allocation");
386
387 // Parse options
388 Teuchos::CommandLineProcessor::EParseCommandLineReturn
389 parseReturn= clp.parse(argc, argv);
390 if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
391 return 1;
392 bool use_dynamic = (dynamic != 0);
393
394 std::cout.setf(std::ios::scientific);
395 std::cout.precision(p);
396 std::cout << "Times (sec) for m = " << m << ", n = " << n
397 << ", ndot = " << ndot << ", nloop = " << nloop
398 << ", dynamic = " << use_dynamic << ": "
399 << std::endl;
400
401 tb = do_time_teuchos_double_gemm(m,n,k,nloop);
402 std::cout << "GEMM: " << std::setw(w) << tb << std::endl;
403
404 t = do_time_sacado_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop,use_dynamic);
405 std::cout << "Sacado DVFad GEMM: " << std::setw(w) << t << "\t"
406 << std::setw(w) << t/tb << std::endl;
407
408 t = do_time_sacado_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop,use_dynamic);
409 std::cout << "Sacado DFad GEMM: " << std::setw(w) << t << "\t"
410 << std::setw(w) << t/tb << std::endl;
411
412 t = do_time_teuchos_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop);
413 std::cout << "Teuchos DFad GEMM: " << std::setw(w) << t << "\t"
414 << std::setw(w) << t/tb << std::endl;
415
416 // t = do_time_teuchos_fad_gemm< Sacado::ELRFad::DFad<double> >(m,n,k,ndot,nloop);
417 // std::cout << "Teuchos ELRDFad GEMM: " << std::setw(w) << t << "\t"
418 // << std::setw(w) << t/tb << std::endl;
419
420 t = do_time_teuchos_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop);
421 std::cout << "Teuchos DVFad GEMM: " << std::setw(w) << t << "\t"
422 << std::setw(w) << t/tb << std::endl;
423
424 std::cout << std::endl;
425
426 tb = do_time_teuchos_double_gemv(m,n,nloop);
427 std::cout << "GEMV: " << std::setw(w) << tb << std::endl;
428
429 t = do_time_sacado_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10,use_dynamic);
430 std::cout << "Sacado DVFad GEMV: " << std::setw(w) << t << "\t"
431 << std::setw(w) << t/tb << std::endl;
432
433 t = do_time_sacado_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10,use_dynamic);
434 std::cout << "Sacado DFad GEMV: " << std::setw(w) << t << "\t"
435 << std::setw(w) << t/tb << std::endl;
436
437 t = do_time_teuchos_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10);
438 std::cout << "Teuchos DFad GEMV: " << std::setw(w) << t << "\t"
439 << std::setw(w) << t/tb << std::endl;
440
441 // t = do_time_teuchos_fad_gemv< Sacado::ELRFad::DFad<double> >(m,n,ndot,nloop*10);
442 // std::cout << "Teuchos ELRDFad GEMV: " << std::setw(w) << t << "\t"
443 // << std::setw(w) << t/tb << std::endl;
444
445 t = do_time_teuchos_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10);
446 std::cout << "Teuchos DVFad GEMV: " << std::setw(w) << t << "\t"
447 << std::setw(w) << t/tb << std::endl;
448
449 std::cout << std::endl;
450
451 tb = do_time_teuchos_double_dot(m,nloop*100);
452 std::cout << "DOT: " << std::setw(w) << tb << std::endl;
453
454 t = do_time_sacado_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100,use_dynamic);
455 std::cout << "Sacado DVFad DOT: " << std::setw(w) << t << "\t"
456 << std::setw(w) << t/tb << std::endl;
457
458 t = do_time_sacado_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100,use_dynamic);
459 std::cout << "Sacado DFad DOT: " << std::setw(w) << t << "\t"
460 << std::setw(w) << t/tb << std::endl;
461
462 t = do_time_teuchos_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100);
463 std::cout << "Teuchos DFad DOT: " << std::setw(w) << t << "\t"
464 << std::setw(w) << t/tb << std::endl;
465
466 // t = do_time_teuchos_fad_dot< Sacado::ELRFad::DFad<double> >(m,ndot,nloop*100);
467 // std::cout << "Teuchos ELRDFad DOT: " << std::setw(w) << t << "\t"
468 // << std::setw(w) << t/tb << std::endl;
469
470 t = do_time_teuchos_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100);
471 std::cout << "Teuchos DVFad DOT: " << std::setw(w) << t << "\t"
472 << std::setw(w) << t/tb << std::endl;
473
474 }
475 catch (std::exception& e) {
476 std::cout << e.what() << std::endl;
477 ierr = 1;
478 }
479 catch (const char *s) {
480 std::cout << s << std::endl;
481 ierr = 1;
482 }
483 catch (...) {
484 std::cout << "Caught unknown exception!" << std::endl;
485 ierr = 1;
486 }
487
488 return ierr;
489}
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
#define A
Definition: Sacado_rad.hpp:572
#define C(x)
int main()
Definition: ad_example.cpp:191
Sacado::Fad::DFad< double > FadType
A class for storing a contiguously allocated array of Fad objects. This is a general definition that ...
A random number generator that generates random numbers uniformly distributed in the interval (a,...
ScalarT number()
Get random number.
double do_time_sacado_fad_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int ndot, unsigned int nloop, bool use_dynamic)
Definition: fad_blas.cpp:243
double do_time_teuchos_double_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int nloop)
Definition: fad_blas.cpp:44
double do_time_teuchos_double_gemv(unsigned int m, unsigned int n, unsigned int nloop)
Definition: fad_blas.cpp:75
double do_time_sacado_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot, unsigned int nloop, bool use_dynamic)
Definition: fad_blas.cpp:293
double do_time_sacado_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop, bool use_dynamic)
Definition: fad_blas.cpp:336
double do_time_teuchos_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot, unsigned int nloop)
Definition: fad_blas.cpp:174
double do_time_teuchos_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop)
Definition: fad_blas.cpp:216
double do_time_teuchos_fad_gemm(unsigned int m, unsigned int n, unsigned int k, unsigned int ndot, unsigned int nloop)
Definition: fad_blas.cpp:126
double do_time_teuchos_double_dot(unsigned int m, unsigned int nloop)
Definition: fad_blas.cpp:102
Uncopyable z
const char * p