Zoltan2
Loading...
Searching...
No Matches
Zoltan2_MachineDragonflyRCA.hpp
Go to the documentation of this file.
1#ifndef _ZOLTAN2_MACHINE_DRAGONFLY_RCALIB_HPP_
2#define _ZOLTAN2_MACHINE_DRAGONFLY_RCALIB_HPP_
3
4#include <Teuchos_Comm.hpp>
5#include <Teuchos_CommHelpers.hpp>
6#include <Zoltan2_Machine.hpp>
7
8#ifdef HAVE_ZOLTAN2_RCALIB
9extern "C"{
10#include <rca_lib.h>
11}
12#endif
13
14namespace Zoltan2{
15
72template <typename pcoord_t, typename part_t>
73class MachineDragonflyRCA : public Machine <pcoord_t, part_t> {
74
75public:
76
84 MachineDragonflyRCA(const Teuchos::Comm<int> &comm):
85 Machine<pcoord_t,part_t>(comm),
86 transformed_networkDim(3),
87 actual_networkDim(3),
88 transformed_procCoords(NULL),
89 actual_procCoords(NULL),
90 transformed_machine_extent(NULL),
91 actual_machine_extent(NULL),
92 num_unique_groups(0),
93 group_count(NULL),
94 is_transformed(false),
95 pl(NULL) {
96
97 actual_machine_extent = new int[actual_networkDim];
98 this->getActualMachineExtent(this->actual_machine_extent);
99
100 // Number of ranks in each Dragonfly network group
101 // (i.e. RCA's X coord == Grp g)
102 group_count = new part_t[actual_machine_extent[0]];
103
104 memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
105
106 // Transformed dims = 1 + N_y + N_z
107 transformed_networkDim = 1 + actual_machine_extent[1] +
108 actual_machine_extent[2];
109 transformed_machine_extent = new int[transformed_networkDim];
110
111 // Allocate memory for processor coords
112 actual_procCoords = new pcoord_t *[actual_networkDim];
113 transformed_procCoords = new pcoord_t *[transformed_networkDim];
114
115 for (int i = 0; i < actual_networkDim; ++i) {
116 actual_procCoords[i] = new pcoord_t[this->numRanks];
117 memset(actual_procCoords[i], 0,
118 sizeof(pcoord_t) * this->numRanks);
119 }
120
121 pcoord_t *xyz = new pcoord_t[transformed_networkDim];
123 for (int i = 0; i < actual_networkDim; ++i)
124 actual_procCoords[i][this->myRank] = xyz[i];
125 delete [] xyz;
126
127 // Gather number of ranks in each Dragonfly network group from
128 // across all ranks
129 part_t * tmp_vec = new part_t[actual_machine_extent[0]];
130 memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
131
132 Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
133 actual_machine_extent[0],
134 group_count,
135 tmp_vec);
136
137 // remove zero entries from reduced array
138 num_unique_groups = 0;
139
140 for (int i = 0; i < actual_machine_extent[0]; ++i) {
141 if (tmp_vec[i] > 0) {
142 ++num_unique_groups;
143 }
144 }
145
146 // Reset group_count array to new size
147 delete[] group_count;
148 group_count = new part_t[num_unique_groups];
149
150 int pos = 0;
151 for (int i = 0; i < actual_machine_extent[0]; ++i) {
152 if (tmp_vec[i] > 0) {
153 group_count[pos] = tmp_vec[i];
154 ++pos;
155 }
156 }
157
158 delete[] tmp_vec;
159
160 // reduceAll the coordinates of each processor.
161 gatherMachineCoordinates(this->actual_procCoords,
162 this->actual_networkDim, comm);
163 }
164
165 // No necessary wrap arounds for dragonfly networks. Groups
166 // have wrap around, but group all-to-all connection makes unneccessary.
167 virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
168 return false;
169 }
170
171
181 MachineDragonflyRCA(const Teuchos::Comm<int> &comm,
182 const Teuchos::ParameterList &pl_ ):
183 Machine<pcoord_t,part_t>(comm),
184 transformed_networkDim(3),
185 actual_networkDim(3),
186 transformed_procCoords(NULL),
187 actual_procCoords(NULL),
188 transformed_machine_extent(NULL),
189 actual_machine_extent(NULL),
190 num_unique_groups(0),
191 group_count(NULL),
192 is_transformed(false),
193 pl(&pl_)
194 {
195 actual_machine_extent = new int[actual_networkDim];
196 this->getActualMachineExtent(this->actual_machine_extent);
197
198 // Number of parts in each Group (i.e. RCA's X coord == Grp g)
199 group_count = new part_t[actual_machine_extent[0]];
200
201 memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
202
203 // Allocate memory for processor coords
204 actual_procCoords = new pcoord_t *[actual_networkDim];
205 transformed_procCoords = new pcoord_t *[transformed_networkDim];
206
207 pcoord_t *xyz = new pcoord_t[actual_networkDim];
209
210 // Gather number of ranks in each Dragonfly network group
211 // from across all ranks
212 part_t * tmp_vec = new part_t[actual_machine_extent[0]];
213 memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
214
215 Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
216 actual_machine_extent[0],
217 group_count,
218 tmp_vec);
219
220 // Remove zero entries from reduced array
221 num_unique_groups = 0;
222
223 for (int i = 0; i < actual_machine_extent[0]; ++i) {
224 if (tmp_vec[i] > 0) {
225 ++num_unique_groups;
226 }
227 }
228
229 // Reset group_count array to new size
230 delete[] group_count;
231 group_count = new part_t[num_unique_groups];
232
233 int pos = 0;
234 for (int i = 0; i < actual_machine_extent[0]; ++i) {
235 if (tmp_vec[i] > 0) {
236 group_count[pos] = tmp_vec[i];
237 ++pos;
238 }
239 }
240 delete[] tmp_vec;
241
242 const Teuchos::ParameterEntry *pe2 =
243 this->pl->getEntryPtr("Machine_Optimization_Level");
244
245 // Transform with mach opt level
246 if (pe2) {
247 int optimization_level;
248 optimization_level = pe2->getValue<int>(&optimization_level);
249
250 if (optimization_level > 0) {
251 is_transformed = true;
252
253 // Transformed dims = 1 + N_y + N_z
254 transformed_networkDim = 1 + actual_machine_extent[1] +
255 actual_machine_extent[2];
256 transformed_machine_extent = new int[transformed_networkDim];
257
258 transformed_procCoords = new pcoord_t *[transformed_networkDim];
259
260 // Allocate memory for transformed coordinates
261 for (int i = 0; i < transformed_networkDim; ++i) {
262 transformed_procCoords[i] = new pcoord_t[this->numRanks];
263 memset(transformed_procCoords[i], 0,
264 sizeof(pcoord_t) * this->numRanks);
265 }
266
267 // Calculate transformed coordinates and machine extents
268 int nx = this->actual_machine_extent[0];
269 int ny = this->actual_machine_extent[1];
270 int nz = this->actual_machine_extent[2];
271
272 const Teuchos::ParameterEntry *pe_x =
273 this->pl->getEntryPtr("Machine_X_Stretch");
274 const Teuchos::ParameterEntry *pe_y =
275 this->pl->getEntryPtr("Machine_Y_Stretch");
276 const Teuchos::ParameterEntry *pe_z =
277 this->pl->getEntryPtr("Machine_Z_Stretch");
278
279 // Default X,Y,Z stretches
280 int x_stretch = 3;
281 int y_stretch = 2;
282 int z_stretch = 1;
283
284 if (pe_x)
285 x_stretch = pe_x->getValue<int>(&x_stretch);
286 if (pe_y)
287 y_stretch = pe_y->getValue<int>(&y_stretch);
288 if (pe_z)
289 z_stretch = pe_z->getValue<int>(&z_stretch);
290
291 // Transform X coords
292 transformed_procCoords[0][this->myRank] =
293 x_stretch * xyz[0] * ny * nz;
294
295 // Transform Y coords
296 for (int i = 1; i < 1 + ny; ++i) {
297 // Shift y-coord given a group, xyz[0];
298 transformed_procCoords[i][this->myRank] = 0;
299 // Increment in the dim where y-coord present
300 if (xyz[1] == i - 1)
301 transformed_procCoords[i][this->myRank] = y_stretch;
302 }
303 // Transform Z coords
304 for (int i = 1 + ny; i < transformed_networkDim; ++i) {
305 // Shift z-coord given a group, xyz[0];
306 transformed_procCoords[i][this->myRank] = 0;
307 // Increment in the dim where z-coord present
308 if (xyz[2] == i - (1 + ny))
309 transformed_procCoords[i][this->myRank] = z_stretch;
310 }
311
312 this->transformed_machine_extent = new int[transformed_networkDim];
313
314 // Maximum extents in shifted high dim coordinate system
315 this->transformed_machine_extent[0] = x_stretch * (nx - 1) * ny * nz;
316 for (int i = 1; i < 1 + ny; ++i) {
317 this->transformed_machine_extent[i] = y_stretch;
318 }
319 for (int i = 1 + ny; i < transformed_networkDim; ++i) {
320 this->transformed_machine_extent[i] = z_stretch;
321 }
322
323 // reduceAll the transformed coordinates of each processor.
324 gatherMachineCoordinates(this->transformed_procCoords,
325 this->transformed_networkDim, comm);
326
327 this->printAllocation();
328 }
329 }
330 // If no coordinate transformation, gather actual coords
331 if (!is_transformed) {
332
333 for (int i = 0; i < actual_networkDim; ++i) {
334 actual_procCoords[i] = new pcoord_t[this->numRanks];
335 memset(actual_procCoords[i], 0,
336 sizeof(pcoord_t) * this->numRanks);
337 }
338
339 for (int i = 0; i < actual_networkDim; ++i)
340 actual_procCoords[i][this->myRank] = xyz[i];
341
342 // reduceAll the actual coordinates of each processor
343 gatherMachineCoordinates(this->actual_procCoords,
344 this->actual_networkDim, comm);
345
346 this->printAllocation();
347 }
348 delete [] xyz;
349 }
350
351 // Destructor
353 if (is_transformed) {
354 is_transformed = false;
355 if (this->numRanks > 1) {
356 for (int i = 0; i < transformed_networkDim; ++i) {
357 delete [] transformed_procCoords[i];
358 }
359 }
360 delete [] transformed_machine_extent;
361 }
362 else {
363 if (this->numRanks > 1) {
364 for (int i = 0; i < actual_networkDim; ++i) {
365 delete [] actual_procCoords[i];
366 }
367 }
368 }
369
370 delete [] actual_procCoords;
371 delete [] transformed_procCoords;
372
373 delete [] actual_machine_extent;
374 delete [] group_count;
375 }
376
377 bool hasMachineCoordinates() const { return true; }
378
379 // Return dimensions of coords, transformed or actual
380 int getMachineDim() const {
381 if (is_transformed)
382 return this->transformed_networkDim;
383 else
384 return this->actual_networkDim;
385 }
386
387 // Return the transformed maximum machine extents
388 bool getTransformedMachineExtent(int *nxyz) const {
389 if (is_transformed) {
390 for (int dim = 0; dim < transformed_networkDim; ++dim)
391 nxyz[dim] = this->transformed_machine_extent[dim];
392
393 return true;
394 }
395 else
396 return false;
397 }
398
399 // Return the actual RCA maximum machine extents
400 bool getActualMachineExtent(int *nxyz) const {
401#if defined (HAVE_ZOLTAN2_RCALIB)
402 mesh_coord_t mxyz;
403 rca_get_max_dimension(&mxyz);
404
405 int dim = 0; // Example extents on Cori
406 nxyz[dim++] = mxyz.mesh_x + 1; // X - group [0, ~100]
407 nxyz[dim++] = mxyz.mesh_y + 1; // Y - row within group [0, 5]
408 nxyz[dim++] = mxyz.mesh_z + 1; // Z - col within row [0, 15]
409 return true;
410#else
411 return false;
412#endif
413 }
414
415 // Return machine extents, transformed or actual
416 bool getMachineExtent(int *nxyz) const {
417 if (is_transformed)
418 this->getTransformedMachineExtent(nxyz);
419 else
420 this->getActualMachineExtent(nxyz);
421
422 return true;
423 }
424
425 // Return number of groups (RCA X-dim) with allocated nodes
426 part_t getNumUniqueGroups() const override{
427 return this->num_unique_groups;
428 }
429
430 // Return number of ranks in each group (RCA X-dim) in an allocation
431 bool getGroupCount(part_t *grp_count) const override {
432
433 if (group_count != NULL) {
434 for (int i = 0; i < num_unique_groups; ++i) {
435 grp_count[i] = this->group_count[i];
436 }
437
438 return true;
439 }
440 else
441 return false;
442 }
443
444 // Print allocation coords and extents on rank 0, transformed or actual
446 if (this->myRank == 0) {
447 // Print transformed coordinates and extents
448 if (is_transformed) {
449 for (int i = 0; i < this->numRanks; ++i) {
450 std::cout << "Rank:" << i;
451 for (int j = 0; j < this->transformed_networkDim; ++j) {
452 std::cout << " " << transformed_procCoords[j][i];
453 }
454 std::cout << std::endl;
455 }
456
457 std::cout << std::endl << "Transformed Machine Extent: ";
458 for (int i = 0; i < this->transformed_networkDim; ++i) {
459 std::cout << " " << this->transformed_machine_extent[i];
460 }
461 std::cout << std::endl;
462 }
463 // Print actual coordinates and extents
464 else {
465 for (int i = 0; i < this->numRanks; ++i) {
466 std::cout << "Rank:" << i;
467 for (int j = 0; j < this->actual_networkDim; ++j) {
468 std::cout << " " << actual_procCoords[j][i];
469 }
470 std::cout << std::endl;
471 }
472
473 std::cout << std::endl << "Actual Machine Extent: ";
474 for (int i = 0; i < this->actual_networkDim; ++i) {
475 std::cout << " " << this->actual_machine_extent[i];
476 }
477 std::cout << std::endl;
478 }
479 }
480 }
481
482 // Return transformed coord for this rank
484 if (is_transformed) {
485 for (int i = 0; i < this->transformed_networkDim; ++i) {
486 xyz[i] = transformed_procCoords[i][this->myRank];
487 }
488
489 return true;
490 }
491 else
492 return false;
493 }
494
495 // Return actual RCA coord for this rank
496 bool getMyActualMachineCoordinate(pcoord_t *xyz) {
497#if defined (HAVE_ZOLTAN2_RCALIB)
498 // Cray node info for current node
499 rs_node_t nodeInfo;
500 rca_get_nodeid(&nodeInfo);
501
502 // Current node ID
503 int NIDs = (int)nodeInfo.rs_node_s._node_id;
504
505 mesh_coord_t node_coord;
506 int returnval = rca_get_meshcoord((uint16_t)NIDs, &node_coord);
507 if (returnval == -1) {
508 return false;
509 }
510
511 int x = node_coord.mesh_x;
512 int y = node_coord.mesh_y;
513 int z = node_coord.mesh_z;
514
515 xyz[0] = x;
516 xyz[1] = y;
517 xyz[2] = z;
518
519 group_count[x]++;
520
521 return true;
522#else
523 return false;
524#endif
525 }
526
527 // Return machine coordinate for this rank, transformed or actual
528 bool getMyMachineCoordinate(pcoord_t *xyz) {
529 if (is_transformed)
531 else
533
534 return true;
535 }
536
537 // Return machine coord of given rank, transformed or actual
538 inline bool getMachineCoordinate(const int rank,
539 pcoord_t *xyz) const {
540 if (is_transformed) {
541 for (int i = 0; i < this->transformed_networkDim; ++i) {
542 xyz[i] = transformed_procCoords[i][rank];
543 }
544 }
545 else {
546 for (int i = 0; i < this->actual_networkDim; ++i) {
547 xyz[i] = actual_procCoords[i][rank];
548 }
549 }
550
551 return true;
552 }
553
554 bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
555 return false; // cannot yet return from nodename
556 }
557
558 // Return view of all machine coords, transformed or actual
559 bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
560 if (is_transformed) {
561 allCoords = transformed_procCoords;
562 }
563 else {
564 allCoords = actual_procCoords;
565 }
566
567 return true;
568 }
569
570 // Return (approx) hop count from rank1 to rank2. Does not account for
571 // Dragonfly's dynamic routing.
572 virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
573 hops = 0;
574
575 if (is_transformed) {
576 // Case: ranks in different groups
577 // Does not account for location of group to group connection.
578 // (Most group to group messages will take 5 hops)
579 if (this->transformed_procCoords[0][rank1] !=
580 this->transformed_procCoords[0][rank2])
581 {
582 hops = 5;
583 return true;
584 }
585
586 // Case: ranks in same group
587 // For each 2 differences in transformed_coordinates then
588 // 1 hop
589 for (int i = 1; i < this->transformed_networkDim; ++i) {
590 if (this->transformed_procCoords[i][rank1] !=
591 this->transformed_procCoords[i][rank2])
592 ++hops;
593 }
594 hops /= 2;
595 }
596 else {
597 // Case: ranks in different groups
598 // Does not account for location of group to group connection.
599 // (Nearly all group to group messages will take 5 hops)
600 if (this->actual_procCoords[0][rank1] !=
601 this->actual_procCoords[0][rank2])
602 {
603 hops = 5;
604 return true;
605 }
606
607 // Case: ranks in same group
608 // For each difference in actual_coordinates then
609 // 1 hop
610 for (int i = 1; i < actual_networkDim; ++i) {
611 if (this->actual_procCoords[i][rank1] !=
612 this->actual_procCoords[i][rank2])
613 ++hops;
614 }
615 }
616
617 return true;
618 }
619
620private:
621
622 // # of dimensions in the stored coordinates, transformed or actual
623 int transformed_networkDim;
624 int actual_networkDim;
625
626 // Machine Coordinates
627 pcoord_t **transformed_procCoords;
628 pcoord_t **actual_procCoords;
629
630 // Maximum extents for each dimension, transformed or actual
631 part_t *transformed_machine_extent;
632 part_t *actual_machine_extent;
633
634 // Number of groups (RCA X-dim) with nonzero nodes allocated
635 part_t num_unique_groups;
636 // Distribution of nodes in each group (zero node groups have been trimmed)
637 part_t *group_count;
638
639 // Are our coordinates transformed?
640 bool is_transformed;
641
642 const Teuchos::ParameterList *pl;
643
644 // reduceAll the machine coordinates
645 void gatherMachineCoordinates(pcoord_t **&coords, int netDim,
646 const Teuchos::Comm<int> &comm) {
647 // Reduces and stores all machine coordinates.
648 pcoord_t *tmpVect = new pcoord_t [this->numRanks];
649
650 for (int i = 0; i < netDim; ++i) {
651 Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
652 this->numRanks,
653 coords[i], tmpVect);
654 pcoord_t *tmp = tmpVect;
655 tmpVect = coords[i];
656 coords[i] = tmp;
657 }
658 delete [] tmpVect;
659 }
660
661};
662
663} // namespace Zoltan2
664
665#endif
A Dragonfly (e.g. Cori, Trinity, & Theta) Machine Class for task mapping.
MachineDragonflyRCA(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
Constructor: Dragonfly (e.g. Cori & Trinity) network machine description;.
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available
part_t getNumUniqueGroups() const override
getNumUniqueGroups function return the number of unique Dragonfly network groups in provided allocati...
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
MachineDragonflyRCA(const Teuchos::Comm< int > &comm)
Constructor: Dragonfly (e.g. Cori & Trinity) network machine description;.
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
bool getGroupCount(part_t *grp_count) const override
getGroupCount function return the number of ranks in each group (RCA X-dim, e.g. first dim)
MachineClass Base class for representing machine coordinates, networks, etc.
Created by mbenlioglu on Aug 31, 2020.
SparseMatrixAdapter_t::part_t part_t