Zoltan2
Loading...
Searching...
No Matches
Zoltan2_MachineTorusRCA.hpp
Go to the documentation of this file.
1#ifndef _ZOLTAN2_MACHINE_TORUS_RCALIB_HPP_
2#define _ZOLTAN2_MACHINE_TORUS_RCALIB_HPP_
3
4#include <Teuchos_Comm.hpp>
5#include <Teuchos_CommHelpers.hpp>
6#include <Zoltan2_Machine.hpp>
7
8#ifdef HAVE_ZOLTAN2_RCALIB
9extern "C"{
10#include <rca_lib.h>
11}
12#endif
13
14
15namespace Zoltan2{
16
20template <typename pcoord_t, typename part_t>
21class MachineTorusRCA : public Machine <pcoord_t, part_t> {
22
23public:
28 MachineTorusRCA(const Teuchos::Comm<int> &comm):
29 Machine<pcoord_t,part_t>(comm),
30 networkDim(3),
31 actual_networkDim(3),
32 procCoords(NULL),
33 actual_procCoords(NULL),
34 machine_extent(NULL),
35 actual_machine_extent(NULL),
36 is_transformed(false),
37 pl(NULL) {
38
39 actual_machine_extent = machine_extent = new int[networkDim];
40 this->getRealMachineExtent(this->machine_extent);
41 actual_machine_extent = machine_extent;
42
43 //allocate memory for processor coordinates.
44 actual_procCoords = procCoords = new pcoord_t *[networkDim];
45 for (int i = 0; i < networkDim; ++i) {
46 procCoords[i] = new pcoord_t[this->numRanks];
47 memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
48 }
49
50 //obtain the coordinate of the processor.
51 pcoord_t *xyz = new pcoord_t[networkDim];
53 for (int i = 0; i < networkDim; i++)
54 procCoords[i][this->myRank] = xyz[i];
55 delete [] xyz;
56
57 //reduceAll the coordinates of each processor.
58 gatherMachineCoordinates(comm);
59 }
60
61 virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
62 int dim = 0;
63 int transformed_network_dim = networkDim;
64 if (dim < transformed_network_dim)
65 wrap_around[dim++] = true;
66 if (dim < transformed_network_dim)
67 wrap_around[dim++] = true;
68 if (dim < transformed_network_dim)
69 wrap_around[dim++] = true;
70 return true;
71 }
72
73 MachineTorusRCA(const Teuchos::Comm<int> &comm,
74 const Teuchos::ParameterList &pl_):
75 Machine<pcoord_t,part_t>(comm),
76 networkDim(3),
77 actual_networkDim(3),
78 procCoords(NULL),
79 actual_procCoords(NULL),
80 machine_extent(NULL),
81 actual_machine_extent(NULL),
82 is_transformed(false),
83 pl(&pl_) {
84
85 actual_machine_extent = machine_extent = new int[networkDim];
86 this->getRealMachineExtent(this->machine_extent);
87 actual_machine_extent = machine_extent;
88
89 //allocate memory for processor coordinates.
90 actual_procCoords = procCoords = new pcoord_t *[networkDim];
91 for (int i = 0; i < networkDim; ++i) {
92 procCoords[i] = new pcoord_t[this->numRanks];
93 memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
94 }
95 //obtain the coordinate of the processor.
96 pcoord_t *xyz = new pcoord_t[networkDim];
98 for (int i = 0; i < networkDim; i++)
99 procCoords[i][this->myRank] = xyz[i];
100 delete [] xyz;
101
102
103 //reduceAll the coordinates of each processor.
104 gatherMachineCoordinates(comm);
105
106 const Teuchos::ParameterEntry *pe2 =
107 this->pl->getEntryPtr("Machine_Optimization_Level");
108// this->printAllocation();
109
110 if (pe2) {
111 int optimization_level;
112 optimization_level = pe2->getValue<int>(&optimization_level);
113
114 if (optimization_level == 1) {
115 is_transformed = true;
116 this->networkDim = 3;
117 procCoords = new pcoord_t * [networkDim];
118 for(int i = 0; i < networkDim; ++i) {
119 procCoords[i] = new pcoord_t[this->numRanks] ;
120 //this->proc_coords[permutation[i]];
121 }
122 for (int i = 0; i < this->numRanks; ++i) {
123 procCoords[0][i] = this->actual_procCoords[0][i] * 8;
124 int yordinal = this->actual_procCoords[1][i];
125 procCoords[1][i] = yordinal/2 * (16 + 8) + (yordinal %2) * 8;
126 int zordinal = this->actual_procCoords[2][i];
127 procCoords[2][i] = zordinal * 5 + (zordinal / 8) * 3;
128 }
129 int mx = this->machine_extent[0];
130 int my = this->machine_extent[1];
131 int mz = this->machine_extent[2];
132
133
134 this->machine_extent = new int[networkDim];
135 this->machine_extent[0] = mx * 8;
136 this->machine_extent[1] = my/2 * (16 + 8) + (my %2) * 8;
137 this->machine_extent[2] = mz * 5 + (mz / 8) * 3;
138 if(this->myRank == 0)
139 std::cout << "Transforming the coordinates" << std::endl;
140// this->printAllocation();
141 }
142 else if(optimization_level >= 3) {
143 is_transformed = true;
144 this->networkDim = 6;
145 procCoords = new pcoord_t * [networkDim];
146 for(int i = 0; i < networkDim; ++i) {
147 procCoords[i] = new pcoord_t[this->numRanks] ;
148// this->proc_coords[permutation[i]];
149 }
150
151// this->machine_extent[0] = this->actual_machine_extent
152 this->machine_extent = new int[networkDim];
153
154 this->machine_extent[0] =
155 ceil (int (this->actual_machine_extent[0]) / 2.0) * 64 ;
156 this->machine_extent[3] = 2 * 8 ;
157 this->machine_extent[1] =
158 ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
159 this->machine_extent[4] = 2 * 8;
160 this->machine_extent[2] =
161 ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
162 this->machine_extent[5] = 8 * 5;
163
164 for (int k = 0; k < this->numRanks ; k++) {
165 // This part is for titan.
166 // But it holds for other 3D torus machines such as Bluewaters.
167
168 // Bandwitdh along
169 // X = 75
170 // Y = 37.5 or 75 --- everyother has 37.5
171 // --- Y[0-1] =75 but Y[1-2]=37.5
172 // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
173
174 // Along X we make groups of 2. Then scale the distance with 64.
175 // First dimension is represents x/2
176 procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 2) * 64;
177 // Then the 3rd dimension is x%2. distance is scaled with 8,
178 // reversely proportional with bw=75
179 procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 2) * 8 ;
180
181 // Along Y. Every other one has the slowest link. So we want
182 // distances between Y/2 huge.
183 // We scale Y/2 with 2400 so that we make sure that it is the
184 // first one we divie.
185 procCoords[1][k] =
186 (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
187 // The other one is scaled with 8 as in X.
188 procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
189
190 // We make groups of 8 along Z. Then distances between these
191 // groups are scaled with 160.
192 // So that it is more than 2x distance than the distance with
193 // X grouping.
194 // That is we scale the groups of Zs with 160. Groups of X with 64.
195 // Zs has 8 processors connecting them, while X has only one. We
196 // want to divide along
197 // Z twice before dividing along X.
198 procCoords[2][k] =
199 ((int (this->actual_procCoords[2][k])) / 8) * 160;
200 // In the second group everything is scaled with 5, as bw=120
201 procCoords[5][k] =
202 ((int (this->actual_procCoords[2][k])) % 8) * 5;
203 }
204 }
205 else if(optimization_level == 2) {
206 // This is as above case. but we make groups of 3 along X instead.
207 is_transformed = true;
208 this->networkDim = 6;
209 procCoords = new pcoord_t * [networkDim];
210 for(int i = 0; i < networkDim; ++i) {
211 procCoords[i] = new pcoord_t[this->numRanks] ;
212// this->proc_coords[permutation[i]];
213 }
214
215// this->machine_extent[0] = this->actual_machine_extent
216 this->machine_extent = new int[networkDim];
217
218 this->machine_extent[0] =
219 ceil(int (this->actual_machine_extent[0]) / 3.0) * 128 ;
220 this->machine_extent[3] = 3 * 8 ;
221 this->machine_extent[1] =
222 ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
223 this->machine_extent[4] = 2 * 8;
224 this->machine_extent[2] =
225 ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
226 this->machine_extent[5] = 8 * 5;
227
228
229 for (int k = 0; k < this->numRanks ; k++) {
230 // This part is for titan.
231 // But it holds for other 3D torus machines such as Bluewaters.
232
233 // Bandwitdh along
234 // X = 75
235 // Y = 37.5 or 75 --- everyother has 37.5
236 // --- Y[0-1] =75 but Y[1-2]=37.5
237 // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
238
239 // In this case we make groups of 3. along X.
240 procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 3) * 128;
241 // Then the 3rd dimension is x%2. distance is scaled with 8,
242 // reversely proportional with bw=75
243 procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 3) * 8 ;
244
245 // Along Y. Every other one has the slowest link. So we want
246 // distances between Y/2 huge.
247 // We scale Y/2 with 2400 so that we make sure that it is the
248 // first one we divie.
249 procCoords[1][k] =
250 (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
251 // The other one is scaled with 8 as in X.
252 procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
253
254
255 procCoords[2][k] =
256 ((int (this->actual_procCoords[2][k])) / 8) * 160;
257 // In the second group everything is scaled with 5, as bw=120
258 procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
259 }
260 }
261 }
262 }
263
264
265
266
268 if (is_transformed) {
269 is_transformed = false;
270 for (int i = 0; i < actual_networkDim; i++) {
271 delete [] actual_procCoords[i];
272 }
273 delete [] actual_procCoords;
274 delete [] actual_machine_extent;
275 }
276 for (int i = 0; i < networkDim; i++) {
277 delete [] procCoords[i];
278 }
279 delete [] procCoords;
280 delete [] machine_extent;
281 }
282
283 bool hasMachineCoordinates() const { return true; }
284
285 int getMachineDim() const { return this->networkDim; }
286 int getRealMachineDim() const { return this->actual_networkDim; }
287
288 bool getMachineExtent(int *nxyz) const {
289 if (is_transformed) {
290 return false;
291 }
292 else {
293 int dim = 0;
294 nxyz[dim++] = this->machine_extent[0]; // X
295 nxyz[dim++] = this->machine_extent[1]; // Y
296 nxyz[dim++] = this->machine_extent[2]; // Z
297 return true;
298 }
299 }
300
301 bool getRealMachineExtent(int *nxyz) const {
302#if defined (HAVE_ZOLTAN2_RCALIB)
303 mesh_coord_t mxyz;
304 rca_get_max_dimension(&mxyz);
305 int dim = 0;
306 nxyz[dim++] = mxyz.mesh_x + 1; // X
307 nxyz[dim++] = mxyz.mesh_y + 1; // Y
308 nxyz[dim++] = mxyz.mesh_z + 1; // Z
309 return true;
310#else
311 return false;
312#endif
313 }
314
315
317 if(this->myRank == 0) {
318 for (int i = 0; i < this->numRanks; ++i) {
319 std::cout << "Rank:" << i
320 << " " << procCoords[0][i]
321 << " " << procCoords[1][i]
322 << " " << procCoords[2][i] << std::endl;
323 }
324 std::cout << "Machine Extent:"
325 << " " << this->machine_extent[0]
326 << " " << this->machine_extent[1]
327 << " " << this->machine_extent[2] << std::endl;
328 }
329 }
330
331 bool getMyMachineCoordinate(pcoord_t *xyz) {
332 for (int i = 0; i < this->networkDim; ++i) {
333 xyz[i] = procCoords[i][this->myRank];
334 }
335 return true;
336 }
337
338 bool getMyActualMachineCoordinate(pcoord_t *xyz) {
339#if defined (HAVE_ZOLTAN2_RCALIB)
340 rs_node_t nodeInfo; /* Cray node info for node running this function */
341 rca_get_nodeid(&nodeInfo);
342 int NIDs = (int)nodeInfo.rs_node_s._node_id; /* its node ID */
343
344 mesh_coord_t node_coord;
345 int returnval = rca_get_meshcoord((uint16_t)NIDs, &node_coord);
346 if (returnval == -1) {
347 return false;
348 }
349 xyz[0] = node_coord.mesh_x;
350 xyz[1] = node_coord.mesh_y;
351 xyz[2] = node_coord.mesh_z;
352 return true;
353#else
354 return false;
355#endif
356 }
357
358 inline bool getMachineCoordinate(const int rank,
359 pcoord_t *xyz) const {
360 for (int i = 0; i < this->networkDim; ++i) {
361 xyz[i] = procCoords[i][rank];
362 }
363 return true;
364 }
365
366
367 bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
368 return false; // cannot yet return from nodename
369 }
370
371 bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
372 allCoords = procCoords;
373 return true;
374 }
375
376 virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
377 hops = 0;
378 for (int i = 0; i < networkDim; ++i) {
379 pcoord_t distance = procCoords[i][rank1] - procCoords[i][rank2];
380 if (distance < 0)
381 distance = -distance;
382 if (machine_extent[i] - distance < distance)
383 distance = machine_extent[i] - distance;
384 hops += distance;
385 }
386 return true;
387 }
388
389
390private:
391
392 int networkDim;
393 int actual_networkDim;
394
395 pcoord_t **procCoords;
396 pcoord_t **actual_procCoords;
397
398 part_t *machine_extent;
399 part_t *actual_machine_extent;
400 bool is_transformed;
401
402
403 const Teuchos::ParameterList *pl;
404
405/*
406 bool delete_transformed_coords;
407 int transformed_network_dim;
408 pcoord_t **transformed_coordinates;
409*/
410
411 void gatherMachineCoordinates(const Teuchos::Comm<int> &comm) {
412 // reduces and stores all machine coordinates.
413 pcoord_t *tmpVect = new pcoord_t [this->numRanks];
414
415 for (int i = 0; i < networkDim; i++) {
416 Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
417 this->numRanks,
418 procCoords[i], tmpVect);
419 pcoord_t *tmp = tmpVect;
420 tmpVect = procCoords[i];
421 procCoords[i] = tmp;
422 }
423 delete [] tmpVect;
424 }
425
426};
427
428} // namespace Zoltan2
429#endif
An RCA Machine class on Torus Networks.
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
bool getRealMachineExtent(int *nxyz) const
MachineTorusRCA(const Teuchos::Comm< int > &comm)
Constructor: A BlueGeneQ network machine description;.
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available
MachineTorusRCA(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
bool getMyMachineCoordinate(pcoord_t *xyz)
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
bool getMachineExtent(int *nxyz) const
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
bool getMyActualMachineCoordinate(pcoord_t *xyz)
MachineClass Base class for representing machine coordinates, networks, etc.
Created by mbenlioglu on Aug 31, 2020.
SparseMatrixAdapter_t::part_t part_t