Zoltan2
Loading...
Searching...
No Matches
Zoltan2_MachineTorusRCAForTesting.hpp
Go to the documentation of this file.
1#ifndef _ZOLTAN2_MACHINE_TORUS_RCALIBTEST_HPP_
2#define _ZOLTAN2_MACHINE_TORUS_RCALIBTEST_HPP_
3
4#include <Teuchos_Comm.hpp>
5#include <Teuchos_CommHelpers.hpp>
6#include <Zoltan2_Machine.hpp>
7
8#include <cstdlib> /* srand, rand */
9#include <fstream>
10#include <string>
11
12namespace Zoltan2{
13
18template <typename pcoord_t, typename part_t>
19class MachineTorusRCAForTesting : public Machine <pcoord_t, part_t> {
20
21public:
26 MachineTorusRCAForTesting(const Teuchos::Comm<int> &comm):
27 Machine<pcoord_t,part_t>(comm),
28 networkDim(3), actual_networkDim(3),
29 procCoords(NULL), actual_procCoords(NULL),
30 machine_extent(NULL),actual_machine_extent(NULL),
31 is_transformed(false), pl(NULL)
32 {
33 actual_machine_extent = machine_extent = new int[networkDim];
34 this->getRealMachineExtent(this->machine_extent);
35 actual_machine_extent = machine_extent;
36
37 // Allocate memory for processor coordinates.
38 actual_procCoords = procCoords = new pcoord_t *[networkDim];
39 for (int i = 0; i < networkDim; ++i) {
40 procCoords[i] = new pcoord_t[this->numRanks];
41 memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
42 }
43
44 // Obtain the coordinate of the processor.
45 pcoord_t *xyz = new pcoord_t[networkDim];
47 for (int i = 0; i < networkDim; i++)
48 procCoords[i][this->myRank] = xyz[i];
49 delete [] xyz;
50
51
52 // reduceAll the coordinates of each processor.
53 gatherMachineCoordinates(comm);
54 }
55
56 virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
57 int dim = 0;
58 int transformed_network_dim = networkDim;
59
60 if (dim < transformed_network_dim)
61 wrap_around[dim++] = true;
62 if (dim < transformed_network_dim)
63 wrap_around[dim++] = true;
64 if (dim < transformed_network_dim)
65 wrap_around[dim++] = true;
66 return true;
67 }
68
69 MachineTorusRCAForTesting(const Teuchos::Comm<int> &comm,
70 const Teuchos::ParameterList &pl_):
71 Machine<pcoord_t,part_t>(comm),
72 networkDim(3), actual_networkDim(3),
73 procCoords(NULL), actual_procCoords(NULL),
74 machine_extent(NULL),actual_machine_extent(NULL),
75 is_transformed(false), pl(&pl_)
76 {
77
78 actual_machine_extent = machine_extent = new int[networkDim];
79 this->getRealMachineExtent(this->machine_extent);
80 actual_machine_extent = machine_extent;
81
82 // Allocate memory for processor coordinates.
83 actual_procCoords = procCoords = new pcoord_t *[networkDim];
84
85
86 const Teuchos::ParameterEntry *pe1 =
87 this->pl->getEntryPtr("Input_RCA_Machine_Coords");
88 if (pe1) {
89 std::string input_coord_file;
90 input_coord_file = pe1->getValue<std::string>(&input_coord_file);
91 if (input_coord_file != "") {
92
93 if (this->myRank == 0) {
94 std::vector < std::vector <pcoord_t> > proc_coords(networkDim);
95 std::fstream machine_coord_file(input_coord_file.c_str());
96
97 part_t i = 0;
98 pcoord_t a,b, c;
99 machine_coord_file >> a >> b >> c;
100 while(!machine_coord_file.eof()) {
101 proc_coords[0].push_back(a);
102 proc_coords[1].push_back(b);
103 proc_coords[2].push_back(c);
104 ++i;
105 machine_coord_file >> a >> b >> c;
106 }
107
108 machine_coord_file.close();
109 std::cout << "Rewriting numprocs from:"
110 << this->numRanks << " to:" << i << std::endl;
111 this->numRanks = i;
112
113 for(int ii = 0; ii < networkDim; ++ii) {
114 procCoords[ii] = new pcoord_t[this->numRanks];
115 for (int j = 0; j < this->numRanks; ++j) {
116 procCoords[ii][j] = proc_coords[ii][j];
117 }
118 }
119 }
120 comm.broadcast(0, sizeof(int), (char *) &(this->numRanks));
121
122 if (this->myRank != 0) {
123 for (int i = 0; i < networkDim; ++i) {
124 procCoords[i] = new pcoord_t[this->numRanks];
125 memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
126 }
127 }
128 }
129 }
130 else {
131 for (int i = 0; i < networkDim; ++i) {
132 procCoords[i] = new pcoord_t[this->numRanks];
133 memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
134 }
135 // Obtain the coordinate of the processor.
136 pcoord_t *xyz = new pcoord_t[networkDim];
138 for (int i = 0; i < networkDim; i++)
139 procCoords[i][this->myRank] = xyz[i];
140 delete [] xyz;
141 }
142
143 // reduceAll the coordinates of each processor.
144 gatherMachineCoordinates(comm);
145
146 const Teuchos::ParameterEntry *pe2 =
147 this->pl->getEntryPtr("Machine_Optimization_Level");
148// this->printAllocation();
149 if (pe2) {
150 int optimization_level;
151 optimization_level = pe2->getValue<int>(&optimization_level);
152
153 if (optimization_level == 1) {
154 is_transformed = true;
155 this->networkDim = 3;
156 procCoords = new pcoord_t * [networkDim];
157 for(int i = 0; i < networkDim; ++i) {
158 procCoords[i] = new pcoord_t[this->numRanks] ;
159// this->proc_coords[permutation[i]];
160 }
161 for (int i = 0; i < this->numRanks; ++i) {
162 procCoords[0][i] = this->actual_procCoords[0][i] * 8;
163 int yordinal = this->actual_procCoords[1][i];
164 procCoords[1][i] = yordinal/2 * (16 + 8) + (yordinal %2) * 8;
165 int zordinal = this->actual_procCoords[2][i];
166 procCoords[2][i] = zordinal * 5 + (zordinal / 8) * 3;
167 }
168 int mx = this->machine_extent[0];
169 int my = this->machine_extent[1];
170 int mz = this->machine_extent[2];
171
172
173 this->machine_extent = new int[networkDim];
174 this->machine_extent[0] = mx * 8;
175 this->machine_extent[1] = my/2 * (16 + 8) + (my %2) * 8;
176 this->machine_extent[2] = mz * 5 + (mz / 8) * 3;
177 if(this->myRank == 0)
178 std::cout << "Transforming the coordinates" << std::endl;
179// this->printAllocation();
180 }
181 else if(optimization_level >= 3) {
182 is_transformed = true;
183 this->networkDim = 6;
184 procCoords = new pcoord_t * [networkDim];
185 for(int i = 0; i < networkDim; ++i) {
186 procCoords[i] = new pcoord_t[this->numRanks] ;
187// this->proc_coords[permutation[i]];
188 }
189
190// this->machine_extent[0] = this->actual_machine_extent
191 this->machine_extent = new int[networkDim];
192
193 this->machine_extent[0] =
194 ceil (int (this->actual_machine_extent[0]) / 2.0) * 64 ;
195 this->machine_extent[3] = 2 * 8 ;
196 this->machine_extent[1] =
197 ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
198 this->machine_extent[4] = 2 * 8;
199 this->machine_extent[2] =
200 ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
201 this->machine_extent[5] = 8 * 5;
202
203 for (int k = 0; k < this->numRanks ; k++) {
204 // This part is for titan.
205 // But it holds for other 3D torus machines such as Bluewaters.
206
207 // Bandwitdh along
208 // X = 75
209 // Y = 37.5 or 75 --- everyother has 37.5
210 // --- Y[0-1] =75 but Y[1-2]=37.5
211 // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
212
213 // Along X we make groups of 2. Then scale the distance with 64.
214 // First dimension is represents x/2
215 procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 2) * 64;
216 // Then the 3rd dimension is x%2. distance is scaled with 8,
217 // reversely proportional with bw=75
218 procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 2) * 8 ;
219
220 // Along Y. Every other one has the slowest link. So we want
221 // distances between Y/2 huge.
222 // We scale Y/2 with 2400 so that we make sure that it is the
223 // first one we divie.
224 procCoords[1][k] =
225 (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
226 // The other one is scaled with 8 as in X.
227 procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
228
229 // We make groups of 8 along Z. Then distances between these
230 // groups are scaled with 160.
231 // So that it is more than 2x distance than the distance with X
232 // grouping.
233 // That is we scale the groups of Zs with 160. Groups of X with 64.
234 // Zs has 8 processors connecting them, while X has only one. We
235 // want to divide along Z twice before dividing along X.
236 procCoords[2][k] =
237 ((int (this->actual_procCoords[2][k])) / 8) * 160;
238 // In the second group everything is scaled with 5, as bw=120
239 procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
240 }
241 }
242 else if(optimization_level == 2) {
243 // This is as above case. but we make groups of 3 along X instead.
244 is_transformed = true;
245 this->networkDim = 6;
246 procCoords = new pcoord_t * [networkDim];
247 for(int i = 0; i < networkDim; ++i) {
248 procCoords[i] = new pcoord_t[this->numRanks] ;
249// this->proc_coords[permutation[i]];
250 }
251
252// this->machine_extent[0] = this->actual_machine_extent
253 this->machine_extent = new int[networkDim];
254
255 this->machine_extent[0] =
256 ceil(int (this->actual_machine_extent[0]) / 3.0) * 128 ;
257 this->machine_extent[3] = 3 * 8 ;
258 this->machine_extent[1] =
259 ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
260 this->machine_extent[4] = 2 * 8;
261 this->machine_extent[2] =
262 ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
263 this->machine_extent[5] = 8 * 5;
264
265
266 for (int k = 0; k < this->numRanks ; k++) {
267 // This part is for titan.
268 // But it holds for other 3D torus machines such as Bluewaters.
269
270 // Bandwitdh along
271 // X = 75
272 // Y = 37.5 or 75 --- everyother has 37.5
273 // --- Y[0-1] =75 but Y[1-2]=37.5
274 // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
275
276 // In this case we make groups of 3. along X.
277 procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 3) * 128;
278 // Then the 3rd dimension is x%2. distance is scaled with 8,
279 // reversely proportional with bw=75
280 procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 3) * 8 ;
281
282 // Along Y. Every other one has the slowest link. So we want
283 // distances between Y/2 huge.
284 // We scale Y/2 with 2400 so that we make sure that it is the
285 // first one we divie.
286 procCoords[1][k] =
287 (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
288 // The other one is scaled with 8 as in X.
289 procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
290
291
292 procCoords[2][k] =
293 ((int (this->actual_procCoords[2][k])) / 8) * 160;
294 // In the second group everything is scaled with 5, as bw=120
295 procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
296 }
297 }
298 }
299 }
300
302 if (is_transformed) {
303 is_transformed = false;
304 for (int i = 0; i < actual_networkDim; i++) {
305 delete [] actual_procCoords[i];
306 }
307 delete [] actual_procCoords;
308 delete [] actual_machine_extent;
309 }
310 for (int i = 0; i < networkDim; i++) {
311 delete [] procCoords[i];
312 }
313 delete [] procCoords;
314 delete [] machine_extent;
315 }
316
317 bool hasMachineCoordinates() const { return true; }
318
319 int getMachineDim() const { return this->networkDim; }
320 int getRealMachineDim() const { return this->actual_networkDim; }
321
322 bool getMachineExtent(int *nxyz) const {
323 if (is_transformed) {
324 return false;
325 }
326 else {
327 int dim = 0;
328 nxyz[dim++] = this->machine_extent[0]; //x
329 nxyz[dim++] = this->machine_extent[1]; //y
330 nxyz[dim++] = this->machine_extent[2]; //z
331 return true;
332 }
333 }
334
335 bool getRealMachineExtent(int *nxyz) const {
336 int dim = 0;
337 nxyz[dim++] = 25; //x
338 nxyz[dim++] = 16; //y
339 nxyz[dim++] = 24; //z
340 return true;
341 }
342
343
345 if(this->myRank == 0) {
346 for (int i = 0; i < this->numRanks; ++i) {
347 std::cout << "Rank:" << i
348 << " " << procCoords[0][i]
349 << " " << procCoords[1][i]
350 << " " << procCoords[2][i] << std::endl;
351 }
352 std::cout << "Machine Extent:"
353 << " " << this->machine_extent[0]
354 << " " << this->machine_extent[1]
355 << " " << this->machine_extent[2] << std::endl;
356 }
357 }
358
359 bool getMyMachineCoordinate(pcoord_t *xyz) {
360 for (int i = 0; i < this->networkDim; ++i) {
361 xyz[i] = procCoords[i][this->myRank];
362 }
363 return true;
364 }
365
366 bool getMyActualMachineCoordinate(pcoord_t *xyz) {
367 xyz[0] = rand() % 25;
368 xyz[1] = rand() % 16;
369 xyz[2] = rand() % 24;
370 return true;
371 }
372
373 inline bool getMachineCoordinate(const int rank,
374 pcoord_t *xyz) const {
375 for (int i = 0; i < this->networkDim; ++i) {
376 xyz[i] = procCoords[i][rank];
377 }
378 return true;
379 }
380
381
382 bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
383 return false; // cannot yet return from nodename
384 }
385
386 bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
387 allCoords = procCoords;
388 return true;
389 }
390
391 virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
392 hops = 0;
393 for (int i = 0; i < networkDim; ++i) {
394 pcoord_t distance = procCoords[i][rank1] - procCoords[i][rank2];
395 if (distance < 0)
396 distance = -distance;
397 if (machine_extent[i] - distance < distance)
398 distance = machine_extent[i] - distance;
399 hops += distance;
400 }
401 return true;
402 }
403
404
405private:
406
407 int networkDim;
408 int actual_networkDim;
409
410 pcoord_t **procCoords;
411 pcoord_t **actual_procCoords;
412
413 part_t *machine_extent;
414 part_t *actual_machine_extent;
415 bool is_transformed;
416
417
418 const Teuchos::ParameterList *pl;
419
420/*
421 bool delete_transformed_coords;
422 int transformed_network_dim;
423 pcoord_t **transformed_coordinates;
424*/
425
426 void gatherMachineCoordinates(const Teuchos::Comm<int> &comm) {
427 // reduces and stores all machine coordinates.
428 pcoord_t *tmpVect = new pcoord_t [this->numRanks];
429
430 for (int i = 0; i < networkDim; i++) {
431 Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
432 this->numRanks,
433 procCoords[i], tmpVect);
434 pcoord_t *tmp = tmpVect;
435 tmpVect = procCoords[i];
436 procCoords[i] = tmp;
437 }
438 delete [] tmpVect;
439 }
440
441};
442
443} // namespace Zoltan2
444#endif
An RCA Machine Class (Torus Networks) for testing only A more realistic machine should be used for ta...
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
MachineTorusRCAForTesting(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available
MachineTorusRCAForTesting(const Teuchos::Comm< int > &comm)
Constructor: A BlueGeneQ network machine description;.
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
MachineClass Base class for representing machine coordinates, networks, etc.
Created by mbenlioglu on Aug 31, 2020.
SparseMatrixAdapter_t::part_t part_t