Zoltan2
Loading...
Searching...
No Matches
Zoltan2_PartitioningSolution.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// Zoltan2: A package of combinatorial algorithms for scientific computing
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Karen Devine (kddevin@sandia.gov)
39// Erik Boman (egboman@sandia.gov)
40// Siva Rajamanickam (srajama@sandia.gov)
41//
42// ***********************************************************************
43//
44// @HEADER
45
50#ifndef _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
51#define _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
52
53namespace Zoltan2 {
54template <typename Adapter>
55class PartitioningSolution;
56}
57
59#include <Zoltan2_Solution.hpp>
60#include <Zoltan2_GreedyMWM.hpp>
61#include <Zoltan2_Algorithm.hpp>
63#include <cmath>
64#include <algorithm>
65#include <vector>
66#include <limits>
67
68#ifdef _MSC_VER
69#define NOMINMAX
70#include <windows.h>
71#endif
72
73
74namespace Zoltan2 {
75
90template <typename Adapter>
92{
93public:
94
95#ifndef DOXYGEN_SHOULD_SKIP_THIS
96 typedef typename Adapter::gno_t gno_t;
97 typedef typename Adapter::scalar_t scalar_t;
98 typedef typename Adapter::lno_t lno_t;
99 typedef typename Adapter::part_t part_t;
100 typedef typename Adapter::user_t user_t;
101#endif
102
120 PartitioningSolution( const RCP<const Environment> &env,
121 const RCP<const Comm<int> > &comm,
122 int nUserWeights,
123 const RCP<Algorithm<Adapter> > &algorithm = Teuchos::null);
124
154 PartitioningSolution(const RCP<const Environment> &env,
155 const RCP<const Comm<int> > &comm,
156 int nUserWeights, ArrayView<ArrayRCP<part_t> > reqPartIds,
157 ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
158 const RCP<Algorithm<Adapter> > &algorithm = Teuchos::null);
159
161 // Information that the algorithm may wish to query.
162
165 size_t getTargetGlobalNumberOfParts() const { return nGlobalParts_; }
166
169 size_t getActualGlobalNumberOfParts() const { return nGlobalPartsSolution_; }
170
173 size_t getLocalNumberOfParts() const { return nLocalParts_; }
174
182 scalar_t getLocalFractionOfPart() const { return localFraction_; }
183
193 bool oneToOnePartDistribution() const { return onePartPerProc_; }
194
214 const int *getPartDistribution() const {
215 if (partDist_.size() > 0) return &partDist_[0];
216 else return NULL;
217 }
218
236 if (procDist_.size() > 0) return &procDist_[0];
237 else return NULL;
238 }
239
243 int getNumberOfCriteria() const { return nWeightsPerObj_; }
244
245
252 bool criteriaHasUniformPartSizes(int idx) const { return pSizeUniform_[idx];}
253
266 scalar_t getCriteriaPartSize(int idx, part_t part) const {
267 if (pSizeUniform_[idx])
268 return 1.0 / nGlobalParts_;
269 else if (pCompactIndex_[idx].size())
270 return pSize_[idx][pCompactIndex_[idx][part]];
271 else
272 return pSize_[idx][part];
273 }
274
288 bool criteriaHaveSamePartSizes(int c1, int c2) const;
289
291 // Method used by the algorithm to set results.
292
319 void setParts(ArrayRCP<part_t> &partList);
320
322
334 void RemapParts();
335
337 /* Return the weight of objects staying with a given remap.
338 * If remap is NULL, compute weight of objects staying with given partition
339 */
340 long measure_stays(part_t *remap, int *idx, part_t *adj, long *wgt,
341 part_t nrhs, part_t nlhs)
342 {
343 long staying = 0;
344 for (part_t i = 0; i < nrhs; i++) {
345 part_t k = (remap ? remap[i] : i);
346 for (part_t j = idx[k]; j < idx[k+1]; j++) {
347 if (i == (adj[j]-nlhs)) {
348 staying += wgt[j];
349 break;
350 }
351 }
352 }
353 return staying;
354 }
355
357 // Results that may be queried by the user, by migration methods,
358 // or by metric calculation methods.
359 // We return raw pointers so users don't have to learn about our
360 // pointer wrappers.
361
364 inline const RCP<const Comm<int> > &getCommunicator() const { return comm_;}
365
368 inline const RCP<const Environment> &getEnvironment() const { return env_;}
369
372 const part_t *getPartListView() const {
373 if (parts_.size() > 0) return parts_.getRawPtr();
374 else return NULL;
375 }
376
381 const int *getProcListView() const {
382 if (procs_.size() > 0) return procs_.getRawPtr();
383 else return NULL;
384 }
385
388 virtual bool isPartitioningTreeBinary() const
389 {
390 if (this->algorithm_ == Teuchos::null)
391 throw std::logic_error("no partitioning algorithm has been run yet");
392 return this->algorithm_->isPartitioningTreeBinary();
393 }
394
397 void getPartitionTree(part_t & numTreeVerts,
398 std::vector<part_t> & permPartNums,
399 std::vector<part_t> & splitRangeBeg,
400 std::vector<part_t> & splitRangeEnd,
401 std::vector<part_t> & treeVertParents) const {
402
403 part_t numParts = static_cast<part_t>(getTargetGlobalNumberOfParts());
404
405 if (this->algorithm_ == Teuchos::null)
406 throw std::logic_error("no partitioning algorithm has been run yet");
407 this->algorithm_->getPartitionTree(
408 numParts, // may want to change how this is passed through
409 numTreeVerts,
410 permPartNums,
411 splitRangeBeg,
412 splitRangeEnd,
413 treeVertParents);
414 }
415
418 std::vector<Zoltan2::coordinateModelPartBox> &
420 {
421 return this->algorithm_->getPartBoxesView();
422 }
423
425 // when a point lies on a part boundary, the lowest part
426 // number on that boundary is returned.
427 // Note that not all partitioning algorithms will support
428 // this method.
429 //
430 // \param dim : the number of dimensions specified for the point in space
431 // \param point : the coordinates of the point in space; array of size dim
432 // \return the part number of a part overlapping the given point
433 part_t pointAssign(int dim, scalar_t *point) const
434 {
435 part_t p;
436 try {
437 if (this->algorithm_ == Teuchos::null)
438 throw std::logic_error("no partitioning algorithm has been run yet");
439
440 p = this->algorithm_->pointAssign(dim, point);
441 }
443 return p;
444 }
445
447 // This method allocates memory for the return argument, but does not
448 // control that memory. The user is responsible for freeing the
449 // memory.
450 //
451 // \param dim : (in) the number of dimensions specified for the box
452 // \param lower : (in) the coordinates of the lower corner of the box;
453 // array of size dim
454 // \param upper : (in) the coordinates of the upper corner of the box;
455 // array of size dim
456 // \param nPartsFound : (out) the number of parts overlapping the box
457 // \param partsFound : (out) array of parts overlapping the box
458 void boxAssign(int dim, scalar_t *lower, scalar_t *upper,
459 size_t &nPartsFound, part_t **partsFound) const
460 {
461 try {
462 if (this->algorithm_ == Teuchos::null)
463 throw std::logic_error("no partitioning algorithm has been run yet");
464
465 this->algorithm_->boxAssign(dim, lower, upper, nPartsFound, partsFound);
466 }
468 }
469
470
473 void getCommunicationGraph(ArrayRCP <part_t> &comXAdj,
474 ArrayRCP <part_t> &comAdj) const
475 {
476 try {
477 if (this->algorithm_ == Teuchos::null)
478 throw std::logic_error("no partitioning algorithm has been run yet");
479
480 this->algorithm_->getCommunicationGraph(this, comXAdj, comAdj);
481 }
483 }
484
502 void getPartsForProc(int procId, double &numParts, part_t &partMin,
503 part_t &partMax) const
504 {
505 env_->localInputAssertion(__FILE__, __LINE__, "invalid process id",
506 procId >= 0 && procId < comm_->getSize(), BASIC_ASSERTION);
507
508 procToPartsMap(procId, numParts, partMin, partMax);
509 }
510
522 void getProcsForPart(part_t partId, part_t &procMin, part_t &procMax) const
523 {
524 env_->localInputAssertion(__FILE__, __LINE__, "invalid part id",
525 partId >= 0 && partId < nGlobalParts_, BASIC_ASSERTION);
526
527 partToProcsMap(partId, procMin, procMax);
528 }
529
530private:
531 void partToProc(bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
532 int numLocalParts, int numGlobalParts);
533
534 void procToPartsMap(int procId, double &numParts, part_t &partMin,
535 part_t &partMax) const;
536
537 void partToProcsMap(part_t partId, int &procMin, int &procMax) const;
538
539 void setPartDistribution();
540
541 void setPartSizes(ArrayView<ArrayRCP<part_t> > reqPartIds,
542 ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
543
544 void computePartSizes(int widx, ArrayView<part_t> ids,
545 ArrayView<scalar_t> sizes);
546
547 void broadcastPartSizes(int widx);
548
549
550 RCP<const Environment> env_; // has application communicator
551 const RCP<const Comm<int> > comm_; // the problem communicator
552
553 //part box boundaries as a result of geometric partitioning algorithm.
554 RCP<std::vector<Zoltan2::coordinateModelPartBox> > partBoxes;
555
556 part_t nGlobalParts_;// target global number of parts
557 part_t nLocalParts_; // number of parts to be on this process
558
559 scalar_t localFraction_; // approx fraction of a part on this process
560 int nWeightsPerObj_; // if user has no weights, this is 1 TODO: WHY???
561
562 // If process p is to be assigned part p for all p, then onePartPerProc_
563 // is true. Otherwise it is false, and either procDist_ or partDist_
564 // describes the allocation of parts to processes.
565 //
566 // If parts are never split across processes, then procDist_ is defined
567 // as follows:
568 //
569 // partId = procDist_[procId]
570 // partIdNext = procDist_[procId+1]
571 // globalNumberOfParts = procDist_[numProcs]
572 //
573 // meaning that the parts assigned to process procId range from
574 // [partId, partIdNext). If partIdNext is the same as partId, then
575 // process procId has no parts.
576 //
577 // If the number parts is less than the number of processes, and the
578 // user did not specify "num_local_parts" for each of the processes, then
579 // parts are split across processes, and partDist_ is defined rather than
580 // procDist_.
581 //
582 // procId = partDist_[partId]
583 // procIdNext = partDist_[partId+1]
584 // globalNumberOfProcs = partDist_[numParts]
585 //
586 // which implies that the part partId is shared by processes in the
587 // the range [procId, procIdNext).
588 //
589 // We use std::vector so we can use upper_bound algorithm
590
591 bool onePartPerProc_; // either this is true...
592 std::vector<int> partDist_; // or this is defined ...
593 std::vector<part_t> procDist_; // or this is defined.
594 bool procDistEquallySpread_; // if procDist_ is used and
595 // #parts > #procs and
596 // num_local_parts is not specified,
597 // parts are evenly distributed to procs
598
599 // In order to minimize the storage required for part sizes, we
600 // have three different representations.
601 //
602 // If the part sizes for weight index w are all the same, then:
603 // pSizeUniform_[w] = true
604 // pCompactIndex_[w].size() = 0
605 // pSize_[w].size() = 0
606 //
607 // and the size for part p is 1.0 / nparts.
608 //
609 // If part sizes differ for each part in weight index w, but there
610 // are no more than 64 distinct sizes:
611 // pSizeUniform_[w] = false
612 // pCompactIndex_[w].size() = number of parts
613 // pSize_[w].size() = number of different sizes
614 //
615 // and the size for part p is pSize_[pCompactIndex_[p]].
616 //
617 // If part sizes differ for each part in weight index w, and there
618 // are more than 64 distinct sizes:
619 // pSizeUniform_[w] = false
620 // pCompactIndex_[w].size() = 0
621 // pSize_[w].size() = nparts
622 //
623 // and the size for part p is pSize_[p].
624 //
625 // NOTE: If we expect to have similar cases, i.e. a long list of scalars
626 // where it is highly possible that just a few unique values appear,
627 // then we may want to make this a class. The advantage is that we
628 // save a long list of 1-byte indices instead of a long list of scalars.
629
630 ArrayRCP<bool> pSizeUniform_;
631 ArrayRCP<ArrayRCP<unsigned char> > pCompactIndex_;
632 ArrayRCP<ArrayRCP<scalar_t> > pSize_;
633
635 // The algorithm sets these values upon completion.
636
637 ArrayRCP<part_t> parts_; // part number assigned to localid[i]
638
639 bool haveSolution_;
640
641 part_t nGlobalPartsSolution_; // global number of parts in solution
642
644 // The solution calculates this from the part assignments,
645 // unless onePartPerProc_.
646
647 ArrayRCP<int> procs_; // process rank assigned to localid[i]
648
650 // Algorithm used to compute the solution;
651 // needed for post-processing with pointAssign or getCommunicationGraph
652 const RCP<Algorithm<Adapter> > algorithm_; //
653};
654
656// Definitions
658
659template <typename Adapter>
661 const RCP<const Environment> &env,
662 const RCP<const Comm<int> > &comm,
663 int nUserWeights,
664 const RCP<Algorithm<Adapter> > &algorithm)
665 : env_(env), comm_(comm),
666 partBoxes(),
667 nGlobalParts_(0), nLocalParts_(0),
668 localFraction_(0), nWeightsPerObj_(),
669 onePartPerProc_(false), partDist_(), procDist_(),
670 procDistEquallySpread_(false),
671 pSizeUniform_(), pCompactIndex_(), pSize_(),
672 parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
673 procs_(), algorithm_(algorithm)
674{
675 nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1); // TODO: WHY?? WHY NOT ZERO?
676
677 setPartDistribution();
678
679 // We must call setPartSizes() because part sizes may have
680 // been provided by the user on other processes.
681
682 ArrayRCP<part_t> *noIds = new ArrayRCP<part_t> [nWeightsPerObj_];
683 ArrayRCP<scalar_t> *noSizes = new ArrayRCP<scalar_t> [nWeightsPerObj_];
684 ArrayRCP<ArrayRCP<part_t> > ids(noIds, 0, nWeightsPerObj_, true);
685 ArrayRCP<ArrayRCP<scalar_t> > sizes(noSizes, 0, nWeightsPerObj_, true);
686
687 setPartSizes(ids.view(0, nWeightsPerObj_), sizes.view(0, nWeightsPerObj_));
688
689 env_->memory("After construction of solution");
690}
691
692template <typename Adapter>
694 const RCP<const Environment> &env,
695 const RCP<const Comm<int> > &comm,
696 int nUserWeights,
697 ArrayView<ArrayRCP<part_t> > reqPartIds,
698 ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
699 const RCP<Algorithm<Adapter> > &algorithm)
700 : env_(env), comm_(comm),
701 partBoxes(),
702 nGlobalParts_(0), nLocalParts_(0),
703 localFraction_(0), nWeightsPerObj_(),
704 onePartPerProc_(false), partDist_(), procDist_(),
705 procDistEquallySpread_(false),
706 pSizeUniform_(), pCompactIndex_(), pSize_(),
707 parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
708 procs_(), algorithm_(algorithm)
709{
710 nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1); // TODO: WHY?? WHY NOT ZERO?
711
712 setPartDistribution();
713
714 setPartSizes(reqPartIds, reqPartSizes);
715
716 env_->memory("After construction of solution");
717}
718
719template <typename Adapter>
721{
722 // Did the caller define num_global_parts and/or num_local_parts?
723
724 const ParameterList &pl = env_->getParameters();
725 size_t haveGlobalNumParts=0, haveLocalNumParts=0;
726 int numLocal=0, numGlobal=0;
727
728 const Teuchos::ParameterEntry *pe = pl.getEntryPtr("num_global_parts");
729
730 if (pe){
731 haveGlobalNumParts = 1;
732 nGlobalParts_ = part_t(pe->getValue(&nGlobalParts_));
733 numGlobal = nGlobalParts_;
734 }
735
736 pe = pl.getEntryPtr("num_local_parts");
737
738 if (pe){
739 haveLocalNumParts = 1;
740 nLocalParts_ = part_t(pe->getValue(&nLocalParts_));
741 numLocal = nLocalParts_;
742 }
743
744 try{
745 // Sets onePartPerProc_, partDist_, and procDist_
746
747 partToProc(true, haveLocalNumParts, haveGlobalNumParts,
748 numLocal, numGlobal);
749 }
751
752 int nprocs = comm_->getSize();
753 int rank = comm_->getRank();
754
755 if (onePartPerProc_){
756 nGlobalParts_ = nprocs;
757 nLocalParts_ = 1;
758 }
759 else if (partDist_.size() > 0){ // more procs than parts
760 nGlobalParts_ = partDist_.size() - 1;
761 int pstart = partDist_[0];
762 for (part_t i=1; i <= nGlobalParts_; i++){
763 int pend = partDist_[i];
764 if (rank >= pstart && rank < pend){
765 int numOwners = pend - pstart;
766 nLocalParts_ = 1;
767 localFraction_ = 1.0 / numOwners;
768 break;
769 }
770 pstart = pend;
771 }
772 }
773 else if (procDist_.size() > 0){ // more parts than procs
774 nGlobalParts_ = procDist_[nprocs];
775 nLocalParts_ = procDist_[rank+1] - procDist_[rank];
776 }
777 else {
778 throw std::logic_error("partToProc error");
779 }
780
781}
782
783template <typename Adapter>
784 void PartitioningSolution<Adapter>::setPartSizes(
785 ArrayView<ArrayRCP<part_t> > ids, ArrayView<ArrayRCP<scalar_t> > sizes)
786{
787 int widx = nWeightsPerObj_;
788 bool fail=false;
789
790 size_t *countBuf = new size_t [widx*2];
791 ArrayRCP<size_t> counts(countBuf, 0, widx*2, true);
792
793 fail = ((ids.size() != widx) || (sizes.size() != widx));
794
795 for (int w=0; !fail && w < widx; w++){
796 counts[w] = ids[w].size();
797 if (ids[w].size() != sizes[w].size()) fail=true;
798 }
799
800 env_->globalBugAssertion(__FILE__, __LINE__, "bad argument arrays", fail==0,
801 COMPLEX_ASSERTION, comm_);
802
803 // Are all part sizes the same? This is the common case.
804
805 ArrayRCP<scalar_t> *emptySizes= new ArrayRCP<scalar_t> [widx];
806 pSize_ = arcp(emptySizes, 0, widx);
807
808 ArrayRCP<unsigned char> *emptyIndices= new ArrayRCP<unsigned char> [widx];
809 pCompactIndex_ = arcp(emptyIndices, 0, widx);
810
811 bool *info = new bool [widx];
812 pSizeUniform_ = arcp(info, 0, widx);
813 for (int w=0; w < widx; w++)
814 pSizeUniform_[w] = true;
815
816 if (nGlobalParts_ == 1){
817 return; // there's only one part in the whole problem
818 }
819
820 size_t *ptr1 = counts.getRawPtr();
821 size_t *ptr2 = counts.getRawPtr() + widx;
822
823 try{
824 reduceAll<int, size_t>(*comm_, Teuchos::REDUCE_MAX, widx, ptr1, ptr2);
825 }
827
828 bool zero = true;
829
830 for (int w=0; w < widx; w++)
831 if (counts[widx+w] > 0){
832 zero = false;
833 pSizeUniform_[w] = false;
834 }
835
836 if (zero) // Part sizes for all criteria are uniform.
837 return;
838
839 // Compute the part sizes for criteria for which part sizes were
840 // supplied. Normalize for each criteria so part sizes sum to one.
841
842 int nprocs = comm_->getSize();
843 int rank = comm_->getRank();
844
845 for (int w=0; w < widx; w++){
846 if (pSizeUniform_[w]) continue;
847
848 // Send all ids and sizes to one process.
849 // (There is no simple gather method in Teuchos.)
850
851 part_t length = ids[w].size();
852 part_t *allLength = new part_t [nprocs];
853 Teuchos::gatherAll<int, part_t>(*comm_, 1, &length,
854 nprocs, allLength);
855
856 if (rank == 0){
857 int total = 0;
858 for (int i=0; i < nprocs; i++)
859 total += allLength[i];
860
861 part_t *partNums = new part_t [total];
862 scalar_t *partSizes = new scalar_t [total];
863
864 ArrayView<part_t> idArray(partNums, total);
865 ArrayView<scalar_t> sizeArray(partSizes, total);
866
867 if (length > 0){
868 for (int i=0; i < length; i++){
869 *partNums++ = ids[w][i];
870 *partSizes++ = sizes[w][i];
871 }
872 }
873
874 for (int p=1; p < nprocs; p++){
875 if (allLength[p] > 0){
876 Teuchos::receive<int, part_t>(*comm_, p,
877 allLength[p], partNums);
878 Teuchos::receive<int, scalar_t>(*comm_, p,
879 allLength[p], partSizes);
880 partNums += allLength[p];
881 partSizes += allLength[p];
882 }
883 }
884
885 delete [] allLength;
886
887 try{
888 computePartSizes(w, idArray, sizeArray);
889 }
891
892 delete [] idArray.getRawPtr();
893 delete [] sizeArray.getRawPtr();
894 }
895 else{
896 delete [] allLength;
897 if (length > 0){
898 Teuchos::send<int, part_t>(*comm_, length, ids[w].getRawPtr(), 0);
899 Teuchos::send<int, scalar_t>(*comm_, length, sizes[w].getRawPtr(), 0);
900 }
901 }
902
903 broadcastPartSizes(w);
904 }
905}
906
907template <typename Adapter>
908 void PartitioningSolution<Adapter>::broadcastPartSizes(int widx)
909{
910 env_->localBugAssertion(__FILE__, __LINE__, "preallocations",
911 pSize_.size()>widx &&
912 pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
914
915 int rank = comm_->getRank();
916 int nprocs = comm_->getSize();
917 part_t nparts = nGlobalParts_;
918
919 if (nprocs < 2)
920 return;
921
922 char flag=0;
923
924 if (rank == 0){
925 if (pSizeUniform_[widx] == true)
926 flag = 1;
927 else if (pCompactIndex_[widx].size() > 0)
928 flag = 2;
929 else
930 flag = 3;
931 }
932
933 try{
934 Teuchos::broadcast<int, char>(*comm_, 0, 1, &flag);
935 }
937
938 if (flag == 1){
939 if (rank > 0)
940 pSizeUniform_[widx] = true;
941
942 return;
943 }
944
945 if (flag == 2){
946
947 // broadcast the indices into the size list
948
949 unsigned char *idxbuf = NULL;
950
951 if (rank > 0){
952 idxbuf = new unsigned char [nparts];
953 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, idxbuf);
954 }
955 else{
956 env_->localBugAssertion(__FILE__, __LINE__, "index list size",
957 pCompactIndex_[widx].size() == nparts, COMPLEX_ASSERTION);
958 idxbuf = pCompactIndex_[widx].getRawPtr();
959 }
960
961 try{
962 // broadcast of unsigned char is not supported
963 Teuchos::broadcast<int, char>(*comm_, 0, nparts,
964 reinterpret_cast<char *>(idxbuf));
965 }
967
968 if (rank > 0)
969 pCompactIndex_[widx] = arcp(idxbuf, 0, nparts, true);
970
971 // broadcast the list of different part sizes
972
973 unsigned char maxIdx=0;
974 for (part_t p=0; p < nparts; p++)
975 if (idxbuf[p] > maxIdx) maxIdx = idxbuf[p];
976
977 int numSizes = maxIdx + 1;
978
979 scalar_t *sizeList = NULL;
980
981 if (rank > 0){
982 sizeList = new scalar_t [numSizes];
983 env_->localMemoryAssertion(__FILE__, __LINE__, numSizes, sizeList);
984 }
985 else{
986 env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes",
987 numSizes == pSize_[widx].size(), COMPLEX_ASSERTION);
988
989 sizeList = pSize_[widx].getRawPtr();
990 }
991
992 try{
993 Teuchos::broadcast<int, scalar_t>(*comm_, 0, numSizes, sizeList);
994 }
996
997 if (rank > 0)
998 pSize_[widx] = arcp(sizeList, 0, numSizes, true);
999
1000 return;
1001 }
1002
1003 if (flag == 3){
1004
1005 // broadcast the size of each part
1006
1007 scalar_t *sizeList = NULL;
1008
1009 if (rank > 0){
1010 sizeList = new scalar_t [nparts];
1011 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, sizeList);
1012 }
1013 else{
1014 env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes",
1015 nparts == pSize_[widx].size(), COMPLEX_ASSERTION);
1016
1017 sizeList = pSize_[widx].getRawPtr();
1018 }
1019
1020 try{
1021 Teuchos::broadcast<int, scalar_t >(*comm_, 0, nparts, sizeList);
1022 }
1024
1025 if (rank > 0)
1026 pSize_[widx] = arcp(sizeList, 0, nparts);
1027
1028 return;
1029 }
1030}
1031
1032template <typename Adapter>
1033 void PartitioningSolution<Adapter>::computePartSizes(int widx,
1034 ArrayView<part_t> ids, ArrayView<scalar_t> sizes)
1035{
1036 int len = ids.size();
1037
1038 if (len == 0){
1039 pSizeUniform_[widx] = true;
1040 return;
1041 }
1042
1043 env_->localBugAssertion(__FILE__, __LINE__, "bad array sizes",
1044 len>0 && sizes.size()==len, COMPLEX_ASSERTION);
1045
1046 env_->localBugAssertion(__FILE__, __LINE__, "bad index",
1047 widx>=0 && widx<nWeightsPerObj_, COMPLEX_ASSERTION);
1048
1049 env_->localBugAssertion(__FILE__, __LINE__, "preallocations",
1050 pSize_.size()>widx &&
1051 pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
1053
1054 // Check ids and sizes and find min, max and average sizes.
1055 // If sizes are very close to uniform, call them uniform parts.
1056
1057 part_t nparts = nGlobalParts_;
1058 unsigned char *buf = new unsigned char [nparts];
1059 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, buf);
1060 memset(buf, 0, nparts);
1061 ArrayRCP<unsigned char> partIdx(buf, 0, nparts, true);
1062
1063 scalar_t epsilon = 10e-5 / nparts;
1064 scalar_t min=sizes[0], max=sizes[0], sum=0;
1065
1066 for (int i=0; i < len; i++){
1067 part_t id = ids[i];
1068 scalar_t size = sizes[i];
1069
1070 env_->localInputAssertion(__FILE__, __LINE__, "invalid part id",
1071 id>=0 && id<nparts, BASIC_ASSERTION);
1072
1073 env_->localInputAssertion(__FILE__, __LINE__, "invalid part size", size>=0,
1075
1076 // TODO: we could allow users to specify multiple sizes for the same
1077 // part if we add a parameter that says what we are to do with them:
1078 // add them or take the max.
1079
1080 env_->localInputAssertion(__FILE__, __LINE__,
1081 "multiple sizes provided for one part", partIdx[id]==0, BASIC_ASSERTION);
1082
1083 partIdx[id] = 1; // mark that we have a size for this part
1084
1085 if (size < min) min = size;
1086 if (size > max) max = size;
1087 sum += size;
1088 }
1089
1090 if (sum == 0){
1091
1092 // User has given us a list of parts of size 0, we'll set
1093 // the rest to them to equally sized parts.
1094
1095 scalar_t *allSizes = new scalar_t [2];
1096 env_->localMemoryAssertion(__FILE__, __LINE__, 2, allSizes);
1097
1098 ArrayRCP<scalar_t> sizeArray(allSizes, 0, 2, true);
1099
1100 int numNonZero = nparts - len;
1101
1102 allSizes[0] = 0.0;
1103 allSizes[1] = 1.0 / numNonZero;
1104
1105 for (part_t p=0; p < nparts; p++)
1106 buf[p] = 1; // index to default part size
1107
1108 for (int i=0; i < len; i++)
1109 buf[ids[i]] = 0; // index to part size zero
1110
1111 pSize_[widx] = sizeArray;
1112 pCompactIndex_[widx] = partIdx;
1113
1114 return;
1115 }
1116
1117 if (max - min <= epsilon){
1118 pSizeUniform_[widx] = true;
1119 return;
1120 }
1121
1122 // A size for parts that were not specified:
1123 scalar_t avg = sum / nparts;
1124
1125 // We are going to merge part sizes that are very close. This takes
1126 // computation time now, but can save considerably in the storage of
1127 // all part sizes on each process. For example, a common case may
1128 // be some parts are size 1 and all the rest are size 2.
1129
1130 scalar_t *tmp = new scalar_t [len];
1131 env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
1132 memcpy(tmp, sizes.getRawPtr(), sizeof(scalar_t) * len);
1133 ArrayRCP<scalar_t> partSizes(tmp, 0, len, true);
1134
1135 std::sort(partSizes.begin(), partSizes.end());
1136
1137 // create a list of sizes that are unique within epsilon
1138
1139 Array<scalar_t> nextUniqueSize;
1140 nextUniqueSize.push_back(partSizes[len-1]); // largest
1141 scalar_t curr = partSizes[len-1];
1142 int avgIndex = len;
1143 bool haveAvg = false;
1144 if (curr - avg <= epsilon)
1145 avgIndex = 0;
1146
1147 for (int i=len-2; i >= 0; i--){
1148 scalar_t val = partSizes[i];
1149 if (curr - val > epsilon){
1150 nextUniqueSize.push_back(val); // the highest in the group
1151 curr = val;
1152 if (avgIndex==len && val > avg && val - avg <= epsilon){
1153 // the average would be in this group
1154 avgIndex = nextUniqueSize.size() - 1;
1155 haveAvg = true;
1156 }
1157 }
1158 }
1159
1160 partSizes.clear();
1161
1162 size_t numSizes = nextUniqueSize.size();
1163 int sizeArrayLen = numSizes;
1164
1165 if (numSizes < 64){
1166 // We can store the size for every part in a compact way.
1167
1168 // Create a list of all sizes in increasing order
1169
1170 if (!haveAvg) sizeArrayLen++; // need to include average
1171
1172 scalar_t *allSizes = new scalar_t [sizeArrayLen];
1173 env_->localMemoryAssertion(__FILE__, __LINE__, sizeArrayLen, allSizes);
1174 ArrayRCP<scalar_t> sizeArray(allSizes, 0, sizeArrayLen, true);
1175
1176 int newAvgIndex = sizeArrayLen;
1177
1178 for (int i=numSizes-1, idx=0; i >= 0; i--){
1179
1180 if (newAvgIndex == sizeArrayLen){
1181
1182 if (haveAvg && i==avgIndex)
1183 newAvgIndex = idx;
1184
1185 else if (!haveAvg && avg < nextUniqueSize[i]){
1186 newAvgIndex = idx;
1187 allSizes[idx++] = avg;
1188 }
1189 }
1190
1191 allSizes[idx++] = nextUniqueSize[i];
1192 }
1193
1194 env_->localBugAssertion(__FILE__, __LINE__, "finding average in list",
1195 newAvgIndex < sizeArrayLen, COMPLEX_ASSERTION);
1196
1197 for (int i=0; i < nparts; i++){
1198 buf[i] = newAvgIndex; // index to default part size
1199 }
1200
1201 sum = (nparts - len) * allSizes[newAvgIndex];
1202
1203 for (int i=0; i < len; i++){
1204 int id = ids[i];
1205 scalar_t size = sizes[i];
1206 int index;
1207
1208 // Find the first size greater than or equal to this size.
1209
1210 if (size < avg && avg - size <= epsilon)
1211 index = newAvgIndex;
1212 else{
1213 typename ArrayRCP<scalar_t>::iterator found =
1214 std::lower_bound(sizeArray.begin(), sizeArray.end(), size);
1215
1216 env_->localBugAssertion(__FILE__, __LINE__, "size array",
1217 found != sizeArray.end(), COMPLEX_ASSERTION);
1218
1219 index = found - sizeArray.begin();
1220 }
1221
1222 buf[id] = index;
1223 sum += allSizes[index];
1224 }
1225
1226 for (int i=0; i < sizeArrayLen; i++){
1227 sizeArray[i] /= sum;
1228 }
1229
1230 pCompactIndex_[widx] = partIdx;
1231 pSize_[widx] = sizeArray;
1232 }
1233 else{
1234 // To have access to part sizes, we must store nparts scalar_ts on
1235 // every process. We expect this is a rare case.
1236
1237 tmp = new scalar_t [nparts];
1238 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, tmp);
1239
1240 sum += ((nparts - len) * avg);
1241
1242 for (int i=0; i < nparts; i++){
1243 tmp[i] = avg/sum;
1244 }
1245
1246 for (int i=0; i < len; i++){
1247 tmp[ids[i]] = sizes[i]/sum;
1248 }
1249
1250 pSize_[widx] = arcp(tmp, 0, nparts);
1251 }
1252}
1253
1254template <typename Adapter>
1255 void PartitioningSolution<Adapter>::setParts(ArrayRCP<part_t> &partList)
1256{
1257 env_->debug(DETAILED_STATUS, "Entering setParts");
1258
1259 size_t len = partList.size();
1260
1261 // Find the actual number of parts in the solution, which can
1262 // be more or less than the nGlobalParts_ target.
1263 // (We may want to compute the imbalance of a given solution with
1264 // respect to a desired solution. This solution may have more or
1265 // fewer parts that the desired solution.)
1266
1267 part_t lMax = -1;
1268 part_t lMin = (len > 0 ? std::numeric_limits<part_t>::max() : 0);
1269 part_t gMax, gMin;
1270
1271 for (size_t i = 0; i < len; i++) {
1272 if (partList[i] < lMin) lMin = partList[i];
1273 if (partList[i] > lMax) lMax = partList[i];
1274 }
1275 Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MIN, 1, &lMin, &gMin);
1276 Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MAX, 1, &lMax, &gMax);
1277
1278 nGlobalPartsSolution_ = gMax - gMin + 1;
1279 parts_ = partList;
1280
1281 // Now determine which process gets each object, if not one-to-one.
1282
1283 if (!onePartPerProc_){
1284
1285 int *procs = new int [len];
1286 env_->localMemoryAssertion(__FILE__, __LINE__, len, procs);
1287 procs_ = arcp<int>(procs, 0, len);
1288
1289 if (len > 0) {
1290 part_t *parts = partList.getRawPtr();
1291
1292 if (procDist_.size() > 0){ // parts are not split across procs
1293
1294 int procId;
1295 for (size_t i=0; i < len; i++){
1296 partToProcsMap(parts[i], procs[i], procId);
1297 }
1298 }
1299 else{ // harder - we need to split the parts across multiple procs
1300
1301 lno_t *partCounter = new lno_t [nGlobalPartsSolution_];
1302 env_->localMemoryAssertion(__FILE__, __LINE__, nGlobalPartsSolution_,
1303 partCounter);
1304
1305 int numProcs = comm_->getSize();
1306
1307 //MD NOTE: there was no initialization for partCounter.
1308 //I added the line below, correct me if I am wrong.
1309 memset(partCounter, 0, sizeof(lno_t) * nGlobalPartsSolution_);
1310
1311 for (typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++)
1312 partCounter[parts[i]]++;
1313
1314 lno_t *procCounter = new lno_t [numProcs];
1315 env_->localMemoryAssertion(__FILE__, __LINE__, numProcs, procCounter);
1316
1317 int proc1;
1318 int proc2 = partDist_[0];
1319
1320 for (part_t part=1; part < nGlobalParts_; part++){
1321 proc1 = proc2;
1322 proc2 = partDist_[part+1];
1323 int numprocs = proc2 - proc1;
1324
1325 double dNum = partCounter[part];
1326 double dProcs = numprocs;
1327
1328 //std::cout << "dNum:" << dNum << " dProcs:" << dProcs << std::endl;
1329 double each = floor(dNum/dProcs);
1330 double extra = fmod(dNum,dProcs);
1331
1332 for (int proc=proc1, i=0; proc<proc2; proc++, i++){
1333 if (i < extra)
1334 procCounter[proc] = lno_t(each) + 1;
1335 else
1336 procCounter[proc] = lno_t(each);
1337 }
1338 }
1339
1340 delete [] partCounter;
1341
1342 for (typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++){
1343 if (partList[i] >= nGlobalParts_){
1344 // Solution has more parts that targeted. These
1345 // objects just remain on this process.
1346 procs[i] = comm_->getRank();
1347 continue;
1348 }
1349 part_t partNum = parts[i];
1350 proc1 = partDist_[partNum];
1351 proc2 = partDist_[partNum + 1];
1352
1353 int proc;
1354 for (proc=proc1; proc < proc2; proc++){
1355 if (procCounter[proc] > 0){
1356 procs[i] = proc;
1357 procCounter[proc]--;
1358 break;
1359 }
1360 }
1361 env_->localBugAssertion(__FILE__, __LINE__, "part to proc",
1362 proc < proc2, COMPLEX_ASSERTION);
1363 }
1364
1365 delete [] procCounter;
1366 }
1367 }
1368 }
1369
1370 // Now that parts_ info is back on home process, remap the parts.
1371 // TODO: The parts will be inconsistent with the proc assignments after
1372 // TODO: remapping. This problem will go away after we separate process
1373 // TODO: mapping from setParts. But for MueLu's use case, the part
1374 // TODO: remapping is all that matters; they do not use the process mapping.
1375 bool doRemap = false;
1376 const Teuchos::ParameterEntry *pe =
1377 env_->getParameters().getEntryPtr("remap_parts");
1378 if (pe) doRemap = pe->getValue(&doRemap);
1379 if (doRemap) RemapParts();
1380
1381 haveSolution_ = true;
1382
1383 env_->memory("After Solution has processed algorithm's answer");
1384 env_->debug(DETAILED_STATUS, "Exiting setParts");
1385}
1386
1387
1388template <typename Adapter>
1390 double &numParts, part_t &partMin, part_t &partMax) const
1391{
1392 if (onePartPerProc_){
1393 numParts = 1.0;
1394 partMin = partMax = procId;
1395 }
1396 else if (procDist_.size() > 0){
1397 partMin = procDist_[procId];
1398 partMax = procDist_[procId+1] - 1;
1399 numParts = procDist_[procId+1] - partMin;
1400 }
1401 else{
1402 // find the first p such that partDist_[p] > procId
1403
1404 std::vector<int>::const_iterator entry;
1405 entry = std::upper_bound(partDist_.begin(), partDist_.end(), procId);
1406
1407 size_t partIdx = entry - partDist_.begin();
1408 int numProcs = partDist_[partIdx] - partDist_[partIdx-1];
1409 partMin = partMax = int(partIdx) - 1;
1410 numParts = 1.0 / numProcs;
1411 }
1412}
1413
1414template <typename Adapter>
1415 void PartitioningSolution<Adapter>::partToProcsMap(part_t partId,
1416 int &procMin, int &procMax) const
1417{
1418 if (partId >= nGlobalParts_){
1419 // setParts() may be given an initial solution which uses a
1420 // different number of parts than the desired solution. It is
1421 // still a solution. We keep it on this process.
1422 procMin = procMax = comm_->getRank();
1423 }
1424 else if (onePartPerProc_){
1425 procMin = procMax = int(partId);
1426 }
1427 else if (procDist_.size() > 0){
1428 if (procDistEquallySpread_) {
1429 // Avoid binary search.
1430 double fProcs = comm_->getSize();
1431 double fParts = nGlobalParts_;
1432 double each = fParts / fProcs;
1433 procMin = int(partId / each);
1434 while (procDist_[procMin] > partId) procMin--;
1435 while (procDist_[procMin+1] <= partId) procMin++;
1436 procMax = procMin;
1437 }
1438 else {
1439 // find the first p such that procDist_[p] > partId.
1440 // For now, do a binary search.
1441
1442 typename std::vector<part_t>::const_iterator entry;
1443 entry = std::upper_bound(procDist_.begin(), procDist_.end(), partId);
1444
1445 size_t procIdx = entry - procDist_.begin();
1446 procMin = procMax = int(procIdx) - 1;
1447 }
1448 }
1449 else{
1450 procMin = partDist_[partId];
1451 procMax = partDist_[partId+1] - 1;
1452 }
1453}
1454
1455template <typename Adapter>
1457 int c1, int c2) const
1458{
1459 if (c1 < 0 || c1 >= nWeightsPerObj_ || c2 < 0 || c2 >= nWeightsPerObj_ )
1460 throw std::logic_error("criteriaHaveSamePartSizes error");
1461
1462 bool theSame = false;
1463
1464 if (c1 == c2)
1465 theSame = true;
1466
1467 else if (pSizeUniform_[c1] == true && pSizeUniform_[c2] == true)
1468 theSame = true;
1469
1470 else if (pCompactIndex_[c1].size() == pCompactIndex_[c2].size()){
1471 theSame = true;
1472 bool useIndex = pCompactIndex_[c1].size() > 0;
1473 if (useIndex){
1474 for (part_t p=0; theSame && p < nGlobalParts_; p++)
1475 if (pSize_[c1][pCompactIndex_[c1][p]] !=
1476 pSize_[c2][pCompactIndex_[c2][p]])
1477 theSame = false;
1478 }
1479 else{
1480 for (part_t p=0; theSame && p < nGlobalParts_; p++)
1481 if (pSize_[c1][p] != pSize_[c2][p])
1482 theSame = false;
1483 }
1484 }
1485
1486 return theSame;
1487}
1488
1504template <typename Adapter>
1506 bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
1507 int numLocalParts, int numGlobalParts)
1508{
1509#ifdef _MSC_VER
1510 typedef SSIZE_T ssize_t;
1511#endif
1512 int nprocs = comm_->getSize();
1513 ssize_t reducevals[4];
1514 ssize_t sumHaveGlobal=0, sumHaveLocal=0;
1515 ssize_t sumGlobal=0, sumLocal=0;
1516 ssize_t maxGlobal=0, maxLocal=0;
1517 ssize_t vals[4] = {haveNumGlobalParts, haveNumLocalParts,
1518 numGlobalParts, numLocalParts};
1519
1520 partDist_.clear();
1521 procDist_.clear();
1522
1523 if (doCheck){
1524
1525 try{
1526 reduceAll<int, ssize_t>(*comm_, Teuchos::REDUCE_SUM, 4, vals, reducevals);
1527 }
1529
1530 sumHaveGlobal = reducevals[0];
1531 sumHaveLocal = reducevals[1];
1532 sumGlobal = reducevals[2];
1533 sumLocal = reducevals[3];
1534
1535 env_->localInputAssertion(__FILE__, __LINE__,
1536 "Either all procs specify num_global/local_parts or none do",
1537 (sumHaveGlobal == 0 || sumHaveGlobal == nprocs) &&
1538 (sumHaveLocal == 0 || sumHaveLocal == nprocs),
1540 }
1541 else{
1542 if (haveNumLocalParts)
1543 sumLocal = numLocalParts * nprocs;
1544 if (haveNumGlobalParts)
1545 sumGlobal = numGlobalParts * nprocs;
1546
1547 sumHaveGlobal = haveNumGlobalParts ? nprocs : 0;
1548 sumHaveLocal = haveNumLocalParts ? nprocs : 0;
1549
1550 maxLocal = numLocalParts;
1551 maxGlobal = numGlobalParts;
1552 }
1553
1554 if (!haveNumLocalParts && !haveNumGlobalParts){
1555 onePartPerProc_ = true; // default if user did not specify
1556 return;
1557 }
1558
1559 if (haveNumGlobalParts){
1560 if (doCheck){
1561 vals[0] = numGlobalParts;
1562 vals[1] = numLocalParts;
1563 try{
1564 reduceAll<int, ssize_t>(
1565 *comm_, Teuchos::REDUCE_MAX, 2, vals, reducevals);
1566 }
1568
1569 maxGlobal = reducevals[0];
1570 maxLocal = reducevals[1];
1571
1572 env_->localInputAssertion(__FILE__, __LINE__,
1573 "Value for num_global_parts is different on different processes.",
1574 maxGlobal * nprocs == sumGlobal, BASIC_ASSERTION);
1575 }
1576
1577 if (sumLocal){
1578 env_->localInputAssertion(__FILE__, __LINE__,
1579 "Sum of num_local_parts does not equal requested num_global_parts",
1580 sumLocal == numGlobalParts, BASIC_ASSERTION);
1581
1582 if (sumLocal == nprocs && maxLocal == 1){
1583 onePartPerProc_ = true; // user specified one part per proc
1584 return;
1585 }
1586 }
1587 else{
1588 if (maxGlobal == nprocs){
1589 onePartPerProc_ = true; // user specified num parts is num procs
1590 return;
1591 }
1592 }
1593 }
1594
1595 // If we are here, we do not have #parts == #procs.
1596
1597 if (sumHaveLocal == nprocs){
1598 //
1599 // We will go by the number of local parts specified.
1600 //
1601
1602 try{
1603 procDist_.resize(nprocs+1);
1604 }
1605 catch (std::exception &){
1606 throw(std::bad_alloc());
1607 }
1608
1609 part_t *procArray = &procDist_[0];
1610
1611 try{
1612 part_t tmp = part_t(numLocalParts);
1613 gatherAll<int, part_t>(*comm_, 1, &tmp, nprocs, procArray + 1);
1614 }
1616
1617 procArray[0] = 0;
1618
1619 for (int proc=0; proc < nprocs; proc++)
1620 procArray[proc+1] += procArray[proc];
1621 }
1622 else{
1623 //
1624 // We will allocate global number of parts to the processes.
1625 //
1626 double fParts = numGlobalParts;
1627 double fProcs = nprocs;
1628
1629 if (fParts < fProcs){
1630
1631 try{
1632 partDist_.resize(size_t(fParts+1));
1633 }
1634 catch (std::exception &){
1635 throw(std::bad_alloc());
1636 }
1637
1638 int *partArray = &partDist_[0];
1639
1640 double each = floor(fProcs / fParts);
1641 double extra = fmod(fProcs, fParts);
1642 partDist_[0] = 0;
1643
1644 for (part_t part=0; part < numGlobalParts; part++){
1645 int numOwners = int(each + ((part<extra) ? 1 : 0));
1646 partArray[part+1] = partArray[part] + numOwners;
1647 }
1648
1649 env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs",
1650 partDist_[numGlobalParts] == nprocs, COMPLEX_ASSERTION, comm_);
1651 }
1652 else if (fParts > fProcs){
1653
1654 // User did not specify local number of parts per proc;
1655 // Distribute the parts evenly among the procs.
1656
1657 procDistEquallySpread_ = true;
1658
1659 try{
1660 procDist_.resize(size_t(fProcs+1));
1661 }
1662 catch (std::exception &){
1663 throw(std::bad_alloc());
1664 }
1665
1666 part_t *procArray = &procDist_[0];
1667
1668 double each = floor(fParts / fProcs);
1669 double extra = fmod(fParts, fProcs);
1670 procArray[0] = 0;
1671
1672 for (int proc=0; proc < nprocs; proc++){
1673 part_t numParts = part_t(each + ((proc<extra) ? 1 : 0));
1674 procArray[proc+1] = procArray[proc] + numParts;
1675 }
1676
1677 env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs",
1678 procDist_[nprocs] == numGlobalParts, COMPLEX_ASSERTION, comm_);
1679 }
1680 else{
1681 env_->globalBugAssertion(__FILE__, __LINE__,
1682 "should never get here", 1, COMPLEX_ASSERTION, comm_);
1683 }
1684 }
1685}
1686
1688// Remap a new part assignment vector for maximum overlap with an input
1689// part assignment.
1690//
1691// Assumptions for this version:
1692// input part assignment == processor rank for every local object.
1693// assuming nGlobalParts_ <= num ranks
1694// TODO: Write a version that takes the input part number as input;
1695// this change requires input parts in adapters to be provided in
1696// the Adapter.
1697// TODO: For repartitioning, compare to old remapping results; see Zoltan1.
1698
1699template <typename Adapter>
1701{
1702 size_t len = parts_.size();
1703
1704 part_t me = comm_->getRank();
1705 int np = comm_->getSize();
1706
1707 if (np < nGlobalParts_) {
1708 if (me == 0)
1709 std::cout << "Remapping not yet supported for "
1710 << "num_global_parts " << nGlobalParts_
1711 << " > num procs " << np << std::endl;
1712 return;
1713 }
1714 // Build edges of a bipartite graph with np + nGlobalParts_ vertices,
1715 // and edges between a process vtx and any parts to which that process'
1716 // objects are assigned.
1717 // Weight edge[parts_[i]] by the number of objects that are going from
1718 // this rank to parts_[i].
1719 // We use a std::map, assuming the number of unique parts in the parts_ array
1720 // is small to keep the binary search efficient.
1721 // TODO We use the count of objects to move; should change to SIZE of objects
1722 // to move; need SIZE function in Adapter.
1723
1724 std::map<part_t, long> edges;
1725 long lstaying = 0; // Total num of local objects staying if we keep the
1726 // current mapping. TODO: change to SIZE of local objs
1727 long gstaying = 0; // Total num of objects staying in the current partition
1728
1729 for (size_t i = 0; i < len; i++) {
1730 edges[parts_[i]]++; // TODO Use obj size instead of count
1731 if (parts_[i] == me) lstaying++; // TODO Use obj size instead of count
1732 }
1733
1734 Teuchos::reduceAll<int, long>(*comm_, Teuchos::REDUCE_SUM, 1,
1735 &lstaying, &gstaying);
1736//TODO if (gstaying == Adapter::getGlobalNumObjs()) return; // Nothing to do
1737
1738 part_t *remap = NULL;
1739
1740 int nedges = edges.size();
1741
1742 // Gather the graph to rank 0.
1743 part_t tnVtx = np + nGlobalParts_; // total # vertices
1744 int *idx = NULL; // Pointer index into graph adjacencies
1745 int *sizes = NULL; // nedges per rank
1746 if (me == 0) {
1747 idx = new int[tnVtx+1];
1748 sizes = new int[np];
1749 sizes[0] = nedges;
1750 }
1751 if (np > 1)
1752 Teuchos::gather<int, int>(&nedges, 1, sizes, 1, 0, *comm_);
1753
1754 // prefix sum to build the idx array
1755 if (me == 0) {
1756 idx[0] = 0;
1757 for (int i = 0; i < np; i++)
1758 idx[i+1] = idx[i] + sizes[i];
1759 }
1760
1761 // prepare to send edges
1762 int cnt = 0;
1763 part_t *bufv = NULL;
1764 long *bufw = NULL;
1765 if (nedges) {
1766 bufv = new part_t[nedges];
1767 bufw = new long[nedges];
1768 // Create buffer with edges (me, part[i]) and weight edges[parts_[i]].
1769 for (typename std::map<part_t, long>::iterator it = edges.begin();
1770 it != edges.end(); it++) {
1771 bufv[cnt] = it->first; // target part
1772 bufw[cnt] = it->second; // weight
1773 cnt++;
1774 }
1775 }
1776
1777 // Prepare to receive edges on rank 0
1778 part_t *adj = NULL;
1779 long *wgt = NULL;
1780 if (me == 0) {
1781//SYM adj = new part_t[2*idx[np]]; // need 2x space to symmetrize later
1782//SYM wgt = new long[2*idx[np]]; // need 2x space to symmetrize later
1783 adj = new part_t[idx[np]];
1784 wgt = new long[idx[np]];
1785 }
1786
1787 Teuchos::gatherv<int, part_t>(bufv, cnt, adj, sizes, idx, 0, *comm_);
1788 Teuchos::gatherv<int, long>(bufw, cnt, wgt, sizes, idx, 0, *comm_);
1789 delete [] bufv;
1790 delete [] bufw;
1791
1792 // Now have constructed graph on rank 0.
1793 // Call the matching algorithm
1794
1795 int doRemap;
1796 if (me == 0) {
1797 // We have the "LHS" vertices of the bipartite graph; need to create
1798 // "RHS" vertices.
1799 for (int i = 0; i < idx[np]; i++) {
1800 adj[i] += np; // New RHS vertex number; offset by num LHS vertices
1801 }
1802
1803 // Build idx for RHS vertices
1804 for (part_t i = np; i < tnVtx; i++) {
1805 idx[i+1] = idx[i]; // No edges for RHS vertices
1806 }
1807
1808#ifdef KDDKDD_DEBUG
1809 std::cout << "IDX ";
1810 for (part_t i = 0; i <= tnVtx; i++) std::cout << idx[i] << " ";
1811 std::cout << std::endl;
1812
1813 std::cout << "ADJ ";
1814 for (part_t i = 0; i < idx[tnVtx]; i++) std::cout << adj[i] << " ";
1815 std::cout << std::endl;
1816
1817 std::cout << "WGT ";
1818 for (part_t i = 0; i < idx[tnVtx]; i++) std::cout << wgt[i] << " ";
1819 std::cout << std::endl;
1820#endif
1821
1822 // Perform matching on the graph
1823 part_t *match = new part_t[tnVtx];
1824 for (part_t i = 0; i < tnVtx; i++) match[i] = i;
1825 part_t nmatches =
1826 Zoltan2::GreedyMWM<part_t, long>(idx, adj, wgt, tnVtx, match);
1827
1828#ifdef KDDKDD_DEBUG
1829 std::cout << "After matching: " << nmatches << " found" << std::endl;
1830 for (part_t i = 0; i < tnVtx; i++)
1831 std::cout << "match[" << i << "] = " << match[i]
1832 << ((match[i] != i &&
1833 (i < np && match[i] != i+np))
1834 ? " *" : " ")
1835 << std::endl;
1836#endif
1837
1838 // See whether there were nontrivial changes in the matching.
1839 bool nontrivial = false;
1840 if (nmatches) {
1841 for (part_t i = 0; i < np; i++) {
1842 if ((match[i] != i) && (match[i] != (i+np))) {
1843 nontrivial = true;
1844 break;
1845 }
1846 }
1847 }
1848
1849 // Process the matches
1850 if (nontrivial) {
1851 remap = new part_t[nGlobalParts_];
1852 for (part_t i = 0; i < nGlobalParts_; i++) remap[i] = -1;
1853
1854 bool *used = new bool[np];
1855 for (part_t i = 0; i < np; i++) used[i] = false;
1856
1857 // First, process all matched parts
1858 for (part_t i = 0; i < nGlobalParts_; i++) {
1859 part_t tmp = i + np;
1860 if (match[tmp] != tmp) {
1861 remap[i] = match[tmp];
1862 used[match[tmp]] = true;
1863 }
1864 }
1865
1866 // Second, process unmatched parts; keep same part number if possible
1867 for (part_t i = 0; i < nGlobalParts_; i++) {
1868 if (remap[i] > -1) continue;
1869 if (!used[i]) {
1870 remap[i] = i;
1871 used[i] = true;
1872 }
1873 }
1874
1875 // Third, process unmatched parts; give them the next unused part
1876 for (part_t i = 0, uidx = 0; i < nGlobalParts_; i++) {
1877 if (remap[i] > -1) continue;
1878 while (used[uidx]) uidx++;
1879 remap[i] = uidx;
1880 used[uidx] = true;
1881 }
1882 delete [] used;
1883 }
1884 delete [] match;
1885
1886#ifdef KDDKDD_DEBUG
1887 std::cout << "Remap vector: ";
1888 for (part_t i = 0; i < nGlobalParts_; i++) std::cout << remap[i] << " ";
1889 std::cout << std::endl;
1890#endif
1891
1892 long newgstaying = measure_stays(remap, idx, adj, wgt,
1893 nGlobalParts_, np);
1894 doRemap = (newgstaying > gstaying);
1895 std::cout << "gstaying " << gstaying << " measure(input) "
1896 << measure_stays(NULL, idx, adj, wgt, nGlobalParts_, np)
1897 << " newgstaying " << newgstaying
1898 << " nontrivial " << nontrivial
1899 << " doRemap " << doRemap << std::endl;
1900 }
1901 delete [] idx;
1902 delete [] sizes;
1903 delete [] adj;
1904 delete [] wgt;
1905
1906 Teuchos::broadcast<int, int>(*comm_, 0, 1, &doRemap);
1907
1908 if (doRemap) {
1909 if (me != 0) remap = new part_t[nGlobalParts_];
1910 Teuchos::broadcast<int, part_t>(*comm_, 0, nGlobalParts_, remap);
1911 for (size_t i = 0; i < len; i++) {
1912 parts_[i] = remap[parts_[i]];
1913 }
1914 }
1915
1916 delete [] remap; // TODO May want to keep for repartitioning as in Zoltan
1917}
1918
1919
1920} // namespace Zoltan2
1921
1922#endif
Zoltan2::BasicUserTypes< zscalar_t, zlno_t, zgno_t > user_t
Definition: Metric.cpp:74
Defines the Environment class.
#define Z2_FORWARD_EXCEPTIONS
Forward an exception back through call stack.
#define Z2_THROW_OUTSIDE_ERROR(env)
Throw an error returned from outside the Zoltan2 library.
Greedy Maximal Weight Matching.
Defines the Solution base class.
Algorithm defines the base class for all algorithms.
A PartitioningSolution is a solution to a partitioning problem.
virtual bool isPartitioningTreeBinary() const
calculate if partition tree is binary.
const int * getProcListView() const
Returns the process list corresponding to the global ID list.
int getNumberOfCriteria() const
Get the number of criteria (object weights)
scalar_t getLocalFractionOfPart() const
If parts are divided across processes, return the fraction of a part on this process.
void RemapParts()
Remap a new partition for maximum overlap with an input partition.
size_t getActualGlobalNumberOfParts() const
Returns the actual global number of parts provided in setParts().
size_t getLocalNumberOfParts() const
Returns the number of parts to be assigned to this process.
PartitioningSolution(const RCP< const Environment > &env, const RCP< const Comm< int > > &comm, int nUserWeights, const RCP< Algorithm< Adapter > > &algorithm=Teuchos::null)
Constructor when part sizes are not supplied.
void getPartitionTree(part_t &numTreeVerts, std::vector< part_t > &permPartNums, std::vector< part_t > &splitRangeBeg, std::vector< part_t > &splitRangeEnd, std::vector< part_t > &treeVertParents) const
get the partition tree - fill the relevant arrays
scalar_t getCriteriaPartSize(int idx, part_t part) const
Get the size for a given weight index and a given part.
void boxAssign(int dim, scalar_t *lower, scalar_t *upper, size_t &nPartsFound, part_t **partsFound) const
Return an array of all parts overlapping a given box in space.
const RCP< const Environment > & getEnvironment() const
Return the environment associated with the solution.
std::vector< Zoltan2::coordinateModelPartBox > & getPartBoxesView() const
returns the part box boundary list.
void setParts(ArrayRCP< part_t > &partList)
The algorithm uses setParts to set the solution.
const int * getPartDistribution() const
Return a distribution by part.
long measure_stays(part_t *remap, int *idx, part_t *adj, long *wgt, part_t nrhs, part_t nlhs)
bool oneToOnePartDistribution() const
Is the part-to-process distribution is one-to-one.
bool criteriaHaveSamePartSizes(int c1, int c2) const
Return true if the two weight indices have the same part size information.
const part_t * getPartListView() const
Returns the part list corresponding to the global ID list.
part_t pointAssign(int dim, scalar_t *point) const
Return the part overlapping a given point in space;.
const RCP< const Comm< int > > & getCommunicator() const
Return the communicator associated with the solution.
void getProcsForPart(part_t partId, part_t &procMin, part_t &procMax) const
Get the processes containing a part.
void getPartsForProc(int procId, double &numParts, part_t &partMin, part_t &partMax) const
Get the parts belonging to a process.
size_t getTargetGlobalNumberOfParts() const
Returns the global number of parts desired in the solution.
void getCommunicationGraph(ArrayRCP< part_t > &comXAdj, ArrayRCP< part_t > &comAdj) const
returns communication graph resulting from geometric partitioning.
const part_t * getProcDistribution() const
Return a distribution by process.
bool criteriaHasUniformPartSizes(int idx) const
Determine if balancing criteria has uniform part sizes. (User can specify differing part sizes....
Just a placeholder for now.
map_t::local_ordinal_type lno_t
Definition: mapRemotes.cpp:17
map_t::global_ordinal_type gno_t
Definition: mapRemotes.cpp:18
Created by mbenlioglu on Aug 31, 2020.
static const std::string fail
@ DETAILED_STATUS
sub-steps, each method's entry and exit
@ BASIC_ASSERTION
fast typical checks for valid arguments
@ COMPLEX_ASSERTION
more involved, like validate a graph
#define epsilon
Definition: nd.cpp:82
SparseMatrixAdapter_t::part_t part_t