50 #ifndef _ZOLTAN2_PARTITIONINGSOLUTION_HPP_ 51 #define _ZOLTAN2_PARTITIONINGSOLUTION_HPP_ 54 template <
typename Adapter>
90 template <
typename Adapter>
95 #ifndef DOXYGEN_SHOULD_SKIP_THIS 96 typedef typename Adapter::gno_t gno_t;
97 typedef typename Adapter::scalar_t scalar_t;
98 typedef typename Adapter::lno_t lno_t;
99 typedef typename Adapter::part_t part_t;
100 typedef typename Adapter::user_t user_t;
121 const RCP<
const Comm<int> > &comm,
155 const RCP<
const Comm<int> > &comm,
156 int nUserWeights, ArrayView<ArrayRCP<part_t> > reqPartIds,
157 ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
215 if (partDist_.size() > 0)
return &partDist_[0];
236 if (procDist_.size() > 0)
return &procDist_[0];
267 if (pSizeUniform_[idx])
268 return 1.0 / nGlobalParts_;
269 else if (pCompactIndex_[idx].size())
270 return pSize_[idx][pCompactIndex_[idx][part]];
272 return pSize_[idx][part];
319 void setParts(ArrayRCP<part_t> &partList);
341 part_t nrhs, part_t nlhs)
344 for (part_t i = 0; i < nrhs; i++) {
345 part_t k = (remap ? remap[i] : i);
346 for (part_t j = idx[k]; j < idx[k+1]; j++) {
347 if (i == (adj[j]-nlhs)) {
373 if (parts_.size() > 0)
return parts_.getRawPtr();
382 if (procs_.size() > 0)
return procs_.getRawPtr();
389 std::vector<Zoltan2::coordinateModelPartBox<scalar_t, part_t> > &
392 return this->algorithm_->getPartBoxesView();
408 if (this->algorithm_ == Teuchos::null)
409 throw std::logic_error(
"no partitioning algorithm has been run yet");
411 p = this->algorithm_->pointAssign(dim, point);
429 void boxAssign(
int dim, scalar_t *lower, scalar_t *upper,
430 size_t &nPartsFound, part_t **partsFound)
const 433 if (this->algorithm_ == Teuchos::null)
434 throw std::logic_error(
"no partitioning algorithm has been run yet");
436 this->algorithm_->boxAssign(dim, lower, upper, nPartsFound, partsFound);
445 ArrayRCP <part_t> &comAdj)
const 448 if (this->algorithm_ == Teuchos::null)
449 throw std::logic_error(
"no partitioning algorithm has been run yet");
451 this->algorithm_->getCommunicationGraph(
this, comXAdj, comAdj);
474 part_t &partMax)
const 476 env_->localInputAssertion(__FILE__, __LINE__,
"invalid process id",
479 procToPartsMap(procId, numParts, partMin, partMax);
495 env_->localInputAssertion(__FILE__, __LINE__,
"invalid part id",
498 partToProcsMap(partId, procMin, procMax);
502 void partToProc(
bool doCheck,
bool haveNumLocalParts,
bool haveNumGlobalParts,
503 int numLocalParts,
int numGlobalParts);
505 void procToPartsMap(
int procId,
double &numParts, part_t &partMin,
506 part_t &partMax)
const;
508 void partToProcsMap(part_t partId,
int &procMin,
int &procMax)
const;
510 void setPartDistribution();
512 void setPartSizes(ArrayView<ArrayRCP<part_t> > reqPartIds,
513 ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
515 void computePartSizes(
int widx, ArrayView<part_t> ids,
516 ArrayView<scalar_t> sizes);
518 void broadcastPartSizes(
int widx);
521 RCP<const Environment> env_;
522 const RCP<const Comm<int> > comm_;
525 RCP < std::vector <Zoltan2::coordinateModelPartBox <scalar_t, part_t> > > partBoxes;
527 part_t nGlobalParts_;
530 scalar_t localFraction_;
562 bool onePartPerProc_;
563 std::vector<int> partDist_;
564 std::vector<part_t> procDist_;
565 bool procDistEquallySpread_;
601 ArrayRCP<bool> pSizeUniform_;
602 ArrayRCP<ArrayRCP<unsigned char> > pCompactIndex_;
603 ArrayRCP<ArrayRCP<scalar_t> > pSize_;
608 ArrayRCP<part_t> parts_;
612 part_t nGlobalPartsSolution_;
618 ArrayRCP<int> procs_;
623 const RCP<Algorithm<Adapter> > algorithm_;
630 template <
typename Adapter>
632 const RCP<const Environment> &env,
633 const RCP<
const Comm<int> > &comm,
636 : env_(env), comm_(comm),
638 nGlobalParts_(0), nLocalParts_(0),
639 localFraction_(0), nWeightsPerObj_(),
640 onePartPerProc_(false), partDist_(), procDist_(),
641 procDistEquallySpread_(false),
642 pSizeUniform_(), pCompactIndex_(), pSize_(),
643 parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
644 procs_(), algorithm_(algorithm)
646 nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);
648 setPartDistribution();
653 ArrayRCP<part_t> *noIds =
new ArrayRCP<part_t> [nWeightsPerObj_];
654 ArrayRCP<scalar_t> *noSizes =
new ArrayRCP<scalar_t> [nWeightsPerObj_];
655 ArrayRCP<ArrayRCP<part_t> > ids(noIds, 0, nWeightsPerObj_,
true);
656 ArrayRCP<ArrayRCP<scalar_t> > sizes(noSizes, 0, nWeightsPerObj_,
true);
658 setPartSizes(ids.view(0, nWeightsPerObj_), sizes.view(0, nWeightsPerObj_));
660 env_->memory(
"After construction of solution");
663 template <
typename Adapter>
665 const RCP<const Environment> &env,
666 const RCP<
const Comm<int> > &comm,
668 ArrayView<ArrayRCP<part_t> > reqPartIds,
669 ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
671 : env_(env), comm_(comm),
673 nGlobalParts_(0), nLocalParts_(0),
674 localFraction_(0), nWeightsPerObj_(),
675 onePartPerProc_(false), partDist_(), procDist_(),
676 procDistEquallySpread_(false),
677 pSizeUniform_(), pCompactIndex_(), pSize_(),
678 parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
679 procs_(), algorithm_(algorithm)
681 nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);
683 setPartDistribution();
685 setPartSizes(reqPartIds, reqPartSizes);
687 env_->memory(
"After construction of solution");
690 template <
typename Adapter>
695 const ParameterList &pl = env_->getParameters();
696 size_t haveGlobalNumParts=0, haveLocalNumParts=0;
697 int numLocal=0, numGlobal=0;
699 const Teuchos::ParameterEntry *pe = pl.getEntryPtr(
"num_global_parts");
702 haveGlobalNumParts = 1;
703 nGlobalParts_ = part_t(pe->getValue(&nGlobalParts_));
704 numGlobal = nGlobalParts_;
707 pe = pl.getEntryPtr(
"num_local_parts");
710 haveLocalNumParts = 1;
711 nLocalParts_ = part_t(pe->getValue(&nLocalParts_));
712 numLocal = nLocalParts_;
718 partToProc(
true, haveLocalNumParts, haveGlobalNumParts,
719 numLocal, numGlobal);
723 int nprocs = comm_->getSize();
724 int rank = comm_->getRank();
726 if (onePartPerProc_){
727 nGlobalParts_ = nprocs;
730 else if (partDist_.size() > 0){
731 nGlobalParts_ = partDist_.size() - 1;
732 int pstart = partDist_[0];
733 for (part_t i=1; i <= nGlobalParts_; i++){
734 int pend = partDist_[i];
735 if (rank >= pstart && rank < pend){
736 int numOwners = pend - pstart;
738 localFraction_ = 1.0 / numOwners;
744 else if (procDist_.size() > 0){
745 nGlobalParts_ = procDist_[nprocs];
746 nLocalParts_ = procDist_[rank+1] - procDist_[rank];
749 throw std::logic_error(
"partToProc error");
754 template <
typename Adapter>
755 void PartitioningSolution<Adapter>::setPartSizes(
756 ArrayView<ArrayRCP<part_t> > ids, ArrayView<ArrayRCP<scalar_t> > sizes)
758 int widx = nWeightsPerObj_;
761 size_t *countBuf =
new size_t [widx*2];
762 ArrayRCP<size_t> counts(countBuf, 0, widx*2,
true);
764 fail = ((ids.size() != widx) || (sizes.size() != widx));
766 for (
int w=0; !
fail && w < widx; w++){
767 counts[w] = ids[w].size();
768 if (ids[w].size() != sizes[w].size())
fail=
true;
771 env_->globalBugAssertion(__FILE__, __LINE__,
"bad argument arrays",
fail==0,
776 ArrayRCP<scalar_t> *emptySizes=
new ArrayRCP<scalar_t> [widx];
777 pSize_ = arcp(emptySizes, 0, widx);
779 ArrayRCP<unsigned char> *emptyIndices=
new ArrayRCP<unsigned char> [widx];
780 pCompactIndex_ = arcp(emptyIndices, 0, widx);
782 bool *info =
new bool [widx];
783 pSizeUniform_ = arcp(info, 0, widx);
784 for (
int w=0; w < widx; w++)
785 pSizeUniform_[w] =
true;
787 if (nGlobalParts_ == 1){
791 size_t *ptr1 = counts.getRawPtr();
792 size_t *ptr2 = counts.getRawPtr() + widx;
795 reduceAll<int, size_t>(*comm_, Teuchos::REDUCE_MAX, widx, ptr1, ptr2);
801 for (
int w=0; w < widx; w++)
802 if (counts[widx+w] > 0){
804 pSizeUniform_[w] =
false;
813 int nprocs = comm_->getSize();
814 int rank = comm_->getRank();
816 for (
int w=0; w < widx; w++){
817 if (pSizeUniform_[w])
continue;
822 part_t length = ids[w].size();
823 part_t *allLength =
new part_t [nprocs];
824 Teuchos::gatherAll<int, part_t>(*comm_, 1, &length,
829 for (
int i=0; i < nprocs; i++)
830 total += allLength[i];
832 part_t *partNums =
new part_t [total];
833 scalar_t *partSizes =
new scalar_t [total];
835 ArrayView<part_t> idArray(partNums, total);
836 ArrayView<scalar_t> sizeArray(partSizes, total);
839 for (
int i=0; i < length; i++){
840 *partNums++ = ids[w][i];
841 *partSizes++ = sizes[w][i];
845 for (
int p=1; p < nprocs; p++){
846 if (allLength[p] > 0){
847 Teuchos::receive<int, part_t>(*comm_, p,
848 allLength[p], partNums);
849 Teuchos::receive<int, scalar_t>(*comm_, p,
850 allLength[p], partSizes);
851 partNums += allLength[p];
852 partSizes += allLength[p];
859 computePartSizes(w, idArray, sizeArray);
863 delete [] idArray.getRawPtr();
864 delete [] sizeArray.getRawPtr();
869 Teuchos::send<int, part_t>(*comm_, length, ids[w].getRawPtr(), 0);
870 Teuchos::send<int, scalar_t>(*comm_, length, sizes[w].getRawPtr(), 0);
874 broadcastPartSizes(w);
878 template <
typename Adapter>
879 void PartitioningSolution<Adapter>::broadcastPartSizes(
int widx)
881 env_->localBugAssertion(__FILE__, __LINE__,
"preallocations",
882 pSize_.size()>widx &&
883 pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
886 int rank = comm_->getRank();
887 int nprocs = comm_->getSize();
888 part_t nparts = nGlobalParts_;
896 if (pSizeUniform_[widx] ==
true)
898 else if (pCompactIndex_[widx].size() > 0)
905 Teuchos::broadcast<int, char>(*comm_, 0, 1, &flag);
911 pSizeUniform_[widx] =
true;
920 unsigned char *idxbuf = NULL;
923 idxbuf =
new unsigned char [nparts];
924 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, idxbuf);
927 env_->localBugAssertion(__FILE__, __LINE__,
"index list size",
929 idxbuf = pCompactIndex_[widx].getRawPtr();
934 Teuchos::broadcast<int, char>(*comm_, 0, nparts,
935 reinterpret_cast<char *
>(idxbuf));
940 pCompactIndex_[widx] = arcp(idxbuf, 0, nparts,
true);
944 unsigned char maxIdx=0;
945 for (part_t p=0; p < nparts; p++)
946 if (idxbuf[p] > maxIdx) maxIdx = idxbuf[p];
948 int numSizes = maxIdx + 1;
950 scalar_t *sizeList = NULL;
953 sizeList =
new scalar_t [numSizes];
954 env_->localMemoryAssertion(__FILE__, __LINE__, numSizes, sizeList);
957 env_->localBugAssertion(__FILE__, __LINE__,
"wrong number of sizes",
960 sizeList = pSize_[widx].getRawPtr();
964 Teuchos::broadcast<int, scalar_t>(*comm_, 0, numSizes, sizeList);
969 pSize_[widx] = arcp(sizeList, 0, numSizes,
true);
978 scalar_t *sizeList = NULL;
981 sizeList =
new scalar_t [nparts];
982 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, sizeList);
985 env_->localBugAssertion(__FILE__, __LINE__,
"wrong number of sizes",
988 sizeList = pSize_[widx].getRawPtr();
992 Teuchos::broadcast<int, scalar_t >(*comm_, 0, nparts, sizeList);
997 pSize_[widx] = arcp(sizeList, 0, nparts);
1003 template <
typename Adapter>
1004 void PartitioningSolution<Adapter>::computePartSizes(
int widx,
1005 ArrayView<part_t> ids, ArrayView<scalar_t> sizes)
1007 int len = ids.size();
1010 pSizeUniform_[widx] =
true;
1014 env_->localBugAssertion(__FILE__, __LINE__,
"bad array sizes",
1017 env_->localBugAssertion(__FILE__, __LINE__,
"bad index",
1020 env_->localBugAssertion(__FILE__, __LINE__,
"preallocations",
1021 pSize_.size()>widx &&
1022 pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
1028 part_t nparts = nGlobalParts_;
1029 unsigned char *buf =
new unsigned char [nparts];
1030 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, buf);
1031 memset(buf, 0, nparts);
1032 ArrayRCP<unsigned char> partIdx(buf, 0, nparts,
true);
1034 scalar_t
epsilon = 10e-5 / nparts;
1035 scalar_t min=sizes[0], max=sizes[0], sum=0;
1037 for (
int i=0; i < len; i++){
1039 scalar_t size = sizes[i];
1041 env_->localInputAssertion(__FILE__, __LINE__,
"invalid part id",
1044 env_->localInputAssertion(__FILE__, __LINE__,
"invalid part size", size>=0,
1051 env_->localInputAssertion(__FILE__, __LINE__,
1052 "multiple sizes provided for one part", partIdx[
id]==0,
BASIC_ASSERTION);
1056 if (size < min) min = size;
1057 if (size > max) max = size;
1066 scalar_t *allSizes =
new scalar_t [2];
1067 env_->localMemoryAssertion(__FILE__, __LINE__, 2, allSizes);
1069 ArrayRCP<scalar_t> sizeArray(allSizes, 0, 2,
true);
1071 int numNonZero = nparts - len;
1074 allSizes[1] = 1.0 / numNonZero;
1076 for (part_t p=0; p < nparts; p++)
1079 for (
int i=0; i < len; i++)
1082 pSize_[widx] = sizeArray;
1083 pCompactIndex_[widx] = partIdx;
1089 pSizeUniform_[widx] =
true;
1094 scalar_t avg = sum / nparts;
1101 scalar_t *tmp =
new scalar_t [len];
1102 env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
1103 memcpy(tmp, sizes.getRawPtr(),
sizeof(scalar_t) * len);
1104 ArrayRCP<scalar_t> partSizes(tmp, 0, len,
true);
1106 std::sort(partSizes.begin(), partSizes.end());
1110 Array<scalar_t> nextUniqueSize;
1111 nextUniqueSize.push_back(partSizes[len-1]);
1112 scalar_t curr = partSizes[len-1];
1114 bool haveAvg =
false;
1118 for (
int i=len-2; i >= 0; i--){
1119 scalar_t val = partSizes[i];
1121 nextUniqueSize.push_back(val);
1123 if (avgIndex==len && val > avg && val - avg <=
epsilon){
1125 avgIndex = nextUniqueSize.size() - 1;
1133 size_t numSizes = nextUniqueSize.size();
1134 int sizeArrayLen = numSizes;
1141 if (!haveAvg) sizeArrayLen++;
1143 scalar_t *allSizes =
new scalar_t [sizeArrayLen];
1144 env_->localMemoryAssertion(__FILE__, __LINE__, sizeArrayLen, allSizes);
1145 ArrayRCP<scalar_t> sizeArray(allSizes, 0, sizeArrayLen,
true);
1147 int newAvgIndex = sizeArrayLen;
1149 for (
int i=numSizes-1, idx=0; i >= 0; i--){
1151 if (newAvgIndex == sizeArrayLen){
1153 if (haveAvg && i==avgIndex)
1156 else if (!haveAvg && avg < nextUniqueSize[i]){
1158 allSizes[idx++] = avg;
1162 allSizes[idx++] = nextUniqueSize[i];
1165 env_->localBugAssertion(__FILE__, __LINE__,
"finding average in list",
1168 for (
int i=0; i < nparts; i++){
1169 buf[i] = newAvgIndex;
1172 sum = (nparts - len) * allSizes[newAvgIndex];
1174 for (
int i=0; i < len; i++){
1176 scalar_t size = sizes[i];
1181 if (size < avg && avg - size <=
epsilon)
1182 index = newAvgIndex;
1184 typename ArrayRCP<scalar_t>::iterator found =
1185 std::lower_bound(sizeArray.begin(), sizeArray.end(), size);
1187 env_->localBugAssertion(__FILE__, __LINE__,
"size array",
1190 index = found - sizeArray.begin();
1194 sum += allSizes[index];
1197 for (
int i=0; i < sizeArrayLen; i++){
1198 sizeArray[i] /= sum;
1201 pCompactIndex_[widx] = partIdx;
1202 pSize_[widx] = sizeArray;
1208 tmp =
new scalar_t [nparts];
1209 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, tmp);
1211 sum += ((nparts - len) * avg);
1213 for (
int i=0; i < nparts; i++){
1217 for (
int i=0; i < len; i++){
1218 tmp[ids[i]] = sizes[i]/sum;
1221 pSize_[widx] = arcp(tmp, 0, nparts);
1225 template <
typename Adapter>
1230 size_t len = partList.size();
1238 part_t lMax = std::numeric_limits<part_t>::min();
1239 part_t lMin = std::numeric_limits<part_t>::max();
1242 for (
size_t i = 0; i < len; i++) {
1243 if (partList[i] < lMin) lMin = partList[i];
1244 if (partList[i] > lMax) lMax = partList[i];
1246 Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MIN, 1, &lMin, &gMin);
1247 Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MAX, 1, &lMax, &gMax);
1249 nGlobalPartsSolution_ = gMax - gMin + 1;
1254 if (!onePartPerProc_){
1256 int *procs =
new int [len];
1257 env_->localMemoryAssertion(__FILE__, __LINE__, len, procs);
1258 procs_ = arcp<int>(procs, 0, len);
1261 part_t *parts = partList.getRawPtr();
1263 if (procDist_.size() > 0){
1266 for (
size_t i=0; i < len; i++){
1267 partToProcsMap(parts[i], procs[i], procId);
1272 lno_t *partCounter =
new lno_t [nGlobalPartsSolution_];
1273 env_->localMemoryAssertion(__FILE__, __LINE__, nGlobalPartsSolution_,
1276 int numProcs = comm_->getSize();
1280 memset(partCounter, 0,
sizeof(lno_t) * nGlobalPartsSolution_);
1282 for (
typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++)
1283 partCounter[parts[i]]++;
1285 lno_t *procCounter =
new lno_t [numProcs];
1286 env_->localMemoryAssertion(__FILE__, __LINE__, numProcs, procCounter);
1289 int proc2 = partDist_[0];
1291 for (part_t part=1; part < nGlobalParts_; part++){
1293 proc2 = partDist_[part+1];
1294 int numprocs = proc2 - proc1;
1296 double dNum = partCounter[part];
1297 double dProcs = numprocs;
1300 double each = floor(dNum/dProcs);
1301 double extra = fmod(dNum,dProcs);
1303 for (
int proc=proc1, i=0; proc<proc2; proc++, i++){
1305 procCounter[proc] = lno_t(each) + 1;
1307 procCounter[proc] = lno_t(each);
1311 delete [] partCounter;
1313 for (
typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++){
1314 if (partList[i] >= nGlobalParts_){
1317 procs[i] = comm_->getRank();
1320 part_t partNum = parts[i];
1321 proc1 = partDist_[partNum];
1322 proc2 = partDist_[partNum + 1];
1325 for (proc=proc1; proc < proc2; proc++){
1326 if (procCounter[proc] > 0){
1328 procCounter[proc]--;
1332 env_->localBugAssertion(__FILE__, __LINE__,
"part to proc",
1336 delete [] procCounter;
1347 const Teuchos::ParameterEntry *pe =
1348 env_->getParameters().getEntryPtr(
"remap_parts");
1349 if (pe) doRemap = pe->getValue(&doRemap);
1350 if (doRemap) RemapParts();
1352 haveSolution_ =
true;
1354 env_->memory(
"After Solution has processed algorithm's answer");
1359 template <
typename Adapter>
1361 double &numParts, part_t &partMin, part_t &partMax)
const 1363 if (onePartPerProc_){
1365 partMin = partMax = procId;
1367 else if (procDist_.size() > 0){
1368 partMin = procDist_[procId];
1369 partMax = procDist_[procId+1] - 1;
1370 numParts = procDist_[procId+1] - partMin;
1375 std::vector<int>::const_iterator entry;
1376 entry = std::upper_bound(partDist_.begin(), partDist_.end(), procId);
1378 size_t partIdx = entry - partDist_.begin();
1379 int numProcs = partDist_[partIdx] - partDist_[partIdx-1];
1380 partMin = partMax = int(partIdx) - 1;
1381 numParts = 1.0 / numProcs;
1385 template <
typename Adapter>
1386 void PartitioningSolution<Adapter>::partToProcsMap(part_t partId,
1387 int &procMin,
int &procMax)
const 1389 if (partId >= nGlobalParts_){
1393 procMin = procMax = comm_->getRank();
1395 else if (onePartPerProc_){
1396 procMin = procMax = int(partId);
1398 else if (procDist_.size() > 0){
1399 if (procDistEquallySpread_) {
1401 double fProcs = comm_->getSize();
1402 double fParts = nGlobalParts_;
1403 double each = fParts / fProcs;
1404 procMin = int(partId / each);
1405 while (procDist_[procMin] > partId) procMin--;
1406 while (procDist_[procMin+1] <= partId) procMin++;
1413 typename std::vector<part_t>::const_iterator entry;
1414 entry = std::upper_bound(procDist_.begin(), procDist_.end(), partId);
1416 size_t procIdx = entry - procDist_.begin();
1417 procMin = procMax = int(procIdx) - 1;
1421 procMin = partDist_[partId];
1422 procMax = partDist_[partId+1] - 1;
1426 template <
typename Adapter>
1428 int c1,
int c2)
const 1430 if (c1 < 0 || c1 >= nWeightsPerObj_ || c2 < 0 || c2 >= nWeightsPerObj_ )
1431 throw std::logic_error(
"criteriaHaveSamePartSizes error");
1433 bool theSame =
false;
1438 else if (pSizeUniform_[c1] ==
true && pSizeUniform_[c2] ==
true)
1441 else if (pCompactIndex_[c1].size() == pCompactIndex_[c2].size()){
1443 bool useIndex = pCompactIndex_[c1].size() > 0;
1445 for (part_t p=0; theSame && p < nGlobalParts_; p++)
1446 if (pSize_[c1][pCompactIndex_[c1][p]] !=
1447 pSize_[c2][pCompactIndex_[c2][p]])
1451 for (part_t p=0; theSame && p < nGlobalParts_; p++)
1452 if (pSize_[c1][p] != pSize_[c2][p])
1475 template <
typename Adapter>
1477 bool doCheck,
bool haveNumLocalParts,
bool haveNumGlobalParts,
1478 int numLocalParts,
int numGlobalParts)
1481 typedef SSIZE_T ssize_t;
1483 int nprocs = comm_->getSize();
1484 ssize_t reducevals[4];
1485 ssize_t sumHaveGlobal=0, sumHaveLocal=0;
1486 ssize_t sumGlobal=0, sumLocal=0;
1487 ssize_t maxGlobal=0, maxLocal=0;
1488 ssize_t
vals[4] = {haveNumGlobalParts, haveNumLocalParts,
1489 numGlobalParts, numLocalParts};
1497 reduceAll<int, ssize_t>(*comm_, Teuchos::REDUCE_SUM, 4,
vals, reducevals);
1501 sumHaveGlobal = reducevals[0];
1502 sumHaveLocal = reducevals[1];
1503 sumGlobal = reducevals[2];
1504 sumLocal = reducevals[3];
1506 env_->localInputAssertion(__FILE__, __LINE__,
1507 "Either all procs specify num_global/local_parts or none do",
1508 (sumHaveGlobal == 0 || sumHaveGlobal == nprocs) &&
1509 (sumHaveLocal == 0 || sumHaveLocal == nprocs),
1513 if (haveNumLocalParts)
1514 sumLocal = numLocalParts * nprocs;
1515 if (haveNumGlobalParts)
1516 sumGlobal = numGlobalParts * nprocs;
1518 sumHaveGlobal = haveNumGlobalParts ? nprocs : 0;
1519 sumHaveLocal = haveNumLocalParts ? nprocs : 0;
1521 maxLocal = numLocalParts;
1522 maxGlobal = numGlobalParts;
1525 if (!haveNumLocalParts && !haveNumGlobalParts){
1526 onePartPerProc_ =
true;
1530 if (haveNumGlobalParts){
1532 vals[0] = numGlobalParts;
1533 vals[1] = numLocalParts;
1535 reduceAll<int, ssize_t>(
1536 *comm_, Teuchos::REDUCE_MAX, 2,
vals, reducevals);
1540 maxGlobal = reducevals[0];
1541 maxLocal = reducevals[1];
1543 env_->localInputAssertion(__FILE__, __LINE__,
1544 "Value for num_global_parts is different on different processes.",
1549 env_->localInputAssertion(__FILE__, __LINE__,
1550 "Sum of num_local_parts does not equal requested num_global_parts",
1553 if (sumLocal == nprocs && maxLocal == 1){
1554 onePartPerProc_ =
true;
1559 if (maxGlobal == nprocs){
1560 onePartPerProc_ =
true;
1568 if (sumHaveLocal == nprocs){
1574 procDist_.resize(nprocs+1);
1576 catch (std::exception &e){
1577 throw(std::bad_alloc());
1580 part_t *procArray = &procDist_[0];
1583 part_t tmp = part_t(numLocalParts);
1584 gatherAll<int, part_t>(*comm_, 1, &tmp, nprocs, procArray + 1);
1590 for (
int proc=0; proc < nprocs; proc++)
1591 procArray[proc+1] += procArray[proc];
1597 double fParts = numGlobalParts;
1598 double fProcs = nprocs;
1600 if (fParts < fProcs){
1603 partDist_.resize(
size_t(fParts+1));
1605 catch (std::exception &e){
1606 throw(std::bad_alloc());
1609 int *partArray = &partDist_[0];
1611 double each = floor(fProcs / fParts);
1612 double extra = fmod(fProcs, fParts);
1615 for (part_t part=0; part < numGlobalParts; part++){
1616 int numOwners = int(each + ((part<extra) ? 1 : 0));
1617 partArray[part+1] = partArray[part] + numOwners;
1620 env_->globalBugAssertion(__FILE__, __LINE__,
"#parts != #procs",
1623 else if (fParts > fProcs){
1628 procDistEquallySpread_ =
true;
1631 procDist_.resize(
size_t(fProcs+1));
1633 catch (std::exception &e){
1634 throw(std::bad_alloc());
1637 part_t *procArray = &procDist_[0];
1639 double each = floor(fParts / fProcs);
1640 double extra = fmod(fParts, fProcs);
1643 for (
int proc=0; proc < nprocs; proc++){
1644 part_t numParts = part_t(each + ((proc<extra) ? 1 : 0));
1645 procArray[proc+1] = procArray[proc] + numParts;
1648 env_->globalBugAssertion(__FILE__, __LINE__,
"#parts != #procs",
1652 env_->globalBugAssertion(__FILE__, __LINE__,
1670 template <
typename Adapter>
1673 size_t len = parts_.size();
1675 part_t me = comm_->getRank();
1676 int np = comm_->getSize();
1678 if (np < nGlobalParts_) {
1680 std::cout <<
"Remapping not yet supported for " 1681 <<
"num_global_parts " << nGlobalParts_
1682 <<
" > num procs " << np << std::endl;
1695 std::map<part_t, long> edges;
1700 for (
size_t i = 0; i < len; i++) {
1702 if (parts_[i] == me) lstaying++;
1705 Teuchos::reduceAll<int, long>(*comm_, Teuchos::REDUCE_SUM, 1,
1706 &lstaying, &gstaying);
1709 part_t *remap = NULL;
1711 int nedges = edges.size();
1714 part_t tnVtx = np + nGlobalParts_;
1718 idx =
new int[tnVtx+1];
1719 sizes =
new int[np];
1723 Teuchos::gather<int, int>(&nedges, 1, sizes, 1, 0, *comm_);
1728 for (
int i = 0; i < np; i++)
1729 idx[i+1] = idx[i] + sizes[i];
1734 part_t *bufv = NULL;
1737 bufv =
new part_t[nedges];
1738 bufw =
new long[nedges];
1740 for (
typename std::map<part_t, long>::iterator it = edges.begin();
1741 it != edges.end(); it++) {
1742 bufv[cnt] = it->first;
1743 bufw[cnt] = it->second;
1754 adj =
new part_t[idx[np]];
1755 wgt =
new long[idx[np]];
1758 Teuchos::gatherv<int, part_t>(bufv, cnt, adj, sizes, idx, 0, *comm_);
1759 Teuchos::gatherv<int, long>(bufw, cnt, wgt, sizes, idx, 0, *comm_);
1770 for (
int i = 0; i < idx[np]; i++) {
1775 for (part_t i = np; i < tnVtx; i++) {
1781 for (part_t i = 0; i <= tnVtx; i++) std::cout << idx[i] <<
" ";
1782 std::cout << std::endl;
1784 std::cout <<
"ADJ ";
1785 for (part_t i = 0; i < idx[tnVtx]; i++) std::cout << adj[i] <<
" ";
1786 std::cout << std::endl;
1788 std::cout <<
"WGT ";
1789 for (part_t i = 0; i < idx[tnVtx]; i++) std::cout << wgt[i] <<
" ";
1790 std::cout << std::endl;
1794 part_t *match =
new part_t[tnVtx];
1795 for (part_t i = 0; i < tnVtx; i++) match[i] = i;
1797 Zoltan2::GreedyMWM<part_t, long>(idx, adj, wgt, tnVtx, match);
1800 std::cout <<
"After matching: " << nmatches <<
" found" << std::endl;
1801 for (part_t i = 0; i < tnVtx; i++)
1802 std::cout <<
"match[" << i <<
"] = " << match[i]
1803 << ((match[i] != i &&
1804 (i < np && match[i] != i+np))
1810 bool nontrivial =
false;
1812 for (part_t i = 0; i < np; i++) {
1813 if ((match[i] != i) && (match[i] != (i+np))) {
1822 remap =
new part_t[nGlobalParts_];
1823 for (part_t i = 0; i < nGlobalParts_; i++) remap[i] = -1;
1825 bool *used =
new bool[np];
1826 for (part_t i = 0; i < np; i++) used[i] =
false;
1829 for (part_t i = 0; i < nGlobalParts_; i++) {
1830 part_t tmp = i + np;
1831 if (match[tmp] != tmp) {
1832 remap[i] = match[tmp];
1833 used[match[tmp]] =
true;
1838 for (part_t i = 0; i < nGlobalParts_; i++) {
1839 if (remap[i] > -1)
continue;
1847 for (part_t i = 0, uidx = 0; i < nGlobalParts_; i++) {
1848 if (remap[i] > -1)
continue;
1849 while (used[uidx]) uidx++;
1858 cout <<
"Remap vector: ";
1859 for (part_t i = 0; i < nGlobalParts_; i++) cout << remap[i] <<
" ";
1860 std::cout << std::endl;
1863 long newgstaying = measure_stays(remap, idx, adj, wgt,
1865 doRemap = (newgstaying > gstaying);
1866 std::cout <<
"gstaying " << gstaying <<
" measure(input) " 1867 << measure_stays(NULL, idx, adj, wgt, nGlobalParts_, np)
1868 <<
" newgstaying " << newgstaying
1869 <<
" nontrivial " << nontrivial
1870 <<
" doRemap " << doRemap << std::endl;
1877 Teuchos::broadcast<int, int>(*comm_, 0, 1, &doRemap);
1880 if (me != 0) remap =
new part_t[nGlobalParts_];
1881 Teuchos::broadcast<int, part_t>(*comm_, 0, nGlobalParts_, remap);
1882 for (
size_t i = 0; i < len; i++) {
1883 parts_[i] = remap[parts_[i]];
void getPartsForProc(int procId, double &numParts, part_t &partMin, part_t &partMax) const
Get the parts belonging to a process.
Defines the Solution base class.
fast typical checks for valid arguments
bool criteriaHaveSamePartSizes(int c1, int c2) const
Return true if the two weight indices have the same part size information.
#define Z2_FORWARD_EXCEPTIONS
Forward an exception back through call stack.
const part_t * getProcDistribution() const
Return a distribution by process.
void RemapParts()
Remap a new partition for maximum overlap with an input partition.
Just a placeholder for now.
more involved, like validate a graph
int getNumberOfCriteria() const
Get the number of criteria (object weights)
sub-steps, each method's entry and exit
const RCP< const Comm< int > > & getCommunicator() const
Return the communicator associated with the solution.
const RCP< const Environment > & getEnvironment() const
Return the environment associated with the solution.
long measure_stays(part_t *remap, int *idx, part_t *adj, long *wgt, part_t nrhs, part_t nlhs)
void boxAssign(int dim, scalar_t *lower, scalar_t *upper, size_t &nPartsFound, part_t **partsFound) const
Return an array of all parts overlapping a given box in space.
A PartitioningSolution is a solution to a partitioning problem.
scalar_t getCriteriaPartSize(int idx, part_t part) const
Get the size for a given weight index and a given part.
PartitioningSolution(const RCP< const Environment > &env, const RCP< const Comm< int > > &comm, int nUserWeights, const RCP< Algorithm< Adapter > > &algorithm=Teuchos::null)
Constructor when part sizes are not supplied.
const int * getProcListView() const
Returns the process list corresponding to the global ID list.
Algorithm defines the base class for all algorithms.
void getProcsForPart(part_t partId, part_t &procMin, part_t &procMax) const
Get the processes containing a part.
Greedy Maximal Weight Matching.
static const std::string fail
void setParts(ArrayRCP< part_t > &partList)
The algorithm uses setParts to set the solution.
part_t pointAssign(int dim, scalar_t *point) const
Return the part overlapping a given point in space;.
const int * getPartDistribution() const
Return a distribution by part.
size_t getActualGlobalNumberOfParts() const
Returns the actual global number of parts provided in setParts().
const part_t * getPartListView() const
Returns the part list corresponding to the global ID list.
size_t getLocalNumberOfParts() const
Returns the number of parts to be assigned to this process.
std::vector< Zoltan2::coordinateModelPartBox< scalar_t, part_t > > & getPartBoxesView() const
returns the part box boundary list.
Defines the Environment class.
bool oneToOnePartDistribution() const
Is the part-to-process distribution is one-to-one.
bool criteriaHasUniformPartSizes(int idx) const
Determine if balancing criteria has uniform part sizes. (User can specify differing part sizes...
size_t getTargetGlobalNumberOfParts() const
Returns the global number of parts desired in the solution.
#define Z2_THROW_OUTSIDE_ERROR(env)
Throw an error returned from outside the Zoltan2 library.
void getCommunicationGraph(ArrayRCP< part_t > &comXAdj, ArrayRCP< part_t > &comAdj) const
returns communication graph resulting from geometric partitioning.
scalar_t getLocalFractionOfPart() const
If parts are divided across processes, return the fraction of a part on this process.