1 #ifndef _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_ 2 #define _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_ 9 #include "Teuchos_ArrayViewDecl.hpp" 12 #include "Teuchos_ReductionOp.hpp" 17 #include <zoltan_dd.h> 20 #include "Teuchos_Comm.hpp" 21 #ifdef HAVE_ZOLTAN2_MPI 22 #include "Teuchos_DefaultMpiComm.hpp" 23 #endif // HAVE_ZOLTAN2_MPI 24 #include <Teuchos_DefaultSerialComm.hpp> 33 template <
typename Ordinal,
typename T>
46 void reduce(
const Ordinal count,
const T inBuffer[], T inoutBuffer[])
const 49 for (Ordinal i=0; i < count; i++){
50 if (inBuffer[0] - inoutBuffer[0] < -_EPSILON){
51 inoutBuffer[0] = inBuffer[0];
52 inoutBuffer[1] = inBuffer[1];
53 }
else if(inBuffer[0] - inoutBuffer[0] < _EPSILON &&
54 inBuffer[1] - inoutBuffer[1] < _EPSILON){
55 inoutBuffer[0] = inBuffer[0];
56 inoutBuffer[1] = inBuffer[1];
66 template <
typename it>
68 return (x == 1 ? x : x * z2Fact<it>(x - 1));
71 template <
typename gno_t,
typename part_t>
79 template <
typename IT>
89 fact[k] = fact[k - 1] * k;
92 for (k = 0; k < n; ++k)
94 perm[k] = i / fact[n - 1 - k];
95 i = i % fact[n - 1 - k];
100 for (k = n - 1; k > 0; --k)
101 for (j = k - 1; j >= 0; --j)
102 if (perm[j] <= perm[k])
108 template <
typename part_t>
110 int dim = grid_dims.size();
111 int neighborCount = 2 * dim;
112 task_comm_xadj = allocMemory<part_t>(taskCount+1);
113 task_comm_adj = allocMemory<part_t>(taskCount * neighborCount);
115 part_t neighBorIndex = 0;
116 task_comm_xadj[0] = 0;
117 for (part_t i = 0; i < taskCount; ++i){
118 part_t prevDimMul = 1;
119 for (
int j = 0; j < dim; ++j){
120 part_t lNeighbor = i - prevDimMul;
121 part_t rNeighbor = i + prevDimMul;
122 prevDimMul *= grid_dims[j];
123 if (lNeighbor >= 0 && lNeighbor/ prevDimMul == i / prevDimMul && lNeighbor < taskCount){
124 task_comm_adj[neighBorIndex++] = lNeighbor;
126 if (rNeighbor >= 0 && rNeighbor/ prevDimMul == i / prevDimMul && rNeighbor < taskCount){
127 task_comm_adj[neighBorIndex++] = rNeighbor;
130 task_comm_xadj[i+1] = neighBorIndex;
135 template <
typename Adapter,
typename scalar_t,
typename part_t>
138 const Teuchos::Comm<int> *comm,
144 scalar_t **partCenters){
146 typedef typename Adapter::lno_t lno_t;
147 typedef typename Adapter::gno_t gno_t;
150 ArrayView<const gno_t> gnos;
151 ArrayView<input_t> xyz;
152 ArrayView<input_t> wgts;
162 gno_t *point_counts = allocMemory<gno_t>(ntasks);
163 memset(point_counts, 0,
sizeof(gno_t) * ntasks);
166 gno_t *global_point_counts = allocMemory<gno_t>(ntasks);
169 scalar_t **multiJagged_coordinates = allocMemory<scalar_t *>(coordDim);
171 for (
int dim=0; dim < coordDim; dim++){
172 ArrayRCP<const scalar_t> ar;
173 xyz[dim].getInputArray(ar);
175 multiJagged_coordinates[dim] = (scalar_t *)ar.getRawPtr();
176 memset(partCenters[dim], 0,
sizeof(scalar_t) * ntasks);
191 for (lno_t i=0; i < numLocalCoords; i++){
195 for(
int j = 0; j < coordDim; ++j){
196 scalar_t c = multiJagged_coordinates[j][i];
197 partCenters[j][p] += c;
203 reduceAll<int, gno_t>(*comm, Teuchos::REDUCE_SUM,
204 ntasks, point_counts, global_point_counts
207 for(
int j = 0; j < coordDim; ++j){
208 for (part_t i=0; i < ntasks; ++i){
209 partCenters[j][i] /= global_point_counts[i];
213 scalar_t *tmpCoords = allocMemory<scalar_t>(ntasks);
214 for(
int j = 0; j < coordDim; ++j){
215 reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM,
216 ntasks, partCenters[j], tmpCoords
219 scalar_t *tmp = partCenters[j];
220 partCenters[j] = tmpCoords;
225 freeArray<gno_t> (point_counts);
226 freeArray<gno_t> (global_point_counts);
228 freeArray<scalar_t> (tmpCoords);
229 freeArray<scalar_t *>(multiJagged_coordinates);
233 template <
typename Adapter,
typename scalar_t,
typename part_t>
236 const Teuchos::Comm<int> *comm,
241 ArrayRCP<part_t> &g_part_xadj,
242 ArrayRCP<part_t> &g_part_adj,
243 ArrayRCP<scalar_t> &g_part_ew
246 typedef typename Adapter::lno_t t_lno_t;
247 typedef typename Adapter::gno_t t_gno_t;
248 typedef typename Adapter::scalar_t t_scalar_t;
270 ArrayView<const t_gno_t> Ids;
271 ArrayView<t_input_t> v_wghts;
275 ArrayView<const t_gno_t> edgeIds;
276 ArrayView<const t_lno_t> offsets;
277 ArrayView<t_input_t> e_wgts;
281 std::vector <t_scalar_t> edge_weights;
284 if (numWeightPerEdge > 0){
285 edge_weights = std::vector <t_scalar_t> (localNumEdges);
286 for (t_lno_t i = 0; i < localNumEdges; ++i){
287 edge_weights[i] = e_wgts[0][i];
293 std::vector <part_t> e_parts (localNumEdges);
294 #ifdef HAVE_ZOLTAN2_MPI 295 if (comm->getSize() > 1)
297 Zoltan_DD_Struct *dd = NULL;
299 MPI_Comm mpicomm = Teuchos::getRawMpiComm(*comm);
303 Zoltan_DD_Create(&dd, mpicomm,
305 sizeof(part_t), localNumVertices, debug_level);
307 ZOLTAN_ID_PTR ddnotneeded = NULL;
310 (ZOLTAN_ID_PTR) Ids.getRawPtr(),
314 int(localNumVertices));
318 (ZOLTAN_ID_PTR) edgeIds.getRawPtr(),
320 (
char *)&(e_parts[0]),
325 Zoltan_DD_Destroy(&dd);
337 for (t_lno_t i = 0; i < localNumEdges; ++i){
338 t_gno_t ei = edgeIds[i];
339 part_t p = parts[ei];
344 std::vector <t_lno_t> part_begins(np, -1);
345 std::vector <t_lno_t> part_nexts(localNumVertices, -1);
349 for (t_lno_t i = 0; i < localNumVertices; ++i){
350 part_t ap = parts[i];
351 part_nexts[i] = part_begins[ap];
356 g_part_xadj = ArrayRCP<part_t> (np + 1);
357 g_part_adj = ArrayRCP<part_t> (localNumEdges);
358 g_part_ew = ArrayRCP<t_scalar_t> (localNumEdges);
361 std::vector <part_t> part_neighbors (np);
362 std::vector <t_scalar_t> part_neighbor_weights(np, 0);
363 std::vector <t_scalar_t> part_neighbor_weights_ordered(np);
366 for (t_lno_t i = 0; i < np; ++i){
367 part_t num_neighbor_parts = 0;
368 t_lno_t v = part_begins[i];
372 for (t_lno_t j = offsets[v]; j < offsets[v+1]; ++j){
374 part_t ep = e_parts[j];
377 if (numWeightPerEdge > 0){
378 ew = edge_weights[j];
382 if (part_neighbor_weights[ep] < 0.00001){
383 part_neighbors[num_neighbor_parts++] = ep;
385 part_neighbor_weights[ep] += ew;
392 for (t_lno_t j = 0; j < num_neighbor_parts; ++j){
393 part_t neighbor_part = part_neighbors[j];
394 g_part_adj[nindex] = neighbor_part;
395 g_part_ew[nindex++] = part_neighbor_weights[neighbor_part];
396 part_neighbor_weights[neighbor_part] = 0;
398 g_part_xadj[i + 1] = nindex;
403 RCP<const Teuchos::Comm<int> > tcomm = rcpFromRef(*comm);
404 typedef Tpetra::Map<>::node_type t_node_t;
405 typedef Tpetra::Map<part_t, part_t, t_node_t> map_t;
406 Teuchos::RCP<const map_t> map = Teuchos::rcp (
new map_t (np, 0, tcomm));
407 typedef Tpetra::CrsMatrix<t_scalar_t, part_t, part_t, t_node_t>
tcrsMatrix_t;
408 Teuchos::RCP<tcrsMatrix_t> tMatrix(
new tcrsMatrix_t (map, 0));
415 std::vector <t_lno_t> part_begins(np, -1);
416 std::vector <t_lno_t> part_nexts(localNumVertices, -1);
420 for (t_lno_t i = 0; i < localNumVertices; ++i){
421 part_t ap = parts[i];
422 part_nexts[i] = part_begins[ap];
426 std::vector <part_t> part_neighbors (np);
427 std::vector <t_scalar_t> part_neighbor_weights(np, 0);
428 std::vector <t_scalar_t> part_neighbor_weights_ordered(np);
431 for (t_lno_t i = 0; i < np; ++i){
432 part_t num_neighbor_parts = 0;
433 t_lno_t v = part_begins[i];
437 for (t_lno_t j = offsets[v]; j < offsets[v+1]; ++j){
439 part_t ep = e_parts[j];
442 if (numWeightPerEdge > 0){
443 ew = edge_weights[j];
446 if (part_neighbor_weights[ep] < 0.00001){
447 part_neighbors[num_neighbor_parts++] = ep;
449 part_neighbor_weights[ep] += ew;
455 for (t_lno_t j = 0; j < num_neighbor_parts; ++j){
456 part_t neighbor_part = part_neighbors[j];
457 part_neighbor_weights_ordered[j] = part_neighbor_weights[neighbor_part];
458 part_neighbor_weights[neighbor_part] = 0;
462 if (num_neighbor_parts > 0){
463 Teuchos::ArrayView<const part_t> destinations(
464 &(part_neighbors[0]), num_neighbor_parts);
465 Teuchos::ArrayView<const t_scalar_t>
466 vals(&(part_neighbor_weights_ordered[0]), num_neighbor_parts);
467 tMatrix->insertGlobalValues (i,destinations,
vals);
473 tMatrix->fillComplete ();
477 std::vector <part_t> part_indices(np);
479 for (part_t i = 0; i < np; ++i) part_indices[i] = i;
481 Teuchos::ArrayView<const part_t>
482 global_ids( &(part_indices[0]), np);
486 Teuchos::RCP<const map_t> gatherRowMap(
new map_t (
487 Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid(), global_ids, 0, tcomm));
491 Teuchos::RCP<tcrsMatrix_t> A_gather =
493 typedef Tpetra::Import<
typename map_t::local_ordinal_type,
494 typename map_t::global_ordinal_type,
495 typename map_t::node_type> import_type;
496 import_type
import (map, gatherRowMap);
497 A_gather->doImport (*tMatrix,
import, Tpetra::INSERT);
498 A_gather->fillComplete ();
503 g_part_xadj = ArrayRCP<part_t> (np + 1);
504 g_part_adj = ArrayRCP<part_t> (A_gather->getNodeNumEntries ());
505 g_part_ew = ArrayRCP<t_scalar_t> (A_gather->getNodeNumEntries ());
507 part_t *taskidx = g_part_xadj.getRawPtr();
508 part_t *taskadj = g_part_adj.getRawPtr();
509 t_scalar_t *taskadjwgt = g_part_ew.getRawPtr();
514 for (part_t i = 0; i < np; i++) {
515 part_t length = A_gather->getNumEntriesInLocalRow(i);
517 taskidx[i+1] = taskidx[i] + length;
519 Teuchos::ArrayView<part_t> Indices(taskadj + taskidx[i], length);
520 Teuchos::ArrayView<t_scalar_t> Values(taskadjwgt + taskidx[i], length);
521 A_gather->getLocalRowCopy(i, Indices, Values, nentries);
529 template <
class IT,
class WT>
539 this->heapSize = heapsize_;
540 this->indices = allocMemory<IT>(heapsize_ );
541 this->values = allocMemory<WT>(heapsize_ );
546 freeArray<IT>(this->indices);
547 freeArray<WT>(this->values);
552 WT maxVal = this->values[0];
555 if (distance >= maxVal)
return;
557 this->values[0] = distance;
558 this->indices[0] = index;
565 IT child_index1 = 2 * index_on_heap + 1;
566 IT child_index2 = 2 * index_on_heap + 2;
569 if(child_index1 < this->heapSize && child_index2 < this->heapSize){
571 if (this->values[child_index1] < this->values[child_index2]){
572 biggerIndex = child_index2;
575 biggerIndex = child_index1;
578 else if(child_index1 < this->heapSize){
579 biggerIndex = child_index1;
582 else if(child_index2 < this->heapSize){
583 biggerIndex = child_index2;
585 if (biggerIndex >= 0 && this->values[biggerIndex] > this->values[index_on_heap]){
586 WT tmpVal = this->values[biggerIndex];
587 this->values[biggerIndex] = this->values[index_on_heap];
588 this->values[index_on_heap] = tmpVal;
590 IT tmpIndex = this->indices[biggerIndex];
591 this->indices[biggerIndex] = this->indices[index_on_heap];
592 this->indices[index_on_heap] = tmpIndex;
598 WT MAXVAL = std::numeric_limits<WT>::max();
599 for(IT i = 0; i < this->heapSize; ++i){
600 this->values[i] = MAXVAL;
601 this->indices[i] = -1;
609 for(IT j = 0; j < this->heapSize; ++j){
610 nc += this->values[j];
620 for(
int i = 0; i < dimension; ++i){
622 for(IT j = 0; j < this->heapSize; ++j){
623 IT k = this->indices[j];
627 nc /= this->heapSize;
628 moved = (
ZOLTAN2_ABS(center[i] - nc) > this->_EPSILON || moved );
636 for(IT i = 0; i < this->heapSize; ++i){
637 permutation[i] = this->indices[i];
644 template <
class IT,
class WT>
657 this->dimension = dimension_;
658 this->center = allocMemory<WT>(dimension_);
674 for (
int i = 0; i < this->dimension; ++i){
675 WT d = (
center[i] - elementCoords[i][index]);
678 distance = pow(distance, WT(1.0 / this->dimension));
679 closestPoints.
addPoint(index, distance);
695 template <
class IT,
class WT>
702 IT required_elements;
708 freeArray<KMeansCluster <IT,WT> >(clusters);
709 freeArray<WT>(maxCoordinates);
710 freeArray<WT>(minCoordinates);
719 IT required_elements_):
721 numElements(numElements_),
722 elementCoords(elementCoords_),
723 numClusters ((1 << dim_) + 1),
724 required_elements(required_elements_)
726 this->clusters = allocMemory<KMeansCluster <IT,WT> >(this->numClusters);
728 for (
int i = 0; i < numClusters; ++i){
729 this->clusters[i].
setParams(this->dim, this->required_elements);
732 this->maxCoordinates = allocMemory <WT> (this->dim);
733 this->minCoordinates = allocMemory <WT> (this->dim);
736 for (
int j = 0; j < dim; ++j){
737 this->minCoordinates[j] = this->maxCoordinates[j] = this->elementCoords[j][0];
738 for(IT i = 1; i < numElements; ++i){
739 WT t = this->elementCoords[j][i];
740 if(t > this->maxCoordinates[j]){
741 this->maxCoordinates[j] = t;
743 if (t < minCoordinates[j]){
744 this->minCoordinates[j] = t;
751 for (
int j = 0; j < dim; ++j){
752 int mod = (1 << (j+1));
753 for (
int i = 0; i < numClusters - 1; ++i){
755 if ( (i % mod) < mod / 2){
756 c = this->maxCoordinates[j];
760 c = this->minCoordinates[j];
762 this->clusters[i].
center[j] = c;
767 for (
int j = 0; j < dim; ++j){
768 this->clusters[numClusters - 1].
center[j] = (this->maxCoordinates[j] + this->minCoordinates[j]) / 2;
784 for(
int it = 0; it < 10; ++it){
786 for (IT j = 0; j < this->numClusters; ++j){
789 for (IT i = 0; i < this->numElements; ++i){
791 for (IT j = 0; j < this->numClusters; ++j){
793 this->clusters[j].
getDistance(i,this->elementCoords);
797 for (IT j = 0; j < this->numClusters; ++j){
798 moved =(this->clusters[j].
getNewCenters(this->elementCoords) || moved );
814 for (IT j = 1; j < this->numClusters; ++j){
817 if(minTmpDistance < minDistance){
818 minDistance = minTmpDistance;
830 #define MINOF(a,b) (((a)<(b))?(a):(b)) 838 template <
typename T>
842 #ifdef HAVE_ZOLTAN2_OMP 843 #pragma omp parallel for 845 for(
size_t i = 0; i < arrSize; ++i){
852 #ifdef HAVE_ZOLTAN2_OMP 853 #pragma omp parallel for 855 for(
size_t i = 0; i < arrSize; ++i){
864 template <
typename part_t,
typename pcoord_t>
887 part_t *task_to_proc,
888 part_t *task_communication_xadj,
889 part_t *task_communication_adj,
890 pcoord_t *task_communication_edge_weight){
892 double totalCost = 0;
894 part_t commCount = 0;
895 for (part_t task = 0; task < this->
no_tasks; ++task){
896 int assigned_proc = task_to_proc[task];
898 part_t task_adj_begin = task_communication_xadj[task];
899 part_t task_adj_end = task_communication_xadj[task+1];
901 commCount += task_adj_end - task_adj_begin;
903 for (part_t task2 = task_adj_begin; task2 < task_adj_end; ++task2){
906 part_t neighborTask = task_communication_adj[task2];
908 int neighborProc = task_to_proc[neighborTask];
912 if (task_communication_edge_weight == NULL){
913 totalCost += distance ;
916 totalCost += distance * task_communication_edge_weight[task2];
929 this->commCost = totalCost;
946 const RCP<const Environment> &env,
947 ArrayRCP <part_t> &proc_to_task_xadj,
948 ArrayRCP <part_t> &proc_to_task_adj,
949 ArrayRCP <part_t> &task_to_proc
955 template <
typename pcoord_t,
typename tcoord_t,
typename part_t>
1000 int *machine_extent_,
1001 bool *machine_extent_wrap_around_,
1016 this->partArraySize = psize;
1019 this->partNoArray = pNo;
1031 part_t minCoordDim =
MINOF(this->task_coord_dim, this->proc_coord_dim);
1033 minCoordDim, nprocs,
1034 this->proc_coords, ntasks);
1039 for(
int i = ntasks; i < nprocs; ++i){
1040 proc_permutation[i] = -1;
1064 lo = _u_umpa_seed % q;
1065 hi = _u_umpa_seed / q;
1066 test = (a*lo)-(r*hi);
1068 _u_umpa_seed = test;
1070 _u_umpa_seed = test + m;
1071 d = (double) ((
double) _u_umpa_seed / (double) m);
1072 return (part_t) (d*(double)l);
1076 pcoord_t distance = 0;
1089 this->machine->
getHopCount(procId1, procId2, distance);
1098 part_t *a = visitOrder;
1102 part_t i, u, v, tmp;
1110 for (i=0; i<n; i+=16)
1125 part_t i, end = n / 4;
1127 for (i=1; i<end; i++)
1148 const RCP<const Environment> &env,
1149 ArrayRCP <part_t> &rcp_proc_to_task_xadj,
1150 ArrayRCP <part_t> &rcp_proc_to_task_adj,
1151 ArrayRCP <part_t> &rcp_task_to_proc
1154 rcp_proc_to_task_xadj = ArrayRCP <part_t> (this->
no_procs+1);
1155 rcp_proc_to_task_adj = ArrayRCP <part_t> (this->
no_tasks);
1156 rcp_task_to_proc = ArrayRCP <part_t> (this->
no_tasks);
1158 part_t *proc_to_task_xadj = rcp_proc_to_task_xadj.getRawPtr();
1159 part_t *proc_to_task_adj = rcp_proc_to_task_adj.getRawPtr();
1160 part_t *task_to_proc = rcp_task_to_proc.getRawPtr();
1164 fillContinousArray<part_t> (proc_to_task_xadj, this->
no_procs+1, &invalid);
1176 recursion_depth = log(
float(this->no_procs)) / log(2.0) + 1;
1182 int permutations = taskPerm * procPerm;
1185 permutations += taskPerm;
1187 permutations += procPerm;
1193 part_t *proc_xadj = allocMemory<part_t> (num_parts+1);
1196 part_t *proc_adjList = allocMemory<part_t>(this->
no_procs);
1199 part_t used_num_procs = this->
no_procs;
1200 if(this->no_procs > this->
no_tasks){
1206 fillContinousArray<part_t>(proc_adjList,this->
no_procs, NULL);
1209 int myPermutation = myRank % permutations;
1210 bool task_partition_along_longest_dim =
false;
1211 bool proc_partition_along_longest_dim =
false;
1217 if (myPermutation == 0){
1218 task_partition_along_longest_dim =
true;
1219 proc_partition_along_longest_dim =
true;
1223 if (myPermutation < taskPerm){
1224 proc_partition_along_longest_dim =
true;
1225 myTaskPerm = myPermutation;
1228 myPermutation -= taskPerm;
1229 if (myPermutation < procPerm){
1230 task_partition_along_longest_dim =
true;
1231 myProcPerm = myPermutation;
1234 myPermutation -= procPerm;
1235 myProcPerm = myPermutation % procPerm;
1236 myTaskPerm = myPermutation / procPerm;
1261 int *permutation = allocMemory<int> ((this->proc_coord_dim > this->
task_coord_dim)
1262 ? this->proc_coord_dim : this->task_coord_dim);
1265 ithPermutation<int>(this->
proc_coord_dim, myProcPerm, permutation);
1267 pcoord_t **pcoords = allocMemory<pcoord_t *> (this->
proc_coord_dim);
1269 pcoords[i] = this->proc_coords[permutation[i]];
1277 env->timerStart(
MACRO_TIMERS,
"Mapping - Proc Partitioning");
1285 this->proc_coord_dim,
1292 proc_partition_along_longest_dim
1295 env->timerStop(
MACRO_TIMERS,
"Mapping - Proc Partitioning");
1296 freeArray<pcoord_t *> (pcoords);
1299 part_t *task_xadj = allocMemory<part_t> (num_parts+1);
1300 part_t *task_adjList = allocMemory<part_t>(this->
no_tasks);
1302 fillContinousArray<part_t>(task_adjList,this->
no_tasks, NULL);
1305 ithPermutation<int>(this->
task_coord_dim, myTaskPerm, permutation);
1308 tcoord_t **tcoords = allocMemory<tcoord_t *> (this->
task_coord_dim);
1310 tcoords[i] = this->task_coords[permutation[i]];
1313 env->timerStart(
MACRO_TIMERS,
"Mapping - Task Partitioning");
1320 this->task_coord_dim,
1327 task_partition_along_longest_dim
1330 env->timerStop(
MACRO_TIMERS,
"Mapping - Task Partitioning");
1331 freeArray<pcoord_t *> (tcoords);
1332 freeArray<int> (permutation);
1336 for(part_t i = 0; i < num_parts; ++i){
1338 part_t proc_index_begin = proc_xadj[i];
1339 part_t task_begin_index = task_xadj[i];
1340 part_t proc_index_end = proc_xadj[i+1];
1341 part_t task_end_index = task_xadj[i+1];
1344 if(proc_index_end - proc_index_begin != 1){
1345 std::cerr <<
"Error at partitioning of processors" << std::endl;
1346 std::cerr <<
"PART:" << i <<
" is assigned to " << proc_index_end - proc_index_begin <<
" processors." << std::endl;
1349 part_t assigned_proc = proc_adjList[proc_index_begin];
1350 proc_to_task_xadj[assigned_proc] = task_end_index - task_begin_index;
1356 part_t *proc_to_task_xadj_work = allocMemory<part_t> (this->
no_procs);
1358 for(part_t i = 0; i < this->
no_procs; ++i){
1359 part_t tmp = proc_to_task_xadj[i];
1360 proc_to_task_xadj[i] = sum;
1362 proc_to_task_xadj_work[i] = sum;
1364 proc_to_task_xadj[this->
no_procs] = sum;
1366 for(part_t i = 0; i < num_parts; ++i){
1368 part_t proc_index_begin = proc_xadj[i];
1369 part_t task_begin_index = task_xadj[i];
1370 part_t task_end_index = task_xadj[i+1];
1372 part_t assigned_proc = proc_adjList[proc_index_begin];
1374 for (part_t j = task_begin_index; j < task_end_index; ++j){
1375 part_t taskId = task_adjList[j];
1377 task_to_proc[taskId] = assigned_proc;
1379 proc_to_task_adj [ --proc_to_task_xadj_work[assigned_proc] ] = taskId;
1383 freeArray<part_t>(proc_to_task_xadj_work);
1384 freeArray<part_t>(task_xadj);
1385 freeArray<part_t>(task_adjList);
1386 freeArray<part_t>(proc_xadj);
1387 freeArray<part_t>(proc_adjList);
1392 template <
typename Adapter,
typename part_t>
1396 #ifndef DOXYGEN_SHOULD_SKIP_THIS 1398 typedef typename Adapter::scalar_t pcoord_t;
1399 typedef typename Adapter::scalar_t tcoord_t;
1400 typedef typename Adapter::scalar_t scalar_t;
1421 if(this->proc_task_comm){
1425 this->proc_to_task_xadj,
1426 this->proc_to_task_adj,
1431 std::cerr <<
"communicationModel is not specified in the Mapper" << std::endl;
1443 int taskPerm = z2Fact<int>(procDim);
1444 int procPerm = z2Fact<int>(taskDim);
1445 int idealGroupSize = taskPerm * procPerm;
1447 idealGroupSize += taskPerm + procPerm + 1;
1449 int myRank = this->
comm->getRank();
1450 int commSize = this->
comm->getSize();
1452 int myGroupIndex = myRank / idealGroupSize;
1454 int prevGroupBegin = (myGroupIndex - 1)* idealGroupSize;
1455 if (prevGroupBegin < 0) prevGroupBegin = 0;
1456 int myGroupBegin = myGroupIndex * idealGroupSize;
1457 int myGroupEnd = (myGroupIndex + 1) * idealGroupSize;
1458 int nextGroupEnd = (myGroupIndex + 2)* idealGroupSize;
1460 if (myGroupEnd > commSize){
1461 myGroupBegin = prevGroupBegin;
1462 myGroupEnd = commSize;
1464 if (nextGroupEnd > commSize){
1465 myGroupEnd = commSize;
1467 int myGroupSize = myGroupEnd - myGroupBegin;
1469 part_t *myGroup = allocMemory<part_t>(myGroupSize);
1470 for (
int i = 0; i < myGroupSize; ++i){
1471 myGroup[i] = myGroupBegin + i;
1475 ArrayView<const part_t> myGroupView(myGroup, myGroupSize);
1477 RCP<Comm<int> > subComm = this->
comm->createSubcommunicator(myGroupView);
1478 freeArray<part_t>(myGroup);
1491 double localCost[2], globalCost[2];
1493 localCost[0] = myCost;
1494 localCost[1] = double(subComm->getRank());
1496 globalCost[1] = globalCost[0] = std::numeric_limits<double>::max();
1498 reduceAll<int, double>(*subComm, reduceBest,
1499 2, localCost, globalCost);
1501 int sender = int(globalCost[1]);
1503 if ( this->
comm->getRank() == 0){
1504 std::cout <<
"me:" << localCost[1] <<
1505 " localcost:" << localCost[0]<<
1506 " bestcost:" << globalCost[0] <<
1507 " Sender:" << sender <<
1513 broadcast (*subComm, sender, this->ntasks, this->task_to_proc.getRawPtr());
1514 broadcast (*subComm, sender, this->nprocs, this->proc_to_task_xadj.getRawPtr());
1515 broadcast (*subComm, sender, this->ntasks, this->proc_to_task_adj.getRawPtr());
1522 std::ofstream gnuPlotCode (
"gnuPlot.plot", std::ofstream::out);
1525 std::string ss =
"";
1526 for(part_t i = 0; i < this->
nprocs; ++i){
1528 std::string procFile = Teuchos::toString<int>(i) +
"_mapping.txt";
1530 gnuPlotCode <<
"plot \"" << procFile <<
"\"\n";
1533 gnuPlotCode <<
"replot \"" << procFile <<
"\"\n";
1536 std::ofstream inpFile (procFile.c_str(), std::ofstream::out);
1538 std::string gnuPlotArrow =
"set arrow from ";
1539 for(
int j = 0; j < mindim; ++j){
1540 if (j == mindim - 1){
1542 gnuPlotArrow += Teuchos::toString<float>(
proc_task_comm->proc_coords[j][i]);
1547 gnuPlotArrow += Teuchos::toString<float>(
proc_task_comm->proc_coords[j][i]) +
",";
1550 gnuPlotArrow +=
" to ";
1553 inpFile << std::endl;
1555 for(
int k = 0; k < a.size(); ++k){
1558 std::string gnuPlotArrow2 = gnuPlotArrow;
1559 for(
int z = 0; z < mindim; ++z){
1560 if(z == mindim - 1){
1564 gnuPlotArrow2 += Teuchos::toString<float>(
proc_task_comm->task_coords[z][j]);
1568 gnuPlotArrow2 += Teuchos::toString<float>(
proc_task_comm->task_coords[z][j]) +
",";
1571 ss += gnuPlotArrow2 +
"\n";
1572 inpFile << std::endl;
1578 gnuPlotCode <<
"\nreplot\n pause -1 \n";
1579 gnuPlotCode.close();
1587 std::string rankStr = Teuchos::toString<int>(myRank);
1588 std::string gnuPlots =
"gnuPlot", extentionS =
".plot";
1589 std::string outF = gnuPlots + rankStr+ extentionS;
1590 std::ofstream gnuPlotCode ( outF.c_str(), std::ofstream::out);
1594 int mindim =
MINOF(tmpproc_task_comm->proc_coord_dim, tmpproc_task_comm->task_coord_dim);
1595 std::string ss =
"";
1596 std::string procs =
"", parts =
"";
1597 for(part_t i = 0; i < this->
nprocs; ++i){
1607 std::string gnuPlotArrow =
"set arrow from ";
1608 for(
int j = 0; j < mindim; ++j){
1609 if (j == mindim - 1){
1611 gnuPlotArrow += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][i]);
1612 procs += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][i]);
1617 gnuPlotArrow += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][i]) +
",";
1618 procs += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][i])+
" ";
1623 gnuPlotArrow +=
" to ";
1626 for(
int k = 0; k < a.size(); ++k){
1629 std::string gnuPlotArrow2 = gnuPlotArrow;
1630 for(
int z = 0; z < mindim; ++z){
1631 if(z == mindim - 1){
1635 gnuPlotArrow2 += Teuchos::toString<float>(tmpproc_task_comm->task_coords[z][j]);
1636 parts += Teuchos::toString<float>(tmpproc_task_comm->task_coords[z][j]);
1640 gnuPlotArrow2 += Teuchos::toString<float>(tmpproc_task_comm->task_coords[z][j]) +
",";
1641 parts += Teuchos::toString<float>(tmpproc_task_comm->task_coords[z][j]) +
" ";
1645 ss += gnuPlotArrow2 +
" nohead\n";
1653 std::ofstream procFile (
"procPlot.plot", std::ofstream::out);
1654 procFile << procs <<
"\n";
1657 std::ofstream partFile (
"partPlot.plot", std::ofstream::out);
1658 partFile << parts<<
"\n";
1661 std::ofstream extraProcFile (
"allProc.plot", std::ofstream::out);
1663 for(part_t j = 0; j < this->
nprocs; ++j){
1664 for(
int i = 0; i < mindim; ++i){
1665 extraProcFile << tmpproc_task_comm->proc_coords[i][j] <<
" ";
1667 extraProcFile << std::endl;
1670 extraProcFile.close();
1674 gnuPlotCode <<
"plot \"procPlot.plot\" with points pointsize 3\n";
1676 gnuPlotCode <<
"splot \"procPlot.plot\" with points pointsize 3\n";
1678 gnuPlotCode <<
"replot \"partPlot.plot\" with points pointsize 3\n";
1679 gnuPlotCode <<
"replot \"allProc.plot\" with points pointsize 0.65\n";
1680 gnuPlotCode <<
"\nreplot\n pause -1 \n";
1681 gnuPlotCode.close();
1689 const Teuchos::Comm<int> *comm_,
1692 tcoord_t **partCenters
1694 std::string file =
"gggnuPlot";
1695 std::string exten =
".plot";
1696 std::ofstream mm(
"2d.txt");
1697 file += Teuchos::toString<int>(comm_->getRank()) + exten;
1698 std::ofstream ff(file.c_str());
1702 for (part_t i = 0; i < this->
ntasks;++i){
1703 outPartBoxes[i].writeGnuPlot(ff, mm);
1706 ff <<
"plot \"2d.txt\"" << std::endl;
1710 ff <<
"splot \"2d.txt\"" << std::endl;
1715 ff <<
"set style arrow 5 nohead size screen 0.03,15,135 ls 1" << std::endl;
1716 for (part_t i = 0; i < this->
ntasks;++i){
1719 for (part_t p = pb; p < pe; ++p){
1723 std::string arrowline =
"set arrow from ";
1724 for (
int j = 0; j < coordDim - 1; ++j){
1725 arrowline += Teuchos::toString<tcoord_t>(partCenters[j][n]) +
",";
1727 arrowline += Teuchos::toString<tcoord_t>(partCenters[coordDim -1][n]) +
" to ";
1730 for (
int j = 0; j < coordDim - 1; ++j){
1731 arrowline += Teuchos::toString<tcoord_t>(partCenters[j][i]) +
",";
1733 arrowline += Teuchos::toString<tcoord_t>(partCenters[coordDim -1][i]) +
" as 5\n";
1740 ff <<
"replot\n pause -1" << std::endl;
1747 void getProcTask(part_t* &proc_to_task_xadj_, part_t* &proc_to_task_adj_){
1748 proc_to_task_xadj_ = this->proc_to_task_xadj.getRawPtr();
1749 proc_to_task_adj_ = this->proc_to_task_adj.getRawPtr();
1757 mappingsoln->setMap_PartsForRank(this->proc_to_task_xadj,
1758 this->proc_to_task_adj);
1772 if(this->isOwnerofModel){
1789 const Teuchos::RCP <
const Teuchos::Comm<int> > comm_,
1791 const Teuchos::RCP <const Adapter> input_adapter_,
1793 const Teuchos::RCP <const Environment> envConst):
1794 PartitionMapping<Adapter> (comm_, machine_, input_adapter_, soln_, envConst),
1806 RCP<Zoltan2::GraphModel<ctm_base_adapter_t> > graph_model_;
1807 RCP<Zoltan2::CoordinateModel<ctm_base_adapter_t> > coordinateModel_ ;
1809 RCP<const Teuchos::Comm<int> > rcp_comm = comm_;
1810 RCP<const Environment> envConst_ = envConst;
1812 RCP<const ctm_base_adapter_t> baseInputAdapter_ (
1813 rcp(dynamic_cast<const ctm_base_adapter_t *>(input_adapter_.getRawPtr()),
false));
1821 baseInputAdapter_, envConst_, rcp_comm, coordFlags_));
1831 baseInputAdapter_, envConst_, rcp_comm,
1835 if (!machine_->hasMachineCoordinates()) {
1836 throw std::runtime_error(
"Existing machine does not provide coordinates " 1837 "for coordinate task mapping");
1841 int procDim = machine_->getMachineDim();
1842 this->nprocs = machine_->getNumRanks();
1845 pcoord_t **procCoordinates = NULL;
1846 if (!machine_->getAllMachineCoordinatesView(procCoordinates)) {
1847 throw std::runtime_error(
"Existing machine does not implement " 1848 "getAllMachineCoordinatesView");
1855 std::vector <int> machine_extent_vec (procDim);
1857 int *machine_extent = &(machine_extent_vec[0]);
1858 bool *machine_extent_wrap_around =
new bool[procDim];
1859 machine_->getMachineExtentWrapArounds(machine_extent_wrap_around);
1867 if (machine_->getMachineExtent(machine_extent)) {
1872 machine_extent_wrap_around,
1880 int coordDim = coordinateModel_->getCoordinateDim();
1890 tcoord_t **partCenters = NULL;
1891 partCenters = allocMemory<tcoord_t *>(coordDim);
1892 for (
int i = 0; i < coordDim; ++i){
1893 partCenters[i] = allocMemory<tcoord_t>(this->
ntasks);
1896 typedef typename Adapter::scalar_t t_scalar_t;
1899 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Solution Center");
1902 getSolutionCenterCoordinates<Adapter, t_scalar_t,part_t>(
1903 envConst.getRawPtr(),
1905 coordinateModel_.getRawPtr(),
1913 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Solution Center");
1917 if (graph_model_.getRawPtr() != NULL){
1918 getCoarsenedPartGraph<Adapter, t_scalar_t, part_t> (
1919 envConst.getRawPtr(),
1921 graph_model_.getRawPtr(),
1933 this->proc_task_comm =
1942 machine_extent_wrap_around,
1943 machine_.getRawPtr());
1945 int myRank = comm_->getRank();
1948 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Processor Task map");
1950 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Processor Task map");
1953 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Graph");
1959 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Graph");
1961 if (comm_->getRank() == 0){
1966 for (part_t i = 1; i <= taskCommCount; ++i){
1968 if (maxN < nc) maxN = nc;
1970 std::cout <<
" maxNeighbor:" << maxN << std::endl;
1973 this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
1976 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Cost");
1989 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Cost");
2014 delete []machine_extent_wrap_around;
2015 if (machine_->getMachineExtent(machine_extent)){
2016 for (
int i = 0; i < procDim; ++i){
2017 delete [] procCoordinates[i];
2019 delete [] procCoordinates;
2022 for (
int i = 0; i < coordDim; ++i){
2023 freeArray<tcoord_t>(partCenters[i]);
2025 freeArray<tcoord_t *>(partCenters);
2041 const Teuchos::RCP <
const Teuchos::Comm<int> > comm_,
2043 const Teuchos::RCP <const Adapter> input_adapter_,
2044 const part_t num_parts_,
2045 const part_t *result_parts,
2046 const Teuchos::RCP <const Environment> envConst,
2047 bool is_input_adapter_distributed =
true):
2048 PartitionMapping<Adapter> (comm_, machine_, input_adapter_, num_parts_, result_parts, envConst),
2060 RCP<Zoltan2::GraphModel<ctm_base_adapter_t> > graph_model_;
2061 RCP<Zoltan2::CoordinateModel<ctm_base_adapter_t> > coordinateModel_ ;
2063 RCP<const Teuchos::Comm<int> > rcp_comm = comm_;
2064 RCP<const Teuchos::Comm<int> > ia_comm = rcp_comm;
2065 if (!is_input_adapter_distributed){
2066 ia_comm = Teuchos::createSerialComm<int>();
2068 RCP<const Environment> envConst_ = envConst;
2070 RCP<const ctm_base_adapter_t> baseInputAdapter_ (
2071 rcp(dynamic_cast<const ctm_base_adapter_t *>(input_adapter_.getRawPtr()),
false));
2079 baseInputAdapter_, envConst_, ia_comm, coordFlags_));
2089 baseInputAdapter_, envConst_, ia_comm,
2093 if (!machine_->hasMachineCoordinates()) {
2094 throw std::runtime_error(
"Existing machine does not provide coordinates " 2095 "for coordinate task mapping");
2099 int procDim = machine_->getMachineDim();
2100 this->nprocs = machine_->getNumRanks();
2103 pcoord_t **procCoordinates = NULL;
2104 if (!machine_->getAllMachineCoordinatesView(procCoordinates)) {
2105 throw std::runtime_error(
"Existing machine does not implement " 2106 "getAllMachineCoordinatesView");
2113 std::vector <int> machine_extent_vec (procDim);
2115 int *machine_extent = &(machine_extent_vec[0]);
2116 bool *machine_extent_wrap_around =
new bool[procDim];
2117 machine_->getMachineExtentWrapArounds(machine_extent_wrap_around);
2125 if (machine_->getMachineExtent(machine_extent)) {
2130 machine_extent_wrap_around,
2138 int coordDim = coordinateModel_->getCoordinateDim();
2141 this->ntasks = num_parts_;
2145 tcoord_t **partCenters = NULL;
2146 partCenters = allocMemory<tcoord_t *>(coordDim);
2147 for (
int i = 0; i < coordDim; ++i){
2148 partCenters[i] = allocMemory<tcoord_t>(this->
ntasks);
2151 typedef typename Adapter::scalar_t t_scalar_t;
2154 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Solution Center");
2157 getSolutionCenterCoordinates<Adapter, t_scalar_t,part_t>(
2158 envConst.getRawPtr(),
2159 ia_comm.getRawPtr(),
2160 coordinateModel_.getRawPtr(),
2168 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Solution Center");
2172 if (graph_model_.getRawPtr() != NULL){
2173 getCoarsenedPartGraph<Adapter, t_scalar_t, part_t> (
2174 envConst.getRawPtr(),
2175 ia_comm.getRawPtr(),
2176 graph_model_.getRawPtr(),
2189 envConst->timerStart(
MACRO_TIMERS,
"CoordinateCommunicationModel Create");
2191 this->proc_task_comm =
2200 machine_extent_wrap_around,
2201 machine_.getRawPtr());
2203 envConst->timerStop(
MACRO_TIMERS,
"CoordinateCommunicationModel Create");
2205 int myRank = comm_->getRank();
2208 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Processor Task map");
2210 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Processor Task map");
2213 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Graph");
2219 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Graph");
2221 if (comm_->getRank() == 0){
2226 for (part_t i = 1; i <= taskCommCount; ++i){
2228 if (maxN < nc) maxN = nc;
2230 std::cout <<
" maxNeighbor:" << maxN << std::endl;
2233 this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
2236 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Cost");
2249 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Cost");
2276 delete []machine_extent_wrap_around;
2277 if (machine_->getMachineExtent(machine_extent)){
2278 for (
int i = 0; i < procDim; ++i){
2279 delete [] procCoordinates[i];
2281 delete [] procCoordinates;
2284 for (
int i = 0; i < coordDim; ++i){
2285 freeArray<tcoord_t>(partCenters[i]);
2287 freeArray<tcoord_t *>(partCenters);
2339 const Teuchos::Comm<int> *problemComm,
2342 pcoord_t **machine_coords,
2346 tcoord_t **task_coords,
2347 ArrayRCP<part_t>task_comm_xadj,
2348 ArrayRCP<part_t>task_comm_adj,
2349 pcoord_t *task_communication_edge_weight_,
2350 int recursion_depth,
2351 part_t *part_no_array,
2352 const part_t *machine_dimensions
2365 pcoord_t ** virtual_machine_coordinates = machine_coords;
2366 bool *wrap_arounds =
new bool [proc_dim];
2367 for (
int i = 0; i < proc_dim; ++i) wrap_arounds[i] =
true;
2369 if (machine_dimensions){
2370 virtual_machine_coordinates =
2379 this->nprocs = num_processors;
2381 int coordDim = task_dim;
2382 this->ntasks = num_tasks;
2385 tcoord_t **partCenters = task_coords;
2388 this->proc_task_comm =
2391 virtual_machine_coordinates,
2400 int myRank = problemComm->getRank();
2412 task_communication_edge_weight_
2431 delete [] wrap_arounds;
2433 if (machine_dimensions){
2434 for (
int i = 0; i < proc_dim; ++i){
2435 delete [] virtual_machine_coordinates[i];
2437 delete [] virtual_machine_coordinates;
2440 if(comm_->getRank() == 0)
2468 const part_t *machine_dimensions,
2469 bool *machine_extent_wrap_around,
2471 pcoord_t **mCoords){
2472 pcoord_t **result_machine_coords = NULL;
2473 result_machine_coords =
new pcoord_t*[machine_dim];
2474 for (
int i = 0; i < machine_dim; ++i){
2475 result_machine_coords[i] =
new pcoord_t [numProcs];
2478 for (
int i = 0; i < machine_dim; ++i){
2479 part_t numMachinesAlongDim = machine_dimensions[i];
2480 part_t *machineCounts=
new part_t[numMachinesAlongDim];
2481 memset(machineCounts, 0,
sizeof(part_t) *numMachinesAlongDim);
2483 int *filledCoordinates=
new int[numMachinesAlongDim];
2485 pcoord_t *coords = mCoords[i];
2486 for(part_t j = 0; j < numProcs; ++j){
2487 part_t mc = (part_t) coords[j];
2488 ++machineCounts[mc];
2491 part_t filledCoordinateCount = 0;
2492 for(part_t j = 0; j < numMachinesAlongDim; ++j){
2493 if (machineCounts[j] > 0){
2494 filledCoordinates[filledCoordinateCount++] = j;
2498 part_t firstProcCoord = filledCoordinates[0];
2499 part_t firstProcCount = machineCounts[firstProcCoord];
2501 part_t lastProcCoord = filledCoordinates[filledCoordinateCount - 1];
2502 part_t lastProcCount = machineCounts[lastProcCoord];
2504 part_t firstLastGap = numMachinesAlongDim - lastProcCoord + firstProcCoord;
2505 part_t firstLastGapProc = lastProcCount + firstProcCount;
2507 part_t leftSideProcCoord = firstProcCoord;
2508 part_t leftSideProcCount = firstProcCount;
2509 part_t biggestGap = 0;
2510 part_t biggestGapProc = numProcs;
2512 part_t shiftBorderCoordinate = -1;
2513 for(part_t j = 1; j < filledCoordinateCount; ++j){
2514 part_t rightSideProcCoord= filledCoordinates[j];
2515 part_t rightSideProcCount = machineCounts[rightSideProcCoord];
2517 part_t gap = rightSideProcCoord - leftSideProcCoord;
2518 part_t gapProc = rightSideProcCount + leftSideProcCount;
2523 if (gap > biggestGap || (gap == biggestGap && biggestGapProc > gapProc)){
2524 shiftBorderCoordinate = rightSideProcCoord;
2525 biggestGapProc = gapProc;
2528 leftSideProcCoord = rightSideProcCoord;
2529 leftSideProcCount = rightSideProcCount;
2533 if (!(biggestGap > firstLastGap || (biggestGap == firstLastGap && biggestGapProc < firstLastGapProc))){
2534 shiftBorderCoordinate = -1;
2537 for(part_t j = 0; j < numProcs; ++j){
2539 if (machine_extent_wrap_around[i] && coords[j] < shiftBorderCoordinate){
2540 result_machine_coords[i][j] = coords[j] + numMachinesAlongDim;
2544 result_machine_coords[i][j] = coords[j];
2548 delete [] machineCounts;
2549 delete [] filledCoordinates;
2552 return result_machine_coords;
2564 procs = this->task_to_proc.getRawPtr() + taskId;
2570 return this->task_to_proc[taskId];
2580 part_t task_begin = this->proc_to_task_xadj[procId];
2581 part_t taskend = this->proc_to_task_xadj[procId+1];
2582 parts = this->proc_to_task_adj.getRawPtr() + task_begin;
2583 numParts = taskend - task_begin;
2587 part_t task_begin = this->proc_to_task_xadj[procId];
2588 part_t taskend = this->proc_to_task_xadj[procId+1];
2596 if (taskend - task_begin > 0){
2597 ArrayView <part_t> assignedParts(this->proc_to_task_adj.getRawPtr() + task_begin, taskend - task_begin);
2598 return assignedParts;
2601 ArrayView <part_t> assignedParts;
2602 return assignedParts;
2678 template <
typename part_t,
typename pcoord_t,
typename tcoord_t>
2680 RCP<
const Teuchos::Comm<int> > problemComm,
2683 pcoord_t **machine_coords,
2686 tcoord_t **task_coords,
2687 part_t *task_comm_xadj,
2688 part_t *task_comm_adj,
2689 pcoord_t *task_communication_edge_weight_,
2692 int recursion_depth,
2693 part_t *part_no_array,
2694 const part_t *machine_dimensions
2703 typedef Tpetra::MultiVector<tcoord_t, part_t, part_t>
tMVector_t;
2708 if (task_comm_xadj){
2709 Teuchos::ArrayRCP<part_t> tmp_task_communication_adj (task_comm_adj, 0, task_comm_xadj[num_tasks],
false);
2717 problemComm.getRawPtr(),
2728 task_communication_edge_weight_,
2735 part_t* proc_to_task_xadj_;
2736 part_t* proc_to_task_adj_;
2738 ctm->
getProcTask(proc_to_task_xadj_, proc_to_task_adj_);
2740 for (part_t i = 0; i <= num_processors; ++i){
2743 for (part_t i = 0; i < num_tasks; ++i){
void setParams(int dimension_, int heapsize)
void map(RCP< MappingSolution< Adapter > > &mappingsoln)
CommunicationModel Base Class that performs mapping between the coordinate partitioning result...
Zoltan2::BaseAdapter< userTypes_t > base_adapter_t
void getBestMapping()
finds the lowest cost mapping and broadcasts solution to everyone.
Tpetra::CrsMatrix< zscalar_t, zlno_t, zgno_t, znode_t > tcrsMatrix_t
void ithPermutation(const IT n, IT i, IT *perm)
CommunicationModel(part_t no_procs_, part_t no_tasks_)
virtual ~CommunicationModel()
size_t getLocalNumCoordinates() const
Returns the number of coordinates on this process.
Time an algorithm (or other entity) as a whole.
void sequential_task_partitioning(const RCP< const Environment > &env, mj_lno_t num_total_coords, mj_lno_t num_selected_coords, size_t num_target_part, int coord_dim, mj_scalar_t **mj_coordinates, mj_lno_t *initial_selected_coords_output_permutation, mj_lno_t *output_xadj, int recursion_depth, const mj_part_t *part_no_array, bool partition_along_longest_dim)
Special function for partitioning for task mapping. Runs sequential, and performs deterministic parti...
bool * machine_extent_wrap_around
void setPartArraySize(int psize)
WT getDistance(IT index, WT **elementCoords)
virtual size_t getLocalNumberOfParts() const
Returns the number of parts to be assigned to this process.
bool getHopCount(int rank1, int rank2, pcoord_t &hops) const
CoordinateCommunicationModel(int pcoord_dim_, pcoord_t **pcoords_, int tcoord_dim_, tcoord_t **tcoords_, part_t no_procs_, part_t no_tasks_, int *machine_extent_, bool *machine_extent_wrap_around_, const MachineRepresentation< pcoord_t, part_t > *machine_=NULL)
Class Constructor:
void getCoarsenedPartGraph(const Environment *envConst, const Teuchos::Comm< int > *comm, const Zoltan2::GraphModel< typename Adapter::base_adapter_t > *graph, part_t np, const part_t *parts, ArrayRCP< part_t > &g_part_xadj, ArrayRCP< part_t > &g_part_adj, ArrayRCP< scalar_t > &g_part_ew)
void getClosestSubset(part_t *proc_permutation, part_t nprocs, part_t ntasks) const
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
PartitionMapping maps a solution or an input distribution to ranks.
void getSolutionCenterCoordinates(const Environment *envConst, const Teuchos::Comm< int > *comm, const Zoltan2::CoordinateModel< typename Adapter::base_adapter_t > *coords, const part_t *parts, int coordDim, part_t ntasks, scalar_t **partCenters)
size_t getLocalNumVertices() const
Returns the number vertices on this process.
bool getNewCenters(WT **coords)
void timerStop(TimerType tt, const char *timerName) const
Stop a named timer.
virtual double getProcDistance(int procId1, int procId2) const =0
std::bitset< NUM_MODEL_FLAGS > modelFlag_t
size_t getLocalNumEdges() const
Returns the number of edges on this process. In global or subset graphs, includes off-process edges...
virtual ~CoordinateTaskMapper()
void calculateCommunicationCost(part_t *task_to_proc, part_t *task_communication_xadj, part_t *task_communication_adj, pcoord_t *task_communication_edge_weight)
const part_t * solution_parts
size_t getGlobalNumVertices() const
Returns the global number vertices.
ArrayView< part_t > getAssignedTasksForProc(part_t procId)
virtual void getMapping(int myRank, const RCP< const Environment > &env, ArrayRCP< part_t > &rcp_proc_to_task_xadj, ArrayRCP< part_t > &rcp_proc_to_task_adj, ArrayRCP< part_t > &rcp_task_to_proc) const
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
const MachineRepresentation< pcoord_t, part_t > * machine
size_t getEdgeList(ArrayView< const gno_t > &edgeIds, ArrayView< const lno_t > &offsets, ArrayView< input_t > &wgts) const
Sets pointers to this process' edge (neighbor) global Ids, including off-process edges.
part_t getAssignedProcForTask(part_t taskId)
getAssignedProcForTask function, returns the assigned processor id for the given task ...
void getMinDistanceCluster(IT *procPermutation)
virtual void getPartsForProc(int procId, part_t &numParts, part_t *&parts) const
getAssignedProcForTask function, returns the assigned tasks with the number of tasks.
void coordinateTaskMapperInterface(RCP< const Teuchos::Comm< int > > problemComm, int proc_dim, int num_processors, pcoord_t **machine_coords, int task_dim, part_t num_tasks, tcoord_t **task_coords, part_t *task_comm_xadj, part_t *task_comm_adj, pcoord_t *task_communication_edge_weight_, part_t *proc_to_task_xadj, part_t *proc_to_task_adj, int recursion_depth, part_t *part_no_array, const part_t *machine_dimensions)
Constructor The interface function that calls CoordinateTaskMapper which will also perform the mappin...
Defines the XpetraMultiVectorAdapter.
Multi Jagged coordinate partitioning algorithm.
virtual void getProcsForPart(part_t taskId, part_t &numProcs, part_t *&procs) const
getAssignedProcForTask function, returns the assigned tasks with the number of tasks.
This class provides geometric coordinates with optional weights to the Zoltan2 algorithm.
int getNumWeightsPerEdge() const
Returns the number (0 or greater) of weights per edge.
Contains the Multi-jagged algorthm.
PartitionMapping maps a solution or an input distribution to ranks.
void getProcTask(part_t *&proc_to_task_xadj_, part_t *&proc_to_task_adj_)
A PartitioningSolution is a solution to a partitioning problem.
size_t getCoordinates(ArrayView< const gno_t > &Ids, ArrayView< input_t > &xyz, ArrayView< input_t > &wgts) const
Returns the coordinate ids, values and optional weights.
void copyCoordinates(IT *permutation)
void getGridCommunicationGraph(part_t taskCount, part_t *&task_comm_xadj, part_t *&task_comm_adj, std::vector< int > grid_dims)
Zoltan2_ReduceBestMapping Class, reduces the minimum cost mapping, ties breaks with minimum proc id...
ArrayRCP< part_t > task_communication_adj
BaseAdapterType
An enum to identify general types of adapters.
The StridedData class manages lists of weights or coordinates.
void setPartArray(part_t *pNo)
ArrayRCP< part_t > proc_to_task_adj
The user parameters, debug, timing and memory profiling output objects, and error checking methods...
static part_t umpa_uRandom(part_t l, int &_u_umpa_seed)
void reduce(const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
Implement Teuchos::ValueTypeReductionOp interface.
void addPoint(IT index, WT distance)
bool getNewCenters(WT *center, WT **coords, int dimension)
MachineRepresentation Class Base class for representing machine coordinates, networks, etc.
size_t getVertexList(ArrayView< const gno_t > &Ids, ArrayView< input_t > &wgts) const
Sets pointers to this process' vertex Ids and their weights.
CoordinateTaskMapper(const Teuchos::RCP< const Teuchos::Comm< int > > comm_, const Teuchos::RCP< const MachineRepresentation< pcoord_t, part_t > > machine_, const Teuchos::RCP< const Adapter > input_adapter_, const Teuchos::RCP< const Zoltan2::PartitioningSolution< Adapter > > soln_, const Teuchos::RCP< const Environment > envConst)
Constructor. When this constructor is called, in order to calculate the communication metric...
CoordinateModelInput Class that performs mapping between the coordinate partitioning result and mpi r...
Traits class to handle conversions between gno_t/lno_t and TPL data types (e.g., ParMETIS's idx_t...
double getCommunicationCostMetric()
size_t getActualGlobalNumberOfParts() const
Returns the actual global number of parts provided in setParts().
pcoord_t ** shiftMachineCoordinates(int machine_dim, const part_t *machine_dimensions, bool *machine_extent_wrap_around, part_t numProcs, pcoord_t **mCoords)
Using the machine dimensions provided, create virtual machine coordinates by assigning the largest ga...
CoordinateTaskMapper(const Environment *env_const_, const Teuchos::Comm< int > *problemComm, int proc_dim, int num_processors, pcoord_t **machine_coords, int task_dim, part_t num_tasks, tcoord_t **task_coords, ArrayRCP< part_t >task_comm_xadj, ArrayRCP< part_t >task_comm_adj, pcoord_t *task_communication_edge_weight_, int recursion_depth, part_t *part_no_array, const part_t *machine_dimensions)
Constructor The mapping constructor which will also perform the mapping operation. The result mapping can be obtained by –getAssignedProcForTask function: which returns the assigned processor id for the given task –getPartsForProc: which returns the assigned tasks with the number of tasks.
void doMapping(int myRank)
doMapping function, calls getMapping function of communicationModel object.
Defines the MappingSolution class.
const Teuchos::RCP< const Teuchos::Comm< int > > comm
ArrayRCP< part_t > task_communication_xadj
const part_t * getPartListView() const
Returns the part list corresponding to the global ID list.
GraphModel defines the interface required for graph models.
void update_visit_order(part_t *visitOrder, part_t n, int &_u_umpa_seed, part_t rndm)
CoordinateTaskMapper(const Teuchos::RCP< const Teuchos::Comm< int > > comm_, const Teuchos::RCP< const MachineRepresentation< pcoord_t, part_t > > machine_, const Teuchos::RCP< const Adapter > input_adapter_, const part_t num_parts_, const part_t *result_parts, const Teuchos::RCP< const Environment > envConst, bool is_input_adapter_distributed=true)
Constructor. Instead of Solution we have two parameters, numparts When this constructor is called...
Zoltan2_ReduceBestMapping()
Default Constructor.
virtual void getMapping(int myRank, const RCP< const Environment > &env, ArrayRCP< part_t > &proc_to_task_xadj, ArrayRCP< part_t > &proc_to_task_adj, ArrayRCP< part_t > &task_to_proc) const =0
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
size_t getTargetGlobalNumberOfParts() const
Returns the global number of parts desired in the solution.
void timerStart(TimerType tt, const char *timerName) const
Start a named timer.
KmeansHeap Class, max heap, but holds the minimum values.
Defines the GraphModel interface.
void fillContinousArray(T *arr, size_t arrSize, T *val)
fillContinousArray function
#define ZOLTAN2_ALGMULTIJAGGED_SWAP(a, b, temp)
void push_down(IT index_on_heap)
ArrayRCP< scalar_t > task_communication_edge_weight
virtual ~CoordinateCommunicationModel()
RCP< Comm< int > > create_subCommunicator()
creates and returns the subcommunicator for the processor group.
KMeansAlgorithm Class that performs clustering of the coordinates, and returns the closest set of coo...
const Teuchos::RCP< const Environment > env
ArrayRCP< part_t > task_to_proc
KMeansAlgorithm(int dim_, IT numElements_, WT **elementCoords_, IT required_elements_)
KMeansAlgorithm Constructor.
virtual double getProcDistance(int procId1, int procId2) const
void writeMapping2(int myRank)
void copyCoordinates(IT *permutation)
CoordinateCommunicationModel()
ArrayRCP< part_t > proc_to_task_xadj
void setHeapsize(IT heapsize_)
CoordinateCommunicationModel< pcoord_t, tcoord_t, part_t > * proc_task_comm