diff options
Diffstat (limited to 'gnu')
-rw-r--r-- | gnu/packages/patches/combinatorial-blas-awpm.patch | 218 |
1 files changed, 217 insertions, 1 deletions
diff --git a/gnu/packages/patches/combinatorial-blas-awpm.patch b/gnu/packages/patches/combinatorial-blas-awpm.patch index 86d4ab2dcf..d44a6f28ba 100644 --- a/gnu/packages/patches/combinatorial-blas-awpm.patch +++ b/gnu/packages/patches/combinatorial-blas-awpm.patch @@ -1,4 +1,6 @@ -Install BipartiteMatchings headers for SuperLU_DIST. +Install BipartiteMatchings headers for SuperLU_DIST. Removes global variables +and code related to performance measurement that is not useful when used in a +library setting. --- a/BipartiteMatchings/ApproxWeightPerfectMatching.h +++ b/BipartiteMatchings/ApproxWeightPerfectMatching.h @@ -11,6 +13,167 @@ Install BipartiteMatchings headers for SuperLU_DIST. #include "BPMaximalMatching.h" #include "BPMaximumMatching.h" #include <parallel/algorithm> +@@ -39,9 +39,6 @@ + std::shared_ptr<CommGrid> commGrid; + }; + +-double t1Comp, t1Comm, t2Comp, t2Comm, t3Comp, t3Comm, t4Comp, t4Comm, t5Comp, t5Comm, tUpdateMateComp; +- +- + template <class IT, class NT> + std::vector<std::tuple<IT,IT,NT>> ExchangeData(std::vector<std::vector<std::tuple<IT,IT,NT>>> & tempTuples, MPI_Comm World) + { +@@ -391,7 +388,7 @@ + + + +-int ThreadBuffLenForBinning(int itemsize, int nbins) ++inline int ThreadBuffLenForBinning(int itemsize, int nbins) + { + // 1MB shared cache (per 2 cores) in KNL + #ifndef L2_CACHE_SIZE +@@ -417,7 +414,6 @@ + + + +- double tstart = MPI_Wtime(); + + + MPI_Comm World = param.commGrid->GetWorld(); +@@ -528,9 +524,6 @@ + } + } + +- t1Comp = MPI_Wtime() - tstart; +- tstart = MPI_Wtime(); +- + // Step 3: Communicate data + + std::vector<int> recvcnt (param.nprocs); +@@ -548,7 +541,6 @@ + std::vector< std::tuple<IT,IT,NT> > recvTuples1(totrecv); + MPI_Alltoallv(sendTuples.data(), sendcnt.data(), sdispls.data(), MPI_tuple, recvTuples1.data(), recvcnt.data(), rdispls.data(), MPI_tuple, World); + MPI_Type_free(&MPI_tuple); +- t1Comm = MPI_Wtime() - tstart; + return recvTuples1; + } + +@@ -730,9 +722,6 @@ + + // Step 4: Communicate data + +- t2Comp = MPI_Wtime() - tstart; +- tstart = MPI_Wtime(); +- + std::vector<int> recvcnt (param.nprocs); + std::vector<int> rdispls (param.nprocs, 0); + +@@ -748,7 +737,6 @@ + std::vector< std::tuple<IT,IT,IT,NT> > recvTuples1(totrecv); + MPI_Alltoallv(sendTuples.data(), sendcnt.data(), sdispls.data(), MPI_tuple, recvTuples1.data(), recvcnt.data(), rdispls.data(), MPI_tuple, World); + MPI_Type_free(&MPI_tuple); +- t2Comm = MPI_Wtime() - tstart; + return recvTuples1; + } + +@@ -836,7 +824,6 @@ + param.myrank = myrank; + param.commGrid = commGrid; + +- double t1CompAll = 0, t1CommAll = 0, t2CompAll = 0, t2CommAll = 0, t3CompAll = 0, t3CommAll = 0, t4CompAll = 0, t4CommAll = 0, t5CompAll = 0, t5CommAll = 0, tUpdateMateCompAll = 0, tUpdateWeightAll = 0; + + // ----------------------------------------------------------- + // replicate mate vectors for mateCol2Row +@@ -975,11 +962,7 @@ + } + + //vector< tuple<IT,IT,IT, NT> >().swap(recvTuples1); +- double t3Comp = MPI_Wtime() - tstart; +- tstart = MPI_Wtime(); + recvTuples1 = ExchangeData1(tempTuples1, World); +- double t3Comm = MPI_Wtime() - tstart; +- tstart = MPI_Wtime(); + + std::vector<std::tuple<IT,IT,IT,IT, NT>> bestTuplesPhase4 (lncol); + // we could have used lnrow in both bestTuplesPhase3 and bestTuplesPhase4 +@@ -1041,14 +1024,9 @@ + + + //vector< tuple<IT,IT,IT, NT> >().swap(recvTuples1); +- double t4Comp = MPI_Wtime() - tstart; +- tstart = MPI_Wtime(); + + std::vector<std::tuple<IT,IT,IT,IT>> recvWinnerTuples = ExchangeData1(winnerTuples, World); + +- double t4Comm = MPI_Wtime() - tstart; +- tstart = MPI_Wtime(); +- + // at the owner of (mj,j) + std::vector<std::tuple<IT,IT>> rowBcastTuples(recvWinnerTuples.size()); //(mi,mj) + std::vector<std::tuple<IT,IT>> colBcastTuples(recvWinnerTuples.size()); //(j,i) +@@ -1065,15 +1043,10 @@ + colBcastTuples[k] = std::make_tuple(j,i); + rowBcastTuples[k] = std::make_tuple(mj,mi); + } +- double t5Comp = MPI_Wtime() - tstart; +- tstart = MPI_Wtime(); + + std::vector<std::tuple<IT,IT>> updatedR2C = MateBcast(rowBcastTuples, RowWorld); + std::vector<std::tuple<IT,IT>> updatedC2R = MateBcast(colBcastTuples, ColWorld); + +- double t5Comm = MPI_Wtime() - tstart; +- tstart = MPI_Wtime(); +- + #ifdef THREADED + #pragma omp parallel for + #endif +@@ -1095,13 +1068,9 @@ + } + + +- double tUpdateMateComp = MPI_Wtime() - tstart; +- tstart = MPI_Wtime(); + // update weights of matched edges + // we can do better than this since we are doing sparse updates + ReplicateMateWeights(param, dcsc, colptr, RepMateC2R, RepMateWR2C, RepMateWC2R); +- double tUpdateWeight = MPI_Wtime() - tstart; +- + + weightPrev = weightCur; + weightCur = MatchingWeight(RepMateWC2R, RowWorld, minw); +@@ -1110,32 +1079,8 @@ + //UpdateMatching(mateRow2Col, mateCol2Row, RepMateR2C, RepMateC2R); + //CheckMatching(mateRow2Col,mateCol2Row); + +- if(myrank==0) +- { +- std::cout << t1Comp << " " << t1Comm << " "<< t2Comp << " " << t2Comm << " " << t3Comp << " " << t3Comm << " " << t4Comp << " " << t4Comm << " " << t5Comp << " " << t5Comm << " " << tUpdateMateComp << " " << tUpdateWeight << std::endl; +- +- t1CompAll += t1Comp; +- t1CommAll += t1Comm; +- t2CompAll += t2Comp; +- t2CommAll += t2Comm; +- t3CompAll += t3Comp; +- t3CommAll += t3Comm; +- t4CompAll += t4Comp; +- t4CommAll += t4Comm; +- t5CompAll += t5Comp; +- t5CommAll += t5Comm; +- tUpdateMateCompAll += tUpdateMateComp; +- tUpdateWeightAll += tUpdateWeight; +- +- } + } + +- if(myrank==0) +- { +- std::cout << "=========== overal timing ==========" << std::endl; +- std::cout << t1CompAll << " " << t1CommAll << " " << t2CompAll << " " << t2CommAll << " " << t3CompAll << " " << t3CommAll << " " << t4CompAll << " " << t4CommAll << " " << t5CompAll << " " << t5CommAll << " " << tUpdateMateCompAll << " " << tUpdateWeightAll << std::endl; +- } +- + // update the distributed mate vectors from replicated mate vectors + UpdateMatching(mateRow2Col, mateCol2Row, RepMateR2C, RepMateC2R); + //weightCur = MatchingWeight(RepMateWC2R, RowWorld); --- a/BipartiteMatchings/BPMaximalMatching.h +++ b/BipartiteMatchings/BPMaximalMatching.h @@ -1,7 +1,7 @@ @@ -22,6 +185,33 @@ Install BipartiteMatchings headers for SuperLU_DIST. #include <iostream> #include <functional> #include <algorithm> +@@ -14,8 +14,6 @@ + #define GREEDY 1 + #define KARP_SIPSER 2 + #define DMD 3 +-MTRand GlobalMT(123); // for reproducible result +-double tTotalMaximal; + + namespace combblas { + +@@ -25,7 +25,7 @@ + void MaximalMatching(Par_DCSC_Bool & A, Par_DCSC_Bool & AT, FullyDistVec<IT, IT>& mateRow2Col, + FullyDistVec<IT, IT>& mateCol2Row, FullyDistVec<IT, IT>& degColRecv, int type, bool rand=true) + { +- ++ static MTRand GlobalMT(123); // for reproducible result + typedef VertexTypeML < IT, IT> VertexType; + int nprocs, myrank; + MPI_Comm_size(MPI_COMM_WORLD,&nprocs); +@@ -354,8 +354,6 @@ + + } + +- tTotalMaximal = MPI_Wtime() - tStart; +- + IT cardinality = mateRow2Col.Count([](IT mate){return mate!=-1;}); + std::vector<double> totalTimes(timing[0].size(),0); + for(int i=0; i<timing.size(); i++) --- a/BipartiteMatchings/BPMaximumMatching.h +++ b/BipartiteMatchings/BPMaximumMatching.h @@ -1,7 +1,7 @@ @@ -33,6 +223,32 @@ Install BipartiteMatchings headers for SuperLU_DIST. #include <mpi.h> #include <sys/time.h> #include <iostream> +@@ -11,7 +11,6 @@ + #include <string> + #include <sstream> + #include "MatchingDefs.h" +-double tTotalMaximum; + + namespace combblas { + +@@ -231,7 +231,7 @@ + void maximumMatching(SpParMat < IT, NT, DER > & A, FullyDistVec<IT, IT>& mateRow2Col, + FullyDistVec<IT, IT>& mateCol2Row, bool prune=true, bool randMM = false, bool maximizeWeight = false) + { +- ++ static MTRand GlobalMT(123); // for reproducible result + typedef VertexTypeMM <IT> VertexType; + + int nthreads=1; +@@ -420,8 +420,6 @@ + + MPI_Win_free(&winLeaves); + +- tTotalMaximum = MPI_Wtime() - tstart; +- + //isMaximalmatching(A, mateRow2Col, mateCol2Row, unmatchedRow, unmatchedCol); + //isMatching(mateCol2Row, mateRow2Col); //todo there is a better way to check this + --- a/BipartiteMatchings/MatchingDefs.h +++ b/BipartiteMatchings/MatchingDefs.h @@ -9,7 +9,7 @@ |