22#include <fmt/format.h>
31#include <taskflow/taskflow/taskflow.hpp>
32#include <taskflow/taskflow/algorithm/for_each.hpp>
36 void printMsg(std::string msg,
int mpiRank,
int printMpiRank) {
37 if (printMpiRank < 0) {
40 if (mpiRank == printMpiRank)
45 double f1(
const double &x){
46 return x*x*x + std::exp(x) - std::sin(x);
49 double f2(
const double &x){
50 return 2*(x-0.5)*(x-0.5)*(x-0.5) + std::exp(x-0.5) - std::cos(x-0.5);
55 const std::vector<size_t> &nodePartition,
56 const std::vector<std::vector<size_t>> &nodeNeighs,
57 std::vector<size_t> &ownedNodes,
58 std::vector<size_t> &ownedInternalNodes,
59 std::vector<size_t> &ownedBdryNodes,
60 std::vector<std::pair<std::vector<size_t>, std::vector<size_t>>> &ghostData) {
62 auto numNodes = nodePartition.size();
66 ownedInternalNodes.clear();
67 ownedBdryNodes.clear();
68 ghostData.resize(mpiSize);
69 for (
size_t i_proc=0; i_proc<mpiSize; i_proc++) {
70 ghostData[i_proc].first.clear();
71 ghostData[i_proc].second.clear();
75 for (
size_t i=0; i<numNodes; i++) {
76 if (nodePartition[i] == mpiRank) {
78 ownedNodes.push_back(i);
81 bool ghostExist =
false;
82 for (
auto j : nodeNeighs[i]) {
83 auto j_proc = nodePartition[j];
84 if (j_proc != mpiRank) {
88 ghostData[j_proc].first.push_back(j);
91 ghostData[j_proc].second.push_back(i);
96 ownedBdryNodes.push_back(i);
98 ownedInternalNodes.push_back(i);
103 bool debugGhostData =
false;
104 if (debugGhostData and mpiRank == 0) {
105 std::vector<std::vector<std::pair<std::vector<size_t>, std::vector<size_t>>>> ghostDataAllProc(
107 std::vector<std::vector<size_t>> numGhostDataAllProc(mpiSize);
108 for (
size_t i_proc = 0; i_proc < mpiSize; i_proc++) {
109 numGhostDataAllProc[i_proc].resize(mpiSize);
110 ghostDataAllProc[i_proc].resize(mpiSize);
113 for (
size_t i_proc = 0; i_proc < mpiSize; i_proc++) {
115 for (
size_t i = 0; i < numNodes; i++) {
116 if (nodePartition[i] == i_proc) {
118 for (
auto j: nodeNeighs[i]) {
119 auto j_proc = nodePartition[j];
120 if (j_proc != i_proc) {
122 ghostDataAllProc[i_proc][j_proc].first.push_back(j);
129 for (
size_t j_proc = 0; j_proc < mpiSize; j_proc++)
130 numGhostDataAllProc[i_proc][j_proc] = ghostDataAllProc[i_proc][j_proc].first.size();
134 std::cout <<
"\n\nGhost data debug output\n\n";
135 bool found_asym =
false;
136 for (
size_t i_proc = 0; i_proc < mpiSize; i_proc++) {
137 for (
size_t j_proc = 0; j_proc < mpiSize; j_proc++) {
138 std::cout << fmt::format(
"(i,j) = ({}, {}), num data = {}\n",
140 numGhostDataAllProc[i_proc][j_proc]);
142 if (j_proc > i_proc) {
143 if (numGhostDataAllProc[i_proc][j_proc] == numGhostDataAllProc[j_proc][i_proc])
144 std::cout << fmt::format(
" symmetric: data ({}, {}) = data ({}, {})\n",
145 i_proc, j_proc, j_proc, i_proc);
148 std::cout << fmt::format(
149 " asymmetric: data ({}, {}) != data ({}, {})\n",
150 i_proc, j_proc, j_proc, i_proc);
156 std::cout <<
"Found asymetric ghost data\n";
158 std::cout <<
"No asymetric ghost data\n";
166 const std::vector<std::pair<std::vector<size_t>, std::vector<size_t>>> &ghostData,
167 std::vector<std::pair<std::vector<util::Point>, std::vector<util::Point>>> &dispGhostData,
168 std::vector<util::Point> &dispNodes) {
170 dispGhostData.resize(mpiSize);
171 for (
size_t j_proc = 0; j_proc < mpiSize; j_proc++) {
172 auto j_data_size = ghostData[j_proc].first.size();
173 dispGhostData[j_proc].first.resize(j_data_size);
174 dispGhostData[j_proc].second.resize(j_data_size);
180 MPI_Request mpiRequests[2*(mpiSize-1)];
181 size_t requestCounter = 0;
182 for (
size_t j_proc=0; j_proc<mpiSize; j_proc++) {
183 auto & sendIds = ghostData[j_proc].second;
184 auto & recvIds = ghostData[j_proc].first;
186 if (j_proc != mpiRank and recvIds.size() != 0) {
191 for (
size_t k = 0; k<sendIds.size(); k++)
192 dispGhostData[j_proc].second[k] = dispNodes[sendIds[k]];
195 MPI_Isend(dispGhostData[j_proc].second.data(), 3*sendIds.size(),
196 MPI_DOUBLE, j_proc, 0, MPI_COMM_WORLD,
197 &mpiRequests[requestCounter++]);
200 MPI_Irecv(dispGhostData[j_proc].first.data(), 3*recvIds.size(),
201 MPI_DOUBLE, j_proc, 0, MPI_COMM_WORLD,
202 &mpiRequests[requestCounter++]);
207 MPI_Waitall(requestCounter, mpiRequests, MPI_STATUSES_IGNORE);
211 for (
size_t j_proc=0; j_proc<mpiSize; j_proc++) {
212 auto & recvIds = ghostData[j_proc].first;
213 if (j_proc != mpiRank and recvIds.size() != 0) {
214 for (
size_t k = 0; k<recvIds.size(); k++)
215 dispNodes[recvIds[k]] = dispGhostData[j_proc].first[k];
225 util::io::print(fmt::format(
"\n\ntestTaskflow(): Number of threads = {}\n\n", nThreads));
233 std::vector<double> x(N);
234 std::vector<double> y1(N);
235 std::vector<double> y2(N);
236 std::generate(std::begin(x), std::end(x), [&] {
return dist(); });
239 auto t1 = steady_clock::now();
240 for (
size_t i = 0; i < N; i++) {
246 auto t2 = steady_clock::now();
250 tf::Executor executor(nThreads);
251 tf::Taskflow taskflow;
253 taskflow.for_each_index((std::size_t) 0, N, (std::size_t) 1, [&x, &y2](std::size_t i) {
261 executor.run(taskflow).get();
262 auto t3 = steady_clock::now();
267 for (
size_t i=0; i<N; i++)
268 y_err += std::pow(y1[i] - y2[i], 2);
270 if (y_err > 1.e-10) {
271 std::cerr << fmt::format(
"Error: Serial and taskflow computation results do not match (squared error = {})\n",
277 std::ostringstream msg;
278 msg << fmt::format(
" Serial computation took = {}ms\n", dt12);
279 msg << fmt::format(
" Taskflow computation took = {}ms\n", dt23);
280 msg << fmt::format(
" Speed-up factor = {}\n\n\n", dt12/dt23);
286 size_t testOption, std::string meshFilename) {
287 int mpiSize, mpiRank;
288 MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
289 MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
292 size_t nPart(mpiSize);
297 mesh.d_spatialDiscretization =
"finite_difference";
300 std::string outMeshFilename =
"";
301 if (testOption == 1) {
303 std::pair<std::vector<double>, std::vector<double>> box;
304 std::vector<size_t> nGridVec;
305 for (
size_t i=0; i<dim; i++) {
306 box.first.push_back(0.);
307 box.second.push_back(1.);
308 nGridVec.push_back(nGrid);
316 outMeshFilename = fmt::format(
"uniform_mesh_Lx_{}_Ly_{}_Nx_{}_Ny_{}",
317 box.second[0], box.second[1],
318 nGridVec[0], nGridVec[1]);
320 else if (testOption == 2) {
321 if (meshFilename.empty()) {
322 std::cerr <<
"testGraphPartitioning(): mesh filename is empty.\n";
328 mesh.createData(meshFilename);
334 std::cerr <<
"testMPI() accepts either 1 or 2 for testOption. The value "
335 << testOption <<
" is invalid.\n";
343 double horizon = mHorizon*mesh.d_h;
344 std::vector<std::vector<size_t>> nodeNeighs(mesh.d_numNodes);
348 mesh.d_nodePartition.resize(mesh.d_numNodes);
354 MPI_Bcast(mesh.d_nodePartition.data(), mesh.d_numNodes,
355 MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
365 std::vector<size_t> ownedNodes, ownedInternalNodes, ownedBdryNodes;
368 std::vector<std::pair<std::vector<size_t>, std::vector<size_t>>> ghostData;
373 mesh.d_nodePartition, nodeNeighs,
374 ownedNodes, ownedInternalNodes, ownedBdryNodes,
378 std::vector<util::Point> dispNodes(mesh.d_numNodes,
util::Point(-1., -1., -1.));
386 for (
auto i: ownedNodes)
387 dispNodes[i] =
util::Point(mpiRank + 1, (mpiRank + 1)*100, (mpiRank + 1)*10000);
391 printMsg(
"\n\nCalling exchangeDispData()\n\n", mpiRank, 0);
392 std::vector<std::pair<std::vector<util::Point>, std::vector<util::Point>>> dispGhostData;
397 printMsg(
"\n\nDebugging dispGhostData()\n\n", mpiRank, -1);
398 bool debug_failed =
false;
399 for (
size_t j_proc = 0; j_proc < mpiSize; j_proc++) {
400 auto &recvIds = ghostData[j_proc].first;
402 if (j_proc != mpiRank and recvIds.size() != 0) {
403 for (
size_t k = 0; k < recvIds.size(); k++) {
404 auto &uk = dispNodes[recvIds[k]];
407 if (uk[0] != j_proc + 1 or uk[1] != 100 * (j_proc + 1)
408 or uk[2] != 10000 * (j_proc + 1)) {
411 "uk = ({}, {}, {})\n", j_proc,
412 uk[0], uk[1], uk[2]),
Templated probability distribution.
double f1(const double &x)
void printMsg(std::string msg, int mpiRank, int printMpiRank)
void exchangeDispData(size_t mpiSize, size_t mpiRank, const std::vector< std::pair< std::vector< size_t >, std::vector< size_t > > > &ghostData, std::vector< std::pair< std::vector< util::Point >, std::vector< util::Point > > > &dispGhostData, std::vector< util::Point > &dispNodes)
double f2(const double &x)
void setupOwnerAndGhost(size_t mpiSize, size_t mpiRank, const std::vector< size_t > &nodePartition, const std::vector< std::vector< size_t > > &nodeNeighs, std::vector< size_t > &ownedNodes, std::vector< size_t > &ownedInternalNodes, std::vector< size_t > &ownedBdryNodes, std::vector< std::pair< std::vector< size_t >, std::vector< size_t > > > &ghostData)
void createUniformMesh(fe::Mesh *mesh_p, size_t dim, std::pair< std::vector< double >, std::vector< double > > box, std::vector< size_t > nGrid)
Creates uniform mesh for rectangle/cuboid domain.
void metisGraphPartition(std::string partitionMethod, const std::vector< std::vector< size_t > > &nodeNeighs, std::vector< size_t > &nodePartition, size_t nPartitions)
Partitions the nodes based on node neighborlist supplied. Function first creates a graph with nodes a...
void computeNonlocalNeighborhood(const std::vector< util::Point > &nodes, double horizon, std::vector< std::vector< size_t > > &nodeNeighs)
Partitions the nodes based on node neighborlist supplied. Function first creates a graph with nodes a...
std::string testTaskflow(size_t N, int seed)
Perform test on taskflow.
void testMPI(size_t nGrid=10, size_t mHorizon=3, size_t testOption=0, std::string meshFilename="")
Perform parallelization test using MPI on mesh partition based on metis.
const int print_default_tab
Default value of tab used in outputting formatted information.
void print(const T &msg, int nt=print_default_tab, int printMpiRank=print_default_mpi_rank)
Prints formatted information.
std::string removeExtensionFromFile(std::string const &filename)
Remove extension from the filename Source - https://stackoverflow.com/a/24386991.
std::string getFilenameFromPath(std::string const &path, std::string const &delims="/\\")
Get filename removing path from the string Source - https://stackoverflow.com/a/24386991.
float timeDiff(std::chrono::steady_clock::time_point begin, std::chrono::steady_clock::time_point end, std::string unit="microseconds")
Returns difference between two times.
unsigned int getNThreads()
Get number of threads to be used by taskflow.
RandGenerator get_rd_gen(int seed=-1)
Return random number generator.
std::mt19937 RandGenerator
A structure to represent 3d vectors.
std::uniform_real_distribution UniformDistribution
std::mt19937 RandGenerator