PeriDEM 0.2.0
PeriDEM -- Peridynamics-based high-fidelity model for granular media
Loading...
Searching...
No Matches
testParallelCompLib.cpp
Go to the documentation of this file.
1/*
2 * -------------------------------------------
3 * Copyright (c) 2021 - 2024 Prashant K. Jha
4 * -------------------------------------------
5 * PeriDEM https://github.com/prashjha/PeriDEM
6 *
7 * Distributed under the Boost Software License, Version 1.0. (See accompanying
8 * file LICENSE)
9 */
10
11#include "testParallelCompLib.h"
12#include "util/methods.h"
13#include "util/randomDist.h"
14#include "util/point.h"
15#include "util/methods.h" // declares std::chrono and defines timeDiff()
16#include "util/io.h"
17#include "fe/mesh.h"
18#include "fe/meshPartitioning.h"
19#include "fe/meshUtil.h"
21#include <mpi.h>
22#include <fmt/format.h>
23#include <fstream>
24#include <iostream>
25#include <random>
26#include <vector>
27
28typedef std::mt19937 RandGenerator;
29typedef std::uniform_real_distribution<> UniformDistribution;
30
31#include <taskflow/taskflow/taskflow.hpp>
32#include <taskflow/taskflow/algorithm/for_each.hpp>
33
34namespace {
35
36 void printMsg(std::string msg, int mpiRank, int printMpiRank) {
37 if (printMpiRank < 0) {
38 std::cout << msg;
39 } else {
40 if (mpiRank == printMpiRank)
41 std::cout << msg;
42 }
43 }
44
45 double f1(const double &x){
46 return x*x*x + std::exp(x) - std::sin(x);
47 }
48
49 double f2(const double &x){
50 return 2*(x-0.5)*(x-0.5)*(x-0.5) + std::exp(x-0.5) - std::cos(x-0.5);
51 }
52
53 void setupOwnerAndGhost(size_t mpiSize,
54 size_t mpiRank,
55 const std::vector<size_t> &nodePartition,
56 const std::vector<std::vector<size_t>> &nodeNeighs,
57 std::vector<size_t> &ownedNodes,
58 std::vector<size_t> &ownedInternalNodes,
59 std::vector<size_t> &ownedBdryNodes,
60 std::vector<std::pair<std::vector<size_t>, std::vector<size_t>>> &ghostData) {
61
62 auto numNodes = nodePartition.size();
63
64 // clear data
65 ownedNodes.clear();
66 ownedInternalNodes.clear();
67 ownedBdryNodes.clear();
68 ghostData.resize(mpiSize);
69 for (size_t i_proc=0; i_proc<mpiSize; i_proc++) {
70 ghostData[i_proc].first.clear();
71 ghostData[i_proc].second.clear();
72 }
73
74 // setup
75 for (size_t i=0; i<numNodes; i++) {
76 if (nodePartition[i] == mpiRank) {
77 // this processor owns this node
78 ownedNodes.push_back(i);
79
80 // ascertain if this node has neighboring nodes owned by other processors
81 bool ghostExist = false;
82 for (auto j : nodeNeighs[i]) {
83 auto j_proc = nodePartition[j];
84 if (j_proc != mpiRank) {
85 ghostExist = true;
86
87 // add j to ghost node list (to receive from j_proc)
88 ghostData[j_proc].first.push_back(j);
89
90 // add i to ghost node list (to send to j_proc)
91 ghostData[j_proc].second.push_back(i);
92 }
93 }
94
95 if (ghostExist)
96 ownedBdryNodes.push_back(i);
97 else
98 ownedInternalNodes.push_back(i);
99 } // loop over neighboring nodes
100 } // loop over nodes
101
103 bool debugGhostData = false;
104 if (debugGhostData and mpiRank == 0) {
105 std::vector<std::vector<std::pair<std::vector<size_t>, std::vector<size_t>>>> ghostDataAllProc(
106 mpiSize);
107 std::vector<std::vector<size_t>> numGhostDataAllProc(mpiSize);
108 for (size_t i_proc = 0; i_proc < mpiSize; i_proc++) {
109 numGhostDataAllProc[i_proc].resize(mpiSize);
110 ghostDataAllProc[i_proc].resize(mpiSize);
111 }
112
113 for (size_t i_proc = 0; i_proc < mpiSize; i_proc++) {
114 // assume we are i_proc and then create ghostData for i_proc
115 for (size_t i = 0; i < numNodes; i++) {
116 if (nodePartition[i] == i_proc) {
117 // i_proc processor owns this node
118 for (auto j: nodeNeighs[i]) {
119 auto j_proc = nodePartition[j];
120 if (j_proc != i_proc) {
121 // add to ghost node list
122 ghostDataAllProc[i_proc][j_proc].first.push_back(j);
123 }
124 }
125 } // loop over neighboring nodes
126 } // loop over nodes
127
128 // total number of ghost nodes from neighboring processors
129 for (size_t j_proc = 0; j_proc < mpiSize; j_proc++)
130 numGhostDataAllProc[i_proc][j_proc] = ghostDataAllProc[i_proc][j_proc].first.size();
131 } // loop over i_proc
132
133 // print the information
134 std::cout << "\n\nGhost data debug output\n\n";
135 bool found_asym = false;
136 for (size_t i_proc = 0; i_proc < mpiSize; i_proc++) {
137 for (size_t j_proc = 0; j_proc < mpiSize; j_proc++) {
138 std::cout << fmt::format("(i,j) = ({}, {}), num data = {}\n",
139 i_proc, j_proc,
140 numGhostDataAllProc[i_proc][j_proc]);
141
142 if (j_proc > i_proc) {
143 if (numGhostDataAllProc[i_proc][j_proc] == numGhostDataAllProc[j_proc][i_proc])
144 std::cout << fmt::format(" symmetric: data ({}, {}) = data ({}, {})\n",
145 i_proc, j_proc, j_proc, i_proc);
146 else {
147 found_asym = true;
148 std::cout << fmt::format(
149 " asymmetric: data ({}, {}) != data ({}, {})\n",
150 i_proc, j_proc, j_proc, i_proc);
151 }
152 }
153 }
154 }
155 if (found_asym)
156 std::cout << "Found asymetric ghost data\n";
157 else
158 std::cout << "No asymetric ghost data\n";
159
160 }
162 } // setupOwnerAndGhost()
163
164 void exchangeDispData(size_t mpiSize,
165 size_t mpiRank,
166 const std::vector<std::pair<std::vector<size_t>, std::vector<size_t>>> &ghostData,
167 std::vector<std::pair<std::vector<util::Point>, std::vector<util::Point>>> &dispGhostData,
168 std::vector<util::Point> &dispNodes) {
169 // resize dispGhostData if not done
170 dispGhostData.resize(mpiSize);
171 for (size_t j_proc = 0; j_proc < mpiSize; j_proc++) {
172 auto j_data_size = ghostData[j_proc].first.size(); // same size for second
173 dispGhostData[j_proc].first.resize(j_data_size);
174 dispGhostData[j_proc].second.resize(j_data_size);
175 }
176
177 // exchange data
178 util::io::print("\n\nBegin exchange data\n\n");
179 util::io::print(fmt::format("\n\nThis processor's rank = {}\n\n", mpiRank), util::io::print_default_tab, -1);
180 MPI_Request mpiRequests[2*(mpiSize-1)];
181 size_t requestCounter = 0;
182 for (size_t j_proc=0; j_proc<mpiSize; j_proc++) {
183 auto & sendIds = ghostData[j_proc].second;
184 auto & recvIds = ghostData[j_proc].first;
185
186 if (j_proc != mpiRank and recvIds.size() != 0) {
187
188 util::io::print(fmt::format("\n\n Processing j_proc = {}\n\n", j_proc), util::io::print_default_tab, -1);
189
190 // fill in ghost displacement data that we are sending from nodal displacement data
191 for (size_t k = 0; k<sendIds.size(); k++)
192 dispGhostData[j_proc].second[k] = dispNodes[sendIds[k]];
193
194 // send data from this process to j_proc
195 MPI_Isend(dispGhostData[j_proc].second.data(), 3*sendIds.size(),
196 MPI_DOUBLE, j_proc, 0, MPI_COMM_WORLD,
197 &mpiRequests[requestCounter++]);
198
199 // receive data from j_proc
200 MPI_Irecv(dispGhostData[j_proc].first.data(), 3*recvIds.size(),
201 MPI_DOUBLE, j_proc, 0, MPI_COMM_WORLD,
202 &mpiRequests[requestCounter++]);
203 }
204 } // loop over j_proc
205
206 util::io::print("\n\nCalling MPI_Waitall\n\n");
207 MPI_Waitall(requestCounter, mpiRequests, MPI_STATUSES_IGNORE);
208
209 util::io::print("\n\nUpdate dispNodes data\n\n");
210
211 for (size_t j_proc=0; j_proc<mpiSize; j_proc++) {
212 auto & recvIds = ghostData[j_proc].first;
213 if (j_proc != mpiRank and recvIds.size() != 0) {
214 for (size_t k = 0; k<recvIds.size(); k++)
215 dispNodes[recvIds[k]] = dispGhostData[j_proc].first[k];
216 }
217 } // loop over j_proc
218 } // exchangeDispData()
219} // namespace
220
221
222std::string test::testTaskflow(size_t N, int seed) {
223
224 auto nThreads = util::parallel::getNThreads();
225 util::io::print(fmt::format("\n\ntestTaskflow(): Number of threads = {}\n\n", nThreads));
226
227 // task: perform N computations in serial and using taskflow for_each
228
229 // generate vector of random numbers
231 auto dist = util::DistributionSample<UniformDistribution>(0., 1., seed);
232
233 std::vector<double> x(N);
234 std::vector<double> y1(N);
235 std::vector<double> y2(N);
236 std::generate(std::begin(x), std::end(x), [&] { return dist(); });
237
238 // now do serial calculation
239 auto t1 = steady_clock::now();
240 for (size_t i = 0; i < N; i++) {
241 if (x[i] < 0.5)
242 y1[i] = f1(x[i]);
243 else
244 y1[i] = f2(x[i]);
245 }
246 auto t2 = steady_clock::now();
247 auto dt12 = util::methods::timeDiff(t1, t2, "microseconds");
248
249 // now do parallel calculation using taskflow
250 tf::Executor executor(nThreads);
251 tf::Taskflow taskflow;
252
253 taskflow.for_each_index((std::size_t) 0, N, (std::size_t) 1, [&x, &y2](std::size_t i) {
254 if (x[i] < 0.5)
255 y2[i] = f1(x[i]);
256 else
257 y2[i] = f2(x[i]);
258 }
259 ); // for_each
260
261 executor.run(taskflow).get();
262 auto t3 = steady_clock::now();
263 auto dt23 = util::methods::timeDiff(t2, t3, "microseconds");
264
265 // compare results
266 double y_err = 0.;
267 for (size_t i=0; i<N; i++)
268 y_err += std::pow(y1[i] - y2[i], 2);
269
270 if (y_err > 1.e-10) {
271 std::cerr << fmt::format("Error: Serial and taskflow computation results do not match (squared error = {})\n",
272 y_err);
273 exit(1);
274 }
275
276 // get time
277 std::ostringstream msg;
278 msg << fmt::format(" Serial computation took = {}ms\n", dt12);
279 msg << fmt::format(" Taskflow computation took = {}ms\n", dt23);
280 msg << fmt::format(" Speed-up factor = {}\n\n\n", dt12/dt23);
281
282 return msg.str();
283}
284
285void test::testMPI(size_t nGrid, size_t mHorizon,
286 size_t testOption, std::string meshFilename) {
287 int mpiSize, mpiRank;
288 MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
289 MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
290
291 // number of partitions
292 size_t nPart(mpiSize);
293
294 // create uniform mesh
295 size_t dim(2);
296 auto mesh = fe::Mesh(dim);
297 mesh.d_spatialDiscretization = "finite_difference";
298
299 // create mesh
300 std::string outMeshFilename = "";
301 if (testOption == 1) {
302 // set geometry details
303 std::pair<std::vector<double>, std::vector<double>> box;
304 std::vector<size_t> nGridVec;
305 for (size_t i=0; i<dim; i++) {
306 box.first.push_back(0.);
307 box.second.push_back(1.);
308 nGridVec.push_back(nGrid);
309 }
310
311 // call utility function to create mesh
312 util::io::print("\n\nCreating uniform mesh\n\n");
313 fe::createUniformMesh(&mesh, dim, box, nGridVec);
314
315 // filename for outputting
316 outMeshFilename = fmt::format("uniform_mesh_Lx_{}_Ly_{}_Nx_{}_Ny_{}",
317 box.second[0], box.second[1],
318 nGridVec[0], nGridVec[1]);
319 }
320 else if (testOption == 2) {
321 if (meshFilename.empty()) {
322 std::cerr << "testGraphPartitioning(): mesh filename is empty.\n";
323 exit(1);
324 }
325
326 // call in-built function of mesh to create data from file
327 util::io::print("\n\nReading mesh\n\n");
328 mesh.createData(meshFilename);
329
330 // find the name of mesh file excluding path and extension
332 }
333 else {
334 std::cerr << "testMPI() accepts either 1 or 2 for testOption. The value "
335 << testOption << " is invalid.\n";
336 exit(1);
337 }
338
339 // print mesh data and write mesh to a file
340 util::io::print(mesh.printStr());
341
342 // calculate nonlocal neighborhood
343 double horizon = mHorizon*mesh.d_h;
344 std::vector<std::vector<size_t>> nodeNeighs(mesh.d_numNodes);
345 geometry::computeNonlocalNeighborhood(mesh.d_nodes, horizon, nodeNeighs);
346
347 // partition the mesh on root processor and broadcast to other processors
348 mesh.d_nodePartition.resize(mesh.d_numNodes);
349 util::io::print("\n\nCreating partition of mesh\n\n");
350 if (mpiRank == 0)
351 fe::metisGraphPartition("metis_kway", &mesh, nodeNeighs, nPart);
352
353 util::io::print("\n\nBroadcasting partition to all processors\n\n");
354 MPI_Bcast(mesh.d_nodePartition.data(), mesh.d_numNodes,
355 MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
356
357
358 // Tasks:
359 // 1. Create list of ghost nodes associated to neighboring processors
360 // 2. Create a method that updates displacement of ghost nodes via MPI communication
361 // 3. Create two set of nodes owned by this processor; internal set will have nodes
362 // that do not depend on ghost nodes and boundary set that depend on ghost nodes
363
364 // store id of nodes owned by this processor
365 std::vector<size_t> ownedNodes, ownedInternalNodes, ownedBdryNodes;
366
367 // for each neighboring processor, store id of ghost nodes owned by that processor
368 std::vector<std::pair<std::vector<size_t>, std::vector<size_t>>> ghostData;
369
370 // fill the owned and ghost node vectors
371 util::io::print("\n\nCalling setupOwnerAndGhost()\n\n");
372 setupOwnerAndGhost(mpiSize, mpiRank,
373 mesh.d_nodePartition, nodeNeighs,
374 ownedNodes, ownedInternalNodes, ownedBdryNodes,
375 ghostData);
376
377 // create dummy displacement vector with random values
378 std::vector<util::Point> dispNodes(mesh.d_numNodes, util::Point(-1., -1., -1.));
379 {
380 // int seed = mpiRank;
381 // RandGenerator gen(util::get_rd_gen(seed));
382 // auto dist = util::DistributionSample<UniformDistribution>(0., 1., seed);
383 // for (auto i: ownedNodes)
384 // dispNodes[i] = util::Point(dist(), dist(), dist());
385 // assign value to owned nodes
386 for (auto i: ownedNodes)
387 dispNodes[i] = util::Point(mpiRank + 1, (mpiRank + 1)*100, (mpiRank + 1)*10000);
388 }
389
390 // MPI communication to send and receive ghost nodes data
391 printMsg("\n\nCalling exchangeDispData()\n\n", mpiRank, 0);
392 std::vector<std::pair<std::vector<util::Point>, std::vector<util::Point>>> dispGhostData;
393 exchangeDispData(mpiSize, mpiRank, ghostData, dispGhostData, dispNodes);
394
396 if (true) {
397 printMsg("\n\nDebugging dispGhostData()\n\n", mpiRank, -1);
398 bool debug_failed = false;
399 for (size_t j_proc = 0; j_proc < mpiSize; j_proc++) {
400 auto &recvIds = ghostData[j_proc].first;
401
402 if (j_proc != mpiRank and recvIds.size() != 0) {
403 for (size_t k = 0; k < recvIds.size(); k++) {
404 auto &uk = dispNodes[recvIds[k]];
405
406 // verify we received correct value of uk
407 if (uk[0] != j_proc + 1 or uk[1] != 100 * (j_proc + 1)
408 or uk[2] != 10000 * (j_proc + 1)) {
409 debug_failed = true;
410 util::io::print(fmt::format(" MPI exchange error: j_proc = {}, "
411 "uk = ({}, {}, {})\n", j_proc,
412 uk[0], uk[1], uk[2]),
414 }
415 }
416 }
417 } // loop over j_proc
418
419 if (debug_failed)
420 util::io::print(fmt::format("\n\nDEBUG failed for processor = {}\n\n", mpiRank), util::io::print_default_tab, -1);
421 else
422 util::io::print(fmt::format("\n\nDEBUG passed for processor = {}\n\n", mpiRank), util::io::print_default_tab, -1);
423 }
425}
A class for mesh data.
Definition mesh.h:51
Templated probability distribution.
Definition randomDist.h:90
void printMsg(std::string msg, int mpiRank, int printMpiRank)
void exchangeDispData(size_t mpiSize, size_t mpiRank, const std::vector< std::pair< std::vector< size_t >, std::vector< size_t > > > &ghostData, std::vector< std::pair< std::vector< util::Point >, std::vector< util::Point > > > &dispGhostData, std::vector< util::Point > &dispNodes)
void setupOwnerAndGhost(size_t mpiSize, size_t mpiRank, const std::vector< size_t > &nodePartition, const std::vector< std::vector< size_t > > &nodeNeighs, std::vector< size_t > &ownedNodes, std::vector< size_t > &ownedInternalNodes, std::vector< size_t > &ownedBdryNodes, std::vector< std::pair< std::vector< size_t >, std::vector< size_t > > > &ghostData)
void createUniformMesh(fe::Mesh *mesh_p, size_t dim, std::pair< std::vector< double >, std::vector< double > > box, std::vector< size_t > nGrid)
Creates uniform mesh for rectangle/cuboid domain.
Definition meshUtil.cpp:23
void metisGraphPartition(std::string partitionMethod, const std::vector< std::vector< size_t > > &nodeNeighs, std::vector< size_t > &nodePartition, size_t nPartitions)
Partitions the nodes based on node neighborlist supplied. Function first creates a graph with nodes a...
void computeNonlocalNeighborhood(const std::vector< util::Point > &nodes, double horizon, std::vector< std::vector< size_t > > &nodeNeighs)
Partitions the nodes based on node neighborlist supplied. Function first creates a graph with nodes a...
std::string testTaskflow(size_t N, int seed)
Perform test on taskflow.
void testMPI(size_t nGrid=10, size_t mHorizon=3, size_t testOption=0, std::string meshFilename="")
Perform parallelization test using MPI on mesh partition based on metis.
const int print_default_tab
Default value of tab used in outputting formatted information.
Definition io.h:27
void print(const T &msg, int nt=print_default_tab, int printMpiRank=print_default_mpi_rank)
Prints formatted information.
Definition io.h:108
std::string removeExtensionFromFile(std::string const &filename)
Remove extension from the filename Source - https://stackoverflow.com/a/24386991.
Definition io.h:416
std::string getFilenameFromPath(std::string const &path, std::string const &delims="/\\")
Get filename removing path from the string Source - https://stackoverflow.com/a/24386991.
Definition io.h:404
float timeDiff(std::chrono::steady_clock::time_point begin, std::chrono::steady_clock::time_point end, std::string unit="microseconds")
Returns difference between two times.
Definition methods.h:304
unsigned int getNThreads()
Get number of threads to be used by taskflow.
RandGenerator get_rd_gen(int seed=-1)
Return random number generator.
Definition randomDist.h:30
std::mt19937 RandGenerator
Definition randomDist.h:16
A structure to represent 3d vectors.
Definition point.h:30
std::uniform_real_distribution UniformDistribution
std::mt19937 RandGenerator