@@ -1,5 +1,5 @@ | |||
# | |||
# PDS homework_1 Makefile | |||
# PDS HW2 Makefile | |||
# | |||
# Copyright (C) 2024 Christos Choutouridis <christos@choutouridis.net> | |||
# | |||
@@ -10,7 +10,7 @@ | |||
# $> sbatch <this file> | |||
# | |||
# NOTE: | |||
# First compile in aristotel with | |||
# First compile in aristotle with | |||
# $> module load gcc/9.2.0 openmpi/4.0.3 | |||
# $> make -j hpc-build | |||
# | |||
@@ -1,5 +1,5 @@ | |||
/*! | |||
* \file config.h | |||
* \file | |||
* \brief Build configuration file. | |||
* | |||
* \author | |||
@@ -45,13 +45,16 @@ using distValue_t = uint32_t; | |||
* Session option for each invocation of the executable | |||
*/ | |||
struct config_t { | |||
size_t arraySize{DEFAULT_DATA_SIZE}; //!< | |||
bool validation{false}; //!< Request a full validation at the end, performed by process rank 0 | |||
bool ndebug{false}; //!< Skips debug trap on DEBUG builds | |||
bool perf{false}; //!< Enable performance timing measurements and prints | |||
bool verbose{false}; //!< Flag to enable verbose output to stdout | |||
size_t arraySize{DEFAULT_DATA_SIZE}; //!< The array size of the local data to sort. | |||
bool validation{false}; //!< Request a full validation at the end, performed by process rank 0. | |||
bool ndebug{false}; //!< Skips debug trap on DEBUG builds. | |||
bool perf{false}; //!< Enable performance timing measurements and prints. | |||
bool verbose{false}; //!< Flag to enable verbose output to stdout. | |||
}; | |||
/* | |||
* Exported data types | |||
*/ | |||
extern config_t config; | |||
@@ -22,7 +22,7 @@ | |||
#include "utils.hpp" | |||
extern Timing TfullSort, Texchange, Tminmax, TelbowSort; | |||
extern Timing TfullSort, Texchange, Tminmax, TelbowSort; // make timers public | |||
/*! | |||
* Enumerator for the different versions of the sorting method | |||
@@ -231,6 +231,43 @@ void elbowSort(ShadowedDataT& data, bool ascending) noexcept { | |||
elbowSortCore(data, ascending, std::greater<>()); | |||
} | |||
/*! | |||
* Predicate for exchange optimization. Returns true only if an exchange between partners is needed. | |||
* In order to do that we exchange min and max statistics of the partner's data. | |||
* | |||
* @tparam StatT Statistics data type (for min-max) | |||
* | |||
* @param lstat [const StatT] Reference to the local statistic data | |||
* @param rstat [StatT] Reference to the remote statistic data to fill | |||
* @param part [mpi_id_t] The partner for the exchange | |||
* @param tag [int] The tag to use for the exchange of stats | |||
* @param keepSmall [bool] Flag to indicate if the local thread keeps the small ro the large values | |||
* @return True if we need data exchange, false otherwise | |||
*/ | |||
template<typename StatT> | |||
bool needsExchange(const StatT& lstat, StatT& rstat, mpi_id_t part, int tag, bool keepSmall) { | |||
timeCall(Texchange, mpi.exchange_it, lstat, rstat, part, tag); | |||
return (keepSmall) ? | |||
rstat.min < lstat.max // Lmin: rstat.min - Smax: lstat.max | |||
: lstat.min < rstat.max; // Lmin: lstat.min - Smax: rstat.max | |||
} | |||
/*! | |||
* Update stats utility | |||
* | |||
* @tparam RangeT A range type with random access iterator | |||
* @tparam StatT Statistics data type (for min-max) | |||
* | |||
* @param stat [StatT] Reference to the statistic data to update | |||
* @param data [const RangeT] Reference to the sequence to extract stats from | |||
*/ | |||
template<typename RangeT, typename StatT> | |||
void updateMinMax(StatT& stat, const RangeT& data) noexcept { | |||
auto [min, max] = std::minmax_element(data.begin(), data.end()); | |||
stat.min = *min; | |||
stat.max = *max; | |||
} | |||
/*! | |||
* Takes two sorted sequences where one is in increasing and the other is in decreasing order | |||
* and selects either the larger or the smaller items in one-to-one comparison between them. | |||
@@ -243,7 +280,7 @@ void elbowSort(ShadowedDataT& data, bool ascending) noexcept { | |||
* @param keepSmall [bool] Flag to indicate if we keep the small items in local sequence | |||
*/ | |||
template<typename RangeT> | |||
void minmax(RangeT& local, const RangeT& remote, bool keepSmall) noexcept { | |||
void keepMinOrMax(RangeT& local, const RangeT& remote, bool keepSmall) noexcept { | |||
using value_t = typename RangeT::value_type; | |||
std::transform( | |||
local.begin(), local.end(), | |||
@@ -274,6 +311,7 @@ template<typename ShadowedDataT> | |||
void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { | |||
// Initially sort to create a half part of a bitonic sequence | |||
timeCall(TfullSort, fullSort, data, ascending<SortMode::Bubbletonic>(rank, 0)); | |||
updateMinMax(localStat, data); | |||
// Sort network (O(N) iterations) | |||
for (size_t step = 0; step < static_cast<size_t>(Processes); ++step) { | |||
@@ -283,8 +321,12 @@ void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { | |||
if ( isActive(rank, Processes) && | |||
isActive(part, Processes) ) { | |||
// Exchange with partner, keep nim-or-max and sort - O(N) | |||
timeCall(Texchange, mpi.exchange, data.getActive(), data.getShadow(), part, step); | |||
timeCall(Tminmax, minmax, data.getActive(), data.getShadow(), ks); | |||
int tag = static_cast<int>(2 * step); | |||
if (needsExchange(localStat, remoteStat, part, tag, ks)) { | |||
timeCall(Texchange, mpi.exchange_data, data.getActive(), data.getShadow(), part, ++tag); | |||
timeCall(Tminmax, keepMinOrMax, data.getActive(), data.getShadow(), ks); | |||
updateMinMax(localStat, data); | |||
} | |||
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bubbletonic>(rank, Processes)); | |||
} | |||
} | |||
@@ -312,6 +354,7 @@ template<typename ShadowedDataT> | |||
void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { | |||
// Initially sort to create a half part of a bitonic sequence | |||
timeCall(TfullSort, fullSort, data, ascending<SortMode::Bitonic>(rank, 0)); | |||
updateMinMax(localStat, data); | |||
// Run through sort network using elbow-sort ( O(LogN * LogN) iterations ) | |||
auto p = static_cast<uint32_t>(std::log2(Processes)); | |||
@@ -322,8 +365,12 @@ void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { | |||
auto part = partner<SortMode::Bitonic>(rank, step); | |||
auto ks = keepSmall<SortMode::Bitonic>(rank, part, depth); | |||
// Exchange with partner, keep nim-or-max | |||
timeCall(Texchange, mpi.exchange, data.getActive(), data.getShadow(), part, (depth << 8) | step); | |||
timeCall(Tminmax, minmax, data.getActive(), data.getShadow(), ks); | |||
int tag = static_cast<int>( (2*p*depth) + (2*step) ); | |||
if (needsExchange(localStat, remoteStat, part, tag, ks)) { | |||
timeCall(Texchange, mpi.exchange_data, data.getActive(), data.getShadow(), part, tag); | |||
timeCall(Tminmax, keepMinOrMax, data.getActive(), data.getShadow(), ks); | |||
updateMinMax(localStat, data); | |||
} | |||
} | |||
// sort - O(N) | |||
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bitonic>(rank, depth)); | |||
@@ -14,10 +14,24 @@ | |||
#include <chrono> | |||
#include <unistd.h> | |||
#include <mpi.h> | |||
//#include <functional> | |||
#include "config.h" | |||
/*! | |||
* Min-Max statistics data for exchange optimization | |||
* @tparam Value_t The underlying data type of the sequence data | |||
*/ | |||
template <typename Value_t> | |||
struct Stat_t { | |||
using value_type = Value_t; //!< meta-export the type | |||
Value_t min{}; //!< The minimum value of the sequence | |||
Value_t max{}; //!< The maximum value of the sequence | |||
}; | |||
//! Application data selection alias | |||
using distStat_t = Stat_t<distValue_t>; | |||
extern distStat_t localStat, remoteStat; // Make stats public | |||
/* | |||
* MPI_<type> dispatcher mechanism | |||
@@ -85,25 +99,54 @@ struct MPI_t { | |||
* | |||
* @tparam T The inner valur type used in buffer | |||
* | |||
* @param send_data [std::vector<T>] Reference to local data to send | |||
* @param recv_data [std::vector<T>] Reference to buffer to receive data from partner | |||
* @param ldata [std::vector<T>] Reference to local data to send | |||
* @param rdata [std::vector<T>] Reference to buffer to receive data from partner | |||
* @param partner [mpi_id_t] The partner for the exchange | |||
* @param tag [int] The tag to use for the MPI communication | |||
*/ | |||
template<typename T> | |||
void exchange(const std::vector<T>& send_data, std::vector<T>& recv_data, ID_t partner, int tag) { | |||
using namespace std::string_literals; | |||
void exchange_data(const std::vector<T>& ldata, std::vector<T>& rdata, ID_t partner, int tag) { | |||
if (tag < 0) | |||
throw std::runtime_error("(MPI) exchange_data() [tag] - Out of bound"); | |||
MPI_Datatype datatype = MPI_TypeMapper<T>::getType(); | |||
int send_count = static_cast<int>(send_data.size()); | |||
int count = static_cast<int>(ldata.size()); | |||
MPI_Status status; | |||
int err; | |||
if ((err = MPI_Sendrecv( | |||
ldata.data(), count, datatype, partner, tag, | |||
rdata.data(), count, datatype, partner, tag, | |||
MPI_COMM_WORLD, &status | |||
)) != MPI_SUCCESS) | |||
mpi_throw(err, "(MPI) MPI_Sendrecv() [data] - "); | |||
} | |||
/*! | |||
* Exchange a data object with partner as part of the sorting network of both bubbletonic | |||
* or bitonic sorting algorithms. | |||
* | |||
* This function matches a transmit and a receive in order for fully exchanged the data object | |||
* between current node and partner. | |||
* | |||
* @tparam T The object type | |||
* | |||
* @param local [const T&] Reference to the local object to send | |||
* @param remote [T&] Reference to the object to receive data from partner | |||
* @param partner [mpi_id_t] The partner for the exchange | |||
* @param tag [int] The tag to use for the MPI communication | |||
*/ | |||
template<typename T> | |||
void exchange_it(const T& local, T& remote, ID_t partner, int tag) { | |||
if (tag < 0) | |||
throw std::runtime_error("(MPI) exchange_it() [tag] - Out of bound"); | |||
MPI_Status status; | |||
int err; | |||
if ((err = MPI_Sendrecv( | |||
send_data.data(), send_count, datatype, partner, tag, | |||
recv_data.data(), send_count, datatype, partner, tag, | |||
&local, sizeof(T), MPI_BYTE, partner, tag, | |||
&remote, sizeof(T), MPI_BYTE, partner, tag, | |||
MPI_COMM_WORLD, &status | |||
)) != MPI_SUCCESS) | |||
mpi_throw(err, "(MPI) MPI_Sendrecv() - "); | |||
mpi_throw(err, "(MPI) MPI_Sendrecv() [item] - "); | |||
} | |||
// Accessors | |||
@@ -165,11 +208,13 @@ using mpi_id_t = MPI_t<>::ID_t; | |||
/*! | |||
* A std::vector wrapper with 2 vectors, an active and a shadow. This type exposes the standard vector | |||
* @brief A std::vector wrapper with 2 vectors, an active and a shadow. | |||
* | |||
* This type exposes the standard vector | |||
* functionality of the active vector. The shadow can be used when we need to use the vector as mutable | |||
* data in algorithms that can not support "in-place" editing (like elbow-sort for example) | |||
* | |||
* @tparam Value_t the inner data type of the vectors | |||
* @tparam Value_t the underlying data type of the vectors | |||
*/ | |||
template <typename Value_t> | |||
struct ShadowedVec_t { | |||
@@ -274,8 +319,10 @@ private: | |||
} active{north}; //!< Flag to select between North and South buffer | |||
}; | |||
/* | |||
* Exported data types | |||
*/ | |||
using distBuffer_t = ShadowedVec_t<distValue_t>; | |||
extern distBuffer_t Data; | |||
/*! | |||
@@ -333,11 +380,12 @@ struct Timing { | |||
return now; | |||
} | |||
Tduration dt(Tpoint t2, Tpoint t1) noexcept { | |||
//! A duration calculation utility | |||
static Tduration dt(Tpoint t2, Tpoint t1) noexcept { | |||
return std::chrono::duration_cast<Tduration>(t2 - t1); | |||
} | |||
//! tool to print the time interval | |||
//! Tool to print the time interval | |||
void print_duration(const char *what, mpi_id_t rank) noexcept { | |||
if (std::chrono::duration_cast<microseconds>(duration_).count() < 10000) | |||
std::cout << "[Timing] (Rank " << rank << ") " << what << ": " | |||
@@ -357,13 +405,17 @@ private: | |||
}; | |||
/*! | |||
* Utility high level function like to forward a function call and measure | |||
* the excecution time | |||
* Utility "high level function"-like macro to forward a function call | |||
* and accumulate the execution time to the corresponding timing object. | |||
* | |||
* @param Tim The Timing object [Needs to have methods start() and stop()] | |||
* @param Func The function name | |||
* @param ... The arguments to pass to function (the preprocessor way) | |||
*/ | |||
#define timeCall(Tim, Func, ...) \ | |||
Tim.start(); \ | |||
Func(__VA_ARGS__); \ | |||
Tim.stop(); \ | |||
Tim.start(); \ | |||
Func(__VA_ARGS__); \ | |||
Tim.stop(); \ | |||
#endif /* UTILS_HPP_ */ |
@@ -4,9 +4,9 @@ | |||
# | |||
function exchange(localid, remoteid) | |||
if verbose | |||
println("Exchange local data from $localid with partner $remoteid") | |||
end | |||
# if verbose | |||
# println("Exchange local data from $localid with partner $remoteid") | |||
# end | |||
nothing # We have all data here ;) | |||
end | |||
@@ -23,9 +23,89 @@ function minmax(data, localid, remoteid, keepsmall) | |||
end | |||
function is_bitonic(arr) | |||
n = length(arr) | |||
if n <= 2 | |||
return true # Any sequence of length <= 2 is bitonic | |||
end | |||
# State for state machine. 1: inc, -1: dec, 0: z-state | |||
state = 0 | |||
inc_count = 0 | |||
dec_count = 0 | |||
ret = false | |||
for i in 1:n-1 | |||
# Find the first order | |||
if state == 0 | |||
if arr[i] > arr[i+1] | |||
state = -1 | |||
dec_count += 1 | |||
elseif arr[i] < arr[i+1] | |||
state = 1 | |||
inc_count += 1 | |||
end | |||
elseif state == -1 # decreasing | |||
if arr[i] < arr[i + 1] | |||
state = 1 | |||
inc_count += 1 | |||
end | |||
elseif state == 1 # increasing | |||
if arr[i] > arr[i+1] | |||
state = -1 | |||
dec_count += 1 | |||
end | |||
end | |||
end | |||
if inc_count <= 1 && dec_count <= 1 | |||
ret = true # Sequence is bitonic | |||
elseif inc_count == 2 && dec_count == 1 | |||
ret = (arr[1] >= arr[n]) | |||
elseif inc_count == 1 && dec_count == 2 | |||
ret = (arr[1] <= arr[n]) | |||
end | |||
ret | |||
end | |||
function is_sort(arr) | |||
# State for state machine. 1: inc, -1: dec, 0: z-state | |||
state = 0 | |||
inc_count = 0 | |||
dec_count = 0 | |||
for i in 1:length(arr)-1 | |||
# Find the first order | |||
if state == 0 | |||
if arr[i] > arr[i+1] | |||
state = -1 | |||
dec_count += 1 | |||
elseif arr[i] < arr[i+1] | |||
state = 1 | |||
inc_count += 1 | |||
end | |||
elseif state == -1 # decreasing | |||
if arr[i] < arr[i + 1] | |||
state = 1 | |||
inc_count += 1 | |||
end | |||
elseif state == 1 # increasing | |||
if arr[i] > arr[i+1] | |||
state = -1 | |||
dec_count += 1 | |||
end | |||
end | |||
end | |||
ret = ((inc_count + dec_count) == 1) ? state : 0 | |||
ret | |||
end | |||
function sort_network!(data, n, depth) | |||
nodes = 0:n-1 | |||
bitonicFlag = zeros(Int8, size(data, 1)) | |||
sortFlag = zeros(Int8, size(data, 1)) | |||
for step = depth-1:-1:0 | |||
partnerid = nodes .⊻ (1 << step) | |||
direction = (nodes .& (1 << depth)) .== 0 .& (nodes .< partnerid) | |||
@@ -40,6 +120,13 @@ function sort_network!(data, n, depth) | |||
minmax(data, i, partnerid[i+1], keepsmall[i+1]) | |||
end | |||
end | |||
if verbose | |||
for i in 1:size(data, 1) | |||
bitonicFlag[i] = is_bitonic(data[i, :]) | |||
sortFlag[i] = is_sort(data[i, :]) | |||
end | |||
println("depth: $depth | step: $step | bitonicFlag: $bitonicFlag | sorfFlag: $sortFlag") | |||
end | |||
end | |||
end | |||
@@ -1,6 +1,6 @@ | |||
/*! | |||
* \file | |||
* \brief Distributed sort implementation. | |||
* \brief Distributed sort implementation | |||
* | |||
* \author | |||
* Christos Choutouridis AEM:8997 | |||
@@ -9,8 +9,13 @@ | |||
#include "utils.hpp" | |||
#include "distsort.hpp" | |||
//! Statistic variables for exchange optimization | |||
distStat_t localStat, remoteStat; | |||
//! Performance timers for each one of the "costly" functions | |||
Timing TfullSort, Texchange, Tminmax, TelbowSort; | |||
bool isActive(mpi_id_t node, size_t nodes) { | |||
if (!((nodes > 0) && | |||
(nodes <= std::numeric_limits<mpi_id_t>::max()) )) | |||
@@ -138,6 +138,9 @@ bool validator(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { | |||
} | |||
#if !defined TESTING | |||
/*! | |||
* @return Returns 0, but.... we may throw or exit(1) | |||
*/ | |||
int main(int argc, char* argv[]) try { | |||
// Initialize MPI environment | |||
mpi.init(&argc, &argv); | |||
@@ -170,6 +173,7 @@ int main(int argc, char* argv[]) try { | |||
sleep(1); | |||
#endif | |||
// Initialize local data | |||
logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl; | |||
std::random_device rd; // Mersenne seeded from hw if possible. range: [type_min, type_max] | |||
std::mt19937 gen(rd()); | |||
@@ -181,6 +185,7 @@ int main(int argc, char* argv[]) try { | |||
Data.resize(config.arraySize); | |||
std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); }); | |||
// Run distributed sort | |||
if (mpi.rank() == 0) | |||
logger << "Starting distributed sorting ... "; | |||
Ttotal.start(); | |||
@@ -192,9 +197,9 @@ int main(int argc, char* argv[]) try { | |||
Ttotal.stop(); | |||
if (mpi.rank() == 0) | |||
logger << " Done." << logger.endl; | |||
std::string timeMsg = "rank " + std::to_string(mpi.rank()); | |||
// Print-outs and validation | |||
if (config.perf) { | |||
Ttotal.print_duration("Total ", mpi.rank()); | |||
TfullSort.print_duration("Full-Sort ", mpi.rank()); | |||
@@ -224,6 +229,9 @@ catch (std::exception& e) { | |||
#include <gtest/gtest.h> | |||
#include <exception> | |||
/*! | |||
* The testing version of our program | |||
*/ | |||
GTEST_API_ int main(int argc, char **argv) try { | |||
testing::InitGoogleTest(&argc, argv); | |||
return RUN_ALL_TESTS(); | |||
@@ -6,6 +6,9 @@ | |||
* make tests | |||
* mpirun -np <N> ./out/tests | |||
* | |||
* Note: | |||
* Yes each process runs the entire test suite!! | |||
* | |||
* \author | |||
* Christos Choutouridis AEM:8997 | |||
* <cchoutou@ece.auth.gr> | |||
@@ -15,7 +18,11 @@ | |||
#include <mpi.h> | |||
#include <random> | |||
#include "distsort.hpp" | |||
/* | |||
* Global fixtures | |||
*/ | |||
// MPI handler for the test session | |||
MPI_t<> ts_mpi; | |||
// Mersenne seeded from hw if possible. range: [type_min, type_max] | |||
@@ -27,17 +34,11 @@ protected: | |||
static void SetUpTestSuite() { | |||
int argc = 0; | |||
char** argv = nullptr; | |||
MPI_Init(&argc, &argv); | |||
int rank, size; | |||
MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |||
MPI_Comm_size(MPI_COMM_WORLD, &size); | |||
ts_mpi.rank(rank); | |||
ts_mpi.size(size); | |||
ts_mpi.init(&argc, &argv); | |||
} | |||
static void TearDownTestSuite() { | |||
MPI_Finalize(); | |||
ts_mpi.finalize(); | |||
} | |||
}; | |||