HW2: RC3 - A small exchange optimization
This commit is contained in:
parent
bc98ef4e9f
commit
4e7237bd4d
@ -1,5 +1,5 @@
|
||||
#
|
||||
# PDS homework_1 Makefile
|
||||
# PDS HW2 Makefile
|
||||
#
|
||||
# Copyright (C) 2024 Christos Choutouridis <christos@choutouridis.net>
|
||||
#
|
||||
|
@ -10,7 +10,7 @@
|
||||
# $> sbatch <this file>
|
||||
#
|
||||
# NOTE:
|
||||
# First compile in aristotel with
|
||||
# First compile in aristotle with
|
||||
# $> module load gcc/9.2.0 openmpi/4.0.3
|
||||
# $> make -j hpc-build
|
||||
#
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* \file config.h
|
||||
* \file
|
||||
* \brief Build configuration file.
|
||||
*
|
||||
* \author
|
||||
@ -45,13 +45,16 @@ using distValue_t = uint32_t;
|
||||
* Session option for each invocation of the executable
|
||||
*/
|
||||
struct config_t {
|
||||
size_t arraySize{DEFAULT_DATA_SIZE}; //!<
|
||||
bool validation{false}; //!< Request a full validation at the end, performed by process rank 0
|
||||
bool ndebug{false}; //!< Skips debug trap on DEBUG builds
|
||||
bool perf{false}; //!< Enable performance timing measurements and prints
|
||||
bool verbose{false}; //!< Flag to enable verbose output to stdout
|
||||
size_t arraySize{DEFAULT_DATA_SIZE}; //!< The array size of the local data to sort.
|
||||
bool validation{false}; //!< Request a full validation at the end, performed by process rank 0.
|
||||
bool ndebug{false}; //!< Skips debug trap on DEBUG builds.
|
||||
bool perf{false}; //!< Enable performance timing measurements and prints.
|
||||
bool verbose{false}; //!< Flag to enable verbose output to stdout.
|
||||
};
|
||||
|
||||
/*
|
||||
* Exported data types
|
||||
*/
|
||||
extern config_t config;
|
||||
|
||||
|
||||
|
@ -22,7 +22,7 @@
|
||||
|
||||
#include "utils.hpp"
|
||||
|
||||
extern Timing TfullSort, Texchange, Tminmax, TelbowSort;
|
||||
extern Timing TfullSort, Texchange, Tminmax, TelbowSort; // make timers public
|
||||
|
||||
/*!
|
||||
* Enumerator for the different versions of the sorting method
|
||||
@ -231,6 +231,43 @@ void elbowSort(ShadowedDataT& data, bool ascending) noexcept {
|
||||
elbowSortCore(data, ascending, std::greater<>());
|
||||
}
|
||||
|
||||
/*!
|
||||
* Predicate for exchange optimization. Returns true only if an exchange between partners is needed.
|
||||
* In order to do that we exchange min and max statistics of the partner's data.
|
||||
*
|
||||
* @tparam StatT Statistics data type (for min-max)
|
||||
*
|
||||
* @param lstat [const StatT] Reference to the local statistic data
|
||||
* @param rstat [StatT] Reference to the remote statistic data to fill
|
||||
* @param part [mpi_id_t] The partner for the exchange
|
||||
* @param tag [int] The tag to use for the exchange of stats
|
||||
* @param keepSmall [bool] Flag to indicate if the local thread keeps the small ro the large values
|
||||
* @return True if we need data exchange, false otherwise
|
||||
*/
|
||||
template<typename StatT>
|
||||
bool needsExchange(const StatT& lstat, StatT& rstat, mpi_id_t part, int tag, bool keepSmall) {
|
||||
timeCall(Texchange, mpi.exchange_it, lstat, rstat, part, tag);
|
||||
return (keepSmall) ?
|
||||
rstat.min < lstat.max // Lmin: rstat.min - Smax: lstat.max
|
||||
: lstat.min < rstat.max; // Lmin: lstat.min - Smax: rstat.max
|
||||
}
|
||||
|
||||
/*!
|
||||
* Update stats utility
|
||||
*
|
||||
* @tparam RangeT A range type with random access iterator
|
||||
* @tparam StatT Statistics data type (for min-max)
|
||||
*
|
||||
* @param stat [StatT] Reference to the statistic data to update
|
||||
* @param data [const RangeT] Reference to the sequence to extract stats from
|
||||
*/
|
||||
template<typename RangeT, typename StatT>
|
||||
void updateMinMax(StatT& stat, const RangeT& data) noexcept {
|
||||
auto [min, max] = std::minmax_element(data.begin(), data.end());
|
||||
stat.min = *min;
|
||||
stat.max = *max;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Takes two sorted sequences where one is in increasing and the other is in decreasing order
|
||||
* and selects either the larger or the smaller items in one-to-one comparison between them.
|
||||
@ -243,7 +280,7 @@ void elbowSort(ShadowedDataT& data, bool ascending) noexcept {
|
||||
* @param keepSmall [bool] Flag to indicate if we keep the small items in local sequence
|
||||
*/
|
||||
template<typename RangeT>
|
||||
void minmax(RangeT& local, const RangeT& remote, bool keepSmall) noexcept {
|
||||
void keepMinOrMax(RangeT& local, const RangeT& remote, bool keepSmall) noexcept {
|
||||
using value_t = typename RangeT::value_type;
|
||||
std::transform(
|
||||
local.begin(), local.end(),
|
||||
@ -274,6 +311,7 @@ template<typename ShadowedDataT>
|
||||
void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
|
||||
// Initially sort to create a half part of a bitonic sequence
|
||||
timeCall(TfullSort, fullSort, data, ascending<SortMode::Bubbletonic>(rank, 0));
|
||||
updateMinMax(localStat, data);
|
||||
|
||||
// Sort network (O(N) iterations)
|
||||
for (size_t step = 0; step < static_cast<size_t>(Processes); ++step) {
|
||||
@ -283,8 +321,12 @@ void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
|
||||
if ( isActive(rank, Processes) &&
|
||||
isActive(part, Processes) ) {
|
||||
// Exchange with partner, keep nim-or-max and sort - O(N)
|
||||
timeCall(Texchange, mpi.exchange, data.getActive(), data.getShadow(), part, step);
|
||||
timeCall(Tminmax, minmax, data.getActive(), data.getShadow(), ks);
|
||||
int tag = static_cast<int>(2 * step);
|
||||
if (needsExchange(localStat, remoteStat, part, tag, ks)) {
|
||||
timeCall(Texchange, mpi.exchange_data, data.getActive(), data.getShadow(), part, ++tag);
|
||||
timeCall(Tminmax, keepMinOrMax, data.getActive(), data.getShadow(), ks);
|
||||
updateMinMax(localStat, data);
|
||||
}
|
||||
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bubbletonic>(rank, Processes));
|
||||
}
|
||||
}
|
||||
@ -312,6 +354,7 @@ template<typename ShadowedDataT>
|
||||
void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
|
||||
// Initially sort to create a half part of a bitonic sequence
|
||||
timeCall(TfullSort, fullSort, data, ascending<SortMode::Bitonic>(rank, 0));
|
||||
updateMinMax(localStat, data);
|
||||
|
||||
// Run through sort network using elbow-sort ( O(LogN * LogN) iterations )
|
||||
auto p = static_cast<uint32_t>(std::log2(Processes));
|
||||
@ -322,8 +365,12 @@ void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
|
||||
auto part = partner<SortMode::Bitonic>(rank, step);
|
||||
auto ks = keepSmall<SortMode::Bitonic>(rank, part, depth);
|
||||
// Exchange with partner, keep nim-or-max
|
||||
timeCall(Texchange, mpi.exchange, data.getActive(), data.getShadow(), part, (depth << 8) | step);
|
||||
timeCall(Tminmax, minmax, data.getActive(), data.getShadow(), ks);
|
||||
int tag = static_cast<int>( (2*p*depth) + (2*step) );
|
||||
if (needsExchange(localStat, remoteStat, part, tag, ks)) {
|
||||
timeCall(Texchange, mpi.exchange_data, data.getActive(), data.getShadow(), part, tag);
|
||||
timeCall(Tminmax, keepMinOrMax, data.getActive(), data.getShadow(), ks);
|
||||
updateMinMax(localStat, data);
|
||||
}
|
||||
}
|
||||
// sort - O(N)
|
||||
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bitonic>(rank, depth));
|
||||
|
@ -14,10 +14,24 @@
|
||||
#include <chrono>
|
||||
#include <unistd.h>
|
||||
#include <mpi.h>
|
||||
//#include <functional>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
/*!
|
||||
* Min-Max statistics data for exchange optimization
|
||||
* @tparam Value_t The underlying data type of the sequence data
|
||||
*/
|
||||
template <typename Value_t>
|
||||
struct Stat_t {
|
||||
using value_type = Value_t; //!< meta-export the type
|
||||
|
||||
Value_t min{}; //!< The minimum value of the sequence
|
||||
Value_t max{}; //!< The maximum value of the sequence
|
||||
};
|
||||
|
||||
//! Application data selection alias
|
||||
using distStat_t = Stat_t<distValue_t>;
|
||||
extern distStat_t localStat, remoteStat; // Make stats public
|
||||
|
||||
/*
|
||||
* MPI_<type> dispatcher mechanism
|
||||
@ -85,25 +99,54 @@ struct MPI_t {
|
||||
*
|
||||
* @tparam T The inner valur type used in buffer
|
||||
*
|
||||
* @param send_data [std::vector<T>] Reference to local data to send
|
||||
* @param recv_data [std::vector<T>] Reference to buffer to receive data from partner
|
||||
* @param ldata [std::vector<T>] Reference to local data to send
|
||||
* @param rdata [std::vector<T>] Reference to buffer to receive data from partner
|
||||
* @param partner [mpi_id_t] The partner for the exchange
|
||||
* @param tag [int] The tag to use for the MPI communication
|
||||
*/
|
||||
template<typename T>
|
||||
void exchange(const std::vector<T>& send_data, std::vector<T>& recv_data, ID_t partner, int tag) {
|
||||
using namespace std::string_literals;
|
||||
void exchange_data(const std::vector<T>& ldata, std::vector<T>& rdata, ID_t partner, int tag) {
|
||||
if (tag < 0)
|
||||
throw std::runtime_error("(MPI) exchange_data() [tag] - Out of bound");
|
||||
|
||||
MPI_Datatype datatype = MPI_TypeMapper<T>::getType();
|
||||
int send_count = static_cast<int>(send_data.size());
|
||||
int count = static_cast<int>(ldata.size());
|
||||
MPI_Status status;
|
||||
int err;
|
||||
if ((err = MPI_Sendrecv(
|
||||
send_data.data(), send_count, datatype, partner, tag,
|
||||
recv_data.data(), send_count, datatype, partner, tag,
|
||||
ldata.data(), count, datatype, partner, tag,
|
||||
rdata.data(), count, datatype, partner, tag,
|
||||
MPI_COMM_WORLD, &status
|
||||
)) != MPI_SUCCESS)
|
||||
mpi_throw(err, "(MPI) MPI_Sendrecv() - ");
|
||||
mpi_throw(err, "(MPI) MPI_Sendrecv() [data] - ");
|
||||
}
|
||||
|
||||
/*!
|
||||
* Exchange a data object with partner as part of the sorting network of both bubbletonic
|
||||
* or bitonic sorting algorithms.
|
||||
*
|
||||
* This function matches a transmit and a receive in order for fully exchanged the data object
|
||||
* between current node and partner.
|
||||
*
|
||||
* @tparam T The object type
|
||||
*
|
||||
* @param local [const T&] Reference to the local object to send
|
||||
* @param remote [T&] Reference to the object to receive data from partner
|
||||
* @param partner [mpi_id_t] The partner for the exchange
|
||||
* @param tag [int] The tag to use for the MPI communication
|
||||
*/
|
||||
template<typename T>
|
||||
void exchange_it(const T& local, T& remote, ID_t partner, int tag) {
|
||||
if (tag < 0)
|
||||
throw std::runtime_error("(MPI) exchange_it() [tag] - Out of bound");
|
||||
MPI_Status status;
|
||||
int err;
|
||||
if ((err = MPI_Sendrecv(
|
||||
&local, sizeof(T), MPI_BYTE, partner, tag,
|
||||
&remote, sizeof(T), MPI_BYTE, partner, tag,
|
||||
MPI_COMM_WORLD, &status
|
||||
)) != MPI_SUCCESS)
|
||||
mpi_throw(err, "(MPI) MPI_Sendrecv() [item] - ");
|
||||
}
|
||||
|
||||
// Accessors
|
||||
@ -165,11 +208,13 @@ using mpi_id_t = MPI_t<>::ID_t;
|
||||
|
||||
|
||||
/*!
|
||||
* A std::vector wrapper with 2 vectors, an active and a shadow. This type exposes the standard vector
|
||||
* @brief A std::vector wrapper with 2 vectors, an active and a shadow.
|
||||
*
|
||||
* This type exposes the standard vector
|
||||
* functionality of the active vector. The shadow can be used when we need to use the vector as mutable
|
||||
* data in algorithms that can not support "in-place" editing (like elbow-sort for example)
|
||||
*
|
||||
* @tparam Value_t the inner data type of the vectors
|
||||
* @tparam Value_t the underlying data type of the vectors
|
||||
*/
|
||||
template <typename Value_t>
|
||||
struct ShadowedVec_t {
|
||||
@ -274,8 +319,10 @@ private:
|
||||
} active{north}; //!< Flag to select between North and South buffer
|
||||
};
|
||||
|
||||
/*
|
||||
* Exported data types
|
||||
*/
|
||||
using distBuffer_t = ShadowedVec_t<distValue_t>;
|
||||
|
||||
extern distBuffer_t Data;
|
||||
|
||||
/*!
|
||||
@ -333,11 +380,12 @@ struct Timing {
|
||||
return now;
|
||||
}
|
||||
|
||||
Tduration dt(Tpoint t2, Tpoint t1) noexcept {
|
||||
//! A duration calculation utility
|
||||
static Tduration dt(Tpoint t2, Tpoint t1) noexcept {
|
||||
return std::chrono::duration_cast<Tduration>(t2 - t1);
|
||||
}
|
||||
|
||||
//! tool to print the time interval
|
||||
//! Tool to print the time interval
|
||||
void print_duration(const char *what, mpi_id_t rank) noexcept {
|
||||
if (std::chrono::duration_cast<microseconds>(duration_).count() < 10000)
|
||||
std::cout << "[Timing] (Rank " << rank << ") " << what << ": "
|
||||
@ -357,8 +405,12 @@ private:
|
||||
};
|
||||
|
||||
/*!
|
||||
* Utility high level function like to forward a function call and measure
|
||||
* the excecution time
|
||||
* Utility "high level function"-like macro to forward a function call
|
||||
* and accumulate the execution time to the corresponding timing object.
|
||||
*
|
||||
* @param Tim The Timing object [Needs to have methods start() and stop()]
|
||||
* @param Func The function name
|
||||
* @param ... The arguments to pass to function (the preprocessor way)
|
||||
*/
|
||||
#define timeCall(Tim, Func, ...) \
|
||||
Tim.start(); \
|
||||
|
@ -4,9 +4,9 @@
|
||||
#
|
||||
|
||||
function exchange(localid, remoteid)
|
||||
if verbose
|
||||
println("Exchange local data from $localid with partner $remoteid")
|
||||
end
|
||||
# if verbose
|
||||
# println("Exchange local data from $localid with partner $remoteid")
|
||||
# end
|
||||
nothing # We have all data here ;)
|
||||
end
|
||||
|
||||
@ -23,9 +23,89 @@ function minmax(data, localid, remoteid, keepsmall)
|
||||
end
|
||||
|
||||
|
||||
function is_bitonic(arr)
|
||||
n = length(arr)
|
||||
if n <= 2
|
||||
return true # Any sequence of length <= 2 is bitonic
|
||||
end
|
||||
|
||||
# State for state machine. 1: inc, -1: dec, 0: z-state
|
||||
state = 0
|
||||
inc_count = 0
|
||||
dec_count = 0
|
||||
ret = false
|
||||
|
||||
for i in 1:n-1
|
||||
# Find the first order
|
||||
if state == 0
|
||||
if arr[i] > arr[i+1]
|
||||
state = -1
|
||||
dec_count += 1
|
||||
elseif arr[i] < arr[i+1]
|
||||
state = 1
|
||||
inc_count += 1
|
||||
end
|
||||
elseif state == -1 # decreasing
|
||||
if arr[i] < arr[i + 1]
|
||||
state = 1
|
||||
inc_count += 1
|
||||
end
|
||||
elseif state == 1 # increasing
|
||||
if arr[i] > arr[i+1]
|
||||
state = -1
|
||||
dec_count += 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if inc_count <= 1 && dec_count <= 1
|
||||
ret = true # Sequence is bitonic
|
||||
elseif inc_count == 2 && dec_count == 1
|
||||
ret = (arr[1] >= arr[n])
|
||||
elseif inc_count == 1 && dec_count == 2
|
||||
ret = (arr[1] <= arr[n])
|
||||
end
|
||||
|
||||
ret
|
||||
end
|
||||
|
||||
function is_sort(arr)
|
||||
# State for state machine. 1: inc, -1: dec, 0: z-state
|
||||
state = 0
|
||||
inc_count = 0
|
||||
dec_count = 0
|
||||
|
||||
for i in 1:length(arr)-1
|
||||
# Find the first order
|
||||
if state == 0
|
||||
if arr[i] > arr[i+1]
|
||||
state = -1
|
||||
dec_count += 1
|
||||
elseif arr[i] < arr[i+1]
|
||||
state = 1
|
||||
inc_count += 1
|
||||
end
|
||||
elseif state == -1 # decreasing
|
||||
if arr[i] < arr[i + 1]
|
||||
state = 1
|
||||
inc_count += 1
|
||||
end
|
||||
elseif state == 1 # increasing
|
||||
if arr[i] > arr[i+1]
|
||||
state = -1
|
||||
dec_count += 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
ret = ((inc_count + dec_count) == 1) ? state : 0
|
||||
ret
|
||||
end
|
||||
|
||||
function sort_network!(data, n, depth)
|
||||
nodes = 0:n-1
|
||||
bitonicFlag = zeros(Int8, size(data, 1))
|
||||
sortFlag = zeros(Int8, size(data, 1))
|
||||
for step = depth-1:-1:0
|
||||
partnerid = nodes .⊻ (1 << step)
|
||||
direction = (nodes .& (1 << depth)) .== 0 .& (nodes .< partnerid)
|
||||
@ -40,6 +120,13 @@ function sort_network!(data, n, depth)
|
||||
minmax(data, i, partnerid[i+1], keepsmall[i+1])
|
||||
end
|
||||
end
|
||||
if verbose
|
||||
for i in 1:size(data, 1)
|
||||
bitonicFlag[i] = is_bitonic(data[i, :])
|
||||
sortFlag[i] = is_sort(data[i, :])
|
||||
end
|
||||
println("depth: $depth | step: $step | bitonicFlag: $bitonicFlag | sorfFlag: $sortFlag")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*!
|
||||
* \file
|
||||
* \brief Distributed sort implementation.
|
||||
* \brief Distributed sort implementation
|
||||
*
|
||||
* \author
|
||||
* Christos Choutouridis AEM:8997
|
||||
@ -9,8 +9,13 @@
|
||||
#include "utils.hpp"
|
||||
#include "distsort.hpp"
|
||||
|
||||
//! Statistic variables for exchange optimization
|
||||
distStat_t localStat, remoteStat;
|
||||
|
||||
//! Performance timers for each one of the "costly" functions
|
||||
Timing TfullSort, Texchange, Tminmax, TelbowSort;
|
||||
|
||||
|
||||
bool isActive(mpi_id_t node, size_t nodes) {
|
||||
if (!((nodes > 0) &&
|
||||
(nodes <= std::numeric_limits<mpi_id_t>::max()) ))
|
||||
|
@ -138,6 +138,9 @@ bool validator(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
|
||||
}
|
||||
|
||||
#if !defined TESTING
|
||||
/*!
|
||||
* @return Returns 0, but.... we may throw or exit(1)
|
||||
*/
|
||||
int main(int argc, char* argv[]) try {
|
||||
// Initialize MPI environment
|
||||
mpi.init(&argc, &argv);
|
||||
@ -170,6 +173,7 @@ int main(int argc, char* argv[]) try {
|
||||
sleep(1);
|
||||
#endif
|
||||
|
||||
// Initialize local data
|
||||
logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl;
|
||||
std::random_device rd; // Mersenne seeded from hw if possible. range: [type_min, type_max]
|
||||
std::mt19937 gen(rd());
|
||||
@ -181,6 +185,7 @@ int main(int argc, char* argv[]) try {
|
||||
Data.resize(config.arraySize);
|
||||
std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); });
|
||||
|
||||
// Run distributed sort
|
||||
if (mpi.rank() == 0)
|
||||
logger << "Starting distributed sorting ... ";
|
||||
Ttotal.start();
|
||||
@ -192,9 +197,9 @@ int main(int argc, char* argv[]) try {
|
||||
Ttotal.stop();
|
||||
if (mpi.rank() == 0)
|
||||
logger << " Done." << logger.endl;
|
||||
std::string timeMsg = "rank " + std::to_string(mpi.rank());
|
||||
|
||||
|
||||
// Print-outs and validation
|
||||
if (config.perf) {
|
||||
Ttotal.print_duration("Total ", mpi.rank());
|
||||
TfullSort.print_duration("Full-Sort ", mpi.rank());
|
||||
@ -224,6 +229,9 @@ catch (std::exception& e) {
|
||||
#include <gtest/gtest.h>
|
||||
#include <exception>
|
||||
|
||||
/*!
|
||||
* The testing version of our program
|
||||
*/
|
||||
GTEST_API_ int main(int argc, char **argv) try {
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
|
@ -6,6 +6,9 @@
|
||||
* make tests
|
||||
* mpirun -np <N> ./out/tests
|
||||
*
|
||||
* Note:
|
||||
* Yes each process runs the entire test suite!!
|
||||
*
|
||||
* \author
|
||||
* Christos Choutouridis AEM:8997
|
||||
* <cchoutou@ece.auth.gr>
|
||||
@ -15,7 +18,11 @@
|
||||
#include <mpi.h>
|
||||
#include <random>
|
||||
#include "distsort.hpp"
|
||||
/*
|
||||
* Global fixtures
|
||||
*/
|
||||
|
||||
// MPI handler for the test session
|
||||
MPI_t<> ts_mpi;
|
||||
|
||||
// Mersenne seeded from hw if possible. range: [type_min, type_max]
|
||||
@ -27,17 +34,11 @@ protected:
|
||||
static void SetUpTestSuite() {
|
||||
int argc = 0;
|
||||
char** argv = nullptr;
|
||||
MPI_Init(&argc, &argv);
|
||||
|
||||
int rank, size;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
ts_mpi.rank(rank);
|
||||
ts_mpi.size(size);
|
||||
ts_mpi.init(&argc, &argv);
|
||||
}
|
||||
|
||||
static void TearDownTestSuite() {
|
||||
MPI_Finalize();
|
||||
ts_mpi.finalize();
|
||||
}
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user