@@ -42,7 +42,7 @@ BUILD_DIR := bin | |||||
OBJ_DIR := $(BUILD_DIR)/obj | OBJ_DIR := $(BUILD_DIR)/obj | ||||
DEP_DIR := $(BUILD_DIR)/.dep | DEP_DIR := $(BUILD_DIR)/.dep | ||||
OUTPUT_DIR := out-rc3b | |||||
OUTPUT_DIR := out | |||||
# ========== Compiler settings ========== | # ========== Compiler settings ========== | ||||
# Compiler flags for debug and release | # Compiler flags for debug and release | ||||
@@ -49,14 +49,19 @@ static constexpr size_t MAX_PIPELINE_SIZE = 64UL; | |||||
using distValue_t = uint32_t; | using distValue_t = uint32_t; | ||||
/*! | /*! | ||||
* Session option for each invocation of the executable | |||||
* Session option for each invocation of the executable. | |||||
* | |||||
* @note | |||||
* The values of the members are set from the command line. | |||||
*/ | */ | ||||
struct config_t { | struct config_t { | ||||
size_t arraySize{DEFAULT_DATA_SIZE}; //!< The array size of the local data to sort. | size_t arraySize{DEFAULT_DATA_SIZE}; //!< The array size of the local data to sort. | ||||
size_t pipeline{1UL}; //!< Pipeline stages | |||||
bool exchangeOpt{false}; //!< Flag to request the exchange optimization | |||||
size_t pipeline{1UL}; //!< Pipeline stages (1 to disable) | |||||
bool validation{false}; //!< Request a full validation at the end, performed by process rank 0. | bool validation{false}; //!< Request a full validation at the end, performed by process rank 0. | ||||
bool ndebug{false}; //!< Skips debug trap on DEBUG builds. | bool ndebug{false}; //!< Skips debug trap on DEBUG builds. | ||||
bool perf{false}; //!< Enable performance timing measurements and prints. | |||||
size_t perf{1}; //!< Enable performance timing measurements and prints and repeat | |||||
//!< the performs the sorting <perf> times to average the measurements | |||||
bool verbose{false}; //!< Flag to enable verbose output to stdout. | bool verbose{false}; //!< Flag to enable verbose output to stdout. | ||||
}; | }; | ||||
@@ -22,7 +22,15 @@ | |||||
#include "utils.hpp" | #include "utils.hpp" | ||||
extern Timing TfullSort, Texchange, Tminmax, TelbowSort; // make timers public | |||||
/* | |||||
* Exported timers | |||||
*/ | |||||
extern Timing Ttotal; | |||||
extern Timing TfullSort; | |||||
extern Timing Texchange; | |||||
extern Timing Tminmax; | |||||
extern Timing TelbowSort; | |||||
/*! | /*! | ||||
* Enumerator for the different versions of the sorting method | * Enumerator for the different versions of the sorting method | ||||
@@ -167,6 +175,9 @@ void fullSort(RangeT& data, bool ascending) noexcept { | |||||
else { | else { | ||||
__gnu_parallel::sort(data.begin(), data.end(), std::greater<>()); | __gnu_parallel::sort(data.begin(), data.end(), std::greater<>()); | ||||
} | } | ||||
if (config.exchangeOpt) | |||||
updateMinMax(localStat, data); | |||||
} | } | ||||
/*! | /*! | ||||
@@ -231,6 +242,43 @@ void elbowSort(ShadowedDataT& data, bool ascending) noexcept { | |||||
elbowSortCore(data, ascending, std::greater<>()); | elbowSortCore(data, ascending, std::greater<>()); | ||||
} | } | ||||
/*! | |||||
* Predicate for exchange optimization. Returns true only if an exchange between partners is needed. | |||||
* In order to do that we exchange min and max statistics of the partner's data. | |||||
* | |||||
* @tparam StatT Statistics data type (for min-max) | |||||
* | |||||
* @param lstat [const StatT] Reference to the local statistic data | |||||
* @param rstat [StatT] Reference to the remote statistic data to fill | |||||
* @param part [mpi_id_t] The partner for the exchange | |||||
* @param tag [int] The tag to use for the exchange of stats | |||||
* @param keepSmall [bool] Flag to indicate if the local thread keeps the small ro the large values | |||||
* @return True if we need data exchange, false otherwise | |||||
*/ | |||||
template<typename StatT> | |||||
bool needsExchange(const StatT& lstat, StatT& rstat, mpi_id_t part, int tag, bool keepSmall) { | |||||
timeCall(Texchange, mpi.exchange_it, lstat, rstat, part, tag); | |||||
return (keepSmall) ? | |||||
rstat.min < lstat.max // Lmin: rstat.min - Smax: lstat.max | |||||
: lstat.min < rstat.max; // Lmin: lstat.min - Smax: rstat.max | |||||
} | |||||
/*! | |||||
* Update stats utility | |||||
* | |||||
* @tparam RangeT A range type with random access iterator | |||||
* @tparam StatT Statistics data type (for min-max) | |||||
* | |||||
* @param stat [StatT] Reference to the statistic data to update | |||||
* @param data [const RangeT] Reference to the sequence to extract stats from | |||||
*/ | |||||
template<typename RangeT, typename StatT> | |||||
void updateMinMax(StatT& stat, const RangeT& data) noexcept { | |||||
auto [min, max] = std::minmax_element(data.begin(), data.end()); | |||||
stat.min = *min; | |||||
stat.max = *max; | |||||
} | |||||
/*! | /*! | ||||
* Takes two sequences and selects either the larger or the smaller items | * Takes two sequences and selects either the larger or the smaller items | ||||
@@ -276,7 +324,11 @@ void keepMinOrMax(ValueT* local, const ValueT* remote, size_t count, bool keepSm | |||||
size_t tagGenerator(size_t depth, size_t step, size_t stage = 0); | size_t tagGenerator(size_t depth, size_t step, size_t stage = 0); | ||||
/*! | /*! | ||||
* A pipeline loop for mixing min-max process with mpi data exchange | |||||
* An exchange functionality to support both Bubbletonic and Bitonic sort algorithms. | |||||
* | |||||
* @note | |||||
* In case of pipeline request it switches to non-blocking MPI communication for | |||||
* pipelining min-max process with mpi data exchange | |||||
* | * | ||||
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator. | * @tparam ShadowedDataT A Shadowed buffer type with random access iterator. | ||||
* | * | ||||
@@ -289,28 +341,38 @@ size_t tagGenerator(size_t depth, size_t step, size_t stage = 0); | |||||
* The @c tag is increased inside the pipeline loop for each different data exchange | * The @c tag is increased inside the pipeline loop for each different data exchange | ||||
*/ | */ | ||||
template<typename ShadowedDataT> | template<typename ShadowedDataT> | ||||
void exchangePipeline(ShadowedDataT& data, mpi_id_t partner, bool keepSmall, int tag) { | |||||
void exchange(ShadowedDataT& data, mpi_id_t partner, bool keepSmall, int tag) { | |||||
using Value_t = typename ShadowedDataT::value_type; | using Value_t = typename ShadowedDataT::value_type; | ||||
// Init counters and pointers | // Init counters and pointers | ||||
size_t count = data.size() / config.pipeline; | |||||
Value_t* active = data.getActive().data(); | Value_t* active = data.getActive().data(); | ||||
Value_t* shadow = data.getShadow().data(); | Value_t* shadow = data.getShadow().data(); | ||||
size_t count = data.size() / config.pipeline; | |||||
// Pipeline | |||||
Texchange.start(); | |||||
mpi.exchange_start(active, shadow, count, partner, tag); | |||||
for (size_t stage = 0 ; stage < config.pipeline ; active += count, shadow += count) { | |||||
// Wait previous chunk | |||||
mpi.exchange_wait(); Texchange.stop(); | |||||
if (++stage < config.pipeline) { | |||||
// Start next chunk if there is a next one | |||||
Texchange.start(); | |||||
mpi.exchange_start(active + count, shadow + count, count, partner, ++tag); | |||||
if (config.pipeline > 1) { | |||||
// Pipeline case - use async MPI | |||||
Texchange.start(); | |||||
mpi.exchange_start(active, shadow, count, partner, tag); | |||||
for (size_t stage = 0; stage < config.pipeline; active += count, shadow += count) { | |||||
// Wait previous chunk | |||||
mpi.exchange_wait(); | |||||
Texchange.stop(); | |||||
if (++stage < config.pipeline) { | |||||
// Start next chunk if there is a next one | |||||
Texchange.start(); | |||||
mpi.exchange_start(active + count, shadow + count, count, partner, ++tag); | |||||
} | |||||
// process the arrived data | |||||
timeCall(Tminmax, keepMinOrMax, active, shadow, count, keepSmall); | |||||
} | } | ||||
// process the arrived data | |||||
} | |||||
else { | |||||
// No pipeline - use blocking MPI | |||||
timeCall(Texchange, mpi.exchange, active, shadow, count, partner, tag); | |||||
timeCall(Tminmax, keepMinOrMax, active, shadow, count, keepSmall); | timeCall(Tminmax, keepMinOrMax, active, shadow, count, keepSmall); | ||||
} | } | ||||
if (config.exchangeOpt) | |||||
updateMinMax(localStat, data); | |||||
} | } | ||||
/*! | /*! | ||||
@@ -339,8 +401,10 @@ void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { | |||||
isActive(part, Processes) ) { | isActive(part, Processes) ) { | ||||
// Exchange with partner, keep nim-or-max and sort - O(N) | // Exchange with partner, keep nim-or-max and sort - O(N) | ||||
int tag = static_cast<int>(tagGenerator(0, step)); | int tag = static_cast<int>(tagGenerator(0, step)); | ||||
exchangePipeline(data, part, ks, tag); | |||||
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bubbletonic>(rank, Processes)); | |||||
if (!config.exchangeOpt || needsExchange(localStat, remoteStat, part, tag++, ks)) { | |||||
exchange(data, part, ks, tag); | |||||
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bubbletonic>(rank, Processes)); | |||||
} | |||||
} | } | ||||
} | } | ||||
@@ -378,7 +442,9 @@ void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { | |||||
auto ks = keepSmall<SortMode::Bitonic>(rank, part, depth); | auto ks = keepSmall<SortMode::Bitonic>(rank, part, depth); | ||||
// Exchange with partner, keep nim-or-max | // Exchange with partner, keep nim-or-max | ||||
int tag = static_cast<int>(tagGenerator(depth, step)); | int tag = static_cast<int>(tagGenerator(depth, step)); | ||||
exchangePipeline(data, part, ks, tag); | |||||
if (!config.exchangeOpt || needsExchange(localStat, remoteStat, part, tag++, ks)) { | |||||
exchange(data, part, ks, tag); | |||||
} | |||||
} | } | ||||
// sort - O(N) | // sort - O(N) | ||||
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bitonic>(rank, depth)); | timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bitonic>(rank, depth)); | ||||
@@ -17,6 +17,22 @@ | |||||
#include "config.h" | #include "config.h" | ||||
/*! | |||||
* Min-Max statistics data for exchange optimization | |||||
* @tparam Value_t The underlying data type of the sequence data | |||||
*/ | |||||
template <typename Value_t> | |||||
struct Stat_t { | |||||
using value_type = Value_t; //!< meta-export the type | |||||
Value_t min{}; //!< The minimum value of the sequence | |||||
Value_t max{}; //!< The maximum value of the sequence | |||||
}; | |||||
//! Application data selection alias | |||||
using distStat_t = Stat_t<distValue_t>; | |||||
extern distStat_t localStat, remoteStat; // Make stats public | |||||
/* | /* | ||||
* MPI_<type> dispatcher mechanism | * MPI_<type> dispatcher mechanism | ||||
*/ | */ | ||||
@@ -78,6 +94,64 @@ struct MPI_t { | |||||
name_ = std::string (processor_name, name_len); | name_ = std::string (processor_name, name_len); | ||||
} | } | ||||
/*! | |||||
* Exchange one data object of type @c T with partner as part of the sorting network of both | |||||
* bubbletonic or bitonic sorting algorithms. | |||||
* | |||||
* This function matches a transmit and a receive in order for fully exchanged the data object | |||||
* between current node and partner. | |||||
* | |||||
* @tparam T The object type | |||||
* | |||||
* @param local [const T&] Reference to the local object to send | |||||
* @param remote [T&] Reference to the object to receive data from partner | |||||
* @param partner [mpi_id_t] The partner for the exchange | |||||
* @param tag [int] The tag to use for the MPI communication | |||||
*/ | |||||
template<typename T> | |||||
void exchange_it(const T& local, T& remote, ID_t partner, int tag) { | |||||
if (tag < 0) | |||||
throw std::runtime_error("(MPI) exchange_it() [tag] - Out of bound"); | |||||
MPI_Status status; | |||||
int err; | |||||
if ((err = MPI_Sendrecv( | |||||
&local, sizeof(T), MPI_BYTE, partner, tag, | |||||
&remote, sizeof(T), MPI_BYTE, partner, tag, | |||||
MPI_COMM_WORLD, &status | |||||
)) != MPI_SUCCESS) | |||||
mpi_throw(err, "(MPI) MPI_Sendrecv() [item] - "); | |||||
} | |||||
/*! | |||||
* Exchange data with partner as part of the sorting network of both bubbletonic or bitonic | |||||
* sorting algorithms. | |||||
* | |||||
* This function matches a transmit and a receive in order for fully exchanged data between | |||||
* current node and partner. | |||||
* | |||||
* @tparam T The inner valur type used in buffer | |||||
* | |||||
* @param ldata [const ValueT*] Pointer to local data to send | |||||
* @param rdata [ValueT*] Pointer to buffer to receive data from partner | |||||
* @param count [size_t] The number of data to exchange | |||||
* @param partner [mpi_id_t] The partner for the exchange | |||||
* @param tag [int] The tag to use for the MPI communication | |||||
*/ | |||||
template<typename ValueT> | |||||
void exchange(const ValueT* ldata, ValueT* rdata, size_t count, ID_t partner, int tag) { | |||||
if (tag < 0) | |||||
throw std::runtime_error("(MPI) exchange_data() [tag] - Out of bound"); | |||||
MPI_Datatype datatype = MPI_TypeMapper<ValueT>::getType(); | |||||
MPI_Status status; | |||||
int err; | |||||
if ((err = MPI_Sendrecv( | |||||
ldata, count, datatype, partner, tag, | |||||
rdata, count, datatype, partner, tag, | |||||
MPI_COMM_WORLD, &status | |||||
)) != MPI_SUCCESS) | |||||
mpi_throw(err, "(MPI) MPI_Sendrecv() [data] - "); | |||||
} | |||||
/*! | /*! | ||||
* Initiate a data exchange data with partner using non-blocking Isend-Irecv, as part of the | * Initiate a data exchange data with partner using non-blocking Isend-Irecv, as part of the | ||||
@@ -353,33 +427,51 @@ struct Timing { | |||||
using milliseconds = std::chrono::milliseconds; | using milliseconds = std::chrono::milliseconds; | ||||
using seconds = std::chrono::seconds; | using seconds = std::chrono::seconds; | ||||
//! Setup measurement rounds | |||||
void init(size_t rounds) { | |||||
duration_.resize(rounds); | |||||
for (auto& d : duration_) | |||||
d = Tduration::zero(); | |||||
} | |||||
//! tool to mark the starting point | //! tool to mark the starting point | ||||
Tpoint start() noexcept { return mark_ = std::chrono::steady_clock::now(); } | Tpoint start() noexcept { return mark_ = std::chrono::steady_clock::now(); } | ||||
//! tool to mark the ending point | //! tool to mark the ending point | ||||
Tpoint stop() noexcept { | Tpoint stop() noexcept { | ||||
Tpoint now = std::chrono::steady_clock::now(); | Tpoint now = std::chrono::steady_clock::now(); | ||||
duration_ += dt(now, mark_); | |||||
duration_[current_] += dt(now, mark_); | |||||
return now; | return now; | ||||
} | } | ||||
//! Switch timing slot | |||||
void next() noexcept { | |||||
++current_; | |||||
current_ %= duration_.size(); | |||||
} | |||||
Tduration& median() noexcept { | |||||
std::sort(duration_.begin(), duration_.end()); | |||||
return duration_[duration_.size()/2]; | |||||
} | |||||
//! A duration calculation utility | //! A duration calculation utility | ||||
static Tduration dt(Tpoint t2, Tpoint t1) noexcept { | static Tduration dt(Tpoint t2, Tpoint t1) noexcept { | ||||
return std::chrono::duration_cast<Tduration>(t2 - t1); | return std::chrono::duration_cast<Tduration>(t2 - t1); | ||||
} | } | ||||
//! Tool to print the time interval | //! Tool to print the time interval | ||||
void print_duration(const char *what, mpi_id_t rank) noexcept { | |||||
if (std::chrono::duration_cast<microseconds>(duration_).count() < 10000) | |||||
static void print_duration(const Tduration& duration, const char *what, mpi_id_t rank) noexcept { | |||||
if (std::chrono::duration_cast<microseconds>(duration).count() < 10000) | |||||
std::cout << "[Timing] (Rank " << rank << ") " << what << ": " | std::cout << "[Timing] (Rank " << rank << ") " << what << ": " | ||||
<< std::to_string(std::chrono::duration_cast<microseconds>(duration_).count()) << " [usec]\n"; | |||||
else if (std::chrono::duration_cast<milliseconds>(duration_).count() < 10000) | |||||
<< std::to_string(std::chrono::duration_cast<microseconds>(duration).count()) << " [usec]\n"; | |||||
else if (std::chrono::duration_cast<milliseconds>(duration).count() < 10000) | |||||
std::cout << "[Timing] (Rank " << rank << ") " << what << ": " | std::cout << "[Timing] (Rank " << rank << ") " << what << ": " | ||||
<< std::to_string(std::chrono::duration_cast<milliseconds>(duration_).count()) << " [msec]\n"; | |||||
<< std::to_string(std::chrono::duration_cast<milliseconds>(duration).count()) << " [msec]\n"; | |||||
else { | else { | ||||
char stime[26]; // fit ulong | char stime[26]; // fit ulong | ||||
auto sec = std::chrono::duration_cast<seconds>(duration_).count(); | |||||
auto msec = (std::chrono::duration_cast<milliseconds>(duration_).count() % 1000) / 10; // keep 2 digit | |||||
auto sec = std::chrono::duration_cast<seconds>(duration).count(); | |||||
auto msec = (std::chrono::duration_cast<milliseconds>(duration).count() % 1000) / 10; // keep 2 digit | |||||
std::sprintf(stime, "%ld.%1ld", sec, msec); | std::sprintf(stime, "%ld.%1ld", sec, msec); | ||||
std::cout << "[Timing] (Rank " << rank << ") " << what << ": " << stime << " [sec]\n"; | std::cout << "[Timing] (Rank " << rank << ") " << what << ": " << stime << " [sec]\n"; | ||||
} | } | ||||
@@ -387,8 +479,9 @@ struct Timing { | |||||
} | } | ||||
private: | private: | ||||
size_t current_{0}; | |||||
Tpoint mark_{}; | Tpoint mark_{}; | ||||
Tduration duration_{}; | |||||
std::vector<Tduration> duration_{1}; | |||||
}; | }; | ||||
/*! | /*! | ||||
@@ -10,13 +10,9 @@ | |||||
#include "distsort.hpp" | #include "distsort.hpp" | ||||
//! Performance timers for each one of the "costly" functions | |||||
Timing TfullSort, Texchange, Tminmax, TelbowSort; | |||||
bool isActive(mpi_id_t node, size_t nodes) { | bool isActive(mpi_id_t node, size_t nodes) { | ||||
if (!((nodes > 0) && | if (!((nodes > 0) && | ||||
(nodes <= std::numeric_limits<mpi_id_t>::max()) )) | |||||
(nodes <= static_cast<size_t>(std::numeric_limits<mpi_id_t>::max())) )) | |||||
throw std::runtime_error("(isActive) Non-acceptable value of MPI Nodes\n"); | throw std::runtime_error("(isActive) Non-acceptable value of MPI Nodes\n"); | ||||
// ^ Assert that mpi_id_t can hold nodes, and thus we can cast without data loss! | // ^ Assert that mpi_id_t can hold nodes, and thus we can cast without data loss! | ||||
@@ -24,12 +20,14 @@ bool isActive(mpi_id_t node, size_t nodes) { | |||||
} | } | ||||
size_t tagGenerator(size_t depth, size_t step, size_t stage) { | size_t tagGenerator(size_t depth, size_t step, size_t stage) { | ||||
auto stage_bits = static_cast<uint32_t>(std::log2(MAX_PIPELINE_SIZE)); | |||||
auto step_bits = static_cast<uint32_t>(std::log2(MAX_MPI_SIZE)); | |||||
auto stage_bits = static_cast<uint32_t>(std::log2(MAX_PIPELINE_SIZE)); | |||||
auto step_bits = static_cast<uint32_t>(std::log2(MAX_MPI_SIZE)); | |||||
uint32_t stat_bit = 1UL; | |||||
// ^ We use MPI_SIZE room for steps to fit the bubbletonic version | // ^ We use MPI_SIZE room for steps to fit the bubbletonic version | ||||
// [ depth | step | stage+stats ] | |||||
size_t tag = stage | size_t tag = stage | ||||
| (step << stage_bits) | |||||
| (depth << (stage_bits + step_bits)); | |||||
| (step << (stage_bits + stat_bit)) | |||||
| (depth << (stage_bits + step_bits + stat_bit)); | |||||
return tag; | return tag; | ||||
} | } |
@@ -22,7 +22,36 @@ config_t config; | |||||
MPI_t<> mpi; | MPI_t<> mpi; | ||||
distBuffer_t Data; | distBuffer_t Data; | ||||
Log logger; | Log logger; | ||||
Timing Ttotal; | |||||
distStat_t localStat, remoteStat; | |||||
//! Performance timers for each one of the "costly" functions | |||||
Timing Ttotal; | |||||
Timing TfullSort; | |||||
Timing Texchange; | |||||
Timing Tminmax; | |||||
Timing TelbowSort; | |||||
//! Init timing objects for extra rounds | |||||
void measurements_init() { | |||||
if (config.perf > 1) { | |||||
Ttotal.init(config.perf); | |||||
TfullSort.init(config.perf); | |||||
Texchange.init(config.perf); | |||||
Tminmax.init(config.perf); | |||||
TelbowSort.init(config.perf); | |||||
} | |||||
} | |||||
//! iterate ot the next round of measurements for all measurement objects | |||||
void measurements_next() { | |||||
if (config.perf > 1) { | |||||
Ttotal.next(); | |||||
TfullSort.next(); | |||||
Texchange.next(); | |||||
Tminmax.next(); | |||||
TelbowSort.next(); | |||||
} | |||||
} | |||||
/*! | /*! | ||||
* A small command line argument parser | * A small command line argument parser | ||||
@@ -43,6 +72,9 @@ bool get_options(int argc, char* argv[]){ | |||||
status = false; | status = false; | ||||
} | } | ||||
} | } | ||||
else if (arg == "-e" || arg == "--exchange-opt") { | |||||
config.exchangeOpt = true; | |||||
} | |||||
else if (arg == "--pipeline") { | else if (arg == "--pipeline") { | ||||
if (i+1 < argc) { | if (i+1 < argc) { | ||||
auto stages = atoi(argv[++i]); | auto stages = atoi(argv[++i]); | ||||
@@ -59,7 +91,12 @@ bool get_options(int argc, char* argv[]){ | |||||
config.validation = true; | config.validation = true; | ||||
} | } | ||||
else if (arg == "--perf") { | else if (arg == "--perf") { | ||||
config.perf = true; | |||||
if (i+1 < argc) { | |||||
config.perf = atoi(argv[++i]); | |||||
} | |||||
else { | |||||
status = false; | |||||
} | |||||
} | } | ||||
else if (arg == "--ndebug") { | else if (arg == "--ndebug") { | ||||
config.ndebug = true; | config.ndebug = true; | ||||
@@ -68,22 +105,25 @@ bool get_options(int argc, char* argv[]){ | |||||
config.verbose = true; | config.verbose = true; | ||||
} | } | ||||
else if (arg == "-h" || arg == "--help") { | else if (arg == "-h" || arg == "--help") { | ||||
std::cout << "distbitonic/distbubbletonic - A distributed bitonic sort\n\n"; | |||||
std::cout << "distbitonic -q <N> [--pipeline N] [--validation] [--ndebug] [-v]\n"; | |||||
std::cout << "distbitonic/distbubbletonic - A distributed sort utility\n\n"; | |||||
std::cout << "distbitonic -q <N> [-e] [-p | --pipeline N] [--validation] [--perf] [--ndebug] [-v]\n"; | |||||
std::cout << "distbitonic -h\n"; | std::cout << "distbitonic -h\n"; | ||||
std::cout << "distbubbletonic -q <N> [--pipeline N] [--validation] [--ndebug] [-v]\n"; | |||||
std::cout << "distbubbletonic -q <N> [-e] [-p | --pipeline N] [--validation] [--perf] [--ndebug] [-v]\n"; | |||||
std::cout << "distbubbletonic -h\n"; | std::cout << "distbubbletonic -h\n"; | ||||
std::cout << '\n'; | std::cout << '\n'; | ||||
std::cout << "Options:\n\n"; | std::cout << "Options:\n\n"; | ||||
std::cout << " -q | --array-size <N>\n"; | std::cout << " -q | --array-size <N>\n"; | ||||
std::cout << " Selects the array size according to size = 2^N\n\n"; | std::cout << " Selects the array size according to size = 2^N\n\n"; | ||||
std::cout << " --pipeline <N>\n"; | |||||
std::cout << " -e | --exchange-opt\n"; | |||||
std::cout << " Request an MPI data exchange optimization \n\n"; | |||||
std::cout << " -p <N> | --pipeline <N>\n"; | |||||
std::cout << " Request a pipeline of <N> stages for exchange-minmax\n"; | std::cout << " Request a pipeline of <N> stages for exchange-minmax\n"; | ||||
std::cout << " N must be power of 2 up to " << MAX_PIPELINE_SIZE << "\n\n"; | std::cout << " N must be power of 2 up to " << MAX_PIPELINE_SIZE << "\n\n"; | ||||
std::cout << " --validation\n"; | std::cout << " --validation\n"; | ||||
std::cout << " Request a full validation at the end, performed by process rank 0\n\n"; | std::cout << " Request a full validation at the end, performed by process rank 0\n\n"; | ||||
std::cout << " --perf\n"; | |||||
std::cout << " Request performance timing measurements to stdout.\n\n"; | |||||
std::cout << " --perf <N> \n"; | |||||
std::cout << " Enable performance timing measurements and prints, and repeat\n"; | |||||
std::cout << " the sorting <N> times to average the measurements\n\n"; | |||||
std::cout << " --ndebug\n"; | std::cout << " --ndebug\n"; | ||||
std::cout << " Skip debug breakpoint when on debug build.\n\n"; | std::cout << " Skip debug breakpoint when on debug build.\n\n"; | ||||
std::cout << " -v | --verbose\n"; | std::cout << " -v | --verbose\n"; | ||||
@@ -167,8 +207,8 @@ int main(int argc, char* argv[]) try { | |||||
" Size: " << mpi.size() << | " Size: " << mpi.size() << | ||||
logger.endl; | logger.endl; | ||||
#if defined DEBUG | |||||
#if defined TESTING | |||||
#if defined DEBUG | |||||
#if defined TESTING | |||||
/* | /* | ||||
* In case of a debug build we will wait here until sleep_wait | * In case of a debug build we will wait here until sleep_wait | ||||
* will reset via debugger. In order to do that the user must attach | * will reset via debugger. In order to do that the user must attach | ||||
@@ -179,12 +219,12 @@ int main(int argc, char* argv[]) try { | |||||
* $> gdb <program> <PID2> | * $> gdb <program> <PID2> | ||||
*/ | */ | ||||
volatile bool sleep_wait = false; | volatile bool sleep_wait = false; | ||||
#else | |||||
#else | |||||
volatile bool sleep_wait = true; | volatile bool sleep_wait = true; | ||||
#endif | |||||
#endif | |||||
while (sleep_wait && !config.ndebug) | while (sleep_wait && !config.ndebug) | ||||
sleep(1); | sleep(1); | ||||
#endif | |||||
#endif | |||||
// Initialize local data | // Initialize local data | ||||
logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl; | logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl; | ||||
@@ -201,24 +241,27 @@ int main(int argc, char* argv[]) try { | |||||
// Run distributed sort | // Run distributed sort | ||||
if (mpi.rank() == 0) | if (mpi.rank() == 0) | ||||
logger << "Starting distributed sorting ... "; | logger << "Starting distributed sorting ... "; | ||||
Ttotal.start(); | |||||
measurements_init(); | |||||
for (size_t it = 0 ; it < config.perf ; ++it) { | |||||
Ttotal.start(); | |||||
#if CODE_VERSION == BUBBLETONIC | #if CODE_VERSION == BUBBLETONIC | ||||
distBubbletonic(Data, mpi.size(), mpi.rank()); | |||||
distBubbletonic(Data, mpi.size(), mpi.rank()); | |||||
#else | #else | ||||
distBitonic (Data, mpi.size(), mpi.rank()); | |||||
distBitonic(Data, mpi.size(), mpi.rank()); | |||||
#endif | #endif | ||||
Ttotal.stop(); | |||||
Ttotal.stop(); | |||||
measurements_next(); | |||||
} | |||||
if (mpi.rank() == 0) | if (mpi.rank() == 0) | ||||
logger << " Done." << logger.endl; | logger << " Done." << logger.endl; | ||||
// Print-outs and validation | // Print-outs and validation | ||||
if (config.perf) { | |||||
Ttotal.print_duration("Total ", mpi.rank()); | |||||
TfullSort.print_duration("Full-Sort ", mpi.rank()); | |||||
Texchange.print_duration("Exchange ", mpi.rank()); | |||||
Tminmax.print_duration("Min-Max ", mpi.rank()); | |||||
TelbowSort.print_duration("Elbow-Sort", mpi.rank()); | |||||
if (config.perf > 1) { | |||||
Timing::print_duration(Ttotal.median(), "Total ", mpi.rank()); | |||||
Timing::print_duration(TfullSort.median(), "Full-Sort ", mpi.rank()); | |||||
Timing::print_duration(Texchange.median(), "Exchange ", mpi.rank()); | |||||
Timing::print_duration(Texchange.median(), "Min-Max ", mpi.rank()); | |||||
Timing::print_duration(TelbowSort.median(),"Elbow-Sort", mpi.rank()); | |||||
} | } | ||||
if (config.validation) { | if (config.validation) { | ||||
// If requested, we have the chance to fail! | // If requested, we have the chance to fail! | ||||