diff --git a/homework_3/src/distsort.cpp b/homework_3/src/distsort.cpp index 19d8a28..b479983 100644 --- a/homework_3/src/distsort.cpp +++ b/homework_3/src/distsort.cpp @@ -10,24 +10,42 @@ #include "distsort.hpp" -bool isActive(mpi_id_t node, size_t nodes) { - if (!((nodes > 0) && - (nodes <= static_cast(std::numeric_limits::max())) )) - throw std::runtime_error("(isActive) Non-acceptable value of MPI Nodes\n"); - // ^ Assert that mpi_id_t can hold nodes, and thus we can cast without data loss! +/*! + * Returns the ascending or descending configuration of the node's sequence based on + * the current node (MPI process) and the depth of the sorting network + * + * @param node [mpi_id_t] The current node (MPI process) + * @param depth [size_t] The total depth of the sorting network (same for each step for a given network) + * @return [bool] True if we need ascending configuration, false otherwise + */ +bool ascending(mpi_id_t node, size_t depth) noexcept { + return !(node & (1 << depth)); +} - return (node >= 0) && (node < static_cast(nodes)); +/*! + * Returns the node's partner for data exchange during the sorting network iterations + * of Bitonic + * + * @param node [mpi_id_t] The current node + * @param step [size_t] The step of the sorting network + * @return [mpi_id_t] The node id of the partner for data exchange + */ +mpi_id_t partner(mpi_id_t node, size_t step) noexcept { + return (node ^ (1 << step)); } -size_t tagGenerator(size_t depth, size_t step, size_t stage) { - auto stage_bits = static_cast(std::log2(MAX_PIPELINE_SIZE)); - auto step_bits = static_cast(std::log2(MAX_MPI_SIZE)); - uint32_t stat_bit = 1UL; - // ^ We use MPI_SIZE room for steps to fit the bubbletonic version - // [ depth | step | stage+stats ] - size_t tag = stage - | (step << (stage_bits + stat_bit)) - | (depth << (stage_bits + step_bits + stat_bit)); - return tag; +/*! + * Predicate to check if a node keeps the small numbers during the bitonic sort network exchange. + * + * @param node [mpi_id_t] The node for which we check + * @param partner [mpi_id_t] The partner of the data exchange + * @param depth [size_t] The total depth of the sorting network (same for each step for a given network) + * @return [bool] True if the node should keep the small values, false otherwise + */ + +bool keepSmall(mpi_id_t node, mpi_id_t partner, size_t depth) { + if (node == partner) + throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n"); + return ascending(node, depth) == (node < partner); } diff --git a/homework_3/src/distsort.hpp b/homework_3/src/distsort.hpp index fbcbc98..1517d60 100644 --- a/homework_3/src/distsort.hpp +++ b/homework_3/src/distsort.hpp @@ -32,35 +32,12 @@ extern Timing Timer_minmax; extern Timing Timer_elbowSort; -/*! - * Enumerator for the different versions of the sorting method - */ -enum class SortMode { - Bubbletonic, //!< The v0.5 of the algorithm where we use a bubble-sort like approach - Bitonic //!< The v1.0 of the algorithm where we use the bitonic data-exchange approach -}; /* * ============================== Sort utilities ============================== */ -/*! - * The primary function template of ascending(). It is DISABLED since , it is explicitly specialized - * for each of the \c SortMode - */ -template inline bool ascending(mpi_id_t, [[maybe_unused]] size_t) noexcept = delete; -/*! - * Returns the ascending or descending configuration of the node's sequence based on - * the current node (MPI process) and the depth of the sorting network - * - * @param node [mpi_id_t] The current node (MPI process) - * @return [bool] True if we need ascending configuration, false otherwise - */ -template <> inline -bool ascending(mpi_id_t node, [[maybe_unused]] size_t depth) noexcept { - return (node % 2) == 0; -} /*! * Returns the ascending or descending configuration of the node's sequence based on @@ -70,30 +47,7 @@ bool ascending(mpi_id_t node, [[maybe_unused]] size_t dep * @param depth [size_t] The total depth of the sorting network (same for each step for a given network) * @return [bool] True if we need ascending configuration, false otherwise */ -template <> inline -bool ascending(mpi_id_t node, size_t depth) noexcept { - return !(node & (1 << depth)); -} - -/*! - * The primary function template of partner(). It is DISABLED since , it is explicitly specialized - * for each of the \c SortMode - */ -template inline mpi_id_t partner(mpi_id_t, size_t) noexcept = delete; - -/*! - * Returns the node's partner for data exchange during the sorting network iterations - * of Bubbletonic - * - * @param node [mpi_id_t] The current node - * @param step [size_t] The step of the sorting network - * @return [mpi_id_t] The node id of the partner for data exchange - */ -template <> inline -mpi_id_t partner(mpi_id_t node, size_t step) noexcept { - //return (node % 2 == step % 2) ? node + 1 : node - 1; - return (((node+step) % 2) == 0) ? node + 1 : node - 1; -} +bool ascending(mpi_id_t node, size_t depth); /*! * Returns the node's partner for data exchange during the sorting network iterations @@ -103,32 +57,9 @@ mpi_id_t partner(mpi_id_t node, size_t step) noexcept { * @param step [size_t] The step of the sorting network * @return [mpi_id_t] The node id of the partner for data exchange */ -template <> inline -mpi_id_t partner(mpi_id_t node, size_t step) noexcept { - return (node ^ (1 << step)); -} +mpi_id_t partner(mpi_id_t node, size_t step); -/*! - * The primary function template of keepSmall(). It is DISABLED since , it is explicitly specialized - * for each of the \c SortMode - */ -template inline bool keepSmall(mpi_id_t, mpi_id_t, [[maybe_unused]] size_t) = delete; - -/*! - * Predicate to check if a node keeps the small numbers during the bubbletonic sort network exchange. - * - * @param node [mpi_id_t] The node for which we check - * @param partner [mpi_id_t] The partner of the data exchange - * @return [bool] True if the node should keep the small values, false otherwise - */ -template <> inline -bool keepSmall(mpi_id_t node, mpi_id_t partner, [[maybe_unused]] size_t depth) { - if (node == partner) - throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n"); - return (node < partner); -} - /*! * Predicate to check if a node keeps the small numbers during the bitonic sort network exchange. * @@ -137,22 +68,9 @@ bool keepSmall(mpi_id_t node, mpi_id_t partner, [[maybe_u * @param depth [size_t] The total depth of the sorting network (same for each step for a given network) * @return [bool] True if the node should keep the small values, false otherwise */ -template <> inline -bool keepSmall(mpi_id_t node, mpi_id_t partner, size_t depth) { - if (node == partner) - throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n"); - return ascending(node, depth) == (node < partner); -} +bool keepSmall(mpi_id_t node, mpi_id_t partner, size_t depth); + -/*! - * Predicate to check if the node is active in the current iteration of the bubbletonic - * sort exchange. - * - * @param node [mpi_id_t] The node to check - * @param nodes [size_t] The total number of nodes - * @return [bool] True if the node is active, false otherwise - */ -bool isActive(mpi_id_t node, size_t nodes); /* * ============================== Data utilities ============================== @@ -175,9 +93,6 @@ void fullSort(RangeT& data, bool ascending) noexcept { else { __gnu_parallel::sort(data.begin(), data.end(), std::greater<>()); } - - if (config.exchangeOpt) - updateMinMax(localStat, data); } /*! @@ -242,43 +157,6 @@ void elbowSort(ShadowedDataT& data, bool ascending) noexcept { elbowSortCore(data, ascending, std::greater<>()); } -/*! - * Predicate for exchange optimization. Returns true only if an exchange between partners is needed. - * In order to do that we exchange min and max statistics of the partner's data. - * - * @tparam StatT Statistics data type (for min-max) - * - * @param lstat [const StatT] Reference to the local statistic data - * @param rstat [StatT] Reference to the remote statistic data to fill - * @param part [mpi_id_t] The partner for the exchange - * @param tag [int] The tag to use for the exchange of stats - * @param keepSmall [bool] Flag to indicate if the local thread keeps the small ro the large values - * @return True if we need data exchange, false otherwise - */ -template -bool needsExchange(const StatT& lstat, StatT& rstat, mpi_id_t part, int tag, bool keepSmall) { - timeCall(Timer_exchange, mpi.exchange_it, lstat, rstat, part, tag); - return (keepSmall) ? - rstat.min < lstat.max // Lmin: rstat.min - Smax: lstat.max - : lstat.min < rstat.max; // Lmin: lstat.min - Smax: rstat.max -} - - -/*! - * Update stats utility - * - * @tparam RangeT A range type with random access iterator - * @tparam StatT Statistics data type (for min-max) - * - * @param stat [StatT] Reference to the statistic data to update - * @param data [const RangeT] Reference to the sequence to extract stats from - */ -template -void updateMinMax(StatT& stat, const RangeT& data) noexcept { - auto [min, max] = std::minmax_element(data.begin(), data.end()); - stat.min = *min; - stat.max = *max; -} /*! * Takes two sequences and selects either the larger or the smaller items @@ -307,113 +185,6 @@ void keepMinOrMax(ValueT* local, const ValueT* remote, size_t count, bool keepSm * ============================== Sort algorithms ============================== */ -/*! - * A small tag generator tool to provide consistent encoding to tag communication - * - * @param depth The current algorithmic depth[bitonic] of the communication, if any - * @param step The current step on the current depth - * @param stage The stage of the pipeline. - * @return The tag to use. - * - * @note - * In case we call this function outside of the pipeline loop, we can ommit - * @c stage argument and use the return value as starting tag for every communication - * of the pipeline loop. We need to increase the tags for each communication of - * the pipeline loop though! - */ -size_t tagGenerator(size_t depth, size_t step, size_t stage = 0); - -/*! - * An exchange functionality to support both Bubbletonic and Bitonic sort algorithms. - * - * @note - * In case of pipeline request it switches to non-blocking MPI communication for - * pipelining min-max process with mpi data exchange - * - * @tparam ShadowedDataT A Shadowed buffer type with random access iterator. - * - * @param data [ShadowedDataT&] Reference to the data to exchange - * @param partner [mpi_id_t] The partner for the exchange - * @param keepSmall [bool] Flag to indicate if we keep the small values - * @param tag [int] The init tag to use for the loop. - * - * @note - * The @c tag is increased inside the pipeline loop for each different data exchange - */ -template -void exchange(ShadowedDataT& data, mpi_id_t partner, bool keepSmall, int tag) { - using Value_t = typename ShadowedDataT::value_type; - - // Init counters and pointers - Value_t* active = data.getActive().data(); - Value_t* shadow = data.getShadow().data(); - size_t count = data.size() / config.pipeline; - - if (config.pipeline > 1) { - // Pipeline case - use async MPI - Timer_exchange.start(); - mpi.exchange_start(active, shadow, count, partner, tag); - for (size_t stage = 0; stage < config.pipeline; active += count, shadow += count) { - // Wait previous chunk - mpi.exchange_wait(); - Timer_exchange.stop(); - if (++stage < config.pipeline) { - // Start next chunk if there is a next one - Timer_exchange.start(); - mpi.exchange_start(active + count, shadow + count, count, partner, ++tag); - } - // process the arrived data - timeCall(Timer_minmax, keepMinOrMax, active, shadow, count, keepSmall); - } - } - else { - // No pipeline - use blocking MPI - timeCall(Timer_exchange, mpi.exchange, active, shadow, count, partner, tag); - timeCall(Timer_minmax, keepMinOrMax, active, shadow, count, keepSmall); - } - if (config.exchangeOpt) - updateMinMax(localStat, data); -} - -/*! - * A distributed version of the Bubbletonic sort algorithm. - * - * @note - * Each MPI process should run an instance of this function. - * - * @tparam ShadowedDataT A Shadowed buffer type with random access iterator. - * - * @param data [ShadowedDataT] The local to MPI process data to sort - * @param Processes [mpi_id_t] The total number of MPI processes - * @param rank [mpi_id_t] The current process id - */ -template -void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { - // Initially sort to create a half part of a bitonic sequence - timeCall(Timer_fullSort, fullSort, data, ascending(rank, 0)); - - // Sort network (O(N) iterations) - for (size_t step = 0; step < static_cast(Processes); ++step) { - // Find out exchange configuration - auto part = partner(rank, step); - auto ks = keepSmall(rank, part, Processes); - if ( isActive(rank, Processes) && - isActive(part, Processes) ) { - // Exchange with partner, keep nim-or-max and sort - O(N) - int tag = static_cast(tagGenerator(0, step)); - if (!config.exchangeOpt || needsExchange(localStat, remoteStat, part, tag++, ks)) { - exchange(data, part, ks, tag); - timeCall(Timer_elbowSort, elbowSort, data, ascending(rank, Processes)); - } - } - } - - // Invert if the node was descending. - if (!ascending(rank, 0)) { - elbowSort(data, true); - } -} - /*! * A distributed version of the Bitonic sort algorithm. @@ -428,9 +199,9 @@ void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { * @param rank [mpi_id_t] The current process id */ template -void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { +void distBitonic(ShadowedDataT& data) { // Initially sort to create a half part of a bitonic sequence - timeCall(Timer_fullSort, fullSort, data, ascending(rank, 0)); + timeCall(Timer_fullSort, fullSort, data, ascending(rank, 0)); // Run through sort network using elbow-sort ( O(LogN * LogN) iterations ) auto p = static_cast(std::log2(Processes)); @@ -438,16 +209,14 @@ void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { for (size_t step = depth; step > 0;) { --step; // Find out exchange configuration - auto part = partner(rank, step); - auto ks = keepSmall(rank, part, depth); + auto part = partner(rank, step); + auto ks = keepSmall(rank, part, depth); // Exchange with partner, keep nim-or-max - int tag = static_cast(tagGenerator(depth, step)); - if (!config.exchangeOpt || needsExchange(localStat, remoteStat, part, tag++, ks)) { - exchange(data, part, ks, tag); - } + exchange(data, part, ks, tag); + } // sort - O(N) - timeCall(Timer_elbowSort, elbowSort, data, ascending(rank, depth)); + timeCall(Timer_elbowSort, elbowSort, data, ascending(rank, depth)); } } diff --git a/homework_3/src/main.cpp b/homework_3/src/main.cpp index da242e5..fc7ec15 100644 --- a/homework_3/src/main.cpp +++ b/homework_3/src/main.cpp @@ -19,10 +19,9 @@ // Global session data config_t config; -MPI_t<> mpi; distBuffer_t Data; Log logger; -distStat_t localStat, remoteStat; + // Mersenne seeded from hw if possible. range: [type_min, type_max] std::random_device rd; @@ -76,21 +75,6 @@ bool get_options(int argc, char* argv[]){ status = false; } } - else if (arg == "-e" || arg == "--exchange-opt") { - config.exchangeOpt = true; - } - else if (arg == "--pipeline") { - if (i+1 < argc) { - auto stages = atoi(argv[++i]); - if (isPowerOfTwo(stages) && stages <= static_cast(MAX_PIPELINE_SIZE)) - config.pipeline = stages; - else - status = false; - } - else { - status = false; - } - } else if (arg == "--validation") { config.validation = true; } @@ -109,25 +93,18 @@ bool get_options(int argc, char* argv[]){ config.verbose = true; } else if (arg == "--version") { - std::cout << "distbitonic/distbubbletonic - A distributed sort utility\n"; + std::cout << "bitonic - A GPU accelerated sort utility\n"; std::cout << "version: " << version << "\n\n"; exit(0); } else if (arg == "-h" || arg == "--help") { - std::cout << "distbitonic/distbubbletonic - A distributed sort utility\n\n"; - std::cout << " distbitonic -q [-e] [-p | --pipeline ] [--validation] [--perf ] [--ndebug] [-v]\n"; + std::cout << "distbitonic - A distributed sort utility\n\n"; + std::cout << " distbitonic -q [--validation] [--perf ] [--ndebug] [-v]\n"; std::cout << " distbitonic -h\n"; - std::cout << " distbubbletonic -q [-e] [-p | --pipeline ] [--validation] [--perf ] [--ndebug] [-v]\n"; - std::cout << " distbubbletonic -h\n"; std::cout << '\n'; std::cout << "Options:\n\n"; std::cout << " -q | --array-size \n"; std::cout << " Selects the array size according to size = 2^N\n\n"; - std::cout << " -e | --exchange-opt\n"; - std::cout << " Request an MPI data exchange optimization \n\n"; - std::cout << " -p | --pipeline \n"; - std::cout << " Request a pipeline of stages for exchange-minmax\n"; - std::cout << " N must be power of 2 up to " << MAX_PIPELINE_SIZE << "\n\n"; std::cout << " --validation\n"; std::cout << " Request a full validation at the end, performed by process rank 0\n\n"; std::cout << " --perf \n"; @@ -142,10 +119,8 @@ bool get_options(int argc, char* argv[]){ std::cout << " --version\n"; std::cout << " Prints version and exit.\n\n"; std::cout << "Examples:\n\n"; - std::cout << " mpirun -np 4 distbitonic -q 24\n"; - std::cout << " Runs distbitonic in 4 MPI processes with 2^24 array points each\n\n"; - std::cout << " mpirun -np 16 distbubbletonic -q 20\n"; - std::cout << " Runs distbubbletonic in 16 MPI processes with 2^20 array points each\n\n"; + std::cout << " bitonic -q 24\n"; + std::cout << " Runs bitonic with GPU acceleration with 2^24 array points\n\n"; exit(0); } @@ -170,34 +145,10 @@ bool get_options(int argc, char* argv[]){ * @return [bool] True if all are sorted and in total ascending order */ template -bool validator(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { +bool validator(ShadowedDataT& data) { using value_t = typename ShadowedDataT::value_type; bool ret = true; // Have faith! - // Local results - value_t lmin = data.front(); - value_t lmax = data.back(); - value_t lsort = static_cast(std::is_sorted(data.begin(), data.end())); - - // Gather min/max/sort to rank 0 - std::vector mins(Processes); - std::vector maxes(Processes); - std::vector sorts(Processes); - - MPI_Datatype datatype = MPI_TypeMapper::getType(); - MPI_Gather(&lmin, 1, datatype, mins.data(), 1, datatype, 0, MPI_COMM_WORLD); - MPI_Gather(&lmax, 1, datatype, maxes.data(), 1, datatype, 0, MPI_COMM_WORLD); - MPI_Gather(&lsort, 1, datatype, sorts.data(), 1, datatype, 0, MPI_COMM_WORLD); - - // Check all results - if (rank == 0) { - for (mpi_id_t r = 1; r < Processes; ++r) { - if (sorts[r] == 0) - ret = false; - if (maxes[r - 1] > mins[r]) - ret = false; - } - } return ret; } @@ -212,31 +163,6 @@ void init(int* argc, char*** argv) { if (!get_options(*argc, *argv)) exit(1); - // Initialize MPI environment - mpi.init(argc, argv); - - logger << "MPI environment initialized." << " Rank: " << mpi.rank() << " Size: " << mpi.size() - << logger.endl; - - #if defined DEBUG - #if defined TESTING - /* - * In case of a debug build we will wait here until sleep_wait - * will reset via debugger. In order to do that the user must attach - * debugger to all processes. For example: - * $> mpirun -np 2 ./ - * $> ps aux | grep - * $> gdb - * $> gdb - */ - volatile bool sleep_wait = false; - #else - volatile bool sleep_wait = true; - #endif - while (sleep_wait && !config.ndebug) - sleep(1); - #endif - // Prepare vector and timing data Data.resize(config.arraySize); measurements_init(); @@ -260,37 +186,28 @@ int main(int argc, char* argv[]) try { ); std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); }); // Run distributed sort - if (mpi.rank() == 0) - logger << "Starting distributed sorting ... "; + logger << "Starting distributed sorting ... "; Timer_total.start(); - #if CODE_VERSION == BUBBLETONIC - distBubbletonic(Data, mpi.size(), mpi.rank()); - #else - distBitonic(Data, mpi.size(), mpi.rank()); - #endif + distBitonic(Data); Timer_total.stop(); measurements_next(); - if (mpi.rank() == 0) - logger << " Done." << logger.endl; + logger << " Done." << logger.endl; } // Print-outs and validation if (config.perf > 1) { - Timing::print_duration(Timer_total.median(), "Total ", mpi.rank()); - Timing::print_duration(Timer_fullSort.median(), "Full-Sort ", mpi.rank()); - Timing::print_duration(Timer_exchange.median(), "Exchange ", mpi.rank()); - Timing::print_duration(Timer_minmax.median(), "Min-Max ", mpi.rank()); - Timing::print_duration(Timer_elbowSort.median(),"Elbow-Sort", mpi.rank()); + Timing::print_duration(Timer_total.median(), "Total ", 0); + Timing::print_duration(Timer_fullSort.median(), "Full-Sort ", 0); + Timing::print_duration(Timer_exchange.median(), "Exchange ", 0); + Timing::print_duration(Timer_minmax.median(), "Min-Max ", 0); + Timing::print_duration(Timer_elbowSort.median(),"Elbow-Sort", 0); } if (config.validation) { // If requested, we have the chance to fail! - if (mpi.rank() == 0) - std::cout << "[Validation] Results validation ..."; - bool val = validator(Data, mpi.size(), mpi.rank()); - if (mpi.rank() == 0) - std::cout << ((val) ? "\x1B[32m [PASSED] \x1B[0m\n" : " \x1B[32m [FAILED] \x1B[0m\n"); + std::cout << "[Validation] Results validation ..."; + bool val = validator(Data); + std::cout << ((val) ? "\x1B[32m [PASSED] \x1B[0m\n" : " \x1B[32m [FAILED] \x1B[0m\n"); } - mpi.finalize(); return 0; } catch (std::exception& e) { diff --git a/homework_3/src/utils.hpp b/homework_3/src/utils.hpp index 4b71366..69b7bc1 100644 --- a/homework_3/src/utils.hpp +++ b/homework_3/src/utils.hpp @@ -13,256 +13,10 @@ #include #include #include -#include +#include #include "config.h" -/*! - * Min-Max statistics data for exchange optimization - * @tparam Value_t The underlying data type of the sequence data - */ -template -struct Stat_t { - using value_type = Value_t; //!< meta-export the type - - Value_t min{}; //!< The minimum value of the sequence - Value_t max{}; //!< The maximum value of the sequence -}; - -//! Application data selection alias -using distStat_t = Stat_t; -extern distStat_t localStat, remoteStat; // Make stats public - -/* - * MPI_ dispatcher mechanism - */ -template struct MPI_TypeMapper { }; - -template <> struct MPI_TypeMapper { static MPI_Datatype getType() { return MPI_CHAR; } }; -template <> struct MPI_TypeMapper { static MPI_Datatype getType() { return MPI_SHORT; } }; -template <> struct MPI_TypeMapper { static MPI_Datatype getType() { return MPI_INT; } }; -template <> struct MPI_TypeMapper { static MPI_Datatype getType() { return MPI_LONG; } }; -template <> struct MPI_TypeMapper { static MPI_Datatype getType() { return MPI_LONG_LONG; } }; -template <> struct MPI_TypeMapper { static MPI_Datatype getType() { return MPI_UNSIGNED_CHAR; } }; -template <> struct MPI_TypeMapper{ static MPI_Datatype getType() { return MPI_UNSIGNED_SHORT; } }; -template <> struct MPI_TypeMapper { static MPI_Datatype getType() { return MPI_UNSIGNED; } }; -template <> struct MPI_TypeMapper { static MPI_Datatype getType() { return MPI_UNSIGNED_LONG; } }; -template <> struct MPI_TypeMapper { static MPI_Datatype getType() { return MPI_UNSIGNED_LONG_LONG; } }; -template <> struct MPI_TypeMapper { static MPI_Datatype getType() { return MPI_FLOAT; } }; -template <> struct MPI_TypeMapper { static MPI_Datatype getType() { return MPI_DOUBLE; } }; - -/*! - * MPI wrapper type to provide MPI functionality and RAII to MPI as a resource - * - * @tparam TID The MPI type for process id [default: int] - */ -template -struct MPI_t { - using ID_t = TID; // Export TID type (currently int defined by the standard) - - /*! - * Initializes the MPI environment, must called from each process - * - * @param argc [int*] POINTER to main's argc argument - * @param argv [char***] POINTER to main's argv argument - */ - void init(int* argc, char*** argv) { - // Initialize the MPI environment - int err; - if ((err = MPI_Init(argc, argv)) != MPI_SUCCESS) - mpi_throw(err, "(MPI) MPI_Init() - "); - initialized_ = true; - - // Get the number of processes - int size_value, rank_value; - if ((err = MPI_Comm_size(MPI_COMM_WORLD, &size_value)) != MPI_SUCCESS) - mpi_throw(err, "(MPI) MPI_Comm_size() - "); - if ((err = MPI_Comm_rank(MPI_COMM_WORLD, &rank_value)) != MPI_SUCCESS) - mpi_throw(err, "(MPI) MPI_Comm_rank() - "); - size_ = static_cast(size_value); - rank_ = static_cast(rank_value); - if (size_ > static_cast(MAX_MPI_SIZE)) - throw std::runtime_error( - "(MPI) size - Not supported number of nodes [over " + std::to_string(MAX_MPI_SIZE) + "]\n" - ); - - // Get the name of the processor - char processor_name[MPI_MAX_PROCESSOR_NAME]; - int name_len; - if ((err = MPI_Get_processor_name(processor_name, &name_len)) != MPI_SUCCESS) - mpi_throw(err, "(MPI) MPI_Get_processor_name() - "); - name_ = std::string (processor_name, name_len); - } - - /*! - * Exchange one data object of type @c T with partner as part of the sorting network of both - * bubbletonic or bitonic sorting algorithms. - * - * This function matches a transmit and a receive in order for fully exchanged the data object - * between current node and partner. - * - * @tparam T The object type - * - * @param local [const T&] Reference to the local object to send - * @param remote [T&] Reference to the object to receive data from partner - * @param partner [mpi_id_t] The partner for the exchange - * @param tag [int] The tag to use for the MPI communication - */ - template - void exchange_it(const T& local, T& remote, ID_t partner, int tag) { - if (tag < 0) - throw std::runtime_error("(MPI) exchange_it() [tag] - Out of bound"); - MPI_Status status; - int err; - if ((err = MPI_Sendrecv( - &local, sizeof(T), MPI_BYTE, partner, tag, - &remote, sizeof(T), MPI_BYTE, partner, tag, - MPI_COMM_WORLD, &status - )) != MPI_SUCCESS) - mpi_throw(err, "(MPI) MPI_Sendrecv() [item] - "); - } - - /*! - * Exchange data with partner as part of the sorting network of both bubbletonic or bitonic - * sorting algorithms. - * - * This function matches a transmit and a receive in order for fully exchanged data between - * current node and partner. - * - * @tparam ValueT The value type used in buffer - * - * @param ldata [const ValueT*] Pointer to local data to send - * @param rdata [ValueT*] Pointer to buffer to receive data from partner - * @param count [size_t] The number of data to exchange - * @param partner [mpi_id_t] The partner for the exchange - * @param tag [int] The tag to use for the MPI communication - */ - template - void exchange(const ValueT* ldata, ValueT* rdata, size_t count, ID_t partner, int tag) { - if (tag < 0) - throw std::runtime_error("(MPI) exchange_data() [tag] - Out of bound"); - - MPI_Datatype datatype = MPI_TypeMapper::getType(); - MPI_Status status; - int err; - if ((err = MPI_Sendrecv( - ldata, count, datatype, partner, tag, - rdata, count, datatype, partner, tag, - MPI_COMM_WORLD, &status - )) != MPI_SUCCESS) - mpi_throw(err, "(MPI) MPI_Sendrecv() [data] - "); - } - - /*! - * Initiate a data exchange data with partner using non-blocking Isend-Irecv, as part of the - * sorting network of both bubbletonic or bitonic sorting algorithms. - * - * This function matches a transmit and a receive in order for fully exchanged data between - * current node and partner. - * @note - * This call MUST paired with exchange_wait() for each MPI_t object. - * Calling 2 consecutive exchange_start() for the same MPI_t object is undefined. - * - * @tparam ValueT The value type used in buffers - * - * @param ldata [const ValueT*] Pointer to local data to send - * @param rdata [ValueT*] Pointer to buffer to receive data from partner - * @param count [size_t] The number of data to exchange - * @param partner [mpi_id_t] The partner for the exchange - * @param tag [int] The tag to use for the MPI communication - */ - template - void exchange_start(const ValueT* ldata, ValueT* rdata, size_t count, ID_t partner, int tag) { - if (tag < 0) - throw std::runtime_error("(MPI) exchange_data() [tag] - Out of bound"); - - MPI_Datatype datatype = MPI_TypeMapper::getType(); - int err; - err = MPI_Isend(ldata, count, datatype, partner, tag, MPI_COMM_WORLD, &handle_tx); - if (err != MPI_SUCCESS) - mpi_throw(err, "(MPI) MPI_Isend() - "); - err = MPI_Irecv(rdata, count, datatype, partner, tag, MPI_COMM_WORLD, &handle_rx); - if (err != MPI_SUCCESS) - mpi_throw(err, "(MPI) MPI_Irecv() - "); - } - - /*! - * Block wait for the completion of the previously called exchange_start() - * - * @note - * This call MUST paired with exchange_start() for each MPI_t object. - * Calling 2 consecutive exchange_wait() for the same MPI_t object is undefined. - */ - void exchange_wait() { - MPI_Status status; - - int err; - if ((err = MPI_Wait(&handle_tx, &status)) != MPI_SUCCESS) - mpi_throw(err, "(MPI) MPI_Wait() [send] - "); - - if ((err = MPI_Wait(&handle_rx, &status)) != MPI_SUCCESS) - mpi_throw(err, "(MPI) MPI_Wait() [recv] - "); - } - - // Accessors - [[nodiscard]] ID_t rank() const noexcept { return rank_; } - [[nodiscard]] ID_t size() const noexcept { return size_; } - [[nodiscard]] const std::string& name() const noexcept { return name_; } - - // Mutators - ID_t rank(ID_t rank) noexcept { return rank_ = rank; } - ID_t size(ID_t size) noexcept { return size_ = size; } - std::string& name(const std::string& name) noexcept { return name_ = name; } - - /*! - * Finalized the MPI - */ - void finalize() { - // Finalize the MPI environment - initialized_ = false; - MPI_Finalize(); - } - - //! RAII MPI finalization - ~MPI_t() { - // Finalize the MPI environment even on unexpected errors - if (initialized_) - MPI_Finalize(); - } - - - // Local functionality -private: - /*! - * Throw exception helper. It bundles the prefix msg with the MPI error string retrieved by - * MPI API. - * - * @param err The MPI error code - * @param prefixMsg The prefix text for the exception error message - */ - void mpi_throw(int err, const char* prefixMsg) { - char err_msg[MPI_MAX_ERROR_STRING]; - int msg_len; - MPI_Error_string(err, err_msg, &msg_len); - throw std::runtime_error(prefixMsg + std::string (err_msg) + '\n'); - } - -private: - ID_t rank_{}; //!< MPI rank of the process - ID_t size_{}; //!< MPI total size of the execution - std::string name_{}; //!< The name of the local machine - bool initialized_{}; //!< RAII helper flag - MPI_Request handle_tx{}; //!< MPI async exchange handler for Transmission - MPI_Request handle_rx{}; //!< MPI async exchange handler for Receptions -}; - -/* - * Exported data types - */ -extern MPI_t<> mpi; -using mpi_id_t = MPI_t<>::ID_t; - - /*! * @brief A std::vector wrapper with 2 vectors, an active and a shadow. @@ -463,19 +217,19 @@ struct Timing { } //! Tool to print the time interval - static void print_duration(const Tduration& duration, const char *what, mpi_id_t rank) noexcept { + static void print_duration(const Tduration& duration, const char *what) noexcept { if (std::chrono::duration_cast(duration).count() < 10000) - std::cout << "[Timing] (Rank " << rank << ") " << what << ": " + std::cout << "[Timing] " << what << ": " << std::to_string(std::chrono::duration_cast(duration).count()) << " [usec]\n"; else if (std::chrono::duration_cast(duration).count() < 10000) - std::cout << "[Timing] (Rank " << rank << ") " << what << ": " + std::cout << "[Timing] " << what << ": " << std::to_string(std::chrono::duration_cast(duration).count()) << " [msec]\n"; else { char stime[26]; // fit ulong auto sec = std::chrono::duration_cast(duration).count(); auto msec = (std::chrono::duration_cast(duration).count() % 1000) / 10; // keep 2 digit std::sprintf(stime, "%ld.%1ld", sec, msec); - std::cout << "[Timing] (Rank " << rank << ") " << what << ": " << stime << " [sec]\n"; + std::cout << "[Timing] " << what << ": " << stime << " [sec]\n"; } }