/*! * \file * \brief Distributed sort implementation header * * \author * Christos Choutouridis AEM:8997 * */ #ifndef DISTBITONIC_H_ #define DISTBITONIC_H_ #include #include #include #include #include #if !defined DEBUG #define NDEBUG #endif #include #include "utils.hpp" extern Timing TfullSort, Texchange, Tminmax, TelbowSort; // make timers public /*! * Enumerator for the different versions of the sorting method */ enum class SortMode { Bubbletonic, //!< The v0.5 of the algorithm where we use a bubble-sort like approach Bitonic //!< The v1.0 of the algorithm where we use the bitonic data-exchange approach }; /* * ============================== Sort utilities ============================== */ /*! * The primary function template of ascending(). It is DISABLED since , it is explicitly specialized * for each of the \c SortMode */ template inline bool ascending(mpi_id_t, [[maybe_unused]] size_t) noexcept = delete; /*! * Returns the ascending or descending configuration of the node's sequence based on * the current node (MPI process) and the depth of the sorting network * * @param node [mpi_id_t] The current node (MPI process) * @return [bool] True if we need ascending configuration, false otherwise */ template <> inline bool ascending(mpi_id_t node, [[maybe_unused]] size_t depth) noexcept { return (node % 2) == 0; } /*! * Returns the ascending or descending configuration of the node's sequence based on * the current node (MPI process) and the depth of the sorting network * * @param node [mpi_id_t] The current node (MPI process) * @param depth [size_t] The total depth of the sorting network (same for each step for a given network) * @return [bool] True if we need ascending configuration, false otherwise */ template <> inline bool ascending(mpi_id_t node, size_t depth) noexcept { return !(node & (1 << depth)); } /*! * The primary function template of partner(). It is DISABLED since , it is explicitly specialized * for each of the \c SortMode */ template inline mpi_id_t partner(mpi_id_t, size_t) noexcept = delete; /*! * Returns the node's partner for data exchange during the sorting network iterations * of Bubbletonic * * @param node [mpi_id_t] The current node * @param step [size_t] The step of the sorting network * @return [mpi_id_t] The node id of the partner for data exchange */ template <> inline mpi_id_t partner(mpi_id_t node, size_t step) noexcept { //return (node % 2 == step % 2) ? node + 1 : node - 1; return (((node+step) % 2) == 0) ? node + 1 : node - 1; } /*! * Returns the node's partner for data exchange during the sorting network iterations * of Bitonic * * @param node [mpi_id_t] The current node * @param step [size_t] The step of the sorting network * @return [mpi_id_t] The node id of the partner for data exchange */ template <> inline mpi_id_t partner(mpi_id_t node, size_t step) noexcept { return (node ^ (1 << step)); } /*! * The primary function template of keepSmall(). It is DISABLED since , it is explicitly specialized * for each of the \c SortMode */ template inline bool keepSmall(mpi_id_t, mpi_id_t, [[maybe_unused]] size_t) = delete; /*! * Predicate to check if a node keeps the small numbers during the bubbletonic sort network exchange. * * @param node [mpi_id_t] The node for which we check * @param partner [mpi_id_t] The partner of the data exchange * @return [bool] True if the node should keep the small values, false otherwise */ template <> inline bool keepSmall(mpi_id_t node, mpi_id_t partner, [[maybe_unused]] size_t depth) { if (node == partner) throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n"); return (node < partner); } /*! * Predicate to check if a node keeps the small numbers during the bitonic sort network exchange. * * @param node [mpi_id_t] The node for which we check * @param partner [mpi_id_t] The partner of the data exchange * @param depth [size_t] The total depth of the sorting network (same for each step for a given network) * @return [bool] True if the node should keep the small values, false otherwise */ template <> inline bool keepSmall(mpi_id_t node, mpi_id_t partner, size_t depth) { if (node == partner) throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n"); return ascending(node, depth) == (node < partner); } /*! * Predicate to check if the node is active in the current iteration of the bubbletonic * sort exchange. * * @param node [mpi_id_t] The node to check * @param nodes [size_t] The total number of nodes * @return [bool] True if the node is active, false otherwise */ bool isActive(mpi_id_t node, size_t nodes); /* * ============================== Data utilities ============================== */ /*! * Sort a range using the build-in O(Nlog(N)) algorithm * * @tparam RangeT A range type with random access iterator * * @param data [RangeT] The data to be sorted * @param ascending [bool] Flag to indicate the sorting order */ template void fullSort(RangeT& data, bool ascending) noexcept { // Use introsort from stdlib++ here, unless ... __gnu_parallel if (ascending) { __gnu_parallel::sort(data.begin(), data.end(), std::less<>()); } else { __gnu_parallel::sort(data.begin(), data.end(), std::greater<>()); } } /*! * Core functionality of sort for shadowed buffer types using * the "elbow sort" algorithm. * * @note: * This algorithm can not work "in place". * We use the active buffer as source and the shadow as target. * At the end we switch which buffer is active and which is the shadow. * @note * This is the core functionality. Use the elbowSort() function instead * * @tparam ShadowedDataT A Shadowed buffer type with random access iterator. * @tparam CompT A Comparison type for binary operation comparisons * * @param data [ShadowedDataT] The data to sort * @param ascending [bool] Flag to indicate the sorting order * @param comp [CompT] The binary operator object */ template void elbowSortCore(ShadowedDataT& data, bool ascending, CompT comp) noexcept { auto& active = data.getActive(); // Get the source vector (the data to sort) auto& shadow = data.getShadow(); // Get the target vector (the sorted data) size_t N = data.size(); // The total size is the same or both vectors size_t left = std::distance( active.begin(), (ascending) ? std::min_element(active.begin(), active.end()) : std::max_element(active.begin(), active.end()) ); // start 'left' from elbow of the bitonic size_t right = (left == N-1) ? 0 : left + 1; // Walk in opposite directions from elbow and insert-sort to target vector for (size_t i = 0 ; i void elbowSort(ShadowedDataT& data, bool ascending) noexcept { if (ascending) elbowSortCore(data, ascending, std::less<>()); else elbowSortCore(data, ascending, std::greater<>()); } /*! * Predicate for exchange optimization. Returns true only if an exchange between partners is needed. * In order to do that we exchange min and max statistics of the partner's data. * * @tparam StatT Statistics data type (for min-max) * * @param lstat [const StatT] Reference to the local statistic data * @param rstat [StatT] Reference to the remote statistic data to fill * @param part [mpi_id_t] The partner for the exchange * @param tag [int] The tag to use for the exchange of stats * @param keepSmall [bool] Flag to indicate if the local thread keeps the small ro the large values * @return True if we need data exchange, false otherwise */ template bool needsExchange(const StatT& lstat, StatT& rstat, mpi_id_t part, int tag, bool keepSmall) { timeCall(Texchange, mpi.exchange_it, lstat, rstat, part, tag); return (keepSmall) ? rstat.min < lstat.max // Lmin: rstat.min - Smax: lstat.max : lstat.min < rstat.max; // Lmin: lstat.min - Smax: rstat.max } /*! * Update stats utility * * @tparam RangeT A range type with random access iterator * @tparam StatT Statistics data type (for min-max) * * @param stat [StatT] Reference to the statistic data to update * @param data [const RangeT] Reference to the sequence to extract stats from */ template void updateMinMax(StatT& stat, const RangeT& data) noexcept { auto [min, max] = std::minmax_element(data.begin(), data.end()); stat.min = *min; stat.max = *max; } /*! * Takes two sorted sequences where one is in increasing and the other is in decreasing order * and selects either the larger or the smaller items in one-to-one comparison between them. * The result is a bitonic sequence. * * @tparam RangeT A range type with random access iterator * * @param local [RangeT] Reference to the local sequence * @param remote [const RangeT] Reference to the remote sequence (copied locally by MPI) * @param keepSmall [bool] Flag to indicate if we keep the small items in local sequence */ template void keepMinOrMax(RangeT& local, const RangeT& remote, bool keepSmall) noexcept { using value_t = typename RangeT::value_type; std::transform( local.begin(), local.end(), remote.begin(), local.begin(), [&keepSmall](const value_t& a, const value_t& b){ return (keepSmall) ? std::min(a, b) : std::max(a, b); }); } /* * ============================== Sort algorithms ============================== */ /*! * A distributed version of the Bubbletonic sort algorithm. * * @note * Each MPI process should run an instance of this function. * * @tparam ShadowedDataT A Shadowed buffer type with random access iterator. * * @param data [ShadowedDataT] The local to MPI process data to sort * @param Processes [mpi_id_t] The total number of MPI processes * @param rank [mpi_id_t] The current process id */ template void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { // Initially sort to create a half part of a bitonic sequence timeCall(TfullSort, fullSort, data, ascending(rank, 0)); updateMinMax(localStat, data); // Sort network (O(N) iterations) for (size_t step = 0; step < static_cast(Processes); ++step) { // Find out exchange configuration auto part = partner(rank, step); auto ks = keepSmall(rank, part, Processes); if ( isActive(rank, Processes) && isActive(part, Processes) ) { // Exchange with partner, keep nim-or-max and sort - O(N) int tag = static_cast(2 * step); if (needsExchange(localStat, remoteStat, part, tag, ks)) { timeCall(Texchange, mpi.exchange_data, data.getActive(), data.getShadow(), part, ++tag); timeCall(Tminmax, keepMinOrMax, data.getActive(), data.getShadow(), ks); updateMinMax(localStat, data); } timeCall(TelbowSort, elbowSort, data, ascending(rank, Processes)); } } // Invert if the node was descending. if (!ascending(rank, 0)) elbowSort(data, true); } /*! * A distributed version of the Bitonic sort algorithm. * * @note * Each MPI process should run an instance of this function. * * @tparam ShadowedDataT A Shadowed buffer type with random access iterator. * * @param data [ShadowedDataT] The local to MPI process data to sort * @param Processes [mpi_id_t] The total number of MPI processes * @param rank [mpi_id_t] The current process id */ template void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { // Initially sort to create a half part of a bitonic sequence timeCall(TfullSort, fullSort, data, ascending(rank, 0)); updateMinMax(localStat, data); // Run through sort network using elbow-sort ( O(LogN * LogN) iterations ) auto p = static_cast(std::log2(Processes)); for (size_t depth = 1; depth <= p; ++depth) { for (size_t step = depth; step > 0;) { --step; // Find out exchange configuration auto part = partner(rank, step); auto ks = keepSmall(rank, part, depth); // Exchange with partner, keep nim-or-max int tag = static_cast( (2*p*depth) + (2*step) ); if (needsExchange(localStat, remoteStat, part, tag, ks)) { timeCall(Texchange, mpi.exchange_data, data.getActive(), data.getShadow(), part, tag); timeCall(Tminmax, keepMinOrMax, data.getActive(), data.getShadow(), ks); updateMinMax(localStat, data); } } // sort - O(N) timeCall(TelbowSort, elbowSort, data, ascending(rank, depth)); } } #endif //DISTBITONIC_H_