/*! * \file * \brief Distributed sort implementation header * * \author * Christos Choutouridis AEM:8997 * */ #ifndef DISTBITONIC_H_ #define DISTBITONIC_H_ #include #include #include #include #if !defined DEBUG #define NDEBUG #endif #include #include "utils.hpp" #include "config.h" /*! * Enumerator for the different versions of the sorting method */ enum class SortMode { Bubbletonic, //!< The v0.5 of the algorithm where we use a bubble-sort like approach Bitonic //!< The v1.0 of the algorithm where we use the bitonic data-exchange approach }; /* * ============================== Sort utilities ============================== */ /*! * The primary function template of ascending(). It is DISABLED since , it is explicitly specialized * for each of the \c SortMode */ template inline bool ascending(mpi_id_t, [[maybe_unused]] size_t) noexcept = delete; /*! * Returns the ascending or descending configuration of the node's sequence based on * the current node (MPI process) and the depth of the sorting network * * @param node [mpi_id_t] The current node (MPI process) * @return [bool] True if we need ascending configuration, false otherwise */ template <> inline bool ascending(mpi_id_t node, [[maybe_unused]] size_t depth) noexcept { return (node % 2) == 0; } /*! * Returns the ascending or descending configuration of the node's sequence based on * the current node (MPI process) and the depth of the sorting network * * @param node [mpi_id_t] The current node (MPI process) * @param depth [size_t] The total depth of the sorting network (same for each step for a given network) * @return [bool] True if we need ascending configuration, false otherwise */ template <> inline bool ascending(mpi_id_t node, size_t depth) noexcept { return !(node & (1 << depth)); } /*! * The primary function template of partner(). It is DISABLED since , it is explicitly specialized * for each of the \c SortMode */ template inline mpi_id_t partner(mpi_id_t, size_t) noexcept = delete; /*! * Returns the node's partner for data exchange during the sorting network iterations * of Bubbletonic * * @param node [mpi_id_t] The current node * @param step [size_t] The step of the sorting network * @return [mpi_id_t] The node id of the partner for data exchange */ template <> inline mpi_id_t partner(mpi_id_t node, size_t step) noexcept { //return (node % 2 == step % 2) ? node + 1 : node - 1; return (((node+step) % 2) == 0) ? node + 1 : node - 1; } /*! * Returns the node's partner for data exchange during the sorting network iterations * of Bitonic * * @param node [mpi_id_t] The current node * @param step [size_t] The step of the sorting network * @return [mpi_id_t] The node id of the partner for data exchange */ template <> inline mpi_id_t partner(mpi_id_t node, size_t step) noexcept { return (node ^ (1 << step)); } /*! * The primary function template of keepSmall(). It is DISABLED since , it is explicitly specialized * for each of the \c SortMode */ template inline bool keepSmall(mpi_id_t, mpi_id_t, [[maybe_unused]] size_t) = delete; /*! * Predicate to check if a node keeps the small numbers during the bubbletonic sort network exchange. * * @param node [mpi_id_t] The node for which we check * @param partner [mpi_id_t] The partner of the data exchange * @return [bool] True if the node should keep the small values, false otherwise */ template <> inline bool keepSmall(mpi_id_t node, mpi_id_t partner, [[maybe_unused]] size_t depth) { if (node == partner) throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n"); return (node < partner); } /*! * Predicate to check if a node keeps the small numbers during the bitonic sort network exchange. * * @param node [mpi_id_t] The node for which we check * @param partner [mpi_id_t] The partner of the data exchange * @param depth [size_t] The total depth of the sorting network (same for each step for a given network) * @return [bool] True if the node should keep the small values, false otherwise */ template <> inline bool keepSmall(mpi_id_t node, mpi_id_t partner, size_t depth) { if (node == partner) throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n"); return ascending(node, depth) == (node < partner); } /*! * Predicate to check if the node is active in the current iteration of the bubbletonic * sort exchange. * * @param node [mpi_id_t] The node to check * @param nodes [size_t] The total number of nodes * @return [bool] True if the node is active, false otherwise */ bool isActive(mpi_id_t node, size_t nodes); /* * ============================== Data utilities ============================== */ /*! * Sort a range using the build-in O(Nlog(N)) algorithm * * @tparam RangeT A range type with random access iterator * * @param data [RangeT] The data to be sorted * @param ascending [bool] Flag to indicate the sorting order */ template void fullSort(RangeT& data, bool ascending) noexcept { // Use introsort from stdlib++ here, unless ... if (ascending) std::sort(data.begin(), data.end(), std::less<>()); else std::sort(data.begin(), data.end(), std::greater<>()); } /*! * Core functionality of sort for shadowed buffer types using * the "elbow sort" algorithm. * * @note: * This algorithm can not work "in place". * We use the active buffer as source and the shadow as target. * At the end we switch which buffer is active and which is the shadow. * @note * This is the core functionality. Use the elbowSort() function instead * * @tparam ShadowedDataT A Shadowed buffer type with random access iterator. * @tparam CompT A Comparison type for binary operation comparisons * * @param data [ShadowedDataT] The data to sort * @param ascending [bool] Flag to indicate the sorting order * @param comp [CompT] The binary operator object */ template void elbowSortCore(ShadowedDataT& data, bool ascending, CompT comp) noexcept { auto& active = data.getActive(); // Get the source vector (the data to sort) auto& shadow = data.getShadow(); // Get the target vector (the sorted data) size_t N = data.size(); // The total size is the same or both vectors size_t left = std::distance( active.begin(), (ascending) ? std::min_element(active.begin(), active.end()) : std::max_element(active.begin(), active.end()) ); // start 'left' from elbow of the bitonic size_t right = (left == N-1) ? 0 : left + 1; // Walk in opposite directions from elbow and insert-sort to target vector for (size_t i = 0 ; i void elbowSort(ShadowedDataT& data, bool ascending) noexcept { if (ascending) elbowSortCore(data, ascending, std::less<>()); else elbowSortCore(data, ascending, std::greater<>()); } /*! * Takes two sorted sequences where one is in increasing and the other is in decreasing order * and selects either the larger or the smaller items in one-to-one comparison between them. * The result is a bitonic sequence. * * @tparam RangeT A range type with random access iterator * * @param local [RangeT] Reference to the local sequence * @param remote [const RangeT] Reference to the remote sequence (copied locally by MPI) * @param keepSmall [bool] Flag to indicate if we keep the small items in local sequence */ template void minmax(RangeT& local, const RangeT& remote, bool keepSmall) noexcept { using value_t = typename RangeT::value_type; std::transform( local.begin(), local.end(), remote.begin(), local.begin(), [&keepSmall](const value_t& a, const value_t& b){ return (keepSmall) ? std::min(a, b) : std::max(a, b); }); } /* * ============================== Sort algorithms ============================== */ /*! * A distributed version of the Bubbletonic sort algorithm. * * @note * Each MPI process should run an instance of this function. * * @tparam ShadowedDataT A Shadowed buffer type with random access iterator. * * @param data [ShadowedDataT] The local to MPI process data to sort * @param Processes [mpi_id_t] The total number of MPI processes * @param rank [mpi_id_t] The current process id */ template void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { // Initially sort to create a half part of a bitonic sequence fullSort(data, ascending(rank, 0)); // Sort network (O(N) iterations) for (size_t step = 0; step < static_cast(Processes); ++step) { // Find out exchange configuration auto part = partner(rank, step); auto ks = keepSmall(rank, part, Processes); if ( isActive(rank, Processes) && isActive(part, Processes) ) { // Exchange with partner, keep nim-or-max and sort - O(N) mpi.exchange(data.getActive(), data.getShadow(), part, step); minmax(data.getActive(), data.getShadow(), ks); elbowSort(data, ascending(rank, Processes)); } } // Invert if the node was descending. if (!ascending(rank, 0)) elbowSort(data, true); } /*! * A distributed version of the Bitonic sort algorithm. * * @note * Each MPI process should run an instance of this function. * * @tparam ShadowedDataT A Shadowed buffer type with random access iterator. * * @param data [ShadowedDataT] The local to MPI process data to sort * @param Processes [mpi_id_t] The total number of MPI processes * @param rank [mpi_id_t] The current process id */ template void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { // Initially sort to create a half part of a bitonic sequence fullSort(data, ascending(rank, 0)); // Run through sort network using elbow-sort ( O(LogN * LogN) iterations ) auto p = static_cast(std::log2(Processes)); for (size_t depth = 1; depth <= p; ++depth) { for (size_t step = depth; step > 0;) { --step; // Find out exchange configuration auto part = partner(rank, step); auto ks = keepSmall(rank, part, depth); // Exchange with partner, keep nim-or-max mpi.exchange(data.getActive(), data.getShadow(), part, (depth << 8) | step); minmax(data.getActive(), data.getShadow(), ks); } // sort - O(N) elbowSort (data, ascending(rank, depth)); } } #endif //DISTBITONIC_H_