AUTH's THMMY "Parallel and distributed systems" course assignments.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

381 lines
14 KiB

  1. /*!
  2. * \file
  3. * \brief Distributed sort implementation header
  4. *
  5. * \author
  6. * Christos Choutouridis AEM:8997
  7. * <cchoutou@ece.auth.gr>
  8. */
  9. #ifndef DISTBITONIC_H_
  10. #define DISTBITONIC_H_
  11. #include <vector>
  12. #include <algorithm>
  13. #include <parallel/algorithm>
  14. #include <cmath>
  15. #include <cstdint>
  16. #if !defined DEBUG
  17. #define NDEBUG
  18. #endif
  19. #include <cassert>
  20. #include "utils.hpp"
  21. extern Timing TfullSort, Texchange, Tminmax, TelbowSort; // make timers public
  22. /*!
  23. * Enumerator for the different versions of the sorting method
  24. */
  25. enum class SortMode {
  26. Bubbletonic, //!< The v0.5 of the algorithm where we use a bubble-sort like approach
  27. Bitonic //!< The v1.0 of the algorithm where we use the bitonic data-exchange approach
  28. };
  29. /*
  30. * ============================== Sort utilities ==============================
  31. */
  32. /*!
  33. * The primary function template of ascending(). It is DISABLED since , it is explicitly specialized
  34. * for each of the \c SortMode
  35. */
  36. template <SortMode Mode> inline bool ascending(mpi_id_t, [[maybe_unused]] size_t) noexcept = delete;
  37. /*!
  38. * Returns the ascending or descending configuration of the node's sequence based on
  39. * the current node (MPI process) and the depth of the sorting network
  40. *
  41. * @param node [mpi_id_t] The current node (MPI process)
  42. * @return [bool] True if we need ascending configuration, false otherwise
  43. */
  44. template <> inline
  45. bool ascending<SortMode::Bubbletonic>(mpi_id_t node, [[maybe_unused]] size_t depth) noexcept {
  46. return (node % 2) == 0;
  47. }
  48. /*!
  49. * Returns the ascending or descending configuration of the node's sequence based on
  50. * the current node (MPI process) and the depth of the sorting network
  51. *
  52. * @param node [mpi_id_t] The current node (MPI process)
  53. * @param depth [size_t] The total depth of the sorting network (same for each step for a given network)
  54. * @return [bool] True if we need ascending configuration, false otherwise
  55. */
  56. template <> inline
  57. bool ascending<SortMode::Bitonic>(mpi_id_t node, size_t depth) noexcept {
  58. return !(node & (1 << depth));
  59. }
  60. /*!
  61. * The primary function template of partner(). It is DISABLED since , it is explicitly specialized
  62. * for each of the \c SortMode
  63. */
  64. template <SortMode Mode> inline mpi_id_t partner(mpi_id_t, size_t) noexcept = delete;
  65. /*!
  66. * Returns the node's partner for data exchange during the sorting network iterations
  67. * of Bubbletonic
  68. *
  69. * @param node [mpi_id_t] The current node
  70. * @param step [size_t] The step of the sorting network
  71. * @return [mpi_id_t] The node id of the partner for data exchange
  72. */
  73. template <> inline
  74. mpi_id_t partner<SortMode::Bubbletonic>(mpi_id_t node, size_t step) noexcept {
  75. //return (node % 2 == step % 2) ? node + 1 : node - 1;
  76. return (((node+step) % 2) == 0) ? node + 1 : node - 1;
  77. }
  78. /*!
  79. * Returns the node's partner for data exchange during the sorting network iterations
  80. * of Bitonic
  81. *
  82. * @param node [mpi_id_t] The current node
  83. * @param step [size_t] The step of the sorting network
  84. * @return [mpi_id_t] The node id of the partner for data exchange
  85. */
  86. template <> inline
  87. mpi_id_t partner<SortMode::Bitonic>(mpi_id_t node, size_t step) noexcept {
  88. return (node ^ (1 << step));
  89. }
  90. /*!
  91. * The primary function template of keepSmall(). It is DISABLED since , it is explicitly specialized
  92. * for each of the \c SortMode
  93. */
  94. template<SortMode Mode> inline bool keepSmall(mpi_id_t, mpi_id_t, [[maybe_unused]] size_t) = delete;
  95. /*!
  96. * Predicate to check if a node keeps the small numbers during the bubbletonic sort network exchange.
  97. *
  98. * @param node [mpi_id_t] The node for which we check
  99. * @param partner [mpi_id_t] The partner of the data exchange
  100. * @return [bool] True if the node should keep the small values, false otherwise
  101. */
  102. template <> inline
  103. bool keepSmall<SortMode::Bubbletonic>(mpi_id_t node, mpi_id_t partner, [[maybe_unused]] size_t depth) {
  104. if (node == partner)
  105. throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n");
  106. return (node < partner);
  107. }
  108. /*!
  109. * Predicate to check if a node keeps the small numbers during the bitonic sort network exchange.
  110. *
  111. * @param node [mpi_id_t] The node for which we check
  112. * @param partner [mpi_id_t] The partner of the data exchange
  113. * @param depth [size_t] The total depth of the sorting network (same for each step for a given network)
  114. * @return [bool] True if the node should keep the small values, false otherwise
  115. */
  116. template <> inline
  117. bool keepSmall<SortMode::Bitonic>(mpi_id_t node, mpi_id_t partner, size_t depth) {
  118. if (node == partner)
  119. throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n");
  120. return ascending<SortMode::Bitonic>(node, depth) == (node < partner);
  121. }
  122. /*!
  123. * Predicate to check if the node is active in the current iteration of the bubbletonic
  124. * sort exchange.
  125. *
  126. * @param node [mpi_id_t] The node to check
  127. * @param nodes [size_t] The total number of nodes
  128. * @return [bool] True if the node is active, false otherwise
  129. */
  130. bool isActive(mpi_id_t node, size_t nodes);
  131. /*
  132. * ============================== Data utilities ==============================
  133. */
  134. /*!
  135. * Sort a range using the build-in O(Nlog(N)) algorithm
  136. *
  137. * @tparam RangeT A range type with random access iterator
  138. *
  139. * @param data [RangeT] The data to be sorted
  140. * @param ascending [bool] Flag to indicate the sorting order
  141. */
  142. template<typename RangeT>
  143. void fullSort(RangeT& data, bool ascending) noexcept {
  144. // Use introsort from stdlib++ here, unless ... __gnu_parallel
  145. if (ascending) {
  146. __gnu_parallel::sort(data.begin(), data.end(), std::less<>());
  147. }
  148. else {
  149. __gnu_parallel::sort(data.begin(), data.end(), std::greater<>());
  150. }
  151. }
  152. /*!
  153. * Core functionality of sort for shadowed buffer types using
  154. * the "elbow sort" algorithm.
  155. *
  156. * @note:
  157. * This algorithm can not work "in place".
  158. * We use the active buffer as source and the shadow as target.
  159. * At the end we switch which buffer is active and which is the shadow.
  160. * @note
  161. * This is the core functionality. Use the elbowSort() function instead
  162. *
  163. * @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
  164. * @tparam CompT A Comparison type for binary operation comparisons
  165. *
  166. * @param data [ShadowedDataT] The data to sort
  167. * @param ascending [bool] Flag to indicate the sorting order
  168. * @param comp [CompT] The binary operator object
  169. */
  170. template<typename ShadowedDataT, typename CompT>
  171. void elbowSortCore(ShadowedDataT& data, bool ascending, CompT comp) noexcept {
  172. auto& active = data.getActive(); // Get the source vector (the data to sort)
  173. auto& shadow = data.getShadow(); // Get the target vector (the sorted data)
  174. size_t N = data.size(); // The total size is the same or both vectors
  175. size_t left = std::distance(
  176. active.begin(),
  177. (ascending) ?
  178. std::min_element(active.begin(), active.end()) :
  179. std::max_element(active.begin(), active.end())
  180. ); // start 'left' from elbow of the bitonic
  181. size_t right = (left == N-1) ? 0 : left + 1;
  182. // Walk in opposite directions from elbow and insert-sort to target vector
  183. for (size_t i = 0 ; i<N ; ++i) {
  184. if (comp(active[left], active[right])) {
  185. shadow[i] = active[left];
  186. left = (left == 0) ? N-1 : left -1; // cycle decrease
  187. }
  188. else {
  189. shadow[i] = active[right];
  190. right = (right + 1) % N; // cycle increase
  191. }
  192. }
  193. data.switch_active(); // Switch active-shadow buffers
  194. }
  195. /*!
  196. * Sort a shadowed buffer using the "elbow sort" algorithm.
  197. *
  198. * @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
  199. *
  200. * @param data [ShadowedDataT] The data to sort
  201. * @param ascending [bool] Flag to indicate the sorting order
  202. */
  203. template<typename ShadowedDataT>
  204. void elbowSort(ShadowedDataT& data, bool ascending) noexcept {
  205. if (ascending)
  206. elbowSortCore(data, ascending, std::less<>());
  207. else
  208. elbowSortCore(data, ascending, std::greater<>());
  209. }
  210. /*!
  211. * Predicate for exchange optimization. Returns true only if an exchange between partners is needed.
  212. * In order to do that we exchange min and max statistics of the partner's data.
  213. *
  214. * @tparam StatT Statistics data type (for min-max)
  215. *
  216. * @param lstat [const StatT] Reference to the local statistic data
  217. * @param rstat [StatT] Reference to the remote statistic data to fill
  218. * @param part [mpi_id_t] The partner for the exchange
  219. * @param tag [int] The tag to use for the exchange of stats
  220. * @param keepSmall [bool] Flag to indicate if the local thread keeps the small ro the large values
  221. * @return True if we need data exchange, false otherwise
  222. */
  223. template<typename StatT>
  224. bool needsExchange(const StatT& lstat, StatT& rstat, mpi_id_t part, int tag, bool keepSmall) {
  225. timeCall(Texchange, mpi.exchange_it, lstat, rstat, part, tag);
  226. return (keepSmall) ?
  227. rstat.min < lstat.max // Lmin: rstat.min - Smax: lstat.max
  228. : lstat.min < rstat.max; // Lmin: lstat.min - Smax: rstat.max
  229. }
  230. /*!
  231. * Update stats utility
  232. *
  233. * @tparam RangeT A range type with random access iterator
  234. * @tparam StatT Statistics data type (for min-max)
  235. *
  236. * @param stat [StatT] Reference to the statistic data to update
  237. * @param data [const RangeT] Reference to the sequence to extract stats from
  238. */
  239. template<typename RangeT, typename StatT>
  240. void updateMinMax(StatT& stat, const RangeT& data) noexcept {
  241. auto [min, max] = std::minmax_element(data.begin(), data.end());
  242. stat.min = *min;
  243. stat.max = *max;
  244. }
  245. /*!
  246. * Takes two sorted sequences where one is in increasing and the other is in decreasing order
  247. * and selects either the larger or the smaller items in one-to-one comparison between them.
  248. * The result is a bitonic sequence.
  249. *
  250. * @tparam RangeT A range type with random access iterator
  251. *
  252. * @param local [RangeT] Reference to the local sequence
  253. * @param remote [const RangeT] Reference to the remote sequence (copied locally by MPI)
  254. * @param keepSmall [bool] Flag to indicate if we keep the small items in local sequence
  255. */
  256. template<typename RangeT>
  257. void keepMinOrMax(RangeT& local, const RangeT& remote, bool keepSmall) noexcept {
  258. using value_t = typename RangeT::value_type;
  259. std::transform(
  260. local.begin(), local.end(),
  261. remote.begin(),
  262. local.begin(),
  263. [&keepSmall](const value_t& a, const value_t& b){
  264. return (keepSmall) ? std::min(a, b) : std::max(a, b);
  265. });
  266. }
  267. /*
  268. * ============================== Sort algorithms ==============================
  269. */
  270. /*!
  271. * A distributed version of the Bubbletonic sort algorithm.
  272. *
  273. * @note
  274. * Each MPI process should run an instance of this function.
  275. *
  276. * @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
  277. *
  278. * @param data [ShadowedDataT] The local to MPI process data to sort
  279. * @param Processes [mpi_id_t] The total number of MPI processes
  280. * @param rank [mpi_id_t] The current process id
  281. */
  282. template<typename ShadowedDataT>
  283. void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
  284. // Initially sort to create a half part of a bitonic sequence
  285. timeCall(TfullSort, fullSort, data, ascending<SortMode::Bubbletonic>(rank, 0));
  286. updateMinMax(localStat, data);
  287. // Sort network (O(N) iterations)
  288. for (size_t step = 0; step < static_cast<size_t>(Processes); ++step) {
  289. // Find out exchange configuration
  290. auto part = partner<SortMode::Bubbletonic>(rank, step);
  291. auto ks = keepSmall<SortMode::Bubbletonic>(rank, part, Processes);
  292. if ( isActive(rank, Processes) &&
  293. isActive(part, Processes) ) {
  294. // Exchange with partner, keep nim-or-max and sort - O(N)
  295. int tag = static_cast<int>(2 * step);
  296. if (needsExchange(localStat, remoteStat, part, tag, ks)) {
  297. timeCall(Texchange, mpi.exchange_data, data.getActive(), data.getShadow(), part, ++tag);
  298. timeCall(Tminmax, keepMinOrMax, data.getActive(), data.getShadow(), ks);
  299. updateMinMax(localStat, data);
  300. }
  301. timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bubbletonic>(rank, Processes));
  302. }
  303. }
  304. // Invert if the node was descending.
  305. if (!ascending<SortMode::Bubbletonic>(rank, 0))
  306. elbowSort(data, true);
  307. }
  308. /*!
  309. * A distributed version of the Bitonic sort algorithm.
  310. *
  311. * @note
  312. * Each MPI process should run an instance of this function.
  313. *
  314. * @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
  315. *
  316. * @param data [ShadowedDataT] The local to MPI process data to sort
  317. * @param Processes [mpi_id_t] The total number of MPI processes
  318. * @param rank [mpi_id_t] The current process id
  319. */
  320. template<typename ShadowedDataT>
  321. void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
  322. // Initially sort to create a half part of a bitonic sequence
  323. timeCall(TfullSort, fullSort, data, ascending<SortMode::Bitonic>(rank, 0));
  324. updateMinMax(localStat, data);
  325. // Run through sort network using elbow-sort ( O(LogN * LogN) iterations )
  326. auto p = static_cast<uint32_t>(std::log2(Processes));
  327. for (size_t depth = 1; depth <= p; ++depth) {
  328. for (size_t step = depth; step > 0;) {
  329. --step;
  330. // Find out exchange configuration
  331. auto part = partner<SortMode::Bitonic>(rank, step);
  332. auto ks = keepSmall<SortMode::Bitonic>(rank, part, depth);
  333. // Exchange with partner, keep nim-or-max
  334. int tag = static_cast<int>( (2*p*depth) + (2*step) );
  335. if (needsExchange(localStat, remoteStat, part, tag, ks)) {
  336. timeCall(Texchange, mpi.exchange_data, data.getActive(), data.getShadow(), part, tag);
  337. timeCall(Tminmax, keepMinOrMax, data.getActive(), data.getShadow(), ks);
  338. updateMinMax(localStat, data);
  339. }
  340. }
  341. // sort - O(N)
  342. timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bitonic>(rank, depth));
  343. }
  344. }
  345. #endif //DISTBITONIC_H_