ソースを参照

HW2: RC3 - A small exchange optimization

tags/v2.0
コミット
4e7237bd4d
12個のファイルの変更249行の追加46行の削除
  1. +1
    -1
      homework_2/Makefile
  2. +1
    -1
      homework_2/hpc/btN1P4T4Q24.sh
  3. +9
    -6
      homework_2/include/config.h
  4. +53
    -6
      homework_2/include/distsort.hpp
  5. +71
    -19
      homework_2/include/utils.hpp
  6. +90
    -3
      homework_2/julia/bitonic_v1.jl
  7. +6
    -1
      homework_2/src/distsort.cpp
  8. +9
    -1
      homework_2/src/main.cpp
  9. +0
    -0
      homework_2/test/tests_BitonicUtils.cpp
  10. +0
    -0
      homework_2/test/tests_BubbletonicUtils.cpp
  11. +0
    -0
      homework_2/test/tests_CommonUtils.cpp
  12. +9
    -8
      homework_2/test/tests_MPI.cpp

+ 1
- 1
homework_2/Makefile ファイルの表示

@@ -1,5 +1,5 @@
#
# PDS homework_1 Makefile
# PDS HW2 Makefile
#
# Copyright (C) 2024 Christos Choutouridis <christos@choutouridis.net>
#


+ 1
- 1
homework_2/hpc/btN1P4T4Q24.sh ファイルの表示

@@ -10,7 +10,7 @@
# $> sbatch <this file>
#
# NOTE:
# First compile in aristotel with
# First compile in aristotle with
# $> module load gcc/9.2.0 openmpi/4.0.3
# $> make -j hpc-build
#


+ 9
- 6
homework_2/include/config.h ファイルの表示

@@ -1,5 +1,5 @@
/*!
* \file config.h
* \file
* \brief Build configuration file.
*
* \author
@@ -45,13 +45,16 @@ using distValue_t = uint32_t;
* Session option for each invocation of the executable
*/
struct config_t {
size_t arraySize{DEFAULT_DATA_SIZE}; //!<
bool validation{false}; //!< Request a full validation at the end, performed by process rank 0
bool ndebug{false}; //!< Skips debug trap on DEBUG builds
bool perf{false}; //!< Enable performance timing measurements and prints
bool verbose{false}; //!< Flag to enable verbose output to stdout
size_t arraySize{DEFAULT_DATA_SIZE}; //!< The array size of the local data to sort.
bool validation{false}; //!< Request a full validation at the end, performed by process rank 0.
bool ndebug{false}; //!< Skips debug trap on DEBUG builds.
bool perf{false}; //!< Enable performance timing measurements and prints.
bool verbose{false}; //!< Flag to enable verbose output to stdout.
};

/*
* Exported data types
*/
extern config_t config;




+ 53
- 6
homework_2/include/distsort.hpp ファイルの表示

@@ -22,7 +22,7 @@

#include "utils.hpp"

extern Timing TfullSort, Texchange, Tminmax, TelbowSort;
extern Timing TfullSort, Texchange, Tminmax, TelbowSort; // make timers public

/*!
* Enumerator for the different versions of the sorting method
@@ -231,6 +231,43 @@ void elbowSort(ShadowedDataT& data, bool ascending) noexcept {
elbowSortCore(data, ascending, std::greater<>());
}

/*!
* Predicate for exchange optimization. Returns true only if an exchange between partners is needed.
* In order to do that we exchange min and max statistics of the partner's data.
*
* @tparam StatT Statistics data type (for min-max)
*
* @param lstat [const StatT] Reference to the local statistic data
* @param rstat [StatT] Reference to the remote statistic data to fill
* @param part [mpi_id_t] The partner for the exchange
* @param tag [int] The tag to use for the exchange of stats
* @param keepSmall [bool] Flag to indicate if the local thread keeps the small ro the large values
* @return True if we need data exchange, false otherwise
*/
template<typename StatT>
bool needsExchange(const StatT& lstat, StatT& rstat, mpi_id_t part, int tag, bool keepSmall) {
timeCall(Texchange, mpi.exchange_it, lstat, rstat, part, tag);
return (keepSmall) ?
rstat.min < lstat.max // Lmin: rstat.min - Smax: lstat.max
: lstat.min < rstat.max; // Lmin: lstat.min - Smax: rstat.max
}

/*!
* Update stats utility
*
* @tparam RangeT A range type with random access iterator
* @tparam StatT Statistics data type (for min-max)
*
* @param stat [StatT] Reference to the statistic data to update
* @param data [const RangeT] Reference to the sequence to extract stats from
*/
template<typename RangeT, typename StatT>
void updateMinMax(StatT& stat, const RangeT& data) noexcept {
auto [min, max] = std::minmax_element(data.begin(), data.end());
stat.min = *min;
stat.max = *max;
}

/*!
* Takes two sorted sequences where one is in increasing and the other is in decreasing order
* and selects either the larger or the smaller items in one-to-one comparison between them.
@@ -243,7 +280,7 @@ void elbowSort(ShadowedDataT& data, bool ascending) noexcept {
* @param keepSmall [bool] Flag to indicate if we keep the small items in local sequence
*/
template<typename RangeT>
void minmax(RangeT& local, const RangeT& remote, bool keepSmall) noexcept {
void keepMinOrMax(RangeT& local, const RangeT& remote, bool keepSmall) noexcept {
using value_t = typename RangeT::value_type;
std::transform(
local.begin(), local.end(),
@@ -274,6 +311,7 @@ template<typename ShadowedDataT>
void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
// Initially sort to create a half part of a bitonic sequence
timeCall(TfullSort, fullSort, data, ascending<SortMode::Bubbletonic>(rank, 0));
updateMinMax(localStat, data);

// Sort network (O(N) iterations)
for (size_t step = 0; step < static_cast<size_t>(Processes); ++step) {
@@ -283,8 +321,12 @@ void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
if ( isActive(rank, Processes) &&
isActive(part, Processes) ) {
// Exchange with partner, keep nim-or-max and sort - O(N)
timeCall(Texchange, mpi.exchange, data.getActive(), data.getShadow(), part, step);
timeCall(Tminmax, minmax, data.getActive(), data.getShadow(), ks);
int tag = static_cast<int>(2 * step);
if (needsExchange(localStat, remoteStat, part, tag, ks)) {
timeCall(Texchange, mpi.exchange_data, data.getActive(), data.getShadow(), part, ++tag);
timeCall(Tminmax, keepMinOrMax, data.getActive(), data.getShadow(), ks);
updateMinMax(localStat, data);
}
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bubbletonic>(rank, Processes));
}
}
@@ -312,6 +354,7 @@ template<typename ShadowedDataT>
void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
// Initially sort to create a half part of a bitonic sequence
timeCall(TfullSort, fullSort, data, ascending<SortMode::Bitonic>(rank, 0));
updateMinMax(localStat, data);

// Run through sort network using elbow-sort ( O(LogN * LogN) iterations )
auto p = static_cast<uint32_t>(std::log2(Processes));
@@ -322,8 +365,12 @@ void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
auto part = partner<SortMode::Bitonic>(rank, step);
auto ks = keepSmall<SortMode::Bitonic>(rank, part, depth);
// Exchange with partner, keep nim-or-max
timeCall(Texchange, mpi.exchange, data.getActive(), data.getShadow(), part, (depth << 8) | step);
timeCall(Tminmax, minmax, data.getActive(), data.getShadow(), ks);
int tag = static_cast<int>( (2*p*depth) + (2*step) );
if (needsExchange(localStat, remoteStat, part, tag, ks)) {
timeCall(Texchange, mpi.exchange_data, data.getActive(), data.getShadow(), part, tag);
timeCall(Tminmax, keepMinOrMax, data.getActive(), data.getShadow(), ks);
updateMinMax(localStat, data);
}
}
// sort - O(N)
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bitonic>(rank, depth));


+ 71
- 19
homework_2/include/utils.hpp ファイルの表示

@@ -14,10 +14,24 @@
#include <chrono>
#include <unistd.h>
#include <mpi.h>
//#include <functional>

#include "config.h"

/*!
* Min-Max statistics data for exchange optimization
* @tparam Value_t The underlying data type of the sequence data
*/
template <typename Value_t>
struct Stat_t {
using value_type = Value_t; //!< meta-export the type

Value_t min{}; //!< The minimum value of the sequence
Value_t max{}; //!< The maximum value of the sequence
};

//! Application data selection alias
using distStat_t = Stat_t<distValue_t>;
extern distStat_t localStat, remoteStat; // Make stats public

/*
* MPI_<type> dispatcher mechanism
@@ -85,25 +99,54 @@ struct MPI_t {
*
* @tparam T The inner valur type used in buffer
*
* @param send_data [std::vector<T>] Reference to local data to send
* @param recv_data [std::vector<T>] Reference to buffer to receive data from partner
* @param ldata [std::vector<T>] Reference to local data to send
* @param rdata [std::vector<T>] Reference to buffer to receive data from partner
* @param partner [mpi_id_t] The partner for the exchange
* @param tag [int] The tag to use for the MPI communication
*/
template<typename T>
void exchange(const std::vector<T>& send_data, std::vector<T>& recv_data, ID_t partner, int tag) {
using namespace std::string_literals;
void exchange_data(const std::vector<T>& ldata, std::vector<T>& rdata, ID_t partner, int tag) {
if (tag < 0)
throw std::runtime_error("(MPI) exchange_data() [tag] - Out of bound");

MPI_Datatype datatype = MPI_TypeMapper<T>::getType();
int send_count = static_cast<int>(send_data.size());
int count = static_cast<int>(ldata.size());
MPI_Status status;
int err;
if ((err = MPI_Sendrecv(
ldata.data(), count, datatype, partner, tag,
rdata.data(), count, datatype, partner, tag,
MPI_COMM_WORLD, &status
)) != MPI_SUCCESS)
mpi_throw(err, "(MPI) MPI_Sendrecv() [data] - ");
}

/*!
* Exchange a data object with partner as part of the sorting network of both bubbletonic
* or bitonic sorting algorithms.
*
* This function matches a transmit and a receive in order for fully exchanged the data object
* between current node and partner.
*
* @tparam T The object type
*
* @param local [const T&] Reference to the local object to send
* @param remote [T&] Reference to the object to receive data from partner
* @param partner [mpi_id_t] The partner for the exchange
* @param tag [int] The tag to use for the MPI communication
*/
template<typename T>
void exchange_it(const T& local, T& remote, ID_t partner, int tag) {
if (tag < 0)
throw std::runtime_error("(MPI) exchange_it() [tag] - Out of bound");
MPI_Status status;
int err;
if ((err = MPI_Sendrecv(
send_data.data(), send_count, datatype, partner, tag,
recv_data.data(), send_count, datatype, partner, tag,
&local, sizeof(T), MPI_BYTE, partner, tag,
&remote, sizeof(T), MPI_BYTE, partner, tag,
MPI_COMM_WORLD, &status
)) != MPI_SUCCESS)
mpi_throw(err, "(MPI) MPI_Sendrecv() - ");
mpi_throw(err, "(MPI) MPI_Sendrecv() [item] - ");
}

// Accessors
@@ -165,11 +208,13 @@ using mpi_id_t = MPI_t<>::ID_t;


/*!
* A std::vector wrapper with 2 vectors, an active and a shadow. This type exposes the standard vector
* @brief A std::vector wrapper with 2 vectors, an active and a shadow.
*
* This type exposes the standard vector
* functionality of the active vector. The shadow can be used when we need to use the vector as mutable
* data in algorithms that can not support "in-place" editing (like elbow-sort for example)
*
* @tparam Value_t the inner data type of the vectors
* @tparam Value_t the underlying data type of the vectors
*/
template <typename Value_t>
struct ShadowedVec_t {
@@ -274,8 +319,10 @@ private:
} active{north}; //!< Flag to select between North and South buffer
};

/*
* Exported data types
*/
using distBuffer_t = ShadowedVec_t<distValue_t>;

extern distBuffer_t Data;

/*!
@@ -333,11 +380,12 @@ struct Timing {
return now;
}

Tduration dt(Tpoint t2, Tpoint t1) noexcept {
//! A duration calculation utility
static Tduration dt(Tpoint t2, Tpoint t1) noexcept {
return std::chrono::duration_cast<Tduration>(t2 - t1);
}

//! tool to print the time interval
//! Tool to print the time interval
void print_duration(const char *what, mpi_id_t rank) noexcept {
if (std::chrono::duration_cast<microseconds>(duration_).count() < 10000)
std::cout << "[Timing] (Rank " << rank << ") " << what << ": "
@@ -357,13 +405,17 @@ private:
};

/*!
* Utility high level function like to forward a function call and measure
* the excecution time
* Utility "high level function"-like macro to forward a function call
* and accumulate the execution time to the corresponding timing object.
*
* @param Tim The Timing object [Needs to have methods start() and stop()]
* @param Func The function name
* @param ... The arguments to pass to function (the preprocessor way)
*/
#define timeCall(Tim, Func, ...) \
Tim.start(); \
Func(__VA_ARGS__); \
Tim.stop(); \
Tim.start(); \
Func(__VA_ARGS__); \
Tim.stop(); \


#endif /* UTILS_HPP_ */

+ 90
- 3
homework_2/julia/bitonic_v1.jl ファイルの表示

@@ -4,9 +4,9 @@
#

function exchange(localid, remoteid)
if verbose
println("Exchange local data from $localid with partner $remoteid")
end
# if verbose
# println("Exchange local data from $localid with partner $remoteid")
# end
nothing # We have all data here ;)
end

@@ -23,9 +23,89 @@ function minmax(data, localid, remoteid, keepsmall)
end


function is_bitonic(arr)
n = length(arr)
if n <= 2
return true # Any sequence of length <= 2 is bitonic
end

# State for state machine. 1: inc, -1: dec, 0: z-state
state = 0
inc_count = 0
dec_count = 0
ret = false

for i in 1:n-1
# Find the first order
if state == 0
if arr[i] > arr[i+1]
state = -1
dec_count += 1
elseif arr[i] < arr[i+1]
state = 1
inc_count += 1
end
elseif state == -1 # decreasing
if arr[i] < arr[i + 1]
state = 1
inc_count += 1
end
elseif state == 1 # increasing
if arr[i] > arr[i+1]
state = -1
dec_count += 1
end
end
end

if inc_count <= 1 && dec_count <= 1
ret = true # Sequence is bitonic
elseif inc_count == 2 && dec_count == 1
ret = (arr[1] >= arr[n])
elseif inc_count == 1 && dec_count == 2
ret = (arr[1] <= arr[n])
end

ret
end

function is_sort(arr)
# State for state machine. 1: inc, -1: dec, 0: z-state
state = 0
inc_count = 0
dec_count = 0

for i in 1:length(arr)-1
# Find the first order
if state == 0
if arr[i] > arr[i+1]
state = -1
dec_count += 1
elseif arr[i] < arr[i+1]
state = 1
inc_count += 1
end
elseif state == -1 # decreasing
if arr[i] < arr[i + 1]
state = 1
inc_count += 1
end
elseif state == 1 # increasing
if arr[i] > arr[i+1]
state = -1
dec_count += 1
end
end
end

ret = ((inc_count + dec_count) == 1) ? state : 0
ret
end

function sort_network!(data, n, depth)
nodes = 0:n-1
bitonicFlag = zeros(Int8, size(data, 1))
sortFlag = zeros(Int8, size(data, 1))
for step = depth-1:-1:0
partnerid = nodes .⊻ (1 << step)
direction = (nodes .& (1 << depth)) .== 0 .& (nodes .< partnerid)
@@ -40,6 +120,13 @@ function sort_network!(data, n, depth)
minmax(data, i, partnerid[i+1], keepsmall[i+1])
end
end
if verbose
for i in 1:size(data, 1)
bitonicFlag[i] = is_bitonic(data[i, :])
sortFlag[i] = is_sort(data[i, :])
end
println("depth: $depth | step: $step | bitonicFlag: $bitonicFlag | sorfFlag: $sortFlag")
end
end
end



+ 6
- 1
homework_2/src/distsort.cpp ファイルの表示

@@ -1,6 +1,6 @@
/*!
* \file
* \brief Distributed sort implementation.
* \brief Distributed sort implementation
*
* \author
* Christos Choutouridis AEM:8997
@@ -9,8 +9,13 @@
#include "utils.hpp"
#include "distsort.hpp"

//! Statistic variables for exchange optimization
distStat_t localStat, remoteStat;

//! Performance timers for each one of the "costly" functions
Timing TfullSort, Texchange, Tminmax, TelbowSort;


bool isActive(mpi_id_t node, size_t nodes) {
if (!((nodes > 0) &&
(nodes <= std::numeric_limits<mpi_id_t>::max()) ))


+ 9
- 1
homework_2/src/main.cpp ファイルの表示

@@ -138,6 +138,9 @@ bool validator(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
}

#if !defined TESTING
/*!
* @return Returns 0, but.... we may throw or exit(1)
*/
int main(int argc, char* argv[]) try {
// Initialize MPI environment
mpi.init(&argc, &argv);
@@ -170,6 +173,7 @@ int main(int argc, char* argv[]) try {
sleep(1);
#endif

// Initialize local data
logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl;
std::random_device rd; // Mersenne seeded from hw if possible. range: [type_min, type_max]
std::mt19937 gen(rd());
@@ -181,6 +185,7 @@ int main(int argc, char* argv[]) try {
Data.resize(config.arraySize);
std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); });

// Run distributed sort
if (mpi.rank() == 0)
logger << "Starting distributed sorting ... ";
Ttotal.start();
@@ -192,9 +197,9 @@ int main(int argc, char* argv[]) try {
Ttotal.stop();
if (mpi.rank() == 0)
logger << " Done." << logger.endl;
std::string timeMsg = "rank " + std::to_string(mpi.rank());


// Print-outs and validation
if (config.perf) {
Ttotal.print_duration("Total ", mpi.rank());
TfullSort.print_duration("Full-Sort ", mpi.rank());
@@ -224,6 +229,9 @@ catch (std::exception& e) {
#include <gtest/gtest.h>
#include <exception>

/*!
* The testing version of our program
*/
GTEST_API_ int main(int argc, char **argv) try {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();


homework_2/test/tests_Bitonic.cpp → homework_2/test/tests_BitonicUtils.cpp ファイルの表示


homework_2/test/tests_Bubbletonic.cpp → homework_2/test/tests_BubbletonicUtils.cpp ファイルの表示


homework_2/test/tests_Common.cpp → homework_2/test/tests_CommonUtils.cpp ファイルの表示


+ 9
- 8
homework_2/test/tests_MPI.cpp ファイルの表示

@@ -6,6 +6,9 @@
* make tests
* mpirun -np <N> ./out/tests
*
* Note:
* Yes each process runs the entire test suite!!
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
@@ -15,7 +18,11 @@
#include <mpi.h>
#include <random>
#include "distsort.hpp"
/*
* Global fixtures
*/

// MPI handler for the test session
MPI_t<> ts_mpi;

// Mersenne seeded from hw if possible. range: [type_min, type_max]
@@ -27,17 +34,11 @@ protected:
static void SetUpTestSuite() {
int argc = 0;
char** argv = nullptr;
MPI_Init(&argc, &argv);

int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
ts_mpi.rank(rank);
ts_mpi.size(size);
ts_mpi.init(&argc, &argv);
}

static void TearDownTestSuite() {
MPI_Finalize();
ts_mpi.finalize();
}
};



読み込み中…
キャンセル
保存