Browse Source

HW2: RC1 - Model version

tags/v2.0
parent
commit
1bd0cbb8d0
7 changed files with 116 additions and 70 deletions
  1. +7
    -7
      homework_2/Makefile
  2. +11
    -11
      homework_2/hpc/btN1P4T4Q24.sh
  3. +3
    -3
      homework_2/include/config.h
  4. +15
    -12
      homework_2/include/distsort.hpp
  5. +54
    -21
      homework_2/include/utils.hpp
  6. +1
    -0
      homework_2/src/distsort.cpp
  7. +25
    -16
      homework_2/src/main.cpp

+ 7
- 7
homework_2/Makefile View File

@@ -45,18 +45,18 @@ DEP_DIR := $(BUILD_DIR)/.dep


# ========== Compiler settings ========== # ========== Compiler settings ==========
# Compiler flags for debug and release # Compiler flags for debug and release
DEB_CFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c11
REL_CFLAGS := -Wall -Wextra -O3 -std=c11
DEB_CXXFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c++17
REL_CXXFLAGS := -Wall -Wextra -O3 -std=c++17
DEB_CFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c11 #-fopenmp
REL_CFLAGS := -Wall -Wextra -O3 -std=c11 #-fopenmp
DEB_CXXFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c++17 #-fopenmp
REL_CXXFLAGS := -Wall -Wextra -O3 -std=c++17 #-fopenmp


# Pre-defines # Pre-defines
# PRE_DEFS := MYCAB=1729 SUPER_MODE # PRE_DEFS := MYCAB=1729 SUPER_MODE
PRE_DEFS :=
PRE_DEFS := #_GLIBCXX_PARALLEL


# ============== Linker settings ============== # ============== Linker settings ==============
# Linker flags (example: -pthread -lm) # Linker flags (example: -pthread -lm)
LDFLAGS := -pthread
LDFLAGS := -pthread # -fopenmp


# Map output file # Map output file
MAP_FILE := output.map MAP_FILE := output.map
@@ -228,7 +228,7 @@ perfbitonic: CC := mpicc
perfbitonic: CXX := mpic++ perfbitonic: CXX := mpic++
perfbitonic: CFLAGS := $(REL_CFLAGS) -g -DCODE_VERSION=BITONIC perfbitonic: CFLAGS := $(REL_CFLAGS) -g -DCODE_VERSION=BITONIC
perfbitonic: CXXFLAGS := $(REL_CXXFLAGS) -g -DCODE_VERSION=BITONIC perfbitonic: CXXFLAGS := $(REL_CXXFLAGS) -g -DCODE_VERSION=BITONIC
perfbitonic: TARGET := distbitonic
perfbitonic: TARGET := perfbitonic
perfbitonic: $(BUILD_DIR)/$(TARGET) perfbitonic: $(BUILD_DIR)/$(TARGET)
@mkdir -p out @mkdir -p out
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)


homework_2/hpc/ntasks16.sh → homework_2/hpc/btN1P4T4Q24.sh View File

@@ -1,28 +1,28 @@
#! /usr/bin/env bash #! /usr/bin/env bash


#SBATCH --partition=batch #SBATCH --partition=batch
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=4 #SBATCH --ntasks-per-node=4
#SBATCH --nodes=4
#SBATCH --cpus-per-task=4
#SBATCH --time=1:00 #SBATCH --time=1:00


# Use this as following
# $> sbatch <this file>
#
# NOTE:
# First compile with
# $> make -j hpc-build
#

module load gcc/9.2.0 openmpi/4.0.3 module load gcc/9.2.0 openmpi/4.0.3
# Note: # Note:
# The above versions are matching w/ my system's # The above versions are matching w/ my system's
# versions, thus making compiling/debugging easier. # versions, thus making compiling/debugging easier.


# Uncomment the following to compile the project
# Note:
# In order for the MPI to run properly (or to run entirely), we need
# to compile it in hpc using the loaded modules above.
# Note:
# Consider moving this to a separate stage before sbatch the tasks.
# make distbitonic
# make distbubbletonic

# Suppress unused UCX_ROOT warning # Suppress unused UCX_ROOT warning
export UCX_WARN_UNUSED_ENV_VARS=n export UCX_WARN_UNUSED_ENV_VARS=n


# Suppress CUDA-aware support is disabled warning # Suppress CUDA-aware support is disabled warning
export OMPI_MCA_opal_warn_on_missing_libcuda=0 export OMPI_MCA_opal_warn_on_missing_libcuda=0


srun ./bin/dist_v05
srun ./out/distbitonic -q 24 --perf --validation

+ 3
- 3
homework_2/include/config.h View File

@@ -44,15 +44,15 @@ using distValue_t = uint32_t;
/*! /*!
* Session option for each invocation of the executable * Session option for each invocation of the executable
*/ */
struct session_t {
struct config_t {
size_t arraySize{DEFAULT_DATA_SIZE}; //!< size_t arraySize{DEFAULT_DATA_SIZE}; //!<
bool validation{false}; //!< Request a full validation at the end, performed by process rank 0 bool validation{false}; //!< Request a full validation at the end, performed by process rank 0
bool ndebug{false}; //!< Skips debug trap on DEBUG builds bool ndebug{false}; //!< Skips debug trap on DEBUG builds
bool timing{false}; //!< Enable timing measurements and prints
bool perf{false}; //!< Enable performance timing measurements and prints
bool verbose{false}; //!< Flag to enable verbose output to stdout bool verbose{false}; //!< Flag to enable verbose output to stdout
}; };


extern session_t session;
extern config_t config;




#endif /* CONFIG_H_ */ #endif /* CONFIG_H_ */

+ 15
- 12
homework_2/include/distsort.hpp View File

@@ -12,6 +12,7 @@


#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
//#include <parallel/algorithm>
#include <cmath> #include <cmath>
#include <cstdint> #include <cstdint>
#if !defined DEBUG #if !defined DEBUG
@@ -20,8 +21,8 @@
#include <cassert> #include <cassert>


#include "utils.hpp" #include "utils.hpp"
#include "config.h"


extern Timing TfullSort, Texchange, Tminmax, TelbowSort;


/*! /*!
* Enumerator for the different versions of the sorting method * Enumerator for the different versions of the sorting method
@@ -159,11 +160,13 @@ bool isActive(mpi_id_t node, size_t nodes);
*/ */
template<typename RangeT> template<typename RangeT>
void fullSort(RangeT& data, bool ascending) noexcept { void fullSort(RangeT& data, bool ascending) noexcept {
// Use introsort from stdlib++ here, unless ...
if (ascending)
// Use introsort from stdlib++ here, unless ... __gnu_parallel
if (ascending) {
std::sort(data.begin(), data.end(), std::less<>()); std::sort(data.begin(), data.end(), std::less<>());
else
}
else {
std::sort(data.begin(), data.end(), std::greater<>()); std::sort(data.begin(), data.end(), std::greater<>());
}
} }


/*! /*!
@@ -270,7 +273,7 @@ void minmax(RangeT& local, const RangeT& remote, bool keepSmall) noexcept {
template<typename ShadowedDataT> template<typename ShadowedDataT>
void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
// Initially sort to create a half part of a bitonic sequence // Initially sort to create a half part of a bitonic sequence
fullSort(data, ascending<SortMode::Bubbletonic>(rank, 0));
timeCall(TfullSort, fullSort, data, ascending<SortMode::Bubbletonic>(rank, 0));


// Sort network (O(N) iterations) // Sort network (O(N) iterations)
for (size_t step = 0; step < static_cast<size_t>(Processes); ++step) { for (size_t step = 0; step < static_cast<size_t>(Processes); ++step) {
@@ -280,9 +283,9 @@ void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
if ( isActive(rank, Processes) && if ( isActive(rank, Processes) &&
isActive(part, Processes) ) { isActive(part, Processes) ) {
// Exchange with partner, keep nim-or-max and sort - O(N) // Exchange with partner, keep nim-or-max and sort - O(N)
mpi.exchange(data.getActive(), data.getShadow(), part, step);
minmax(data.getActive(), data.getShadow(), ks);
elbowSort(data, ascending<SortMode::Bubbletonic>(rank, Processes));
timeCall(Texchange, mpi.exchange, data.getActive(), data.getShadow(), part, step);
timeCall(Tminmax, minmax, data.getActive(), data.getShadow(), ks);
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bubbletonic>(rank, Processes));
} }
} }


@@ -308,7 +311,7 @@ void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
template<typename ShadowedDataT> template<typename ShadowedDataT>
void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
// Initially sort to create a half part of a bitonic sequence // Initially sort to create a half part of a bitonic sequence
fullSort(data, ascending<SortMode::Bitonic>(rank, 0));
timeCall(TfullSort, fullSort, data, ascending<SortMode::Bitonic>(rank, 0));


// Run through sort network using elbow-sort ( O(LogN * LogN) iterations ) // Run through sort network using elbow-sort ( O(LogN * LogN) iterations )
auto p = static_cast<uint32_t>(std::log2(Processes)); auto p = static_cast<uint32_t>(std::log2(Processes));
@@ -319,11 +322,11 @@ void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
auto part = partner<SortMode::Bitonic>(rank, step); auto part = partner<SortMode::Bitonic>(rank, step);
auto ks = keepSmall<SortMode::Bitonic>(rank, part, depth); auto ks = keepSmall<SortMode::Bitonic>(rank, part, depth);
// Exchange with partner, keep nim-or-max // Exchange with partner, keep nim-or-max
mpi.exchange(data.getActive(), data.getShadow(), part, (depth << 8) | step);
minmax(data.getActive(), data.getShadow(), ks);
timeCall(Texchange, mpi.exchange, data.getActive(), data.getShadow(), part, (depth << 8) | step);
timeCall(Tminmax, minmax, data.getActive(), data.getShadow(), ks);
} }
// sort - O(N) // sort - O(N)
elbowSort (data, ascending<SortMode::Bitonic>(rank, depth));
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bitonic>(rank, depth));
} }
} }




+ 54
- 21
homework_2/include/utils.hpp View File

@@ -14,6 +14,7 @@
#include <chrono> #include <chrono>
#include <unistd.h> #include <unistd.h>
#include <mpi.h> #include <mpi.h>
//#include <functional>


#include "config.h" #include "config.h"


@@ -286,7 +287,7 @@ struct Log {
//! We provide logging via << operator //! We provide logging via << operator
template<typename T> template<typename T>
Log &operator<<(T &&t) { Log &operator<<(T &&t) {
if (session.verbose) {
if (config.verbose) {
if (line_) { if (line_) {
std::cout << "[Log]: " << t; std::cout << "[Log]: " << t;
line_ = false; line_ = false;
@@ -299,7 +300,7 @@ struct Log {
// overload for special end line handling // overload for special end line handling
Log &operator<<(Endl e) { Log &operator<<(Endl e) {
(void) e; (void) e;
if (session.verbose) {
if (config.verbose) {
std::cout << '\n'; std::cout << '\n';
line_ = true; line_ = true;
} }
@@ -317,39 +318,71 @@ extern Log logger;
*/ */
struct Timing { struct Timing {
using Tpoint = std::chrono::steady_clock::time_point; using Tpoint = std::chrono::steady_clock::time_point;
using Tduration = std::chrono::microseconds;
using microseconds = std::chrono::microseconds; using microseconds = std::chrono::microseconds;
using milliseconds = std::chrono::milliseconds; using milliseconds = std::chrono::milliseconds;
using seconds = std::chrono::seconds; using seconds = std::chrono::seconds;


//! tool to mark the starting point //! tool to mark the starting point
Tpoint start() noexcept { return start_ = std::chrono::steady_clock::now(); }
Tpoint start() noexcept { return mark_ = std::chrono::steady_clock::now(); }


//! tool to mark the ending point //! tool to mark the ending point
Tpoint stop() noexcept { return stop_ = std::chrono::steady_clock::now(); }
Tpoint stop() noexcept {
Tpoint now = std::chrono::steady_clock::now();
duration_ += dt(now, mark_);
return now;
}


auto dt() noexcept {
return std::chrono::duration_cast<std::chrono::microseconds>(stop_ - start_).count();
Tduration dt(Tpoint t2, Tpoint t1) noexcept {
return std::chrono::duration_cast<Tduration>(t2 - t1);
} }


//! tool to print the time interval //! tool to print the time interval
void print_dt(const char *what) noexcept {
if (session.timing) {
auto t = stop_ - start_;
if (std::chrono::duration_cast<microseconds>(t).count() < 10000)
std::cout << "[Timing]: " << what << ": "
<< std::to_string(std::chrono::duration_cast<microseconds>(t).count()) << " [usec]\n";
else if (std::chrono::duration_cast<milliseconds>(t).count() < 10000)
std::cout << "[Timing]: " << what << ": "
<< std::to_string(std::chrono::duration_cast<milliseconds>(t).count()) << " [msec]\n";
else
std::cout << "[Timing]: " << what << ": "
<< std::to_string(std::chrono::duration_cast<seconds>(t).count()) << " [sec]\n";
}
void print_duration(const char *what, mpi_id_t rank) noexcept {
if (std::chrono::duration_cast<microseconds>(duration_).count() < 10000)
std::cout << "[Timing] (Rank " << rank << ") " << what << ": "
<< std::to_string(std::chrono::duration_cast<microseconds>(duration_).count()) << " [usec]\n";
else if (std::chrono::duration_cast<milliseconds>(duration_).count() < 10000)
std::cout << "[Timing] (Rank " << rank << ") " << what << ": "
<< std::to_string(std::chrono::duration_cast<milliseconds>(duration_).count()) << " [msec]\n";
else
std::cout << "[Timing] (Rank " << rank << ") " << what << ": "
<< std::to_string(std::chrono::duration_cast<seconds>(duration_).count()) << " [sec]\n";

} }


private: private:
Tpoint start_;
Tpoint stop_;
Tpoint mark_{};
Tduration duration_{};
}; };


/*!
* Utility high level function to forward a function call to std::invoke and measure
* the excecution time
*
* @tparam Func The function type
* @tparam Args The argument
* @param func
* @param args
* @return
*/


#define timeCall(Tim, Func, ...) \
Tim.start(); \
Func(__VA_ARGS__); \
Tim.stop(); \


//template <typename Ret, typename Func, typename... Args>
//auto timeCall_r(Ret& ret, Func&& func, Args&&... args) {
// Timing timer;
//
// timer.start();
// ret = std::invoke(std::forward<Func>(func), std::forward<Args>(args)...);
// timer.stop();
//
// return timer.dt();
//}

#endif /* UTILS_HPP_ */ #endif /* UTILS_HPP_ */

+ 1
- 0
homework_2/src/distsort.cpp View File

@@ -9,6 +9,7 @@
#include "utils.hpp" #include "utils.hpp"
#include "distsort.hpp" #include "distsort.hpp"


Timing TfullSort, Texchange, Tminmax, TelbowSort;


bool isActive(mpi_id_t node, size_t nodes) { bool isActive(mpi_id_t node, size_t nodes) {
if (!((nodes > 0) && if (!((nodes > 0) &&


+ 25
- 16
homework_2/src/main.cpp View File

@@ -17,12 +17,12 @@
#include "distsort.hpp" #include "distsort.hpp"




// Global session data
session_t session;
// Global config data
config_t config;
MPI_t<> mpi; MPI_t<> mpi;
distBuffer_t Data; distBuffer_t Data;
Log logger; Log logger;
Timing timer;
Timing Ttotal;


/*! /*!
* A small command line argument parser * A small command line argument parser
@@ -37,23 +37,23 @@ bool get_options(int argc, char* argv[]){


if (arg == "-q" || arg == "--array-size") { if (arg == "-q" || arg == "--array-size") {
if (i+1 < argc) { if (i+1 < argc) {
session.arraySize = 1 << atoi(argv[++i]);
config.arraySize = 1 << atoi(argv[++i]);
} }
else { else {
status = false; status = false;
} }
} }
else if (arg == "--validation") { else if (arg == "--validation") {
session.validation = true;
config.validation = true;
} }
else if (arg == "--ndebug") { else if (arg == "--ndebug") {
session.ndebug = true;
config.ndebug = true;
} }
else if (arg == "-t" || arg == "--timing") {
session.timing = true;
else if (arg == "--perf") {
config.perf = true;
} }
else if (arg == "-v" || arg == "--verbose") { else if (arg == "-v" || arg == "--verbose") {
session.verbose = true;
config.verbose = true;
} }
else if (arg == "-h" || arg == "--help") { else if (arg == "-h" || arg == "--help") {
std::cout << "distbitonic/distbubbletonic - A distributed bitonic sort\n\n"; std::cout << "distbitonic/distbubbletonic - A distributed bitonic sort\n\n";
@@ -65,6 +65,8 @@ bool get_options(int argc, char* argv[]){
std::cout << "Options:\n\n"; std::cout << "Options:\n\n";
std::cout << " -q | --array-size <N>\n"; std::cout << " -q | --array-size <N>\n";
std::cout << " Selects the array size according to size = 2^N\n\n"; std::cout << " Selects the array size according to size = 2^N\n\n";
std::cout << " --par-sort\n";
std::cout << " Request a parallel full sorting algorithm\n\n";
std::cout << " --validation\n"; std::cout << " --validation\n";
std::cout << " Request a full validation at the end, performed by process rank 0\n\n"; std::cout << " Request a full validation at the end, performed by process rank 0\n\n";
std::cout << " --ndebug\n"; std::cout << " --ndebug\n";
@@ -164,11 +166,11 @@ int main(int argc, char* argv[]) try {
#else #else
volatile bool sleep_wait = true; volatile bool sleep_wait = true;
#endif #endif
while (sleep_wait && !session.ndebug)
while (sleep_wait && !config.ndebug)
sleep(1); sleep(1);
#endif #endif


logger << "Initialize local array of " << session.arraySize << " elements" << logger.endl;
logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl;
std::random_device rd; // Mersenne seeded from hw if possible. range: [type_min, type_max] std::random_device rd; // Mersenne seeded from hw if possible. range: [type_min, type_max]
std::mt19937 gen(rd()); std::mt19937 gen(rd());
std::uniform_int_distribution<distValue_t > dis( std::uniform_int_distribution<distValue_t > dis(
@@ -176,24 +178,31 @@ int main(int argc, char* argv[]) try {
std::numeric_limits<distValue_t>::max() std::numeric_limits<distValue_t>::max()
); );
// Fill vector // Fill vector
Data.resize(session.arraySize);
Data.resize(config.arraySize);
std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); }); std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); });


if (mpi.rank() == 0) if (mpi.rank() == 0)
logger << "Starting distributed sorting ... "; logger << "Starting distributed sorting ... ";
timer.start();
Ttotal.start();
#if CODE_VERSION == BUBBLETONIC #if CODE_VERSION == BUBBLETONIC
distBubbletonic(Data, mpi.size(), mpi.rank()); distBubbletonic(Data, mpi.size(), mpi.rank());
#else #else
distBitonic (Data, mpi.size(), mpi.rank()); distBitonic (Data, mpi.size(), mpi.rank());
#endif #endif
timer.stop();
Ttotal.stop();
if (mpi.rank() == 0) if (mpi.rank() == 0)
logger << " Done." << logger.endl; logger << " Done." << logger.endl;
std::string timeMsg = "rank " + std::to_string(mpi.rank()); std::string timeMsg = "rank " + std::to_string(mpi.rank());
timer.print_dt(timeMsg.c_str());


if (session.validation) {

if (config.perf) {
Ttotal.print_duration("Total ", mpi.rank());
TfullSort.print_duration("Full-Sort ", mpi.rank());
Texchange.print_duration("Exchange ", mpi.rank());
Tminmax.print_duration("Min-Max ", mpi.rank());
TelbowSort.print_duration("Elbow-Sort", mpi.rank());
}
if (config.validation) {
// If requested, we have the chance to fail! // If requested, we have the chance to fail!
if (mpi.rank() == 0) if (mpi.rank() == 0)
std::cout << "Results validation ..."; std::cout << "Results validation ...";


Loading…
Cancel
Save