HW2: RC1 - Model version
This commit is contained in:
parent
4dc47bb8f4
commit
1bd0cbb8d0
@ -45,18 +45,18 @@ DEP_DIR := $(BUILD_DIR)/.dep
|
||||
|
||||
# ========== Compiler settings ==========
|
||||
# Compiler flags for debug and release
|
||||
DEB_CFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c11
|
||||
REL_CFLAGS := -Wall -Wextra -O3 -std=c11
|
||||
DEB_CXXFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c++17
|
||||
REL_CXXFLAGS := -Wall -Wextra -O3 -std=c++17
|
||||
DEB_CFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c11 #-fopenmp
|
||||
REL_CFLAGS := -Wall -Wextra -O3 -std=c11 #-fopenmp
|
||||
DEB_CXXFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c++17 #-fopenmp
|
||||
REL_CXXFLAGS := -Wall -Wextra -O3 -std=c++17 #-fopenmp
|
||||
|
||||
# Pre-defines
|
||||
# PRE_DEFS := MYCAB=1729 SUPER_MODE
|
||||
PRE_DEFS :=
|
||||
PRE_DEFS := #_GLIBCXX_PARALLEL
|
||||
|
||||
# ============== Linker settings ==============
|
||||
# Linker flags (example: -pthread -lm)
|
||||
LDFLAGS := -pthread
|
||||
LDFLAGS := -pthread # -fopenmp
|
||||
|
||||
# Map output file
|
||||
MAP_FILE := output.map
|
||||
@ -228,7 +228,7 @@ perfbitonic: CC := mpicc
|
||||
perfbitonic: CXX := mpic++
|
||||
perfbitonic: CFLAGS := $(REL_CFLAGS) -g -DCODE_VERSION=BITONIC
|
||||
perfbitonic: CXXFLAGS := $(REL_CXXFLAGS) -g -DCODE_VERSION=BITONIC
|
||||
perfbitonic: TARGET := distbitonic
|
||||
perfbitonic: TARGET := perfbitonic
|
||||
perfbitonic: $(BUILD_DIR)/$(TARGET)
|
||||
@mkdir -p out
|
||||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)
|
||||
|
@ -1,28 +1,28 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
#SBATCH --partition=batch
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks-per-node=4
|
||||
#SBATCH --nodes=4
|
||||
#SBATCH --cpus-per-task=4
|
||||
#SBATCH --time=1:00
|
||||
|
||||
# Use this as following
|
||||
# $> sbatch <this file>
|
||||
#
|
||||
# NOTE:
|
||||
# First compile with
|
||||
# $> make -j hpc-build
|
||||
#
|
||||
|
||||
module load gcc/9.2.0 openmpi/4.0.3
|
||||
# Note:
|
||||
# The above versions are matching w/ my system's
|
||||
# versions, thus making compiling/debugging easier.
|
||||
|
||||
# Uncomment the following to compile the project
|
||||
# Note:
|
||||
# In order for the MPI to run properly (or to run entirely), we need
|
||||
# to compile it in hpc using the loaded modules above.
|
||||
# Note:
|
||||
# Consider moving this to a separate stage before sbatch the tasks.
|
||||
# make distbitonic
|
||||
# make distbubbletonic
|
||||
|
||||
# Suppress unused UCX_ROOT warning
|
||||
export UCX_WARN_UNUSED_ENV_VARS=n
|
||||
|
||||
# Suppress CUDA-aware support is disabled warning
|
||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||
|
||||
srun ./bin/dist_v05
|
||||
srun ./out/distbitonic -q 24 --perf --validation
|
@ -44,15 +44,15 @@ using distValue_t = uint32_t;
|
||||
/*!
|
||||
* Session option for each invocation of the executable
|
||||
*/
|
||||
struct session_t {
|
||||
struct config_t {
|
||||
size_t arraySize{DEFAULT_DATA_SIZE}; //!<
|
||||
bool validation{false}; //!< Request a full validation at the end, performed by process rank 0
|
||||
bool ndebug{false}; //!< Skips debug trap on DEBUG builds
|
||||
bool timing{false}; //!< Enable timing measurements and prints
|
||||
bool perf{false}; //!< Enable performance timing measurements and prints
|
||||
bool verbose{false}; //!< Flag to enable verbose output to stdout
|
||||
};
|
||||
|
||||
extern session_t session;
|
||||
extern config_t config;
|
||||
|
||||
|
||||
#endif /* CONFIG_H_ */
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
//#include <parallel/algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#if !defined DEBUG
|
||||
@ -20,8 +21,8 @@
|
||||
#include <cassert>
|
||||
|
||||
#include "utils.hpp"
|
||||
#include "config.h"
|
||||
|
||||
extern Timing TfullSort, Texchange, Tminmax, TelbowSort;
|
||||
|
||||
/*!
|
||||
* Enumerator for the different versions of the sorting method
|
||||
@ -159,11 +160,13 @@ bool isActive(mpi_id_t node, size_t nodes);
|
||||
*/
|
||||
template<typename RangeT>
|
||||
void fullSort(RangeT& data, bool ascending) noexcept {
|
||||
// Use introsort from stdlib++ here, unless ...
|
||||
if (ascending)
|
||||
// Use introsort from stdlib++ here, unless ... __gnu_parallel
|
||||
if (ascending) {
|
||||
std::sort(data.begin(), data.end(), std::less<>());
|
||||
else
|
||||
}
|
||||
else {
|
||||
std::sort(data.begin(), data.end(), std::greater<>());
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@ -270,7 +273,7 @@ void minmax(RangeT& local, const RangeT& remote, bool keepSmall) noexcept {
|
||||
template<typename ShadowedDataT>
|
||||
void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
|
||||
// Initially sort to create a half part of a bitonic sequence
|
||||
fullSort(data, ascending<SortMode::Bubbletonic>(rank, 0));
|
||||
timeCall(TfullSort, fullSort, data, ascending<SortMode::Bubbletonic>(rank, 0));
|
||||
|
||||
// Sort network (O(N) iterations)
|
||||
for (size_t step = 0; step < static_cast<size_t>(Processes); ++step) {
|
||||
@ -280,9 +283,9 @@ void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
|
||||
if ( isActive(rank, Processes) &&
|
||||
isActive(part, Processes) ) {
|
||||
// Exchange with partner, keep nim-or-max and sort - O(N)
|
||||
mpi.exchange(data.getActive(), data.getShadow(), part, step);
|
||||
minmax(data.getActive(), data.getShadow(), ks);
|
||||
elbowSort(data, ascending<SortMode::Bubbletonic>(rank, Processes));
|
||||
timeCall(Texchange, mpi.exchange, data.getActive(), data.getShadow(), part, step);
|
||||
timeCall(Tminmax, minmax, data.getActive(), data.getShadow(), ks);
|
||||
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bubbletonic>(rank, Processes));
|
||||
}
|
||||
}
|
||||
|
||||
@ -308,7 +311,7 @@ void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
|
||||
template<typename ShadowedDataT>
|
||||
void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
|
||||
// Initially sort to create a half part of a bitonic sequence
|
||||
fullSort(data, ascending<SortMode::Bitonic>(rank, 0));
|
||||
timeCall(TfullSort, fullSort, data, ascending<SortMode::Bitonic>(rank, 0));
|
||||
|
||||
// Run through sort network using elbow-sort ( O(LogN * LogN) iterations )
|
||||
auto p = static_cast<uint32_t>(std::log2(Processes));
|
||||
@ -319,11 +322,11 @@ void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
|
||||
auto part = partner<SortMode::Bitonic>(rank, step);
|
||||
auto ks = keepSmall<SortMode::Bitonic>(rank, part, depth);
|
||||
// Exchange with partner, keep nim-or-max
|
||||
mpi.exchange(data.getActive(), data.getShadow(), part, (depth << 8) | step);
|
||||
minmax(data.getActive(), data.getShadow(), ks);
|
||||
timeCall(Texchange, mpi.exchange, data.getActive(), data.getShadow(), part, (depth << 8) | step);
|
||||
timeCall(Tminmax, minmax, data.getActive(), data.getShadow(), ks);
|
||||
}
|
||||
// sort - O(N)
|
||||
elbowSort (data, ascending<SortMode::Bitonic>(rank, depth));
|
||||
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bitonic>(rank, depth));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <chrono>
|
||||
#include <unistd.h>
|
||||
#include <mpi.h>
|
||||
//#include <functional>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
@ -286,7 +287,7 @@ struct Log {
|
||||
//! We provide logging via << operator
|
||||
template<typename T>
|
||||
Log &operator<<(T &&t) {
|
||||
if (session.verbose) {
|
||||
if (config.verbose) {
|
||||
if (line_) {
|
||||
std::cout << "[Log]: " << t;
|
||||
line_ = false;
|
||||
@ -299,7 +300,7 @@ struct Log {
|
||||
// overload for special end line handling
|
||||
Log &operator<<(Endl e) {
|
||||
(void) e;
|
||||
if (session.verbose) {
|
||||
if (config.verbose) {
|
||||
std::cout << '\n';
|
||||
line_ = true;
|
||||
}
|
||||
@ -317,39 +318,71 @@ extern Log logger;
|
||||
*/
|
||||
struct Timing {
|
||||
using Tpoint = std::chrono::steady_clock::time_point;
|
||||
using Tduration = std::chrono::microseconds;
|
||||
using microseconds = std::chrono::microseconds;
|
||||
using milliseconds = std::chrono::milliseconds;
|
||||
using seconds = std::chrono::seconds;
|
||||
|
||||
//! tool to mark the starting point
|
||||
Tpoint start() noexcept { return start_ = std::chrono::steady_clock::now(); }
|
||||
Tpoint start() noexcept { return mark_ = std::chrono::steady_clock::now(); }
|
||||
|
||||
//! tool to mark the ending point
|
||||
Tpoint stop() noexcept { return stop_ = std::chrono::steady_clock::now(); }
|
||||
Tpoint stop() noexcept {
|
||||
Tpoint now = std::chrono::steady_clock::now();
|
||||
duration_ += dt(now, mark_);
|
||||
return now;
|
||||
}
|
||||
|
||||
auto dt() noexcept {
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(stop_ - start_).count();
|
||||
Tduration dt(Tpoint t2, Tpoint t1) noexcept {
|
||||
return std::chrono::duration_cast<Tduration>(t2 - t1);
|
||||
}
|
||||
|
||||
//! tool to print the time interval
|
||||
void print_dt(const char *what) noexcept {
|
||||
if (session.timing) {
|
||||
auto t = stop_ - start_;
|
||||
if (std::chrono::duration_cast<microseconds>(t).count() < 10000)
|
||||
std::cout << "[Timing]: " << what << ": "
|
||||
<< std::to_string(std::chrono::duration_cast<microseconds>(t).count()) << " [usec]\n";
|
||||
else if (std::chrono::duration_cast<milliseconds>(t).count() < 10000)
|
||||
std::cout << "[Timing]: " << what << ": "
|
||||
<< std::to_string(std::chrono::duration_cast<milliseconds>(t).count()) << " [msec]\n";
|
||||
else
|
||||
std::cout << "[Timing]: " << what << ": "
|
||||
<< std::to_string(std::chrono::duration_cast<seconds>(t).count()) << " [sec]\n";
|
||||
}
|
||||
void print_duration(const char *what, mpi_id_t rank) noexcept {
|
||||
if (std::chrono::duration_cast<microseconds>(duration_).count() < 10000)
|
||||
std::cout << "[Timing] (Rank " << rank << ") " << what << ": "
|
||||
<< std::to_string(std::chrono::duration_cast<microseconds>(duration_).count()) << " [usec]\n";
|
||||
else if (std::chrono::duration_cast<milliseconds>(duration_).count() < 10000)
|
||||
std::cout << "[Timing] (Rank " << rank << ") " << what << ": "
|
||||
<< std::to_string(std::chrono::duration_cast<milliseconds>(duration_).count()) << " [msec]\n";
|
||||
else
|
||||
std::cout << "[Timing] (Rank " << rank << ") " << what << ": "
|
||||
<< std::to_string(std::chrono::duration_cast<seconds>(duration_).count()) << " [sec]\n";
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
Tpoint start_;
|
||||
Tpoint stop_;
|
||||
Tpoint mark_{};
|
||||
Tduration duration_{};
|
||||
};
|
||||
|
||||
/*!
|
||||
* Utility high level function to forward a function call to std::invoke and measure
|
||||
* the excecution time
|
||||
*
|
||||
* @tparam Func The function type
|
||||
* @tparam Args The argument
|
||||
* @param func
|
||||
* @param args
|
||||
* @return
|
||||
*/
|
||||
|
||||
|
||||
#define timeCall(Tim, Func, ...) \
|
||||
Tim.start(); \
|
||||
Func(__VA_ARGS__); \
|
||||
Tim.stop(); \
|
||||
|
||||
|
||||
//template <typename Ret, typename Func, typename... Args>
|
||||
//auto timeCall_r(Ret& ret, Func&& func, Args&&... args) {
|
||||
// Timing timer;
|
||||
//
|
||||
// timer.start();
|
||||
// ret = std::invoke(std::forward<Func>(func), std::forward<Args>(args)...);
|
||||
// timer.stop();
|
||||
//
|
||||
// return timer.dt();
|
||||
//}
|
||||
|
||||
#endif /* UTILS_HPP_ */
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "utils.hpp"
|
||||
#include "distsort.hpp"
|
||||
|
||||
Timing TfullSort, Texchange, Tminmax, TelbowSort;
|
||||
|
||||
bool isActive(mpi_id_t node, size_t nodes) {
|
||||
if (!((nodes > 0) &&
|
||||
|
@ -17,12 +17,12 @@
|
||||
#include "distsort.hpp"
|
||||
|
||||
|
||||
// Global session data
|
||||
session_t session;
|
||||
// Global config data
|
||||
config_t config;
|
||||
MPI_t<> mpi;
|
||||
distBuffer_t Data;
|
||||
Log logger;
|
||||
Timing timer;
|
||||
Timing Ttotal;
|
||||
|
||||
/*!
|
||||
* A small command line argument parser
|
||||
@ -37,23 +37,23 @@ bool get_options(int argc, char* argv[]){
|
||||
|
||||
if (arg == "-q" || arg == "--array-size") {
|
||||
if (i+1 < argc) {
|
||||
session.arraySize = 1 << atoi(argv[++i]);
|
||||
config.arraySize = 1 << atoi(argv[++i]);
|
||||
}
|
||||
else {
|
||||
status = false;
|
||||
}
|
||||
}
|
||||
else if (arg == "--validation") {
|
||||
session.validation = true;
|
||||
config.validation = true;
|
||||
}
|
||||
else if (arg == "--ndebug") {
|
||||
session.ndebug = true;
|
||||
config.ndebug = true;
|
||||
}
|
||||
else if (arg == "-t" || arg == "--timing") {
|
||||
session.timing = true;
|
||||
else if (arg == "--perf") {
|
||||
config.perf = true;
|
||||
}
|
||||
else if (arg == "-v" || arg == "--verbose") {
|
||||
session.verbose = true;
|
||||
config.verbose = true;
|
||||
}
|
||||
else if (arg == "-h" || arg == "--help") {
|
||||
std::cout << "distbitonic/distbubbletonic - A distributed bitonic sort\n\n";
|
||||
@ -65,6 +65,8 @@ bool get_options(int argc, char* argv[]){
|
||||
std::cout << "Options:\n\n";
|
||||
std::cout << " -q | --array-size <N>\n";
|
||||
std::cout << " Selects the array size according to size = 2^N\n\n";
|
||||
std::cout << " --par-sort\n";
|
||||
std::cout << " Request a parallel full sorting algorithm\n\n";
|
||||
std::cout << " --validation\n";
|
||||
std::cout << " Request a full validation at the end, performed by process rank 0\n\n";
|
||||
std::cout << " --ndebug\n";
|
||||
@ -164,11 +166,11 @@ int main(int argc, char* argv[]) try {
|
||||
#else
|
||||
volatile bool sleep_wait = true;
|
||||
#endif
|
||||
while (sleep_wait && !session.ndebug)
|
||||
while (sleep_wait && !config.ndebug)
|
||||
sleep(1);
|
||||
#endif
|
||||
|
||||
logger << "Initialize local array of " << session.arraySize << " elements" << logger.endl;
|
||||
logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl;
|
||||
std::random_device rd; // Mersenne seeded from hw if possible. range: [type_min, type_max]
|
||||
std::mt19937 gen(rd());
|
||||
std::uniform_int_distribution<distValue_t > dis(
|
||||
@ -176,24 +178,31 @@ int main(int argc, char* argv[]) try {
|
||||
std::numeric_limits<distValue_t>::max()
|
||||
);
|
||||
// Fill vector
|
||||
Data.resize(session.arraySize);
|
||||
Data.resize(config.arraySize);
|
||||
std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); });
|
||||
|
||||
if (mpi.rank() == 0)
|
||||
logger << "Starting distributed sorting ... ";
|
||||
timer.start();
|
||||
Ttotal.start();
|
||||
#if CODE_VERSION == BUBBLETONIC
|
||||
distBubbletonic(Data, mpi.size(), mpi.rank());
|
||||
#else
|
||||
distBitonic (Data, mpi.size(), mpi.rank());
|
||||
#endif
|
||||
timer.stop();
|
||||
Ttotal.stop();
|
||||
if (mpi.rank() == 0)
|
||||
logger << " Done." << logger.endl;
|
||||
std::string timeMsg = "rank " + std::to_string(mpi.rank());
|
||||
timer.print_dt(timeMsg.c_str());
|
||||
|
||||
if (session.validation) {
|
||||
|
||||
if (config.perf) {
|
||||
Ttotal.print_duration("Total ", mpi.rank());
|
||||
TfullSort.print_duration("Full-Sort ", mpi.rank());
|
||||
Texchange.print_duration("Exchange ", mpi.rank());
|
||||
Tminmax.print_duration("Min-Max ", mpi.rank());
|
||||
TelbowSort.print_duration("Elbow-Sort", mpi.rank());
|
||||
}
|
||||
if (config.validation) {
|
||||
// If requested, we have the chance to fail!
|
||||
if (mpi.rank() == 0)
|
||||
std::cout << "Results validation ...";
|
||||
|
Loading…
x
Reference in New Issue
Block a user