@@ -0,0 +1,23 @@ | |||
# project | |||
bin/ | |||
out/ | |||
mat/ | |||
mtx/ | |||
.unused/ | |||
various/ | |||
# hpc | |||
# IDEs | |||
.idea/ | |||
.clangd | |||
# eclipse | |||
.project | |||
.cproject | |||
.settings/ | |||
.vs/ | |||
.vscode/ | |||
@@ -0,0 +1,205 @@ | |||
# | |||
# PDS HW3 Makefile | |||
# | |||
# Copyright (C) 2025 Christos Choutouridis <christos@choutouridis.net> | |||
# | |||
# This program is free software: you can redistribute it and/or modify | |||
# it under the terms of the GNU Lesser General Public License as | |||
# published by the Free Software Foundation, either version 3 | |||
# of the License, or (at your option) any later version. | |||
# | |||
# This program is distributed in the hope that it will be useful, | |||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
# GNU Lesser General Public License for more details. | |||
# | |||
# You should have received a copy of the GNU Lesser General Public License | |||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | |||
# | |||
# ============== Project settings ============== | |||
# Project's name | |||
PROJECT := PDS_homework_3 | |||
# Excecutable's name | |||
TARGET := bitonic | |||
# Source directories list(space seperated). Makefile-relative path, UNDER current directory. | |||
SRC_DIR_LIST := src test test/gtest | |||
# Include directories list(space seperated). Makefile-relative path. | |||
INC_DIR_LIST := src \ | |||
test \ | |||
test/gtest/ \ | |||
# Exclude files list(space seperated). Filenames only. | |||
# EXC_FILE_LIST := bad.cpp old.cpp | |||
# Build directories | |||
BUILD_DIR := bin | |||
OBJ_DIR := $(BUILD_DIR)/obj | |||
DEP_DIR := $(BUILD_DIR)/.dep | |||
OUTPUT_DIR := out | |||
# ========== Compiler settings ========== | |||
# Compiler flags for debug and release | |||
DEB_CFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c11 -fopenmp | |||
REL_CFLAGS := -Wall -Wextra -O3 -std=c11 -fopenmp | |||
DEB_CXXFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c++17 -fopenmp | |||
REL_CXXFLAGS := -Wall -Wextra -O3 -std=c++17 -fopenmp | |||
# Pre-defines | |||
# PRE_DEFS := MYCAB=1729 SUPER_MODE | |||
PRE_DEFS := | |||
# ============== Linker settings ============== | |||
# Linker flags (example: -pthread -lm) | |||
LDFLAGS := -pthread | |||
# Map output file | |||
MAP_FILE := output.map | |||
MAP_FLAG := -Xlinker -Map=$(BUILD_DIR)/$(MAP_FILE) | |||
# ============== Docker settings ============== | |||
# We need: | |||
# - Bind the entire project directory(the dir that icludes all the code) as volume. | |||
# - In docker instance, change to working directory(where the makefile is). | |||
DOCKER_VOL_DIR := $(shell pwd) | |||
DOCKER_WRK_DIR := | |||
DOCKER_RUN := docker run --rm | |||
DOCKER_FLAGS := -v $(DOCKER_VOL_DIR):/usr/src/$(PROJECT) -w /usr/src/$(PROJECT)/$(DOCKER_WRK_DIR) | |||
# docker invoke mechanism (edit with care) | |||
# note: | |||
# Here, `DOCKER` variable is empty. Rules can assign `DOCKER := DOCKER_CMD` when docker | |||
# functionality is needed. | |||
DOCKER_CMD = $(DOCKER_RUN) $(DOCKER_FLAGS) $(IMAGE) | |||
DOCKER := | |||
# ============== Tool selection ============== | |||
# compiler and compiler flags. | |||
CSIZE := size | |||
CFLAGS := $(DEB_CFLAGS) | |||
CXXFLAGS := $(DEB_CXXFLAGS) | |||
CXX := g++ #mpic++ | |||
CC := gcc #mpicc | |||
# | |||
# =========== Main body and Patterns =========== | |||
# | |||
#ifeq ($(OS), Windows_NT) | |||
# TARGET := $(TARGET).exe | |||
#endif | |||
INC := $(foreach dir,$(INC_DIR_LIST),-I$(dir)) | |||
DEF := $(foreach def,$(PRE_DEFS),-D$(def)) | |||
EXC := $(foreach fil,$(EXC_FILE_LIST), \ | |||
$(foreach dir,$(SRC_DIR_LIST),$(wildcard $(dir)/$(fil))) \ | |||
) | |||
# source files. object and dependencies list | |||
# recursive search into current and source directories | |||
SRC := $(wildcard *.cpp) | |||
SRC += $(foreach dir,$(SRC_DIR_LIST),$(wildcard $(dir)/*.cpp)) | |||
SRC += $(foreach dir,$(SRC_DIR_LIST),$(wildcard $(dir)/**/*.cpp)) | |||
SRC := $(filter-out $(EXC),$(SRC)) | |||
#SRC := $(abspath $(SRC)) | |||
OBJ := $(foreach file,$(SRC:%.cpp=%.o),$(OBJ_DIR)/$(file)) | |||
DEP := $(foreach file,$(SRC:%.cpp=%.d),$(DEP_DIR)/$(file)) | |||
# Make Dependencies pattern. | |||
# This little trick enables recompilation only when dependencies change | |||
# and it does so for changes both in source AND header files ;) | |||
# | |||
# It is based on Tom Tromey's method. | |||
# | |||
# Invoke cpp to create makefile rules with dependencies for each source file | |||
$(DEP_DIR)/%.d: %.c | |||
@mkdir -p $(@D) | |||
@$(DOCKER) $(CC) -E $(CFLAGS) $(INC) $(DEF) -MM -MT $(OBJ_DIR)/$(<:.c=.o) -MF $@ $< | |||
# c file objects depent on .c AND dependency files, which have an empty recipe | |||
$(OBJ_DIR)/%.o: %.c $(DEP_DIR)/%.d | |||
@mkdir -p $(@D) | |||
@$(DOCKER) $(CC) -c $(CFLAGS) $(INC) $(DEF) -o $@ $< | |||
$(DEP_DIR)/%.d: %.cpp | |||
@mkdir -p $(@D) | |||
@$(DOCKER) $(CXX) -E $(CXXFLAGS) $(INC) $(DEF) -MM -MT $(OBJ_DIR)/$(<:.cpp=.o) -MF $@ $< | |||
# cpp file objects depent on .cpp AND dependency files, which have an empty recipe | |||
$(OBJ_DIR)/%.o: %.cpp $(DEP_DIR)/%.d | |||
@mkdir -p $(@D) | |||
@$(DOCKER) $(CXX) -c $(CXXFLAGS) $(INC) $(DEF) -o $@ $< | |||
# empty recipe for dependency files. This prevents make errors | |||
$(DEP): | |||
# now include all dependencies | |||
# After all they are makefile dependency rules ;) | |||
include $(wildcard $(DEP)) | |||
# main target rule | |||
$(BUILD_DIR)/$(TARGET): $(OBJ) | |||
@mkdir -p $(@D) | |||
@echo Linking to target: $(TARGET) | |||
@echo $(DOCKER) $(CXX) '$$(OBJ)' $(LDFLAGS) $(MAP_FLAG) -o $(@D)/$(TARGET) | |||
@$(DOCKER) $(CXX) $(OBJ) $(LDFLAGS) $(MAP_FLAG) -o $(@D)/$(TARGET) | |||
@echo | |||
@echo Print size information | |||
@$(CSIZE) $(@D)/$(TARGET) | |||
@echo Done | |||
# | |||
# ================ Default local build rules ================= | |||
# example: | |||
# make debug | |||
.DEFAULT_GOAL := all | |||
.PHONY: clean | |||
clean: | |||
@echo Cleaning build directories | |||
@rm -rf $(OBJ_DIR) | |||
@rm -rf $(DEP_DIR) | |||
@rm -rf $(BUILD_DIR) | |||
debug: CFLAGS := $(DEB_CFLAGS) | |||
debug: $(BUILD_DIR)/$(TARGET) | |||
release: CFLAGS := $(REL_CFLAGS) | |||
release: $(BUILD_DIR)/$(TARGET) | |||
# | |||
# ================ Build rules ================= | |||
# | |||
bitonic_v0: CC := nvcc | |||
bitonic_v0: CXX := nvcc | |||
bitonic_v0: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=V0 | |||
bitonic_v0: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=V0 | |||
bitonic_v0: TARGET := bitonic_v0 | |||
bitonic_v0: $(BUILD_DIR)/$(TARGET) | |||
@mkdir -p $(OUTPUT_DIR) | |||
cp $(BUILD_DIR)/$(TARGET) $(OUTPUT_DIR)/$(TARGET) | |||
hpc-build: | |||
make clean | |||
make distbubbletonic | |||
make clean | |||
make distbitonic | |||
make clean | |||
make tests | |||
all: debug bitonic_v0 | |||
# Note: | |||
# Add a gcc based make rule here in order for clangd to successfully scan the project files. | |||
# Otherwise we do not need the gcc build. | |||
@@ -0,0 +1,74 @@ | |||
/*! | |||
* \file | |||
* \brief Build configuration file. | |||
* | |||
* \author | |||
* Christos Choutouridis AEM:8997 | |||
* <cchoutou@ece.auth.gr> | |||
*/ | |||
#ifndef CONFIG_H_ | |||
#define CONFIG_H_ | |||
#include <cstdint> | |||
/* | |||
* Versioning: | |||
* - RC1: | |||
*/ | |||
static constexpr char version[] = "0.0"; | |||
/* | |||
* Defines for different version of the exercise | |||
*/ | |||
#define V0 (0) | |||
#define V1 (1) | |||
#define V2 (2) | |||
// Fail-safe version selection | |||
#if !defined CODE_VERSION | |||
#define CODE_VERSION V0 | |||
#endif | |||
// Default Data size (in case -q <N> is not present) | |||
static constexpr size_t DEFAULT_DATA_SIZE = 1 << 16; | |||
/*! | |||
* Value type selection | |||
* | |||
* We support the following compiler types or the <cstdint> that translate to them: | |||
* char - unsigned char | |||
* short - unsigned short | |||
* int - unsigned int | |||
* long - unsigned long | |||
* long long - unsigned long long | |||
* float | |||
* double | |||
*/ | |||
using distValue_t = uint32_t; | |||
/*! | |||
* Session option for each invocation of the executable. | |||
* | |||
* @note | |||
* The values of the members are set from the command line. | |||
*/ | |||
struct config_t { | |||
size_t arraySize{DEFAULT_DATA_SIZE}; //!< The array size of the local data to sort. | |||
bool exchangeOpt{false}; //!< Flag to request the exchange optimization | |||
size_t pipeline{1UL}; //!< Pipeline stages (1 to disable) | |||
bool validation{false}; //!< Request a full validation at the end, performed by process rank 0. | |||
bool ndebug{false}; //!< Skips debug trap on DEBUG builds. | |||
size_t perf{1}; //!< Enable performance timing measurements and prints and repeat | |||
//!< the sorting <perf> times. | |||
bool verbose{false}; //!< Flag to enable verbose output to stdout. | |||
}; | |||
/* | |||
* Exported data types | |||
*/ | |||
extern config_t config; | |||
#endif /* CONFIG_H_ */ |
@@ -0,0 +1,33 @@ | |||
/*! | |||
* \file | |||
* \brief Distributed sort implementation | |||
* | |||
* \author | |||
* Christos Choutouridis AEM:8997 | |||
* <cchoutou@ece.auth.gr> | |||
*/ | |||
#include "utils.hpp" | |||
#include "distsort.hpp" | |||
bool isActive(mpi_id_t node, size_t nodes) { | |||
if (!((nodes > 0) && | |||
(nodes <= static_cast<size_t>(std::numeric_limits<mpi_id_t>::max())) )) | |||
throw std::runtime_error("(isActive) Non-acceptable value of MPI Nodes\n"); | |||
// ^ Assert that mpi_id_t can hold nodes, and thus we can cast without data loss! | |||
return (node >= 0) && (node < static_cast<mpi_id_t>(nodes)); | |||
} | |||
size_t tagGenerator(size_t depth, size_t step, size_t stage) { | |||
auto stage_bits = static_cast<uint32_t>(std::log2(MAX_PIPELINE_SIZE)); | |||
auto step_bits = static_cast<uint32_t>(std::log2(MAX_MPI_SIZE)); | |||
uint32_t stat_bit = 1UL; | |||
// ^ We use MPI_SIZE room for steps to fit the bubbletonic version | |||
// [ depth | step | stage+stats ] | |||
size_t tag = stage | |||
| (step << (stage_bits + stat_bit)) | |||
| (depth << (stage_bits + step_bits + stat_bit)); | |||
return tag; | |||
} |
@@ -0,0 +1,454 @@ | |||
/*! | |||
* \file | |||
* \brief Distributed sort implementation header | |||
* | |||
* \author | |||
* Christos Choutouridis AEM:8997 | |||
* <cchoutou@ece.auth.gr> | |||
*/ | |||
#ifndef DISTBITONIC_H_ | |||
#define DISTBITONIC_H_ | |||
#include <vector> | |||
#include <algorithm> | |||
#include <parallel/algorithm> | |||
#include <cmath> | |||
#include <cstdint> | |||
#if !defined DEBUG | |||
#define NDEBUG | |||
#endif | |||
#include <cassert> | |||
#include "utils.hpp" | |||
/* | |||
* Exported timers | |||
*/ | |||
extern Timing Timer_total; | |||
extern Timing Timer_fullSort; | |||
extern Timing Timer_exchange; | |||
extern Timing Timer_minmax; | |||
extern Timing Timer_elbowSort; | |||
/*! | |||
* Enumerator for the different versions of the sorting method | |||
*/ | |||
enum class SortMode { | |||
Bubbletonic, //!< The v0.5 of the algorithm where we use a bubble-sort like approach | |||
Bitonic //!< The v1.0 of the algorithm where we use the bitonic data-exchange approach | |||
}; | |||
/* | |||
* ============================== Sort utilities ============================== | |||
*/ | |||
/*! | |||
* The primary function template of ascending(). It is DISABLED since , it is explicitly specialized | |||
* for each of the \c SortMode | |||
*/ | |||
template <SortMode Mode> inline bool ascending(mpi_id_t, [[maybe_unused]] size_t) noexcept = delete; | |||
/*! | |||
* Returns the ascending or descending configuration of the node's sequence based on | |||
* the current node (MPI process) and the depth of the sorting network | |||
* | |||
* @param node [mpi_id_t] The current node (MPI process) | |||
* @return [bool] True if we need ascending configuration, false otherwise | |||
*/ | |||
template <> inline | |||
bool ascending<SortMode::Bubbletonic>(mpi_id_t node, [[maybe_unused]] size_t depth) noexcept { | |||
return (node % 2) == 0; | |||
} | |||
/*! | |||
* Returns the ascending or descending configuration of the node's sequence based on | |||
* the current node (MPI process) and the depth of the sorting network | |||
* | |||
* @param node [mpi_id_t] The current node (MPI process) | |||
* @param depth [size_t] The total depth of the sorting network (same for each step for a given network) | |||
* @return [bool] True if we need ascending configuration, false otherwise | |||
*/ | |||
template <> inline | |||
bool ascending<SortMode::Bitonic>(mpi_id_t node, size_t depth) noexcept { | |||
return !(node & (1 << depth)); | |||
} | |||
/*! | |||
* The primary function template of partner(). It is DISABLED since , it is explicitly specialized | |||
* for each of the \c SortMode | |||
*/ | |||
template <SortMode Mode> inline mpi_id_t partner(mpi_id_t, size_t) noexcept = delete; | |||
/*! | |||
* Returns the node's partner for data exchange during the sorting network iterations | |||
* of Bubbletonic | |||
* | |||
* @param node [mpi_id_t] The current node | |||
* @param step [size_t] The step of the sorting network | |||
* @return [mpi_id_t] The node id of the partner for data exchange | |||
*/ | |||
template <> inline | |||
mpi_id_t partner<SortMode::Bubbletonic>(mpi_id_t node, size_t step) noexcept { | |||
//return (node % 2 == step % 2) ? node + 1 : node - 1; | |||
return (((node+step) % 2) == 0) ? node + 1 : node - 1; | |||
} | |||
/*! | |||
* Returns the node's partner for data exchange during the sorting network iterations | |||
* of Bitonic | |||
* | |||
* @param node [mpi_id_t] The current node | |||
* @param step [size_t] The step of the sorting network | |||
* @return [mpi_id_t] The node id of the partner for data exchange | |||
*/ | |||
template <> inline | |||
mpi_id_t partner<SortMode::Bitonic>(mpi_id_t node, size_t step) noexcept { | |||
return (node ^ (1 << step)); | |||
} | |||
/*! | |||
* The primary function template of keepSmall(). It is DISABLED since , it is explicitly specialized | |||
* for each of the \c SortMode | |||
*/ | |||
template<SortMode Mode> inline bool keepSmall(mpi_id_t, mpi_id_t, [[maybe_unused]] size_t) = delete; | |||
/*! | |||
* Predicate to check if a node keeps the small numbers during the bubbletonic sort network exchange. | |||
* | |||
* @param node [mpi_id_t] The node for which we check | |||
* @param partner [mpi_id_t] The partner of the data exchange | |||
* @return [bool] True if the node should keep the small values, false otherwise | |||
*/ | |||
template <> inline | |||
bool keepSmall<SortMode::Bubbletonic>(mpi_id_t node, mpi_id_t partner, [[maybe_unused]] size_t depth) { | |||
if (node == partner) | |||
throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n"); | |||
return (node < partner); | |||
} | |||
/*! | |||
* Predicate to check if a node keeps the small numbers during the bitonic sort network exchange. | |||
* | |||
* @param node [mpi_id_t] The node for which we check | |||
* @param partner [mpi_id_t] The partner of the data exchange | |||
* @param depth [size_t] The total depth of the sorting network (same for each step for a given network) | |||
* @return [bool] True if the node should keep the small values, false otherwise | |||
*/ | |||
template <> inline | |||
bool keepSmall<SortMode::Bitonic>(mpi_id_t node, mpi_id_t partner, size_t depth) { | |||
if (node == partner) | |||
throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n"); | |||
return ascending<SortMode::Bitonic>(node, depth) == (node < partner); | |||
} | |||
/*! | |||
* Predicate to check if the node is active in the current iteration of the bubbletonic | |||
* sort exchange. | |||
* | |||
* @param node [mpi_id_t] The node to check | |||
* @param nodes [size_t] The total number of nodes | |||
* @return [bool] True if the node is active, false otherwise | |||
*/ | |||
bool isActive(mpi_id_t node, size_t nodes); | |||
/* | |||
* ============================== Data utilities ============================== | |||
*/ | |||
/*! | |||
* Sort a range using the build-in O(Nlog(N)) algorithm | |||
* | |||
* @tparam RangeT A range type with random access iterator | |||
* | |||
* @param data [RangeT] The data to be sorted | |||
* @param ascending [bool] Flag to indicate the sorting order | |||
*/ | |||
template<typename RangeT> | |||
void fullSort(RangeT& data, bool ascending) noexcept { | |||
// Use introsort from stdlib++ here, unless ... __gnu_parallel | |||
if (ascending) { | |||
__gnu_parallel::sort(data.begin(), data.end(), std::less<>()); | |||
} | |||
else { | |||
__gnu_parallel::sort(data.begin(), data.end(), std::greater<>()); | |||
} | |||
if (config.exchangeOpt) | |||
updateMinMax(localStat, data); | |||
} | |||
/*! | |||
* Core functionality of sort for shadowed buffer types using | |||
* the "elbow sort" algorithm. | |||
* | |||
* @note: | |||
* This algorithm can not work "in place". | |||
* We use the active buffer as source and the shadow as target. | |||
* At the end we switch which buffer is active and which is the shadow. | |||
* @note | |||
* This is the core functionality. Use the elbowSort() function instead | |||
* | |||
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator. | |||
* @tparam CompT A Comparison type for binary operation comparisons | |||
* | |||
* @param data [ShadowedDataT] The data to sort | |||
* @param ascending [bool] Flag to indicate the sorting order | |||
* @param comp [CompT] The binary operator object | |||
*/ | |||
template<typename ShadowedDataT, typename CompT> | |||
void elbowSortCore(ShadowedDataT& data, bool ascending, CompT comp) noexcept { | |||
auto& active = data.getActive(); // Get the source vector (the data to sort) | |||
auto& shadow = data.getShadow(); // Get the target vector (the sorted data) | |||
size_t N = data.size(); // The total size is the same or both vectors | |||
size_t left = std::distance( | |||
active.begin(), | |||
(ascending) ? | |||
std::min_element(active.begin(), active.end()) : | |||
std::max_element(active.begin(), active.end()) | |||
); // start 'left' from elbow of the bitonic | |||
size_t right = (left == N-1) ? 0 : left + 1; | |||
// Walk in opposite directions from elbow and insert-sort to target vector | |||
for (size_t i = 0 ; i<N ; ++i) { | |||
if (comp(active[left], active[right])) { | |||
shadow[i] = active[left]; | |||
left = (left == 0) ? N-1 : left -1; // cycle decrease | |||
} | |||
else { | |||
shadow[i] = active[right]; | |||
right = (right + 1) % N; // cycle increase | |||
} | |||
} | |||
data.switch_active(); // Switch active-shadow buffers | |||
} | |||
/*! | |||
* Sort a shadowed buffer using the "elbow sort" algorithm. | |||
* | |||
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator. | |||
* | |||
* @param data [ShadowedDataT] The data to sort | |||
* @param ascending [bool] Flag to indicate the sorting order | |||
*/ | |||
template<typename ShadowedDataT> | |||
void elbowSort(ShadowedDataT& data, bool ascending) noexcept { | |||
if (ascending) | |||
elbowSortCore(data, ascending, std::less<>()); | |||
else | |||
elbowSortCore(data, ascending, std::greater<>()); | |||
} | |||
/*! | |||
* Predicate for exchange optimization. Returns true only if an exchange between partners is needed. | |||
* In order to do that we exchange min and max statistics of the partner's data. | |||
* | |||
* @tparam StatT Statistics data type (for min-max) | |||
* | |||
* @param lstat [const StatT] Reference to the local statistic data | |||
* @param rstat [StatT] Reference to the remote statistic data to fill | |||
* @param part [mpi_id_t] The partner for the exchange | |||
* @param tag [int] The tag to use for the exchange of stats | |||
* @param keepSmall [bool] Flag to indicate if the local thread keeps the small ro the large values | |||
* @return True if we need data exchange, false otherwise | |||
*/ | |||
template<typename StatT> | |||
bool needsExchange(const StatT& lstat, StatT& rstat, mpi_id_t part, int tag, bool keepSmall) { | |||
timeCall(Timer_exchange, mpi.exchange_it, lstat, rstat, part, tag); | |||
return (keepSmall) ? | |||
rstat.min < lstat.max // Lmin: rstat.min - Smax: lstat.max | |||
: lstat.min < rstat.max; // Lmin: lstat.min - Smax: rstat.max | |||
} | |||
/*! | |||
* Update stats utility | |||
* | |||
* @tparam RangeT A range type with random access iterator | |||
* @tparam StatT Statistics data type (for min-max) | |||
* | |||
* @param stat [StatT] Reference to the statistic data to update | |||
* @param data [const RangeT] Reference to the sequence to extract stats from | |||
*/ | |||
template<typename RangeT, typename StatT> | |||
void updateMinMax(StatT& stat, const RangeT& data) noexcept { | |||
auto [min, max] = std::minmax_element(data.begin(), data.end()); | |||
stat.min = *min; | |||
stat.max = *max; | |||
} | |||
/*! | |||
* Takes two sequences and selects either the larger or the smaller items | |||
* in one-to-one comparison between them. If the initial sequences are bitonic, then | |||
* the result is a bitonic sequence too! | |||
* | |||
* @tparam ValueT The underlying type of the sequences | |||
* | |||
* @param local [ValueT*] Pointer to the local sequence | |||
* @param remote [const ValueT*] Pointer to the remote sequence (copied locally by MPI) | |||
* @param count [size_t] The number of items to process | |||
* @param keepSmall [bool] Flag to indicate if we keep the small items in local sequence | |||
*/ | |||
template<typename ValueT> | |||
void keepMinOrMax(ValueT* local, const ValueT* remote, size_t count, bool keepSmall) noexcept { | |||
std::transform( | |||
local, local + count, | |||
remote, | |||
local, | |||
[&keepSmall](const ValueT& a, const ValueT& b){ | |||
return (keepSmall) ? std::min(a, b) : std::max(a, b); | |||
}); | |||
} | |||
/* | |||
* ============================== Sort algorithms ============================== | |||
*/ | |||
/*! | |||
* A small tag generator tool to provide consistent encoding to tag communication | |||
* | |||
* @param depth The current algorithmic depth[bitonic] of the communication, if any | |||
* @param step The current step on the current depth | |||
* @param stage The stage of the pipeline. | |||
* @return The tag to use. | |||
* | |||
* @note | |||
* In case we call this function outside of the pipeline loop, we can ommit | |||
* @c stage argument and use the return value as starting tag for every communication | |||
* of the pipeline loop. We need to increase the tags for each communication of | |||
* the pipeline loop though! | |||
*/ | |||
size_t tagGenerator(size_t depth, size_t step, size_t stage = 0); | |||
/*! | |||
* An exchange functionality to support both Bubbletonic and Bitonic sort algorithms. | |||
* | |||
* @note | |||
* In case of pipeline request it switches to non-blocking MPI communication for | |||
* pipelining min-max process with mpi data exchange | |||
* | |||
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator. | |||
* | |||
* @param data [ShadowedDataT&] Reference to the data to exchange | |||
* @param partner [mpi_id_t] The partner for the exchange | |||
* @param keepSmall [bool] Flag to indicate if we keep the small values | |||
* @param tag [int] The init tag to use for the loop. | |||
* | |||
* @note | |||
* The @c tag is increased inside the pipeline loop for each different data exchange | |||
*/ | |||
template<typename ShadowedDataT> | |||
void exchange(ShadowedDataT& data, mpi_id_t partner, bool keepSmall, int tag) { | |||
using Value_t = typename ShadowedDataT::value_type; | |||
// Init counters and pointers | |||
Value_t* active = data.getActive().data(); | |||
Value_t* shadow = data.getShadow().data(); | |||
size_t count = data.size() / config.pipeline; | |||
if (config.pipeline > 1) { | |||
// Pipeline case - use async MPI | |||
Timer_exchange.start(); | |||
mpi.exchange_start(active, shadow, count, partner, tag); | |||
for (size_t stage = 0; stage < config.pipeline; active += count, shadow += count) { | |||
// Wait previous chunk | |||
mpi.exchange_wait(); | |||
Timer_exchange.stop(); | |||
if (++stage < config.pipeline) { | |||
// Start next chunk if there is a next one | |||
Timer_exchange.start(); | |||
mpi.exchange_start(active + count, shadow + count, count, partner, ++tag); | |||
} | |||
// process the arrived data | |||
timeCall(Timer_minmax, keepMinOrMax, active, shadow, count, keepSmall); | |||
} | |||
} | |||
else { | |||
// No pipeline - use blocking MPI | |||
timeCall(Timer_exchange, mpi.exchange, active, shadow, count, partner, tag); | |||
timeCall(Timer_minmax, keepMinOrMax, active, shadow, count, keepSmall); | |||
} | |||
if (config.exchangeOpt) | |||
updateMinMax(localStat, data); | |||
} | |||
/*! | |||
* A distributed version of the Bubbletonic sort algorithm. | |||
* | |||
* @note | |||
* Each MPI process should run an instance of this function. | |||
* | |||
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator. | |||
* | |||
* @param data [ShadowedDataT] The local to MPI process data to sort | |||
* @param Processes [mpi_id_t] The total number of MPI processes | |||
* @param rank [mpi_id_t] The current process id | |||
*/ | |||
template<typename ShadowedDataT> | |||
void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { | |||
// Initially sort to create a half part of a bitonic sequence | |||
timeCall(Timer_fullSort, fullSort, data, ascending<SortMode::Bubbletonic>(rank, 0)); | |||
// Sort network (O(N) iterations) | |||
for (size_t step = 0; step < static_cast<size_t>(Processes); ++step) { | |||
// Find out exchange configuration | |||
auto part = partner<SortMode::Bubbletonic>(rank, step); | |||
auto ks = keepSmall<SortMode::Bubbletonic>(rank, part, Processes); | |||
if ( isActive(rank, Processes) && | |||
isActive(part, Processes) ) { | |||
// Exchange with partner, keep nim-or-max and sort - O(N) | |||
int tag = static_cast<int>(tagGenerator(0, step)); | |||
if (!config.exchangeOpt || needsExchange(localStat, remoteStat, part, tag++, ks)) { | |||
exchange(data, part, ks, tag); | |||
timeCall(Timer_elbowSort, elbowSort, data, ascending<SortMode::Bubbletonic>(rank, Processes)); | |||
} | |||
} | |||
} | |||
// Invert if the node was descending. | |||
if (!ascending<SortMode::Bubbletonic>(rank, 0)) { | |||
elbowSort(data, true); | |||
} | |||
} | |||
/*! | |||
* A distributed version of the Bitonic sort algorithm. | |||
* | |||
* @note | |||
* Each MPI process should run an instance of this function. | |||
* | |||
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator. | |||
* | |||
* @param data [ShadowedDataT] The local to MPI process data to sort | |||
* @param Processes [mpi_id_t] The total number of MPI processes | |||
* @param rank [mpi_id_t] The current process id | |||
*/ | |||
template<typename ShadowedDataT> | |||
void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { | |||
// Initially sort to create a half part of a bitonic sequence | |||
timeCall(Timer_fullSort, fullSort, data, ascending<SortMode::Bitonic>(rank, 0)); | |||
// Run through sort network using elbow-sort ( O(LogN * LogN) iterations ) | |||
auto p = static_cast<uint32_t>(std::log2(Processes)); | |||
for (size_t depth = 1; depth <= p; ++depth) { | |||
for (size_t step = depth; step > 0;) { | |||
--step; | |||
// Find out exchange configuration | |||
auto part = partner<SortMode::Bitonic>(rank, step); | |||
auto ks = keepSmall<SortMode::Bitonic>(rank, part, depth); | |||
// Exchange with partner, keep nim-or-max | |||
int tag = static_cast<int>(tagGenerator(depth, step)); | |||
if (!config.exchangeOpt || needsExchange(localStat, remoteStat, part, tag++, ks)) { | |||
exchange(data, part, ks, tag); | |||
} | |||
} | |||
// sort - O(N) | |||
timeCall(Timer_elbowSort, elbowSort, data, ascending<SortMode::Bitonic>(rank, depth)); | |||
} | |||
} | |||
#endif //DISTBITONIC_H_ |
@@ -0,0 +1,318 @@ | |||
/*! | |||
* \file | |||
* \brief Main application file for PDS HW3 (CUDA) | |||
* | |||
* \author | |||
* Christos Choutouridis AEM:8997 | |||
* <cchoutou@ece.auth.gr> | |||
*/ | |||
#include <exception> | |||
#include <iostream> | |||
#include <algorithm> | |||
#include <random> | |||
#include "utils.hpp" | |||
#include "config.h" | |||
#include "distsort.hpp" | |||
// Global session data | |||
config_t config; | |||
MPI_t<> mpi; | |||
distBuffer_t Data; | |||
Log logger; | |||
distStat_t localStat, remoteStat; | |||
// Mersenne seeded from hw if possible. range: [type_min, type_max] | |||
std::random_device rd; | |||
std::mt19937 gen(rd()); | |||
//! Performance timers for each one of the "costly" functions | |||
Timing Timer_total; | |||
Timing Timer_fullSort; | |||
Timing Timer_exchange; | |||
Timing Timer_minmax; | |||
Timing Timer_elbowSort; | |||
//! Init timing objects for extra rounds | |||
void measurements_init() { | |||
if (config.perf > 1) { | |||
Timer_total.init(config.perf); | |||
Timer_fullSort.init(config.perf); | |||
Timer_exchange.init(config.perf); | |||
Timer_minmax.init(config.perf); | |||
Timer_elbowSort.init(config.perf); | |||
} | |||
} | |||
//! iterate ot the next round of measurements for all measurement objects | |||
void measurements_next() { | |||
if (config.perf > 1) { | |||
Timer_total.next(); | |||
Timer_fullSort.next(); | |||
Timer_exchange.next(); | |||
Timer_minmax.next(); | |||
Timer_elbowSort.next(); | |||
} | |||
} | |||
/*! | |||
* A small command line argument parser | |||
* \return The status of the operation | |||
*/ | |||
bool get_options(int argc, char* argv[]){ | |||
bool status =true; | |||
// iterate over the passed arguments | |||
for (int i=1 ; i<argc ; ++i) { | |||
std::string arg(argv[i]); // get current argument | |||
if (arg == "-q" || arg == "--array-size") { | |||
if (i+1 < argc) { | |||
config.arraySize = 1 << atoi(argv[++i]); | |||
} | |||
else { | |||
status = false; | |||
} | |||
} | |||
else if (arg == "-e" || arg == "--exchange-opt") { | |||
config.exchangeOpt = true; | |||
} | |||
else if (arg == "--pipeline") { | |||
if (i+1 < argc) { | |||
auto stages = atoi(argv[++i]); | |||
if (isPowerOfTwo(stages) && stages <= static_cast<int>(MAX_PIPELINE_SIZE)) | |||
config.pipeline = stages; | |||
else | |||
status = false; | |||
} | |||
else { | |||
status = false; | |||
} | |||
} | |||
else if (arg == "--validation") { | |||
config.validation = true; | |||
} | |||
else if (arg == "--perf") { | |||
if (i+1 < argc) { | |||
config.perf = atoi(argv[++i]); | |||
} | |||
else { | |||
status = false; | |||
} | |||
} | |||
else if (arg == "--ndebug") { | |||
config.ndebug = true; | |||
} | |||
else if (arg == "-v" || arg == "--verbose") { | |||
config.verbose = true; | |||
} | |||
else if (arg == "--version") { | |||
std::cout << "distbitonic/distbubbletonic - A distributed sort utility\n"; | |||
std::cout << "version: " << version << "\n\n"; | |||
exit(0); | |||
} | |||
else if (arg == "-h" || arg == "--help") { | |||
std::cout << "distbitonic/distbubbletonic - A distributed sort utility\n\n"; | |||
std::cout << " distbitonic -q <N> [-e] [-p | --pipeline <N>] [--validation] [--perf <N>] [--ndebug] [-v]\n"; | |||
std::cout << " distbitonic -h\n"; | |||
std::cout << " distbubbletonic -q <N> [-e] [-p | --pipeline <N>] [--validation] [--perf <N> ] [--ndebug] [-v]\n"; | |||
std::cout << " distbubbletonic -h\n"; | |||
std::cout << '\n'; | |||
std::cout << "Options:\n\n"; | |||
std::cout << " -q | --array-size <N>\n"; | |||
std::cout << " Selects the array size according to size = 2^N\n\n"; | |||
std::cout << " -e | --exchange-opt\n"; | |||
std::cout << " Request an MPI data exchange optimization \n\n"; | |||
std::cout << " -p <N> | --pipeline <N>\n"; | |||
std::cout << " Request a pipeline of <N> stages for exchange-minmax\n"; | |||
std::cout << " N must be power of 2 up to " << MAX_PIPELINE_SIZE << "\n\n"; | |||
std::cout << " --validation\n"; | |||
std::cout << " Request a full validation at the end, performed by process rank 0\n\n"; | |||
std::cout << " --perf <N> \n"; | |||
std::cout << " Enable performance timing measurements and prints, and repeat\n"; | |||
std::cout << " the sorting <N> times.\n\n"; | |||
std::cout << " --ndebug\n"; | |||
std::cout << " Skip debug breakpoint when on debug build.\n\n"; | |||
std::cout << " -v | --verbose\n"; | |||
std::cout << " Request a more verbose output to stdout.\n\n"; | |||
std::cout << " -h | --help\n"; | |||
std::cout << " Prints this and exit.\n\n"; | |||
std::cout << " --version\n"; | |||
std::cout << " Prints version and exit.\n\n"; | |||
std::cout << "Examples:\n\n"; | |||
std::cout << " mpirun -np 4 distbitonic -q 24\n"; | |||
std::cout << " Runs distbitonic in 4 MPI processes with 2^24 array points each\n\n"; | |||
std::cout << " mpirun -np 16 distbubbletonic -q 20\n"; | |||
std::cout << " Runs distbubbletonic in 16 MPI processes with 2^20 array points each\n\n"; | |||
exit(0); | |||
} | |||
else { // parse error | |||
std::cout << "Invocation error. Try -h for details.\n"; | |||
status = false; | |||
} | |||
} | |||
return status; | |||
} | |||
/*! | |||
* A simple validator for the entire distributed process | |||
* | |||
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator. | |||
* | |||
* @param data [ShadowedDataT] The local to MPI process | |||
* @param Processes [mpi_id_t] The total number of MPI processes | |||
* @param rank [mpi_id_t] The current process id | |||
* | |||
* @return [bool] True if all are sorted and in total ascending order | |||
*/ | |||
template<typename ShadowedDataT> | |||
bool validator(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { | |||
using value_t = typename ShadowedDataT::value_type; | |||
bool ret = true; // Have faith! | |||
// Local results | |||
value_t lmin = data.front(); | |||
value_t lmax = data.back(); | |||
value_t lsort = static_cast<value_t>(std::is_sorted(data.begin(), data.end())); | |||
// Gather min/max/sort to rank 0 | |||
std::vector<value_t> mins(Processes); | |||
std::vector<value_t> maxes(Processes); | |||
std::vector<value_t> sorts(Processes); | |||
MPI_Datatype datatype = MPI_TypeMapper<value_t>::getType(); | |||
MPI_Gather(&lmin, 1, datatype, mins.data(), 1, datatype, 0, MPI_COMM_WORLD); | |||
MPI_Gather(&lmax, 1, datatype, maxes.data(), 1, datatype, 0, MPI_COMM_WORLD); | |||
MPI_Gather(&lsort, 1, datatype, sorts.data(), 1, datatype, 0, MPI_COMM_WORLD); | |||
// Check all results | |||
if (rank == 0) { | |||
for (mpi_id_t r = 1; r < Processes; ++r) { | |||
if (sorts[r] == 0) | |||
ret = false; | |||
if (maxes[r - 1] > mins[r]) | |||
ret = false; | |||
} | |||
} | |||
return ret; | |||
} | |||
/*! | |||
* Initializes the environment, must called from each process | |||
* | |||
* @param argc [int*] POINTER to main's argc argument | |||
* @param argv [char***] POINTER to main's argv argument | |||
*/ | |||
void init(int* argc, char*** argv) { | |||
// try to read command line | |||
if (!get_options(*argc, *argv)) | |||
exit(1); | |||
// Initialize MPI environment | |||
mpi.init(argc, argv); | |||
logger << "MPI environment initialized." << " Rank: " << mpi.rank() << " Size: " << mpi.size() | |||
<< logger.endl; | |||
#if defined DEBUG | |||
#if defined TESTING | |||
/* | |||
* In case of a debug build we will wait here until sleep_wait | |||
* will reset via debugger. In order to do that the user must attach | |||
* debugger to all processes. For example: | |||
* $> mpirun -np 2 ./<program path> | |||
* $> ps aux | grep <program> | |||
* $> gdb <program> <PID1> | |||
* $> gdb <program> <PID2> | |||
*/ | |||
volatile bool sleep_wait = false; | |||
#else | |||
volatile bool sleep_wait = true; | |||
#endif | |||
while (sleep_wait && !config.ndebug) | |||
sleep(1); | |||
#endif | |||
// Prepare vector and timing data | |||
Data.resize(config.arraySize); | |||
measurements_init(); | |||
} | |||
#if !defined TESTING | |||
/*! | |||
* @return Returns 0, but.... we may throw or exit(0) / exit(1) | |||
*/ | |||
int main(int argc, char* argv[]) try { | |||
// Init everything | |||
init(&argc, &argv); | |||
for (size_t it = 0 ; it < config.perf ; ++it) { | |||
// Initialize local data | |||
logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl; | |||
std::uniform_int_distribution<distValue_t > dis( | |||
std::numeric_limits<distValue_t>::min(), | |||
std::numeric_limits<distValue_t>::max() | |||
); | |||
std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); }); | |||
// Run distributed sort | |||
if (mpi.rank() == 0) | |||
logger << "Starting distributed sorting ... "; | |||
Timer_total.start(); | |||
#if CODE_VERSION == BUBBLETONIC | |||
distBubbletonic(Data, mpi.size(), mpi.rank()); | |||
#else | |||
distBitonic(Data, mpi.size(), mpi.rank()); | |||
#endif | |||
Timer_total.stop(); | |||
measurements_next(); | |||
if (mpi.rank() == 0) | |||
logger << " Done." << logger.endl; | |||
} | |||
// Print-outs and validation | |||
if (config.perf > 1) { | |||
Timing::print_duration(Timer_total.median(), "Total ", mpi.rank()); | |||
Timing::print_duration(Timer_fullSort.median(), "Full-Sort ", mpi.rank()); | |||
Timing::print_duration(Timer_exchange.median(), "Exchange ", mpi.rank()); | |||
Timing::print_duration(Timer_minmax.median(), "Min-Max ", mpi.rank()); | |||
Timing::print_duration(Timer_elbowSort.median(),"Elbow-Sort", mpi.rank()); | |||
} | |||
if (config.validation) { | |||
// If requested, we have the chance to fail! | |||
if (mpi.rank() == 0) | |||
std::cout << "[Validation] Results validation ..."; | |||
bool val = validator(Data, mpi.size(), mpi.rank()); | |||
if (mpi.rank() == 0) | |||
std::cout << ((val) ? "\x1B[32m [PASSED] \x1B[0m\n" : " \x1B[32m [FAILED] \x1B[0m\n"); | |||
} | |||
mpi.finalize(); | |||
return 0; | |||
} | |||
catch (std::exception& e) { | |||
//we probably pollute the user's screen. Comment `cerr << ...` if you don't like it. | |||
std::cerr << "Error: " << e.what() << '\n'; | |||
exit(1); | |||
} | |||
#else | |||
#include <gtest/gtest.h> | |||
#include <exception> | |||
/*! | |||
* The testing version of our program | |||
*/ | |||
GTEST_API_ int main(int argc, char **argv) try { | |||
testing::InitGoogleTest(&argc, argv); | |||
return RUN_ALL_TESTS(); | |||
} | |||
catch (std::exception& e) { | |||
std::cout << "Exception: " << e.what() << '\n'; | |||
} | |||
#endif |
@@ -0,0 +1,516 @@ | |||
/** | |||
* \file | |||
* \brief Utilities header | |||
* | |||
* \author | |||
* Christos Choutouridis AEM:8997 | |||
* <cchoutou@ece.auth.gr> | |||
*/ | |||
#ifndef UTILS_HPP_ | |||
#define UTILS_HPP_ | |||
#include <vector> | |||
#include <iostream> | |||
#include <chrono> | |||
#include <unistd.h> | |||
#include <mpi.h> | |||
#include "config.h" | |||
/*! | |||
* Min-Max statistics data for exchange optimization | |||
* @tparam Value_t The underlying data type of the sequence data | |||
*/ | |||
template <typename Value_t> | |||
struct Stat_t { | |||
using value_type = Value_t; //!< meta-export the type | |||
Value_t min{}; //!< The minimum value of the sequence | |||
Value_t max{}; //!< The maximum value of the sequence | |||
}; | |||
//! Application data selection alias | |||
using distStat_t = Stat_t<distValue_t>; | |||
extern distStat_t localStat, remoteStat; // Make stats public | |||
/* | |||
* MPI_<type> dispatcher mechanism | |||
*/ | |||
template <typename T> struct MPI_TypeMapper { }; | |||
template <> struct MPI_TypeMapper<char> { static MPI_Datatype getType() { return MPI_CHAR; } }; | |||
template <> struct MPI_TypeMapper<short> { static MPI_Datatype getType() { return MPI_SHORT; } }; | |||
template <> struct MPI_TypeMapper<int> { static MPI_Datatype getType() { return MPI_INT; } }; | |||
template <> struct MPI_TypeMapper<long> { static MPI_Datatype getType() { return MPI_LONG; } }; | |||
template <> struct MPI_TypeMapper<long long> { static MPI_Datatype getType() { return MPI_LONG_LONG; } }; | |||
template <> struct MPI_TypeMapper<unsigned char> { static MPI_Datatype getType() { return MPI_UNSIGNED_CHAR; } }; | |||
template <> struct MPI_TypeMapper<unsigned short>{ static MPI_Datatype getType() { return MPI_UNSIGNED_SHORT; } }; | |||
template <> struct MPI_TypeMapper<unsigned int> { static MPI_Datatype getType() { return MPI_UNSIGNED; } }; | |||
template <> struct MPI_TypeMapper<unsigned long> { static MPI_Datatype getType() { return MPI_UNSIGNED_LONG; } }; | |||
template <> struct MPI_TypeMapper<unsigned long long> { static MPI_Datatype getType() { return MPI_UNSIGNED_LONG_LONG; } }; | |||
template <> struct MPI_TypeMapper<float> { static MPI_Datatype getType() { return MPI_FLOAT; } }; | |||
template <> struct MPI_TypeMapper<double> { static MPI_Datatype getType() { return MPI_DOUBLE; } }; | |||
/*! | |||
* MPI wrapper type to provide MPI functionality and RAII to MPI as a resource | |||
* | |||
* @tparam TID The MPI type for process id [default: int] | |||
*/ | |||
template<typename TID = int> | |||
struct MPI_t { | |||
using ID_t = TID; // Export TID type (currently int defined by the standard) | |||
/*! | |||
* Initializes the MPI environment, must called from each process | |||
* | |||
* @param argc [int*] POINTER to main's argc argument | |||
* @param argv [char***] POINTER to main's argv argument | |||
*/ | |||
void init(int* argc, char*** argv) { | |||
// Initialize the MPI environment | |||
int err; | |||
if ((err = MPI_Init(argc, argv)) != MPI_SUCCESS) | |||
mpi_throw(err, "(MPI) MPI_Init() - "); | |||
initialized_ = true; | |||
// Get the number of processes | |||
int size_value, rank_value; | |||
if ((err = MPI_Comm_size(MPI_COMM_WORLD, &size_value)) != MPI_SUCCESS) | |||
mpi_throw(err, "(MPI) MPI_Comm_size() - "); | |||
if ((err = MPI_Comm_rank(MPI_COMM_WORLD, &rank_value)) != MPI_SUCCESS) | |||
mpi_throw(err, "(MPI) MPI_Comm_rank() - "); | |||
size_ = static_cast<ID_t>(size_value); | |||
rank_ = static_cast<ID_t>(rank_value); | |||
if (size_ > static_cast<ID_t>(MAX_MPI_SIZE)) | |||
throw std::runtime_error( | |||
"(MPI) size - Not supported number of nodes [over " + std::to_string(MAX_MPI_SIZE) + "]\n" | |||
); | |||
// Get the name of the processor | |||
char processor_name[MPI_MAX_PROCESSOR_NAME]; | |||
int name_len; | |||
if ((err = MPI_Get_processor_name(processor_name, &name_len)) != MPI_SUCCESS) | |||
mpi_throw(err, "(MPI) MPI_Get_processor_name() - "); | |||
name_ = std::string (processor_name, name_len); | |||
} | |||
/*! | |||
* Exchange one data object of type @c T with partner as part of the sorting network of both | |||
* bubbletonic or bitonic sorting algorithms. | |||
* | |||
* This function matches a transmit and a receive in order for fully exchanged the data object | |||
* between current node and partner. | |||
* | |||
* @tparam T The object type | |||
* | |||
* @param local [const T&] Reference to the local object to send | |||
* @param remote [T&] Reference to the object to receive data from partner | |||
* @param partner [mpi_id_t] The partner for the exchange | |||
* @param tag [int] The tag to use for the MPI communication | |||
*/ | |||
template<typename T> | |||
void exchange_it(const T& local, T& remote, ID_t partner, int tag) { | |||
if (tag < 0) | |||
throw std::runtime_error("(MPI) exchange_it() [tag] - Out of bound"); | |||
MPI_Status status; | |||
int err; | |||
if ((err = MPI_Sendrecv( | |||
&local, sizeof(T), MPI_BYTE, partner, tag, | |||
&remote, sizeof(T), MPI_BYTE, partner, tag, | |||
MPI_COMM_WORLD, &status | |||
)) != MPI_SUCCESS) | |||
mpi_throw(err, "(MPI) MPI_Sendrecv() [item] - "); | |||
} | |||
/*! | |||
* Exchange data with partner as part of the sorting network of both bubbletonic or bitonic | |||
* sorting algorithms. | |||
* | |||
* This function matches a transmit and a receive in order for fully exchanged data between | |||
* current node and partner. | |||
* | |||
* @tparam ValueT The value type used in buffer | |||
* | |||
* @param ldata [const ValueT*] Pointer to local data to send | |||
* @param rdata [ValueT*] Pointer to buffer to receive data from partner | |||
* @param count [size_t] The number of data to exchange | |||
* @param partner [mpi_id_t] The partner for the exchange | |||
* @param tag [int] The tag to use for the MPI communication | |||
*/ | |||
template<typename ValueT> | |||
void exchange(const ValueT* ldata, ValueT* rdata, size_t count, ID_t partner, int tag) { | |||
if (tag < 0) | |||
throw std::runtime_error("(MPI) exchange_data() [tag] - Out of bound"); | |||
MPI_Datatype datatype = MPI_TypeMapper<ValueT>::getType(); | |||
MPI_Status status; | |||
int err; | |||
if ((err = MPI_Sendrecv( | |||
ldata, count, datatype, partner, tag, | |||
rdata, count, datatype, partner, tag, | |||
MPI_COMM_WORLD, &status | |||
)) != MPI_SUCCESS) | |||
mpi_throw(err, "(MPI) MPI_Sendrecv() [data] - "); | |||
} | |||
/*! | |||
* Initiate a data exchange data with partner using non-blocking Isend-Irecv, as part of the | |||
* sorting network of both bubbletonic or bitonic sorting algorithms. | |||
* | |||
* This function matches a transmit and a receive in order for fully exchanged data between | |||
* current node and partner. | |||
* @note | |||
* This call MUST paired with exchange_wait() for each MPI_t object. | |||
* Calling 2 consecutive exchange_start() for the same MPI_t object is undefined. | |||
* | |||
* @tparam ValueT The value type used in buffers | |||
* | |||
* @param ldata [const ValueT*] Pointer to local data to send | |||
* @param rdata [ValueT*] Pointer to buffer to receive data from partner | |||
* @param count [size_t] The number of data to exchange | |||
* @param partner [mpi_id_t] The partner for the exchange | |||
* @param tag [int] The tag to use for the MPI communication | |||
*/ | |||
template<typename ValueT> | |||
void exchange_start(const ValueT* ldata, ValueT* rdata, size_t count, ID_t partner, int tag) { | |||
if (tag < 0) | |||
throw std::runtime_error("(MPI) exchange_data() [tag] - Out of bound"); | |||
MPI_Datatype datatype = MPI_TypeMapper<ValueT>::getType(); | |||
int err; | |||
err = MPI_Isend(ldata, count, datatype, partner, tag, MPI_COMM_WORLD, &handle_tx); | |||
if (err != MPI_SUCCESS) | |||
mpi_throw(err, "(MPI) MPI_Isend() - "); | |||
err = MPI_Irecv(rdata, count, datatype, partner, tag, MPI_COMM_WORLD, &handle_rx); | |||
if (err != MPI_SUCCESS) | |||
mpi_throw(err, "(MPI) MPI_Irecv() - "); | |||
} | |||
/*! | |||
* Block wait for the completion of the previously called exchange_start() | |||
* | |||
* @note | |||
* This call MUST paired with exchange_start() for each MPI_t object. | |||
* Calling 2 consecutive exchange_wait() for the same MPI_t object is undefined. | |||
*/ | |||
void exchange_wait() { | |||
MPI_Status status; | |||
int err; | |||
if ((err = MPI_Wait(&handle_tx, &status)) != MPI_SUCCESS) | |||
mpi_throw(err, "(MPI) MPI_Wait() [send] - "); | |||
if ((err = MPI_Wait(&handle_rx, &status)) != MPI_SUCCESS) | |||
mpi_throw(err, "(MPI) MPI_Wait() [recv] - "); | |||
} | |||
// Accessors | |||
[[nodiscard]] ID_t rank() const noexcept { return rank_; } | |||
[[nodiscard]] ID_t size() const noexcept { return size_; } | |||
[[nodiscard]] const std::string& name() const noexcept { return name_; } | |||
// Mutators | |||
ID_t rank(ID_t rank) noexcept { return rank_ = rank; } | |||
ID_t size(ID_t size) noexcept { return size_ = size; } | |||
std::string& name(const std::string& name) noexcept { return name_ = name; } | |||
/*! | |||
* Finalized the MPI | |||
*/ | |||
void finalize() { | |||
// Finalize the MPI environment | |||
initialized_ = false; | |||
MPI_Finalize(); | |||
} | |||
//! RAII MPI finalization | |||
~MPI_t() { | |||
// Finalize the MPI environment even on unexpected errors | |||
if (initialized_) | |||
MPI_Finalize(); | |||
} | |||
// Local functionality | |||
private: | |||
/*! | |||
* Throw exception helper. It bundles the prefix msg with the MPI error string retrieved by | |||
* MPI API. | |||
* | |||
* @param err The MPI error code | |||
* @param prefixMsg The prefix text for the exception error message | |||
*/ | |||
void mpi_throw(int err, const char* prefixMsg) { | |||
char err_msg[MPI_MAX_ERROR_STRING]; | |||
int msg_len; | |||
MPI_Error_string(err, err_msg, &msg_len); | |||
throw std::runtime_error(prefixMsg + std::string (err_msg) + '\n'); | |||
} | |||
private: | |||
ID_t rank_{}; //!< MPI rank of the process | |||
ID_t size_{}; //!< MPI total size of the execution | |||
std::string name_{}; //!< The name of the local machine | |||
bool initialized_{}; //!< RAII helper flag | |||
MPI_Request handle_tx{}; //!< MPI async exchange handler for Transmission | |||
MPI_Request handle_rx{}; //!< MPI async exchange handler for Receptions | |||
}; | |||
/* | |||
* Exported data types | |||
*/ | |||
extern MPI_t<> mpi; | |||
using mpi_id_t = MPI_t<>::ID_t; | |||
/*! | |||
* @brief A std::vector wrapper with 2 vectors, an active and a shadow. | |||
* | |||
* This type exposes the standard vector functionality of the active vector. | |||
* The shadow can be used when we need to use the vector as mutable | |||
* data in algorithms that can not support "in-place" editing (like elbow-sort for example) | |||
* | |||
* @tparam Value_t the underlying data type of the vectors | |||
*/ | |||
template <typename Value_t> | |||
struct ShadowedVec_t { | |||
// STL requirements | |||
using value_type = Value_t; | |||
using iterator = typename std::vector<Value_t>::iterator; | |||
using const_iterator = typename std::vector<Value_t>::const_iterator; | |||
using size_type = typename std::vector<Value_t>::size_type; | |||
// Default constructor | |||
ShadowedVec_t() = default; | |||
// Constructor from an std::vector | |||
explicit ShadowedVec_t(const std::vector<Value_t>& vec) | |||
: North(vec), South(), active(north) { | |||
South.resize(North.size()); | |||
} | |||
explicit ShadowedVec_t(std::vector<Value_t>&& vec) | |||
: North(std::move(vec)), South(), active(north) { | |||
South.resize(North.size()); | |||
} | |||
// Copy assignment operator | |||
ShadowedVec_t& operator=(const ShadowedVec_t& other) { | |||
if (this != &other) { // Avoid self-assignment | |||
North = other.North; | |||
South = other.South; | |||
active = other.active; | |||
} | |||
return *this; | |||
} | |||
// Move assignment operator | |||
ShadowedVec_t& operator=(ShadowedVec_t&& other) noexcept { | |||
if (this != &other) { // Avoid self-assignment | |||
North = std::move(other.North); | |||
South = std::move(other.South); | |||
active = other.active; | |||
// There is no need to zero out other since it is valid but in a non-defined state | |||
} | |||
return *this; | |||
} | |||
// Type accessors | |||
std::vector<Value_t>& getActive() { return (active == north) ? North : South; } | |||
std::vector<Value_t>& getShadow() { return (active == north) ? South : North; } | |||
const std::vector<Value_t>& getActive() const { return (active == north) ? North : South; } | |||
const std::vector<Value_t>& getShadow() const { return (active == north) ? South : North; } | |||
// Swap vectors | |||
void switch_active() { active = (active == north) ? south : north; } | |||
// Dispatch vector functionality to active vector | |||
Value_t& operator[](size_type index) { return getActive()[index]; } | |||
const Value_t& operator[](size_type index) const { return getActive()[index]; } | |||
Value_t& at(size_type index) { return getActive().at(index); } | |||
const Value_t& at(size_type index) const { return getActive().at(index); } | |||
void push_back(const Value_t& value) { getActive().push_back(value); } | |||
void push_back(Value_t&& value) { getActive().push_back(std::move(value)); } | |||
void pop_back() { getActive().pop_back(); } | |||
Value_t& front() { return getActive().front(); } | |||
Value_t& back() { return getActive().back(); } | |||
const Value_t& front() const { return getActive().front(); } | |||
const Value_t& back() const { return getActive().back(); } | |||
iterator begin() { return getActive().begin(); } | |||
const_iterator begin() const { return getActive().begin(); } | |||
iterator end() { return getActive().end(); } | |||
const_iterator end() const { return getActive().end(); } | |||
size_type size() const { return getActive().size(); } | |||
void resize(size_t new_size) { | |||
North.resize(new_size); | |||
South.resize(new_size); | |||
} | |||
void reserve(size_t new_capacity) { | |||
North.reserve(new_capacity); | |||
South.reserve(new_capacity); | |||
} | |||
[[nodiscard]] size_t capacity() const { return getActive().capacity(); } | |||
[[nodiscard]] bool empty() const { return getActive().empty(); } | |||
void clear() { getActive().clear(); } | |||
void swap(std::vector<Value_t>& other) { getActive().swap(other); } | |||
// Comparisons | |||
bool operator== (const ShadowedVec_t& other) { return getActive() == other.getActive(); } | |||
bool operator!= (const ShadowedVec_t& other) { return getActive() != other.getActive(); } | |||
bool operator== (const std::vector<value_type>& other) { return getActive() == other; } | |||
bool operator!= (const std::vector<value_type>& other) { return getActive() != other; } | |||
private: | |||
std::vector<Value_t> North{}; //!< Actual buffer to be used either as active or shadow | |||
std::vector<Value_t> South{}; //!< Actual buffer to be used either as active or shadow | |||
enum { | |||
north, south | |||
} active{north}; //!< Flag to select between North and South buffer | |||
}; | |||
/* | |||
* Exported data types | |||
*/ | |||
using distBuffer_t = ShadowedVec_t<distValue_t>; | |||
extern distBuffer_t Data; | |||
/*! | |||
* A Logger for entire program. | |||
*/ | |||
struct Log { | |||
struct Endl {} endl; //!< a tag object to to use it as a new line request. | |||
//! We provide logging via << operator | |||
template<typename T> | |||
Log &operator<<(T &&t) { | |||
if (config.verbose) { | |||
if (line_) { | |||
std::cout << "[Log]: " << t; | |||
line_ = false; | |||
} else | |||
std::cout << t; | |||
} | |||
return *this; | |||
} | |||
// overload for special end line handling | |||
Log &operator<<(Endl e) { | |||
(void) e; | |||
if (config.verbose) { | |||
std::cout << '\n'; | |||
line_ = true; | |||
} | |||
return *this; | |||
} | |||
private: | |||
bool line_{true}; | |||
}; | |||
extern Log logger; | |||
/*! | |||
* A small timing utility based on chrono that supports timing rounds | |||
* and returning the median of them. Time can accumulate to the measurement | |||
* for each round. | |||
*/ | |||
struct Timing { | |||
using Tpoint = std::chrono::steady_clock::time_point; | |||
using Tduration = std::chrono::microseconds; | |||
using microseconds = std::chrono::microseconds; | |||
using milliseconds = std::chrono::milliseconds; | |||
using seconds = std::chrono::seconds; | |||
//! Setup measurement rounds | |||
void init(size_t rounds) { | |||
duration_.resize(rounds); | |||
for (auto& d : duration_) | |||
d = Tduration::zero(); | |||
} | |||
//! tool to mark the starting point | |||
Tpoint start() noexcept { return mark_ = std::chrono::steady_clock::now(); } | |||
//! tool to mark the ending point | |||
Tpoint stop() noexcept { | |||
Tpoint now = std::chrono::steady_clock::now(); | |||
duration_[current_] += dt(now, mark_); | |||
return now; | |||
} | |||
//! Switch timing slot | |||
void next() noexcept { | |||
++current_; | |||
current_ %= duration_.size(); | |||
} | |||
Tduration& median() noexcept { | |||
std::sort(duration_.begin(), duration_.end()); | |||
return duration_[duration_.size()/2]; | |||
} | |||
//! A duration calculation utility | |||
static Tduration dt(Tpoint t2, Tpoint t1) noexcept { | |||
return std::chrono::duration_cast<Tduration>(t2 - t1); | |||
} | |||
//! Tool to print the time interval | |||
static void print_duration(const Tduration& duration, const char *what, mpi_id_t rank) noexcept { | |||
if (std::chrono::duration_cast<microseconds>(duration).count() < 10000) | |||
std::cout << "[Timing] (Rank " << rank << ") " << what << ": " | |||
<< std::to_string(std::chrono::duration_cast<microseconds>(duration).count()) << " [usec]\n"; | |||
else if (std::chrono::duration_cast<milliseconds>(duration).count() < 10000) | |||
std::cout << "[Timing] (Rank " << rank << ") " << what << ": " | |||
<< std::to_string(std::chrono::duration_cast<milliseconds>(duration).count()) << " [msec]\n"; | |||
else { | |||
char stime[26]; // fit ulong | |||
auto sec = std::chrono::duration_cast<seconds>(duration).count(); | |||
auto msec = (std::chrono::duration_cast<milliseconds>(duration).count() % 1000) / 10; // keep 2 digit | |||
std::sprintf(stime, "%ld.%1ld", sec, msec); | |||
std::cout << "[Timing] (Rank " << rank << ") " << what << ": " << stime << " [sec]\n"; | |||
} | |||
} | |||
private: | |||
size_t current_{0}; | |||
Tpoint mark_{}; | |||
std::vector<Tduration> duration_{1}; | |||
}; | |||
/*! | |||
* A "high level function"-like utility macro to forward a function call | |||
* and accumulate the execution time to the corresponding timing object. | |||
* | |||
* @param Tim The Timing object [Needs to have methods start() and stop()] | |||
* @param Func The function name | |||
* @param ... The arguments to pass to function (the preprocessor way) | |||
*/ | |||
#define timeCall(Tim, Func, ...) \ | |||
Tim.start(); \ | |||
Func(__VA_ARGS__); \ | |||
Tim.stop(); \ | |||
/*! | |||
* A utility to check if a number is power of two | |||
* | |||
* @tparam Integral The integral type of the number to check | |||
* @param x The number to check | |||
* @return True if it is power of 2, false otherwise | |||
*/ | |||
template <typename Integral> | |||
constexpr inline bool isPowerOfTwo(Integral x) noexcept { | |||
return (!(x & (x - 1)) && x); | |||
} | |||
#endif /* UTILS_HPP_ */ |
@@ -0,0 +1,34 @@ | |||
/** | |||
* \file | |||
* \brief PDS HW3 tests | |||
* | |||
* To run these test execute: | |||
* ... | |||
* | |||
* \author | |||
* Christos Choutouridis AEM:8997 | |||
* <cchoutou@ece.auth.gr> | |||
*/ | |||
#include <gtest/gtest.h> | |||
/* | |||
* Global fixtures | |||
*/ | |||
class TCUDAbitonic : public ::testing::Test { | |||
protected: | |||
static void SetUpTestSuite() { } | |||
static void TearDownTestSuite() { } | |||
}; | |||
/* | |||
* MPI: SysTest (acceptance) | |||
* Each process executes distBubbletonic for uin8_t [16] | |||
*/ | |||
TEST_F(TCUDAbitonic, test1) { | |||
EXPECT_EQ(true, true); | |||
} |