Compare commits

..

2 Commits

10 changed files with 29891 additions and 0 deletions

23
homework_3/.gitignore vendored Normal file
View File

@ -0,0 +1,23 @@
# project
bin/
out/
mat/
mtx/
.unused/
various/
# hpc
# IDEs
.idea/
.clangd
# eclipse
.project
.cproject
.settings/
.vs/
.vscode/

205
homework_3/Makefile Normal file
View File

@ -0,0 +1,205 @@
#
# PDS HW3 Makefile
#
# Copyright (C) 2025 Christos Choutouridis <christos@choutouridis.net>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 3
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# ============== Project settings ==============
# Project's name
PROJECT := PDS_homework_3
# Excecutable's name
TARGET := bitonic
# Source directories list(space seperated). Makefile-relative path, UNDER current directory.
SRC_DIR_LIST := src test test/gtest
# Include directories list(space seperated). Makefile-relative path.
INC_DIR_LIST := src \
test \
test/gtest/ \
# Exclude files list(space seperated). Filenames only.
# EXC_FILE_LIST := bad.cpp old.cpp
# Build directories
BUILD_DIR := bin
OBJ_DIR := $(BUILD_DIR)/obj
DEP_DIR := $(BUILD_DIR)/.dep
OUTPUT_DIR := out
# ========== Compiler settings ==========
# Compiler flags for debug and release
DEB_CFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c11 -fopenmp
REL_CFLAGS := -Wall -Wextra -O3 -std=c11 -fopenmp
DEB_CXXFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c++17 -fopenmp
REL_CXXFLAGS := -Wall -Wextra -O3 -std=c++17 -fopenmp
# Pre-defines
# PRE_DEFS := MYCAB=1729 SUPER_MODE
PRE_DEFS :=
# ============== Linker settings ==============
# Linker flags (example: -pthread -lm)
LDFLAGS := -pthread
# Map output file
MAP_FILE := output.map
MAP_FLAG := -Xlinker -Map=$(BUILD_DIR)/$(MAP_FILE)
# ============== Docker settings ==============
# We need:
# - Bind the entire project directory(the dir that icludes all the code) as volume.
# - In docker instance, change to working directory(where the makefile is).
DOCKER_VOL_DIR := $(shell pwd)
DOCKER_WRK_DIR :=
DOCKER_RUN := docker run --rm
DOCKER_FLAGS := -v $(DOCKER_VOL_DIR):/usr/src/$(PROJECT) -w /usr/src/$(PROJECT)/$(DOCKER_WRK_DIR)
# docker invoke mechanism (edit with care)
# note:
# Here, `DOCKER` variable is empty. Rules can assign `DOCKER := DOCKER_CMD` when docker
# functionality is needed.
DOCKER_CMD = $(DOCKER_RUN) $(DOCKER_FLAGS) $(IMAGE)
DOCKER :=
# ============== Tool selection ==============
# compiler and compiler flags.
CSIZE := size
CFLAGS := $(DEB_CFLAGS)
CXXFLAGS := $(DEB_CXXFLAGS)
CXX := g++ #mpic++
CC := gcc #mpicc
#
# =========== Main body and Patterns ===========
#
#ifeq ($(OS), Windows_NT)
# TARGET := $(TARGET).exe
#endif
INC := $(foreach dir,$(INC_DIR_LIST),-I$(dir))
DEF := $(foreach def,$(PRE_DEFS),-D$(def))
EXC := $(foreach fil,$(EXC_FILE_LIST), \
$(foreach dir,$(SRC_DIR_LIST),$(wildcard $(dir)/$(fil))) \
)
# source files. object and dependencies list
# recursive search into current and source directories
SRC := $(wildcard *.cpp)
SRC += $(foreach dir,$(SRC_DIR_LIST),$(wildcard $(dir)/*.cpp))
SRC += $(foreach dir,$(SRC_DIR_LIST),$(wildcard $(dir)/**/*.cpp))
SRC := $(filter-out $(EXC),$(SRC))
#SRC := $(abspath $(SRC))
OBJ := $(foreach file,$(SRC:%.cpp=%.o),$(OBJ_DIR)/$(file))
DEP := $(foreach file,$(SRC:%.cpp=%.d),$(DEP_DIR)/$(file))
# Make Dependencies pattern.
# This little trick enables recompilation only when dependencies change
# and it does so for changes both in source AND header files ;)
#
# It is based on Tom Tromey's method.
#
# Invoke cpp to create makefile rules with dependencies for each source file
$(DEP_DIR)/%.d: %.c
@mkdir -p $(@D)
@$(DOCKER) $(CC) -E $(CFLAGS) $(INC) $(DEF) -MM -MT $(OBJ_DIR)/$(<:.c=.o) -MF $@ $<
# c file objects depent on .c AND dependency files, which have an empty recipe
$(OBJ_DIR)/%.o: %.c $(DEP_DIR)/%.d
@mkdir -p $(@D)
@$(DOCKER) $(CC) -c $(CFLAGS) $(INC) $(DEF) -o $@ $<
$(DEP_DIR)/%.d: %.cpp
@mkdir -p $(@D)
@$(DOCKER) $(CXX) -E $(CXXFLAGS) $(INC) $(DEF) -MM -MT $(OBJ_DIR)/$(<:.cpp=.o) -MF $@ $<
# cpp file objects depent on .cpp AND dependency files, which have an empty recipe
$(OBJ_DIR)/%.o: %.cpp $(DEP_DIR)/%.d
@mkdir -p $(@D)
@$(DOCKER) $(CXX) -c $(CXXFLAGS) $(INC) $(DEF) -o $@ $<
# empty recipe for dependency files. This prevents make errors
$(DEP):
# now include all dependencies
# After all they are makefile dependency rules ;)
include $(wildcard $(DEP))
# main target rule
$(BUILD_DIR)/$(TARGET): $(OBJ)
@mkdir -p $(@D)
@echo Linking to target: $(TARGET)
@echo $(DOCKER) $(CXX) '$$(OBJ)' $(LDFLAGS) $(MAP_FLAG) -o $(@D)/$(TARGET)
@$(DOCKER) $(CXX) $(OBJ) $(LDFLAGS) $(MAP_FLAG) -o $(@D)/$(TARGET)
@echo
@echo Print size information
@$(CSIZE) $(@D)/$(TARGET)
@echo Done
#
# ================ Default local build rules =================
# example:
# make debug
.DEFAULT_GOAL := all
.PHONY: clean
clean:
@echo Cleaning build directories
@rm -rf $(OBJ_DIR)
@rm -rf $(DEP_DIR)
@rm -rf $(BUILD_DIR)
debug: CFLAGS := $(DEB_CFLAGS)
debug: $(BUILD_DIR)/$(TARGET)
release: CFLAGS := $(REL_CFLAGS)
release: $(BUILD_DIR)/$(TARGET)
#
# ================ Build rules =================
#
bitonic_v0: CC := nvcc
bitonic_v0: CXX := nvcc
bitonic_v0: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=V0
bitonic_v0: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=V0
bitonic_v0: TARGET := bitonic_v0
bitonic_v0: $(BUILD_DIR)/$(TARGET)
@mkdir -p $(OUTPUT_DIR)
cp $(BUILD_DIR)/$(TARGET) $(OUTPUT_DIR)/$(TARGET)
hpc-build:
make clean
make distbubbletonic
make clean
make distbitonic
make clean
make tests
all: debug bitonic_v0
# Note:
# Add a gcc based make rule here in order for clangd to successfully scan the project files.
# Otherwise we do not need the gcc build.

74
homework_3/src/config.h Normal file
View File

@ -0,0 +1,74 @@
/*!
* \file
* \brief Build configuration file.
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/
#ifndef CONFIG_H_
#define CONFIG_H_
#include <cstdint>
/*
* Versioning:
* - RC1:
*/
static constexpr char version[] = "0.0";
/*
* Defines for different version of the exercise
*/
#define V0 (0)
#define V1 (1)
#define V2 (2)
// Fail-safe version selection
#if !defined CODE_VERSION
#define CODE_VERSION V0
#endif
// Default Data size (in case -q <N> is not present)
static constexpr size_t DEFAULT_DATA_SIZE = 1 << 16;
/*!
* Value type selection
*
* We support the following compiler types or the <cstdint> that translate to them:
* char - unsigned char
* short - unsigned short
* int - unsigned int
* long - unsigned long
* long long - unsigned long long
* float
* double
*/
using distValue_t = uint32_t;
/*!
* Session option for each invocation of the executable.
*
* @note
* The values of the members are set from the command line.
*/
struct config_t {
size_t arraySize{DEFAULT_DATA_SIZE}; //!< The array size of the local data to sort.
bool exchangeOpt{false}; //!< Flag to request the exchange optimization
size_t pipeline{1UL}; //!< Pipeline stages (1 to disable)
bool validation{false}; //!< Request a full validation at the end, performed by process rank 0.
bool ndebug{false}; //!< Skips debug trap on DEBUG builds.
size_t perf{1}; //!< Enable performance timing measurements and prints and repeat
//!< the sorting <perf> times.
bool verbose{false}; //!< Flag to enable verbose output to stdout.
};
/*
* Exported data types
*/
extern config_t config;
#endif /* CONFIG_H_ */

View File

@ -0,0 +1,51 @@
/*!
* \file
* \brief Distributed sort implementation
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/
#include "utils.hpp"
#include "distsort.hpp"
/*!
* Returns the ascending or descending configuration of the node's sequence based on
* the current node (MPI process) and the depth of the sorting network
*
* @param node [mpi_id_t] The current node (MPI process)
* @param depth [size_t] The total depth of the sorting network (same for each step for a given network)
* @return [bool] True if we need ascending configuration, false otherwise
*/
bool ascending(mpi_id_t node, size_t depth) noexcept {
return !(node & (1 << depth));
}
/*!
* Returns the node's partner for data exchange during the sorting network iterations
* of Bitonic
*
* @param node [mpi_id_t] The current node
* @param step [size_t] The step of the sorting network
* @return [mpi_id_t] The node id of the partner for data exchange
*/
mpi_id_t partner(mpi_id_t node, size_t step) noexcept {
return (node ^ (1 << step));
}
/*!
* Predicate to check if a node keeps the small numbers during the bitonic sort network exchange.
*
* @param node [mpi_id_t] The node for which we check
* @param partner [mpi_id_t] The partner of the data exchange
* @param depth [size_t] The total depth of the sorting network (same for each step for a given network)
* @return [bool] True if the node should keep the small values, false otherwise
*/
bool keepSmall(mpi_id_t node, mpi_id_t partner, size_t depth) {
if (node == partner)
throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n");
return ascending(node, depth) == (node < partner);
}

223
homework_3/src/distsort.hpp Normal file
View File

@ -0,0 +1,223 @@
/*!
* \file
* \brief Distributed sort implementation header
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/
#ifndef DISTBITONIC_H_
#define DISTBITONIC_H_
#include <vector>
#include <algorithm>
#include <parallel/algorithm>
#include <cmath>
#include <cstdint>
#if !defined DEBUG
#define NDEBUG
#endif
#include <cassert>
#include "utils.hpp"
/*
* Exported timers
*/
extern Timing Timer_total;
extern Timing Timer_fullSort;
extern Timing Timer_exchange;
extern Timing Timer_minmax;
extern Timing Timer_elbowSort;
/*
* ============================== Sort utilities ==============================
*/
/*!
* Returns the ascending or descending configuration of the node's sequence based on
* the current node (MPI process) and the depth of the sorting network
*
* @param node [mpi_id_t] The current node (MPI process)
* @param depth [size_t] The total depth of the sorting network (same for each step for a given network)
* @return [bool] True if we need ascending configuration, false otherwise
*/
bool ascending(mpi_id_t node, size_t depth);
/*!
* Returns the node's partner for data exchange during the sorting network iterations
* of Bitonic
*
* @param node [mpi_id_t] The current node
* @param step [size_t] The step of the sorting network
* @return [mpi_id_t] The node id of the partner for data exchange
*/
mpi_id_t partner(mpi_id_t node, size_t step);
/*!
* Predicate to check if a node keeps the small numbers during the bitonic sort network exchange.
*
* @param node [mpi_id_t] The node for which we check
* @param partner [mpi_id_t] The partner of the data exchange
* @param depth [size_t] The total depth of the sorting network (same for each step for a given network)
* @return [bool] True if the node should keep the small values, false otherwise
*/
bool keepSmall(mpi_id_t node, mpi_id_t partner, size_t depth);
/*
* ============================== Data utilities ==============================
*/
/*!
* Sort a range using the build-in O(Nlog(N)) algorithm
*
* @tparam RangeT A range type with random access iterator
*
* @param data [RangeT] The data to be sorted
* @param ascending [bool] Flag to indicate the sorting order
*/
template<typename RangeT>
void fullSort(RangeT& data, bool ascending) noexcept {
// Use introsort from stdlib++ here, unless ... __gnu_parallel
if (ascending) {
__gnu_parallel::sort(data.begin(), data.end(), std::less<>());
}
else {
__gnu_parallel::sort(data.begin(), data.end(), std::greater<>());
}
}
/*!
* Core functionality of sort for shadowed buffer types using
* the "elbow sort" algorithm.
*
* @note:
* This algorithm can not work "in place".
* We use the active buffer as source and the shadow as target.
* At the end we switch which buffer is active and which is the shadow.
* @note
* This is the core functionality. Use the elbowSort() function instead
*
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
* @tparam CompT A Comparison type for binary operation comparisons
*
* @param data [ShadowedDataT] The data to sort
* @param ascending [bool] Flag to indicate the sorting order
* @param comp [CompT] The binary operator object
*/
template<typename ShadowedDataT, typename CompT>
void elbowSortCore(ShadowedDataT& data, bool ascending, CompT comp) noexcept {
auto& active = data.getActive(); // Get the source vector (the data to sort)
auto& shadow = data.getShadow(); // Get the target vector (the sorted data)
size_t N = data.size(); // The total size is the same or both vectors
size_t left = std::distance(
active.begin(),
(ascending) ?
std::min_element(active.begin(), active.end()) :
std::max_element(active.begin(), active.end())
); // start 'left' from elbow of the bitonic
size_t right = (left == N-1) ? 0 : left + 1;
// Walk in opposite directions from elbow and insert-sort to target vector
for (size_t i = 0 ; i<N ; ++i) {
if (comp(active[left], active[right])) {
shadow[i] = active[left];
left = (left == 0) ? N-1 : left -1; // cycle decrease
}
else {
shadow[i] = active[right];
right = (right + 1) % N; // cycle increase
}
}
data.switch_active(); // Switch active-shadow buffers
}
/*!
* Sort a shadowed buffer using the "elbow sort" algorithm.
*
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
*
* @param data [ShadowedDataT] The data to sort
* @param ascending [bool] Flag to indicate the sorting order
*/
template<typename ShadowedDataT>
void elbowSort(ShadowedDataT& data, bool ascending) noexcept {
if (ascending)
elbowSortCore(data, ascending, std::less<>());
else
elbowSortCore(data, ascending, std::greater<>());
}
/*!
* Takes two sequences and selects either the larger or the smaller items
* in one-to-one comparison between them. If the initial sequences are bitonic, then
* the result is a bitonic sequence too!
*
* @tparam ValueT The underlying type of the sequences
*
* @param local [ValueT*] Pointer to the local sequence
* @param remote [const ValueT*] Pointer to the remote sequence (copied locally by MPI)
* @param count [size_t] The number of items to process
* @param keepSmall [bool] Flag to indicate if we keep the small items in local sequence
*/
template<typename ValueT>
void keepMinOrMax(ValueT* local, const ValueT* remote, size_t count, bool keepSmall) noexcept {
std::transform(
local, local + count,
remote,
local,
[&keepSmall](const ValueT& a, const ValueT& b){
return (keepSmall) ? std::min(a, b) : std::max(a, b);
});
}
/*
* ============================== Sort algorithms ==============================
*/
/*!
* A distributed version of the Bitonic sort algorithm.
*
* @note
* Each MPI process should run an instance of this function.
*
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
*
* @param data [ShadowedDataT] The local to MPI process data to sort
* @param Processes [mpi_id_t] The total number of MPI processes
* @param rank [mpi_id_t] The current process id
*/
template<typename ShadowedDataT>
void distBitonic(ShadowedDataT& data) {
// Initially sort to create a half part of a bitonic sequence
timeCall(Timer_fullSort, fullSort, data, ascending(rank, 0));
// Run through sort network using elbow-sort ( O(LogN * LogN) iterations )
auto p = static_cast<uint32_t>(std::log2(Processes));
for (size_t depth = 1; depth <= p; ++depth) {
for (size_t step = depth; step > 0;) {
--step;
// Find out exchange configuration
auto part = partner(rank, step);
auto ks = keepSmall(rank, part, depth);
// Exchange with partner, keep nim-or-max
exchange(data, part, ks, tag);
}
// sort - O(N)
timeCall(Timer_elbowSort, elbowSort, data, ascending(rank, depth));
}
}
#endif //DISTBITONIC_H_

235
homework_3/src/main.cpp Normal file
View File

@ -0,0 +1,235 @@
/*!
* \file
* \brief Main application file for PDS HW3 (CUDA)
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/
#include <exception>
#include <iostream>
#include <algorithm>
#include <random>
#include "utils.hpp"
#include "config.h"
#include "distsort.hpp"
// Global session data
config_t config;
distBuffer_t Data;
Log logger;
// Mersenne seeded from hw if possible. range: [type_min, type_max]
std::random_device rd;
std::mt19937 gen(rd());
//! Performance timers for each one of the "costly" functions
Timing Timer_total;
Timing Timer_fullSort;
Timing Timer_exchange;
Timing Timer_minmax;
Timing Timer_elbowSort;
//! Init timing objects for extra rounds
void measurements_init() {
if (config.perf > 1) {
Timer_total.init(config.perf);
Timer_fullSort.init(config.perf);
Timer_exchange.init(config.perf);
Timer_minmax.init(config.perf);
Timer_elbowSort.init(config.perf);
}
}
//! iterate ot the next round of measurements for all measurement objects
void measurements_next() {
if (config.perf > 1) {
Timer_total.next();
Timer_fullSort.next();
Timer_exchange.next();
Timer_minmax.next();
Timer_elbowSort.next();
}
}
/*!
* A small command line argument parser
* \return The status of the operation
*/
bool get_options(int argc, char* argv[]){
bool status =true;
// iterate over the passed arguments
for (int i=1 ; i<argc ; ++i) {
std::string arg(argv[i]); // get current argument
if (arg == "-q" || arg == "--array-size") {
if (i+1 < argc) {
config.arraySize = 1 << atoi(argv[++i]);
}
else {
status = false;
}
}
else if (arg == "--validation") {
config.validation = true;
}
else if (arg == "--perf") {
if (i+1 < argc) {
config.perf = atoi(argv[++i]);
}
else {
status = false;
}
}
else if (arg == "--ndebug") {
config.ndebug = true;
}
else if (arg == "-v" || arg == "--verbose") {
config.verbose = true;
}
else if (arg == "--version") {
std::cout << "bitonic - A GPU accelerated sort utility\n";
std::cout << "version: " << version << "\n\n";
exit(0);
}
else if (arg == "-h" || arg == "--help") {
std::cout << "distbitonic - A distributed sort utility\n\n";
std::cout << " distbitonic -q <N> [--validation] [--perf <N>] [--ndebug] [-v]\n";
std::cout << " distbitonic -h\n";
std::cout << '\n';
std::cout << "Options:\n\n";
std::cout << " -q | --array-size <N>\n";
std::cout << " Selects the array size according to size = 2^N\n\n";
std::cout << " --validation\n";
std::cout << " Request a full validation at the end, performed by process rank 0\n\n";
std::cout << " --perf <N> \n";
std::cout << " Enable performance timing measurements and prints, and repeat\n";
std::cout << " the sorting <N> times.\n\n";
std::cout << " --ndebug\n";
std::cout << " Skip debug breakpoint when on debug build.\n\n";
std::cout << " -v | --verbose\n";
std::cout << " Request a more verbose output to stdout.\n\n";
std::cout << " -h | --help\n";
std::cout << " Prints this and exit.\n\n";
std::cout << " --version\n";
std::cout << " Prints version and exit.\n\n";
std::cout << "Examples:\n\n";
std::cout << " bitonic -q 24\n";
std::cout << " Runs bitonic with GPU acceleration with 2^24 array points\n\n";
exit(0);
}
else { // parse error
std::cout << "Invocation error. Try -h for details.\n";
status = false;
}
}
return status;
}
/*!
* A simple validator for the entire distributed process
*
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
*
* @param data [ShadowedDataT] The local to MPI process
* @param Processes [mpi_id_t] The total number of MPI processes
* @param rank [mpi_id_t] The current process id
*
* @return [bool] True if all are sorted and in total ascending order
*/
template<typename ShadowedDataT>
bool validator(ShadowedDataT& data) {
using value_t = typename ShadowedDataT::value_type;
bool ret = true; // Have faith!
return ret;
}
/*!
* Initializes the environment, must called from each process
*
* @param argc [int*] POINTER to main's argc argument
* @param argv [char***] POINTER to main's argv argument
*/
void init(int* argc, char*** argv) {
// try to read command line
if (!get_options(*argc, *argv))
exit(1);
// Prepare vector and timing data
Data.resize(config.arraySize);
measurements_init();
}
#if !defined TESTING
/*!
* @return Returns 0, but.... we may throw or exit(0) / exit(1)
*/
int main(int argc, char* argv[]) try {
// Init everything
init(&argc, &argv);
for (size_t it = 0 ; it < config.perf ; ++it) {
// Initialize local data
logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl;
std::uniform_int_distribution<distValue_t > dis(
std::numeric_limits<distValue_t>::min(),
std::numeric_limits<distValue_t>::max()
);
std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); });
// Run distributed sort
logger << "Starting distributed sorting ... ";
Timer_total.start();
distBitonic(Data);
Timer_total.stop();
measurements_next();
logger << " Done." << logger.endl;
}
// Print-outs and validation
if (config.perf > 1) {
Timing::print_duration(Timer_total.median(), "Total ", 0);
Timing::print_duration(Timer_fullSort.median(), "Full-Sort ", 0);
Timing::print_duration(Timer_exchange.median(), "Exchange ", 0);
Timing::print_duration(Timer_minmax.median(), "Min-Max ", 0);
Timing::print_duration(Timer_elbowSort.median(),"Elbow-Sort", 0);
}
if (config.validation) {
// If requested, we have the chance to fail!
std::cout << "[Validation] Results validation ...";
bool val = validator(Data);
std::cout << ((val) ? "\x1B[32m [PASSED] \x1B[0m\n" : " \x1B[32m [FAILED] \x1B[0m\n");
}
return 0;
}
catch (std::exception& e) {
//we probably pollute the user's screen. Comment `cerr << ...` if you don't like it.
std::cerr << "Error: " << e.what() << '\n';
exit(1);
}
#else
#include <gtest/gtest.h>
#include <exception>
/*!
* The testing version of our program
*/
GTEST_API_ int main(int argc, char **argv) try {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
catch (std::exception& e) {
std::cout << "Exception: " << e.what() << '\n';
}
#endif

270
homework_3/src/utils.hpp Normal file
View File

@ -0,0 +1,270 @@
/**
* \file
* \brief Utilities header
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/
#ifndef UTILS_HPP_
#define UTILS_HPP_
#include <vector>
#include <iostream>
#include <chrono>
#include <unistd.h>
#include <algorithm>
#include "config.h"
/*!
* @brief A std::vector wrapper with 2 vectors, an active and a shadow.
*
* This type exposes the standard vector functionality of the active vector.
* The shadow can be used when we need to use the vector as mutable
* data in algorithms that can not support "in-place" editing (like elbow-sort for example)
*
* @tparam Value_t the underlying data type of the vectors
*/
template <typename Value_t>
struct ShadowedVec_t {
// STL requirements
using value_type = Value_t;
using iterator = typename std::vector<Value_t>::iterator;
using const_iterator = typename std::vector<Value_t>::const_iterator;
using size_type = typename std::vector<Value_t>::size_type;
// Default constructor
ShadowedVec_t() = default;
// Constructor from an std::vector
explicit ShadowedVec_t(const std::vector<Value_t>& vec)
: North(vec), South(), active(north) {
South.resize(North.size());
}
explicit ShadowedVec_t(std::vector<Value_t>&& vec)
: North(std::move(vec)), South(), active(north) {
South.resize(North.size());
}
// Copy assignment operator
ShadowedVec_t& operator=(const ShadowedVec_t& other) {
if (this != &other) { // Avoid self-assignment
North = other.North;
South = other.South;
active = other.active;
}
return *this;
}
// Move assignment operator
ShadowedVec_t& operator=(ShadowedVec_t&& other) noexcept {
if (this != &other) { // Avoid self-assignment
North = std::move(other.North);
South = std::move(other.South);
active = other.active;
// There is no need to zero out other since it is valid but in a non-defined state
}
return *this;
}
// Type accessors
std::vector<Value_t>& getActive() { return (active == north) ? North : South; }
std::vector<Value_t>& getShadow() { return (active == north) ? South : North; }
const std::vector<Value_t>& getActive() const { return (active == north) ? North : South; }
const std::vector<Value_t>& getShadow() const { return (active == north) ? South : North; }
// Swap vectors
void switch_active() { active = (active == north) ? south : north; }
// Dispatch vector functionality to active vector
Value_t& operator[](size_type index) { return getActive()[index]; }
const Value_t& operator[](size_type index) const { return getActive()[index]; }
Value_t& at(size_type index) { return getActive().at(index); }
const Value_t& at(size_type index) const { return getActive().at(index); }
void push_back(const Value_t& value) { getActive().push_back(value); }
void push_back(Value_t&& value) { getActive().push_back(std::move(value)); }
void pop_back() { getActive().pop_back(); }
Value_t& front() { return getActive().front(); }
Value_t& back() { return getActive().back(); }
const Value_t& front() const { return getActive().front(); }
const Value_t& back() const { return getActive().back(); }
iterator begin() { return getActive().begin(); }
const_iterator begin() const { return getActive().begin(); }
iterator end() { return getActive().end(); }
const_iterator end() const { return getActive().end(); }
size_type size() const { return getActive().size(); }
void resize(size_t new_size) {
North.resize(new_size);
South.resize(new_size);
}
void reserve(size_t new_capacity) {
North.reserve(new_capacity);
South.reserve(new_capacity);
}
[[nodiscard]] size_t capacity() const { return getActive().capacity(); }
[[nodiscard]] bool empty() const { return getActive().empty(); }
void clear() { getActive().clear(); }
void swap(std::vector<Value_t>& other) { getActive().swap(other); }
// Comparisons
bool operator== (const ShadowedVec_t& other) { return getActive() == other.getActive(); }
bool operator!= (const ShadowedVec_t& other) { return getActive() != other.getActive(); }
bool operator== (const std::vector<value_type>& other) { return getActive() == other; }
bool operator!= (const std::vector<value_type>& other) { return getActive() != other; }
private:
std::vector<Value_t> North{}; //!< Actual buffer to be used either as active or shadow
std::vector<Value_t> South{}; //!< Actual buffer to be used either as active or shadow
enum {
north, south
} active{north}; //!< Flag to select between North and South buffer
};
/*
* Exported data types
*/
using distBuffer_t = ShadowedVec_t<distValue_t>;
extern distBuffer_t Data;
/*!
* A Logger for entire program.
*/
struct Log {
struct Endl {} endl; //!< a tag object to to use it as a new line request.
//! We provide logging via << operator
template<typename T>
Log &operator<<(T &&t) {
if (config.verbose) {
if (line_) {
std::cout << "[Log]: " << t;
line_ = false;
} else
std::cout << t;
}
return *this;
}
// overload for special end line handling
Log &operator<<(Endl e) {
(void) e;
if (config.verbose) {
std::cout << '\n';
line_ = true;
}
return *this;
}
private:
bool line_{true};
};
extern Log logger;
/*!
* A small timing utility based on chrono that supports timing rounds
* and returning the median of them. Time can accumulate to the measurement
* for each round.
*/
struct Timing {
using Tpoint = std::chrono::steady_clock::time_point;
using Tduration = std::chrono::microseconds;
using microseconds = std::chrono::microseconds;
using milliseconds = std::chrono::milliseconds;
using seconds = std::chrono::seconds;
//! Setup measurement rounds
void init(size_t rounds) {
duration_.resize(rounds);
for (auto& d : duration_)
d = Tduration::zero();
}
//! tool to mark the starting point
Tpoint start() noexcept { return mark_ = std::chrono::steady_clock::now(); }
//! tool to mark the ending point
Tpoint stop() noexcept {
Tpoint now = std::chrono::steady_clock::now();
duration_[current_] += dt(now, mark_);
return now;
}
//! Switch timing slot
void next() noexcept {
++current_;
current_ %= duration_.size();
}
Tduration& median() noexcept {
std::sort(duration_.begin(), duration_.end());
return duration_[duration_.size()/2];
}
//! A duration calculation utility
static Tduration dt(Tpoint t2, Tpoint t1) noexcept {
return std::chrono::duration_cast<Tduration>(t2 - t1);
}
//! Tool to print the time interval
static void print_duration(const Tduration& duration, const char *what) noexcept {
if (std::chrono::duration_cast<microseconds>(duration).count() < 10000)
std::cout << "[Timing] " << what << ": "
<< std::to_string(std::chrono::duration_cast<microseconds>(duration).count()) << " [usec]\n";
else if (std::chrono::duration_cast<milliseconds>(duration).count() < 10000)
std::cout << "[Timing] " << what << ": "
<< std::to_string(std::chrono::duration_cast<milliseconds>(duration).count()) << " [msec]\n";
else {
char stime[26]; // fit ulong
auto sec = std::chrono::duration_cast<seconds>(duration).count();
auto msec = (std::chrono::duration_cast<milliseconds>(duration).count() % 1000) / 10; // keep 2 digit
std::sprintf(stime, "%ld.%1ld", sec, msec);
std::cout << "[Timing] " << what << ": " << stime << " [sec]\n";
}
}
private:
size_t current_{0};
Tpoint mark_{};
std::vector<Tduration> duration_{1};
};
/*!
* A "high level function"-like utility macro to forward a function call
* and accumulate the execution time to the corresponding timing object.
*
* @param Tim The Timing object [Needs to have methods start() and stop()]
* @param Func The function name
* @param ... The arguments to pass to function (the preprocessor way)
*/
#define timeCall(Tim, Func, ...) \
Tim.start(); \
Func(__VA_ARGS__); \
Tim.stop(); \
/*!
* A utility to check if a number is power of two
*
* @tparam Integral The integral type of the number to check
* @param x The number to check
* @return True if it is power of 2, false otherwise
*/
template <typename Integral>
constexpr inline bool isPowerOfTwo(Integral x) noexcept {
return (!(x & (x - 1)) && x);
}
#endif /* UTILS_HPP_ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

34
homework_3/test/tests.cpp Normal file
View File

@ -0,0 +1,34 @@
/**
* \file
* \brief PDS HW3 tests
*
* To run these test execute:
* ...
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/
#include <gtest/gtest.h>
/*
* Global fixtures
*/
class TCUDAbitonic : public ::testing::Test {
protected:
static void SetUpTestSuite() { }
static void TearDownTestSuite() { }
};
/*
* MPI: SysTest (acceptance)
* Each process executes distBubbletonic for uin8_t [16]
*/
TEST_F(TCUDAbitonic, test1) {
EXPECT_EQ(true, true);
}