HW3: [no compile] A first clean up

HW3: [No Compile] Init HW3 with HW2 files
2025-01-21 22:52:28 +02:00 · 2025-01-21 22:25:49 +02:00
10 changed files with 29891 additions and 0 deletions
--- a/homework_3/.gitignore
+++ b/homework_3/.gitignore
@ -0,0 +1,23 @@
+# project
+bin/
+out/
+mat/
+mtx/
+.unused/
+various/
+
+# hpc
+
+# IDEs
+.idea/
+.clangd
+
+# eclipse
+.project
+.cproject
+.settings/
+
+.vs/
+.vscode/
+
+
--- a/homework_3/Makefile
+++ b/homework_3/Makefile
@ -0,0 +1,205 @@
+#
+# PDS HW3 Makefile
+#
+# Copyright (C) 2025 Christos Choutouridis <christos@choutouridis.net>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation, either version 3
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# ============== Project settings ==============
+# Project's name
+PROJECT         := PDS_homework_3
+
+# Excecutable's name
+TARGET          := bitonic
+
+# Source directories list(space seperated). Makefile-relative path, UNDER current directory.
+SRC_DIR_LIST    := src test test/gtest
+
+# Include directories list(space seperated). Makefile-relative path.
+INC_DIR_LIST    := src \
+                   test \
+                   test/gtest/ \
+
+
+# Exclude files list(space seperated). Filenames only.
+# EXC_FILE_LIST := bad.cpp old.cpp
+
+# Build directories
+BUILD_DIR       := bin
+OBJ_DIR         := $(BUILD_DIR)/obj
+DEP_DIR         := $(BUILD_DIR)/.dep
+
+OUTPUT_DIR      := out
+
+# ========== Compiler settings ==========
+# Compiler flags for debug and release
+DEB_CFLAGS      := -DDEBUG -g3 -Wall -Wextra -std=c11 -fopenmp
+REL_CFLAGS      := -Wall -Wextra -O3 -std=c11 -fopenmp
+DEB_CXXFLAGS    := -DDEBUG -g3 -Wall -Wextra -std=c++17 -fopenmp
+REL_CXXFLAGS    := -Wall -Wextra -O3 -std=c++17 -fopenmp
+
+# Pre-defines
+# PRE_DEFS := MYCAB=1729 SUPER_MODE
+PRE_DEFS        :=
+
+# ============== Linker settings ==============
+# Linker flags (example: -pthread -lm)
+LDFLAGS         := -pthread
+
+# Map output file
+MAP_FILE        := output.map
+MAP_FLAG        := -Xlinker -Map=$(BUILD_DIR)/$(MAP_FILE)
+
+# ============== Docker settings ==============
+# We need:
+#  - Bind the entire project directory(the dir that icludes all the code) as volume.
+#  - In docker instance, change to working directory(where the makefile is).
+DOCKER_VOL_DIR  := $(shell pwd)
+DOCKER_WRK_DIR  :=
+DOCKER_RUN      := docker run --rm
+DOCKER_FLAGS    := -v $(DOCKER_VOL_DIR):/usr/src/$(PROJECT) -w /usr/src/$(PROJECT)/$(DOCKER_WRK_DIR)
+
+# docker invoke mechanism (edit with care)
+#   note:
+#   Here, `DOCKER` variable is empty. Rules can assign `DOCKER := DOCKER_CMD` when docker
+#   functionality is needed.
+DOCKER_CMD      = $(DOCKER_RUN) $(DOCKER_FLAGS) $(IMAGE)
+DOCKER          :=
+
+# ============== Tool selection ==============
+# compiler and compiler flags.
+CSIZE           := size
+CFLAGS          := $(DEB_CFLAGS)
+CXXFLAGS        := $(DEB_CXXFLAGS)
+CXX             := g++ #mpic++
+CC              := gcc #mpicc
+
+#
+# =========== Main body and Patterns ===========
+#
+
+#ifeq ($(OS), Windows_NT)
+#	TARGET := $(TARGET).exe
+#endif
+INC     := $(foreach dir,$(INC_DIR_LIST),-I$(dir))
+DEF     := $(foreach def,$(PRE_DEFS),-D$(def))
+EXC     := $(foreach fil,$(EXC_FILE_LIST),                              \
+               $(foreach dir,$(SRC_DIR_LIST),$(wildcard $(dir)/$(fil))) \
+           )
+# source files. object and dependencies list
+# recursive search into current and source directories
+SRC     := $(wildcard *.cpp)
+SRC     += $(foreach dir,$(SRC_DIR_LIST),$(wildcard $(dir)/*.cpp))
+SRC     += $(foreach dir,$(SRC_DIR_LIST),$(wildcard $(dir)/**/*.cpp))
+SRC     := $(filter-out $(EXC),$(SRC))
+#SRC     := $(abspath $(SRC))
+
+OBJ     := $(foreach file,$(SRC:%.cpp=%.o),$(OBJ_DIR)/$(file))
+DEP     := $(foreach file,$(SRC:%.cpp=%.d),$(DEP_DIR)/$(file))
+
+
+# Make Dependencies pattern.
+# This little trick enables recompilation only when dependencies change
+# and it does so for changes both in source AND header files ;)
+# 
+# It is based on Tom Tromey's method.
+# 
+# Invoke cpp to create makefile rules with dependencies for each source file
+$(DEP_DIR)/%.d: %.c
+	@mkdir -p $(@D)
+	@$(DOCKER) $(CC) -E $(CFLAGS) $(INC) $(DEF) -MM -MT $(OBJ_DIR)/$(<:.c=.o) -MF $@ $<
+
+# c file objects depent on .c AND dependency files, which have an empty recipe 
+$(OBJ_DIR)/%.o: %.c $(DEP_DIR)/%.d
+	@mkdir -p $(@D)
+	@$(DOCKER) $(CC) -c $(CFLAGS) $(INC) $(DEF) -o $@ $<
+
+$(DEP_DIR)/%.d: %.cpp
+	@mkdir -p $(@D)
+	@$(DOCKER) $(CXX) -E $(CXXFLAGS) $(INC) $(DEF) -MM -MT $(OBJ_DIR)/$(<:.cpp=.o) -MF $@ $<
+
+# cpp file objects depent on .cpp AND dependency files, which have an empty recipe 
+$(OBJ_DIR)/%.o: %.cpp $(DEP_DIR)/%.d
+	@mkdir -p $(@D)
+	@$(DOCKER) $(CXX) -c $(CXXFLAGS) $(INC) $(DEF) -o $@ $<
+
+# empty recipe for dependency files. This prevents make errors
+$(DEP):
+
+# now include all dependencies
+# After all they are makefile dependency rules ;)
+include $(wildcard $(DEP))
+
+# main target rule
+$(BUILD_DIR)/$(TARGET): $(OBJ)
+	@mkdir -p $(@D)
+	@echo Linking to target: $(TARGET)
+	@echo $(DOCKER) $(CXX) '$$(OBJ)' $(LDFLAGS) $(MAP_FLAG) -o $(@D)/$(TARGET)
+	@$(DOCKER) $(CXX) $(OBJ) $(LDFLAGS) $(MAP_FLAG) -o $(@D)/$(TARGET)
+	@echo
+	@echo Print size information
+	@$(CSIZE) $(@D)/$(TARGET)
+	@echo Done
+
+
+#
+# ================ Default local build rules =================
+# example:
+# make debug
+
+.DEFAULT_GOAL := all
+
+.PHONY: clean
+clean:
+	@echo Cleaning build directories
+	@rm -rf $(OBJ_DIR)
+	@rm -rf $(DEP_DIR)
+	@rm -rf $(BUILD_DIR)
+
+debug: CFLAGS := $(DEB_CFLAGS)
+debug: $(BUILD_DIR)/$(TARGET)
+
+release: CFLAGS := $(REL_CFLAGS)
+release: $(BUILD_DIR)/$(TARGET)
+
+#
+# ================ Build rules =================
+#
+
+
+bitonic_v0: CC := nvcc
+bitonic_v0: CXX := nvcc
+bitonic_v0: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=V0
+bitonic_v0: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=V0
+bitonic_v0: TARGET := bitonic_v0
+bitonic_v0: $(BUILD_DIR)/$(TARGET)
+	@mkdir -p $(OUTPUT_DIR)
+	cp $(BUILD_DIR)/$(TARGET) $(OUTPUT_DIR)/$(TARGET)
+
+
+hpc-build:
+	make clean
+	make distbubbletonic
+	make clean
+	make distbitonic
+	make clean
+	make tests
+
+
+all: debug bitonic_v0
+# Note:
+#	Add a gcc based make rule here in order for clangd to successfully scan the project files.
+#	Otherwise we do not need the gcc build.
+
--- a/homework_3/src/config.h
+++ b/homework_3/src/config.h
@ -0,0 +1,74 @@
+/*!
+ * \file
+ * \brief   Build configuration file.
+ *
+ * \author
+ *    Christos Choutouridis AEM:8997
+ *    <cchoutou@ece.auth.gr>
+ */
+
+#ifndef CONFIG_H_
+#define CONFIG_H_
+
+#include <cstdint>
+
+/*
+ * Versioning:
+ * - RC1:
+ */
+static constexpr char version[] = "0.0";
+
+/*
+ * Defines for different version of the exercise
+ */
+#define  V0        (0)
+#define  V1        (1)
+#define  V2        (2)
+
+// Fail-safe version selection
+#if !defined CODE_VERSION
+#define CODE_VERSION   V0
+#endif
+
+// Default Data size (in case -q <N> is not present)
+static constexpr size_t DEFAULT_DATA_SIZE   = 1 << 16;
+
+
+/*!
+ * Value type selection
+ *
+ * We support the following compiler types or the <cstdint> that translate to them:
+ *  char       -  unsigned char
+ *  short      -  unsigned short
+ *  int        -  unsigned int
+ *  long       -  unsigned long
+ *  long long  -  unsigned long long
+ *  float
+ *  double
+ */
+using   distValue_t = uint32_t;
+
+/*!
+ * Session option for each invocation of the executable.
+ *
+ * @note
+ *  The values of the members are set from the command line.
+ */
+struct config_t {
+    size_t  arraySize{DEFAULT_DATA_SIZE};   //!< The array size of the local data to sort.
+    bool    exchangeOpt{false};             //!< Flag to request the exchange optimization
+    size_t  pipeline{1UL};                  //!< Pipeline stages (1 to disable)
+    bool    validation{false};              //!< Request a full validation at the end, performed by process rank 0.
+    bool    ndebug{false};                  //!< Skips debug trap on DEBUG builds.
+    size_t  perf{1};                        //!< Enable performance timing measurements and prints and repeat
+                                            //!< the sorting <perf> times.
+    bool    verbose{false};                 //!< Flag to enable verbose output to stdout.
+};
+
+/*
+ * Exported data types
+ */
+extern config_t config;
+
+
+#endif /* CONFIG_H_ */
--- a/homework_3/src/distsort.cpp
+++ b/homework_3/src/distsort.cpp
@ -0,0 +1,51 @@
+/*!
+ * \file
+ * \brief   Distributed sort implementation
+ *
+ * \author
+ *    Christos Choutouridis AEM:8997
+ *    <cchoutou@ece.auth.gr>
+ */
+#include "utils.hpp"
+#include "distsort.hpp"
+
+
+/*!
+ * Returns the ascending or descending configuration of the node's sequence based on
+ * the current node (MPI process) and the depth of the sorting network
+ *
+ * @param node      [mpi_id_t] The current node (MPI process)
+ * @param depth     [size_t]   The total depth of the sorting network (same for each step for a given network)
+ * @return          [bool]     True if we need ascending configuration, false otherwise
+ */
+bool ascending(mpi_id_t node, size_t depth) noexcept {
+    return !(node & (1 << depth));
+}
+
+/*!
+ * Returns the node's partner for data exchange during the sorting network iterations
+ * of Bitonic
+ *
+ * @param node      [mpi_id_t] The current node
+ * @param step      [size_t]   The step of the sorting network
+ * @return          [mpi_id_t] The node id of the partner for data exchange
+ */
+mpi_id_t partner(mpi_id_t node, size_t step) noexcept {
+    return (node ^ (1 << step));
+}
+
+
+/*!
+ * Predicate to check if a node keeps the small numbers during the bitonic sort network exchange.
+ *
+ * @param node      [mpi_id_t] The node for which we check
+ * @param partner   [mpi_id_t] The partner of the data exchange
+ * @param depth     [size_t]   The total depth of the sorting network (same for each step for a given network)
+ * @return          [bool]     True if the node should keep the small values, false otherwise
+ */
+
+bool keepSmall(mpi_id_t node, mpi_id_t partner, size_t depth) {
+    if (node == partner)
+        throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n");
+    return ascending(node, depth) == (node < partner);
+}
--- a/homework_3/src/distsort.hpp
+++ b/homework_3/src/distsort.hpp
@ -0,0 +1,223 @@
+/*!
+ * \file
+ * \brief   Distributed sort implementation header
+ *
+ * \author
+ *    Christos Choutouridis AEM:8997
+ *    <cchoutou@ece.auth.gr>
+ */
+
+#ifndef DISTBITONIC_H_
+#define DISTBITONIC_H_
+
+#include <vector>
+#include <algorithm>
+#include <parallel/algorithm>
+#include <cmath>
+#include <cstdint>
+#if !defined DEBUG
+#define NDEBUG
+#endif
+#include <cassert>
+
+#include "utils.hpp"
+
+/*
+ * Exported timers
+ */
+extern Timing Timer_total;
+extern Timing Timer_fullSort;
+extern Timing Timer_exchange;
+extern Timing Timer_minmax;
+extern Timing Timer_elbowSort;
+
+
+
+/*
+ * ============================== Sort utilities ==============================
+ */
+
+
+
+/*!
+ * Returns the ascending or descending configuration of the node's sequence based on
+ * the current node (MPI process) and the depth of the sorting network
+ *
+ * @param node      [mpi_id_t] The current node (MPI process)
+ * @param depth     [size_t]   The total depth of the sorting network (same for each step for a given network)
+ * @return          [bool]     True if we need ascending configuration, false otherwise
+ */
+bool ascending(mpi_id_t node, size_t depth);
+
+/*!
+ * Returns the node's partner for data exchange during the sorting network iterations
+ * of Bitonic
+ *
+ * @param node      [mpi_id_t] The current node
+ * @param step      [size_t]   The step of the sorting network
+ * @return          [mpi_id_t] The node id of the partner for data exchange
+ */
+mpi_id_t partner(mpi_id_t node, size_t step);
+
+
+/*!
+ * Predicate to check if a node keeps the small numbers during the bitonic sort network exchange.
+ *
+ * @param node      [mpi_id_t] The node for which we check
+ * @param partner   [mpi_id_t] The partner of the data exchange
+ * @param depth     [size_t]   The total depth of the sorting network (same for each step for a given network)
+ * @return          [bool]     True if the node should keep the small values, false otherwise
+ */
+bool keepSmall(mpi_id_t node, mpi_id_t partner, size_t depth);
+
+
+
+/*
+ * ============================== Data utilities ==============================
+ */
+
+/*!
+ * Sort a range using the build-in O(Nlog(N)) algorithm
+ *
+ * @tparam RangeT   A range type with random access iterator
+ *
+ * @param data      [RangeT] The data to be sorted
+ * @param ascending [bool]   Flag to indicate the sorting order
+ */
+template<typename RangeT>
+void fullSort(RangeT& data, bool ascending) noexcept {
+    // Use introsort from stdlib++ here, unless ... __gnu_parallel
+    if (ascending) {
+        __gnu_parallel::sort(data.begin(), data.end(), std::less<>());
+    }
+    else {
+        __gnu_parallel::sort(data.begin(), data.end(), std::greater<>());
+    }
+}
+
+/*!
+ * Core functionality of sort for shadowed buffer types using
+ * the "elbow sort" algorithm.
+ *
+ * @note:
+ *  This algorithm can not work "in place".
+ *  We use the active buffer as source and the shadow as target.
+ *  At the end we switch which buffer is active and which is the shadow.
+ * @note
+ *  This is the core functionality. Use the elbowSort() function instead
+ *
+ * @tparam ShadowedDataT    A Shadowed buffer type with random access iterator.
+ * @tparam CompT            A Comparison type for binary operation comparisons
+ *
+ * @param data          [ShadowedDataT] The data to sort
+ * @param ascending     [bool]          Flag to indicate the sorting order
+ * @param comp          [CompT]         The binary operator object
+ */
+template<typename ShadowedDataT, typename CompT>
+void elbowSortCore(ShadowedDataT& data, bool ascending, CompT comp) noexcept {
+    auto& active = data.getActive(); // Get the source vector (the data to sort)
+    auto& shadow = data.getShadow(); // Get the target vector (the sorted data)
+
+    size_t N = data.size();         // The total size is the same or both vectors
+    size_t left = std::distance(
+            active.begin(),
+            (ascending) ?
+                std::min_element(active.begin(), active.end()) :
+                std::max_element(active.begin(), active.end())
+    );                              // start 'left' from elbow of the bitonic
+    size_t right = (left == N-1) ? 0 : left + 1;
+
+    // Walk in opposite directions from elbow and insert-sort to target vector
+    for (size_t i = 0 ; i<N ; ++i) {
+        if (comp(active[left], active[right])) {
+            shadow[i] = active[left];
+            left = (left == 0) ? N-1 : left -1; // cycle decrease
+        }
+        else {
+            shadow[i] = active[right];
+            right = (right + 1) % N;            // cycle increase
+        }
+    }
+    data.switch_active();           // Switch active-shadow buffers
+}
+
+/*!
+ * Sort a shadowed buffer using the "elbow sort" algorithm.
+ *
+ * @tparam ShadowedDataT    A Shadowed buffer type with random access iterator.
+ *
+ * @param data          [ShadowedDataT] The data to sort
+ * @param ascending     [bool]          Flag to indicate the sorting order
+ */
+template<typename ShadowedDataT>
+void elbowSort(ShadowedDataT& data, bool ascending) noexcept {
+    if (ascending)
+        elbowSortCore(data, ascending, std::less<>());
+    else
+        elbowSortCore(data, ascending, std::greater<>());
+}
+
+
+/*!
+ * Takes two sequences and selects either the larger or the smaller items
+ * in one-to-one comparison between them. If the initial sequences are bitonic, then
+ * the result is a bitonic sequence too!
+ *
+ * @tparam ValueT   The underlying type of the sequences
+ *
+ * @param local     [ValueT*]       Pointer to the local sequence
+ * @param remote    [const ValueT*] Pointer to the remote sequence (copied locally by MPI)
+ * @param count     [size_t]        The number of items to process
+ * @param keepSmall [bool]          Flag to indicate if we keep the small items in local sequence
+ */
+template<typename ValueT>
+void keepMinOrMax(ValueT* local, const ValueT* remote, size_t count, bool keepSmall) noexcept {
+    std::transform(
+            local, local + count,
+            remote,
+            local,
+            [&keepSmall](const ValueT& a, const ValueT& b){
+                return (keepSmall) ? std::min(a, b) : std::max(a, b);
+            });
+}
+
+/*
+ * ============================== Sort algorithms ==============================
+ */
+
+
+/*!
+ * A distributed version of the Bitonic sort algorithm.
+ *
+ * @note
+ *  Each MPI process should run an instance of this function.
+ *
+ * @tparam ShadowedDataT    A Shadowed buffer type with random access iterator.
+ *
+ * @param data          [ShadowedDataT] The local to MPI process data to sort
+ * @param Processes     [mpi_id_t]      The total number of MPI processes
+ * @param rank          [mpi_id_t]      The current process id
+ */
+template<typename ShadowedDataT>
+void distBitonic(ShadowedDataT& data) {
+    // Initially sort to create a half part of a bitonic sequence
+    timeCall(Timer_fullSort, fullSort, data, ascending(rank, 0));
+
+    // Run through sort network using elbow-sort ( O(LogN * LogN) iterations )
+    auto p = static_cast<uint32_t>(std::log2(Processes));
+    for (size_t depth = 1; depth <= p; ++depth) {
+        for (size_t step = depth; step > 0;) {
+            --step;
+            // Find out exchange configuration
+            auto part = partner(rank, step);
+            auto ks = keepSmall(rank, part, depth);
+            // Exchange with partner, keep nim-or-max
+            exchange(data, part, ks, tag);
+
+        }
+        // sort - O(N)
+        timeCall(Timer_elbowSort, elbowSort, data, ascending(rank, depth));
+    }
+}
+
+#endif //DISTBITONIC_H_
--- a/homework_3/src/main.cpp
+++ b/homework_3/src/main.cpp
@ -0,0 +1,235 @@
+/*!
+ * \file
+ * \brief   Main application file for PDS HW3 (CUDA)
+ *
+ * \author
+ *    Christos Choutouridis AEM:8997
+ *    <cchoutou@ece.auth.gr>
+ */
+
+#include <exception>
+#include <iostream>
+#include <algorithm>
+#include <random>
+
+#include "utils.hpp"
+#include "config.h"
+#include "distsort.hpp"
+
+
+// Global session data
+config_t        config;
+distBuffer_t    Data;
+Log             logger;
+
+
+// Mersenne seeded from hw if possible. range: [type_min, type_max]
+std::random_device  rd;
+std::mt19937        gen(rd());
+
+//! Performance timers for each one of the "costly" functions
+Timing Timer_total;
+Timing Timer_fullSort;
+Timing Timer_exchange;
+Timing Timer_minmax;
+Timing Timer_elbowSort;
+
+//! Init timing objects for extra rounds
+void measurements_init() {
+    if (config.perf > 1) {
+        Timer_total.init(config.perf);
+        Timer_fullSort.init(config.perf);
+        Timer_exchange.init(config.perf);
+        Timer_minmax.init(config.perf);
+        Timer_elbowSort.init(config.perf);
+    }
+}
+
+//! iterate ot the next round of measurements for all measurement objects
+void measurements_next() {
+    if (config.perf > 1) {
+        Timer_total.next();
+        Timer_fullSort.next();
+        Timer_exchange.next();
+        Timer_minmax.next();
+        Timer_elbowSort.next();
+    }
+}
+
+/*!
+ * A small command line argument parser
+ * \return  The status of the operation
+ */
+bool get_options(int argc, char* argv[]){
+    bool status =true;
+
+    // iterate over the passed arguments
+    for (int i=1 ; i<argc ; ++i) {
+        std::string arg(argv[i]);     // get current argument
+
+        if (arg == "-q" || arg == "--array-size") {
+            if (i+1 < argc) {
+                config.arraySize = 1 << atoi(argv[++i]);
+            }
+            else {
+                status = false;
+            }
+        }
+        else if (arg == "--validation") {
+            config.validation = true;
+        }
+        else if (arg == "--perf") {
+            if (i+1 < argc) {
+                config.perf = atoi(argv[++i]);
+            }
+            else {
+                status = false;
+            }
+        }
+        else if (arg == "--ndebug") {
+            config.ndebug = true;
+        }
+        else if (arg == "-v" || arg == "--verbose") {
+            config.verbose = true;
+        }
+        else if (arg == "--version") {
+            std::cout << "bitonic - A GPU accelerated sort utility\n";
+            std::cout << "version: " << version << "\n\n";
+            exit(0);
+        }
+        else if (arg == "-h" || arg == "--help") {
+            std::cout << "distbitonic - A distributed sort utility\n\n";
+            std::cout << "  distbitonic -q <N> [--validation] [--perf <N>] [--ndebug] [-v]\n";
+            std::cout << "  distbitonic -h\n";
+            std::cout << '\n';
+            std::cout << "Options:\n\n";
+            std::cout << "   -q | --array-size <N>\n";
+            std::cout << "      Selects the array size according to size = 2^N\n\n";
+            std::cout << "   --validation\n";
+            std::cout << "      Request a full validation at the end, performed by process rank 0\n\n";
+            std::cout << "   --perf <N> \n";
+            std::cout << "      Enable performance timing measurements and prints, and repeat\n";
+            std::cout << "      the sorting <N> times.\n\n";
+            std::cout << "   --ndebug\n";
+            std::cout << "      Skip debug breakpoint when on debug build.\n\n";
+            std::cout << "   -v | --verbose\n";
+            std::cout << "      Request a more verbose output to stdout.\n\n";
+            std::cout << "   -h | --help\n";
+            std::cout << "      Prints this and exit.\n\n";
+            std::cout << "   --version\n";
+            std::cout << "      Prints version and exit.\n\n";
+            std::cout << "Examples:\n\n";
+            std::cout << "   bitonic -q 24\n";
+            std::cout << "      Runs bitonic with GPU acceleration with 2^24 array points\n\n";
+
+            exit(0);
+        }
+        else {   // parse error
+            std::cout << "Invocation error. Try -h for details.\n";
+            status = false;
+        }
+    }
+
+    return status;
+}
+
+/*!
+ * A simple validator for the entire distributed process
+ *
+ * @tparam ShadowedDataT    A Shadowed buffer type with random access iterator.
+ *
+ * @param data          [ShadowedDataT] The local to MPI process
+ * @param Processes     [mpi_id_t]      The total number of MPI processes
+ * @param rank          [mpi_id_t]      The current process id
+ *
+ * @return              [bool]          True if all are sorted and in total ascending order
+ */
+template<typename ShadowedDataT>
+bool validator(ShadowedDataT& data) {
+    using value_t = typename ShadowedDataT::value_type;
+    bool ret = true;    // Have faith!
+
+    return ret;
+}
+
+/*!
+ * Initializes the environment, must called from each process
+ *
+ * @param argc  [int*]      POINTER to main's argc argument
+ * @param argv  [char***]   POINTER to main's argv argument
+ */
+void init(int* argc, char*** argv) {
+    // try to read command line
+    if (!get_options(*argc, *argv))
+        exit(1);
+
+    // Prepare vector and timing data
+    Data.resize(config.arraySize);
+    measurements_init();
+}
+
+#if !defined TESTING
+/*!
+ * @return Returns 0, but.... we may throw or exit(0) / exit(1)
+ */
+int main(int argc, char* argv[]) try {
+
+    // Init everything
+    init(&argc, &argv);
+
+    for (size_t it = 0 ; it < config.perf ; ++it) {
+        // Initialize local data
+        logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl;
+        std::uniform_int_distribution<distValue_t > dis(
+                std::numeric_limits<distValue_t>::min(),
+                std::numeric_limits<distValue_t>::max()
+        );
+        std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); });
+        // Run distributed sort
+        logger << "Starting distributed sorting ... ";
+        Timer_total.start();
+        distBitonic(Data);
+        Timer_total.stop();
+        measurements_next();
+        logger << " Done." << logger.endl;
+    }
+
+    // Print-outs and validation
+    if (config.perf > 1) {
+        Timing::print_duration(Timer_total.median(),    "Total     ", 0);
+        Timing::print_duration(Timer_fullSort.median(), "Full-Sort ", 0);
+        Timing::print_duration(Timer_exchange.median(), "Exchange  ", 0);
+        Timing::print_duration(Timer_minmax.median(),   "Min-Max   ", 0);
+        Timing::print_duration(Timer_elbowSort.median(),"Elbow-Sort", 0);
+    }
+    if (config.validation) {
+        // If requested, we have the chance to fail!
+        std::cout << "[Validation] Results validation ...";
+        bool val = validator(Data);
+        std::cout << ((val) ? "\x1B[32m [PASSED] \x1B[0m\n" : " \x1B[32m [FAILED] \x1B[0m\n");
+    }
+    return 0;
+}
+catch (std::exception& e) {
+    //we probably pollute the user's screen. Comment `cerr << ...` if you don't like it.
+    std::cerr << "Error: " << e.what() << '\n';
+    exit(1);
+}
+
+#else
+
+#include <gtest/gtest.h>
+#include <exception>
+
+/*!
+ * The testing version of our program
+ */
+GTEST_API_ int main(int argc, char **argv) try {
+   testing::InitGoogleTest(&argc, argv);
+   return RUN_ALL_TESTS();
+}
+catch (std::exception& e) {
+    std::cout << "Exception: " << e.what() << '\n';
+}
+
+#endif
--- a/homework_3/src/utils.hpp
+++ b/homework_3/src/utils.hpp
@ -0,0 +1,270 @@
+/**
+ * \file
+ * \brief   Utilities header
+ *
+ * \author
+ *    Christos Choutouridis AEM:8997
+ *    <cchoutou@ece.auth.gr>
+ */
+#ifndef UTILS_HPP_
+#define UTILS_HPP_
+
+#include <vector>
+#include <iostream>
+#include <chrono>
+#include <unistd.h>
+#include <algorithm>
+
+#include "config.h"
+
+
+/*!
+ * @brief A std::vector wrapper with 2 vectors, an active and a shadow.
+ *
+ * This type exposes the standard vector functionality of the active vector.
+ * The shadow can be used when we need to use the vector as mutable
+ * data in algorithms that can not support "in-place" editing (like elbow-sort for example)
+ *
+ * @tparam Value_t  the underlying data type of the vectors
+ */
+template <typename Value_t>
+struct ShadowedVec_t {
+    // STL requirements
+    using value_type     = Value_t;
+    using iterator       = typename std::vector<Value_t>::iterator;
+    using const_iterator = typename std::vector<Value_t>::const_iterator;
+    using size_type      = typename std::vector<Value_t>::size_type;
+
+    // Default constructor
+    ShadowedVec_t() = default;
+
+    // Constructor from an std::vector
+    explicit ShadowedVec_t(const std::vector<Value_t>& vec)
+            : North(vec), South(), active(north) {
+        South.resize(North.size());
+    }
+
+    explicit ShadowedVec_t(std::vector<Value_t>&& vec)
+            : North(std::move(vec)), South(), active(north) {
+        South.resize(North.size());
+    }
+
+    // Copy assignment operator
+    ShadowedVec_t& operator=(const ShadowedVec_t& other) {
+        if (this != &other) { // Avoid self-assignment
+            North = other.North;
+            South = other.South;
+            active = other.active;
+        }
+        return *this;
+    }
+
+    // Move assignment operator
+    ShadowedVec_t& operator=(ShadowedVec_t&& other) noexcept {
+        if (this != &other) { // Avoid self-assignment
+            North = std::move(other.North);
+            South = std::move(other.South);
+            active = other.active;
+
+            // There is no need to zero out other since it is valid but in a non-defined state
+        }
+        return *this;
+    }
+
+    // Type accessors
+    std::vector<Value_t>& getActive() { return (active == north) ? North : South; }
+    std::vector<Value_t>& getShadow() { return (active == north) ? South : North; }
+    const std::vector<Value_t>& getActive() const { return (active == north) ? North : South; }
+    const std::vector<Value_t>& getShadow() const { return (active == north) ? South : North; }
+
+    // Swap vectors
+    void switch_active() { active = (active == north) ? south : north; }
+
+    // Dispatch vector functionality to active vector
+    Value_t& operator[](size_type index) { return getActive()[index]; }
+    const Value_t& operator[](size_type index) const { return getActive()[index]; }
+
+    Value_t& at(size_type index) { return getActive().at(index); }
+    const Value_t& at(size_type index) const { return getActive().at(index); }
+
+    void push_back(const Value_t& value) { getActive().push_back(value); }
+    void push_back(Value_t&& value)      { getActive().push_back(std::move(value)); }
+    void pop_back()                      { getActive().pop_back(); }
+    Value_t& front() { return getActive().front(); }
+    Value_t& back()  { return getActive().back(); }
+    const Value_t& front() const { return getActive().front(); }
+    const Value_t& back()  const { return getActive().back(); }
+
+    iterator begin() { return getActive().begin(); }
+    const_iterator begin() const { return getActive().begin(); }
+    iterator end() { return getActive().end(); }
+    const_iterator end() const { return getActive().end(); }
+
+    size_type size() const { return getActive().size(); }
+    void resize(size_t new_size) {
+        North.resize(new_size);
+        South.resize(new_size);
+    }
+
+    void reserve(size_t new_capacity) {
+        North.reserve(new_capacity);
+        South.reserve(new_capacity);
+    }
+    [[nodiscard]] size_t capacity() const { return getActive().capacity(); }
+    [[nodiscard]] bool empty() const { return getActive().empty(); }
+
+    void clear() { getActive().clear(); }
+    void swap(std::vector<Value_t>& other) { getActive().swap(other); }
+
+    // Comparisons
+    bool operator== (const ShadowedVec_t& other) { return getActive() == other.getActive(); }
+    bool operator!= (const ShadowedVec_t& other) { return getActive() != other.getActive(); }
+    bool operator== (const std::vector<value_type>& other) { return getActive() == other; }
+    bool operator!= (const std::vector<value_type>& other) { return getActive() != other; }
+
+private:
+    std::vector<Value_t> North{};       //!< Actual buffer to be used either as active or shadow
+    std::vector<Value_t> South{};       //!< Actual buffer to be used either as active or shadow
+    enum {
+        north, south
+    } active{north};                    //!< Flag to select between North and South buffer
+};
+
+/*
+ * Exported data types
+ */
+using distBuffer_t = ShadowedVec_t<distValue_t>;
+extern distBuffer_t Data;
+
+/*!
+ * A Logger for entire program.
+ */
+struct Log {
+    struct Endl {} endl;    //!< a tag object to to use it as a new line request.
+
+    //! We provide logging via << operator
+    template<typename T>
+    Log &operator<<(T &&t) {
+        if (config.verbose) {
+            if (line_) {
+                std::cout << "[Log]: " << t;
+                line_ = false;
+            } else
+                std::cout << t;
+        }
+        return *this;
+    }
+
+    // overload for special end line handling
+    Log &operator<<(Endl e) {
+        (void) e;
+        if (config.verbose) {
+            std::cout << '\n';
+            line_ = true;
+        }
+        return *this;
+    }
+
+private:
+    bool line_{true};
+};
+
+extern Log logger;
+
+/*!
+ * A small timing utility based on chrono that supports timing rounds
+ * and returning the median of them. Time can accumulate to the measurement
+ * for each round.
+ */
+struct Timing {
+    using Tpoint = std::chrono::steady_clock::time_point;
+    using Tduration = std::chrono::microseconds;
+    using microseconds = std::chrono::microseconds;
+    using milliseconds = std::chrono::milliseconds;
+    using seconds = std::chrono::seconds;
+
+    //! Setup measurement rounds
+    void init(size_t rounds) {
+        duration_.resize(rounds);
+        for (auto& d : duration_)
+            d = Tduration::zero();
+    }
+
+    //! tool to mark the starting point
+    Tpoint start() noexcept { return mark_ = std::chrono::steady_clock::now(); }
+
+    //! tool to mark the ending point
+    Tpoint stop() noexcept {
+        Tpoint now = std::chrono::steady_clock::now();
+        duration_[current_] += dt(now, mark_);
+        return now;
+    }
+
+    //! Switch timing slot
+    void next() noexcept {
+        ++current_;
+        current_ %= duration_.size();
+    }
+
+    Tduration& median() noexcept {
+        std::sort(duration_.begin(), duration_.end());
+        return duration_[duration_.size()/2];
+    }
+
+    //! A duration calculation utility
+    static Tduration dt(Tpoint t2, Tpoint t1) noexcept {
+        return std::chrono::duration_cast<Tduration>(t2 - t1);
+    }
+
+    //! Tool to print the time interval
+    static void print_duration(const Tduration& duration, const char *what) noexcept {
+        if (std::chrono::duration_cast<microseconds>(duration).count() < 10000)
+            std::cout << "[Timing] " << what << ": "
+                      << std::to_string(std::chrono::duration_cast<microseconds>(duration).count()) << " [usec]\n";
+        else if (std::chrono::duration_cast<milliseconds>(duration).count() < 10000)
+            std::cout << "[Timing] " << what << ": "
+                      << std::to_string(std::chrono::duration_cast<milliseconds>(duration).count()) << " [msec]\n";
+        else {
+            char stime[26]; // fit ulong
+            auto sec  = std::chrono::duration_cast<seconds>(duration).count();
+            auto msec = (std::chrono::duration_cast<milliseconds>(duration).count() % 1000) / 10;  // keep 2 digit
+            std::sprintf(stime, "%ld.%1ld", sec, msec);
+            std::cout << "[Timing] " << what << ": " << stime << " [sec]\n";
+        }
+
+    }
+
+private:
+    size_t current_{0};
+    Tpoint mark_{};
+    std::vector<Tduration> duration_{1};
+};
+
+/*!
+ * A "high level function"-like utility macro to forward a function call
+ * and accumulate the execution time to the corresponding timing object.
+ *
+ * @param   Tim     The Timing object [Needs to have methods start() and stop()]
+ * @param   Func    The function name
+ * @param   ...     The arguments to pass to function (the preprocessor way)
+ */
+#define timeCall(Tim, Func, ...)    \
+    Tim.start();                    \
+    Func(__VA_ARGS__);              \
+    Tim.stop();                     \
+
+
+/*!
+ * A utility to check if a number is power of two
+ *
+ * @tparam Integral     The integral type of the number to check
+ * @param x             The number to check
+ * @return              True if it is power of 2, false otherwise
+ */
+template <typename Integral>
+constexpr inline bool isPowerOfTwo(Integral x) noexcept {
+    return (!(x & (x - 1)) && x);
+}
+
+
+#endif /* UTILS_HPP_ */
--- a/homework_3/test/gtest/gtest/gtest-all.cpp
+++ b/homework_3/test/gtest/gtest/gtest-all.cpp
--- a/homework_3/test/gtest/gtest/gtest.h
+++ b/homework_3/test/gtest/gtest/gtest.h
--- a/homework_3/test/tests.cpp
+++ b/homework_3/test/tests.cpp
@ -0,0 +1,34 @@
+/**
+ * \file
+ * \brief   PDS HW3 tests
+ *
+ * To run these test execute:
+ *  ...
+ *
+ * \author
+ *    Christos Choutouridis AEM:8997
+ *    <cchoutou@ece.auth.gr>
+ */
+
+#include <gtest/gtest.h>
+
+/*
+ * Global fixtures
+ */
+
+class TCUDAbitonic : public ::testing::Test {
+protected:
+    static void SetUpTestSuite() { }
+
+    static void TearDownTestSuite() { }
+};
+
+
+/*
+ * MPI: SysTest (acceptance)
+ * Each process executes distBubbletonic for uin8_t [16]
+ */
+TEST_F(TCUDAbitonic, test1) {
+
+    EXPECT_EQ(true, true);
+}
Author	SHA1	Message	Date
Christos Choutouridis	146e975ac1	HW3: [no compile] A first clean up	2025-01-21 22:52:28 +02:00
Christos Choutouridis	2ff6ae171a	HW3: [No Compile] Init HW3 with HW2 files	2025-01-21 22:25:49 +02:00