From 1fe5ab4da72fc9a3c01b47a3dca71479cac52755 Mon Sep 17 00:00:00 2001 From: Christos Choutouridis Date: Wed, 29 Jan 2025 00:48:45 +0200 Subject: [PATCH] HW3: [WIP] V0 version added --- homework_3/Makefile | 67 +++++----- homework_3/exersize.md | 33 +++++ homework_3/src/bitonicsort.hpp | 145 +++++++++++++++++++++ homework_3/src/config.h | 5 +- homework_3/src/distsort.cpp | 51 -------- homework_3/src/distsort.hpp | 223 --------------------------------- homework_3/src/main.cpp | 50 +++----- homework_3/src/utils.hpp | 118 ----------------- homework_3/test/tests.cpp | 3 +- 9 files changed, 229 insertions(+), 466 deletions(-) create mode 100644 homework_3/exersize.md create mode 100644 homework_3/src/bitonicsort.hpp delete mode 100644 homework_3/src/distsort.cpp delete mode 100644 homework_3/src/distsort.hpp diff --git a/homework_3/Makefile b/homework_3/Makefile index 1f6e0e3..4856d50 100644 --- a/homework_3/Makefile +++ b/homework_3/Makefile @@ -25,12 +25,12 @@ PROJECT := PDS_homework_3 TARGET := bitonic # Source directories list(space seperated). Makefile-relative path, UNDER current directory. -SRC_DIR_LIST := src test test/gtest +SRC_DIR_LIST := src #test test/gtest # Include directories list(space seperated). Makefile-relative path. -INC_DIR_LIST := src \ - test \ - test/gtest/ \ +INC_DIR_LIST := src +# test \ +# test/gtest/ \ # Exclude files list(space seperated). Filenames only. @@ -45,10 +45,10 @@ OUTPUT_DIR := out # ========== Compiler settings ========== # Compiler flags for debug and release -DEB_CFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c11 -fopenmp -REL_CFLAGS := -Wall -Wextra -O3 -std=c11 -fopenmp -DEB_CXXFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c++17 -fopenmp -REL_CXXFLAGS := -Wall -Wextra -O3 -std=c++17 -fopenmp +DEB_CFLAGS := -DDEBUG -g3 -std=c11 -Xcompiler "-Wall -Wextra" +REL_CFLAGS := -O3 -std=c11 -Xcompiler "-Wall -Wextra" +DEB_CXXFLAGS := -DDEBUG -g3 -std=c++17 -Xcompiler "-Wall -Wextra" +REL_CXXFLAGS := -O3 -std=c++17 -Xcompiler "-Wall -Wextra" # Pre-defines # PRE_DEFS := MYCAB=1729 SUPER_MODE @@ -56,15 +56,15 @@ PRE_DEFS := # ============== Linker settings ============== # Linker flags (example: -pthread -lm) -LDFLAGS := -pthread +LDFLAGS := # Map output file -MAP_FILE := output.map -MAP_FLAG := -Xlinker -Map=$(BUILD_DIR)/$(MAP_FILE) +MAP_FILE := # output.map +MAP_FLAG := # -Xlinker -Map=$(BUILD_DIR)/$(MAP_FILE) # ============== Docker settings ============== # We need: -# - Bind the entire project directory(the dir that icludes all the code) as volume. +# - Bind the entire project directory(the dir that includes all the code) as volume. # - In docker instance, change to working directory(where the makefile is). DOCKER_VOL_DIR := $(shell pwd) DOCKER_WRK_DIR := @@ -85,6 +85,7 @@ CFLAGS := $(DEB_CFLAGS) CXXFLAGS := $(DEB_CXXFLAGS) CXX := g++ #mpic++ CC := gcc #mpicc +LINKER := g++ # # =========== Main body and Patterns =========== @@ -117,37 +118,37 @@ DEP := $(foreach file,$(SRC:%.cpp=%.d),$(DEP_DIR)/$(file)) # It is based on Tom Tromey's method. # # Invoke cpp to create makefile rules with dependencies for each source file -$(DEP_DIR)/%.d: %.c - @mkdir -p $(@D) - @$(DOCKER) $(CC) -E $(CFLAGS) $(INC) $(DEF) -MM -MT $(OBJ_DIR)/$(<:.c=.o) -MF $@ $< +#$(DEP_DIR)/%.d: %.c +# @mkdir -p $(@D) +# @$(DOCKER) $(CC) -E $(CFLAGS) $(INC) $(DEF) -MM -MT $(OBJ_DIR)/$(<:.c=.o) -MF $@ $< # c file objects depent on .c AND dependency files, which have an empty recipe -$(OBJ_DIR)/%.o: %.c $(DEP_DIR)/%.d +$(OBJ_DIR)/%.o: %.c @mkdir -p $(@D) - @$(DOCKER) $(CC) -c $(CFLAGS) $(INC) $(DEF) -o $@ $< + $(DOCKER) $(CC) -c $(CFLAGS) $(INC) $(DEF) -o $@ $< -$(DEP_DIR)/%.d: %.cpp - @mkdir -p $(@D) - @$(DOCKER) $(CXX) -E $(CXXFLAGS) $(INC) $(DEF) -MM -MT $(OBJ_DIR)/$(<:.cpp=.o) -MF $@ $< +#$(DEP_DIR)/%.d: %.cpp +# @mkdir -p $(@D) +# @$(DOCKER) $(CXX) -E $(CXXFLAGS) $(INC) $(DEF) -MM -MT $(OBJ_DIR)/$(<:.cpp=.o) -MF $@ $< -# cpp file objects depent on .cpp AND dependency files, which have an empty recipe -$(OBJ_DIR)/%.o: %.cpp $(DEP_DIR)/%.d +# cpp file objects depend on .cpp AND dependency files, which have an empty recipe +$(OBJ_DIR)/%.o: %.cpp @mkdir -p $(@D) - @$(DOCKER) $(CXX) -c $(CXXFLAGS) $(INC) $(DEF) -o $@ $< + $(DOCKER) $(CXX) -c $(CXXFLAGS) $(INC) $(DEF) -o $@ $< # empty recipe for dependency files. This prevents make errors -$(DEP): +#$(DEP): # now include all dependencies # After all they are makefile dependency rules ;) -include $(wildcard $(DEP)) +#include $(wildcard $(DEP)) # main target rule $(BUILD_DIR)/$(TARGET): $(OBJ) @mkdir -p $(@D) @echo Linking to target: $(TARGET) - @echo $(DOCKER) $(CXX) '$$(OBJ)' $(LDFLAGS) $(MAP_FLAG) -o $(@D)/$(TARGET) - @$(DOCKER) $(CXX) $(OBJ) $(LDFLAGS) $(MAP_FLAG) -o $(@D)/$(TARGET) + @echo $(DOCKER) $(LINKER) '$$(OBJ)' $(LDFLAGS) $(MAP_FLAG) -o $(@D)/$(TARGET) + @$(DOCKER) $(LINKER) $(OBJ) $(LDFLAGS) $(MAP_FLAG) -o $(@D)/$(TARGET) @echo @echo Print size information @$(CSIZE) $(@D)/$(TARGET) @@ -179,10 +180,12 @@ release: $(BUILD_DIR)/$(TARGET) # -bitonic_v0: CC := nvcc -bitonic_v0: CXX := nvcc +bitonic_v0: CC := nvcc -x cu +bitonic_v0: CXX := nvcc -x cu +bitonic_v0: LINKER := nvcc bitonic_v0: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=V0 bitonic_v0: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=V0 +bitonic_v0: OUTPUT_DIR := $(OUTPUT_DIR)/v0 bitonic_v0: TARGET := bitonic_v0 bitonic_v0: $(BUILD_DIR)/$(TARGET) @mkdir -p $(OUTPUT_DIR) @@ -191,11 +194,7 @@ bitonic_v0: $(BUILD_DIR)/$(TARGET) hpc-build: make clean - make distbubbletonic - make clean - make distbitonic - make clean - make tests + make bitonic_v0 all: debug bitonic_v0 diff --git a/homework_3/exersize.md b/homework_3/exersize.md new file mode 100644 index 0000000..26decdd --- /dev/null +++ b/homework_3/exersize.md @@ -0,0 +1,33 @@ +Parallel & Distributed Computer Systems HW3 + +January, 2025 + +Write a program that sorts $N$ integers in ascending order, using CUDA. + +The program must perform the following tasks: + +- The user specifies a positive integers $q$. + +- Start a process with an array of $N = 2^q$ random integers is each processes. + +- Sort all $N$ elements int ascending order. + +- Check the correctness of the final result. + +Your implementation should be based on the following steps: + +V0. A kernel where each thread only compares and exchanges. This "eliminates" the 1:n innermost loop. Easy to write, but too many function calls and global synchronizations. + +V1. Include the k inner loop in the kernel function. How do we handle the synchronization? Fewer calls, fewer global synchronizations. Faster than V0! + +V2. Modify the kernel of V1 to work with local memory instead of global. + +You must deliver: +- A report (about $3-4$ pages) that describes your parallel algorithm and implementation. + +- Your comments on the speed of your parallel program compared to the serial sort, after trying you program on aristotelis for $q = [20:27]$. + +- The source code of your program uploaded online. + +Ethics: If you use code found on the web or by an LLM, you should mention your source and the changes you made. You may work in pairs; both partners must submit a single report with both names. +Deadline: 2 February, $2025$. diff --git a/homework_3/src/bitonicsort.hpp b/homework_3/src/bitonicsort.hpp new file mode 100644 index 0000000..100cb2d --- /dev/null +++ b/homework_3/src/bitonicsort.hpp @@ -0,0 +1,145 @@ +/*! + * \file + * \brief Bitonic sort CUDA implementation header + * + * \author + * Christos Choutouridis AEM:8997 + * + */ + +#ifndef BITONICSORTCUDA_H_ +#define BITONICSORTCUDA_H_ + +#include +#include +#include +#include +#include + +#include "utils.hpp" + +/* + * Exported timers + */ +extern Timing Timer_total; + +using threadId_t = size_t; + + +/* + * ============================== Sort utilities ============================== + */ + +/*! + * Returns the ascending or descending configuration (up/down phase) of the thread id + * depending on the current depth + * + * @param tid [threadId_t] The current thread + * @param stage [size_t] The current stage of the sorting network (same for each step) + * @return [bool] True if we need ascending configuration, false otherwise + */ +__device__ inline bool ascending(threadId_t tid, size_t stage) noexcept { + return !(tid & (1 << stage)); +} + +/*! + * Returns the thread's partner for data exchange during the sorting network iterations + * of Bitonic + * + * @param tid [threadId_t] The current node + * @param step [size_t] The step of the sorting network + * @return [threadId_t] The node id of the partner for data exchange + */ +__device__ inline threadId_t partner(threadId_t tid, size_t step) noexcept { + return (tid ^ (1 << step)); +} + + +/*! + * Predicate to check if a node keeps the small numbers during the bitonic sort network exchange. + * + * @param tid [threadId_t] The node for which we check + * @param partner [threadId_t] The partner of the data exchange + * @param stage [size_t] The current stage of the sorting network (same for each step) + * @return [bool] True if the node should keep the small values, false otherwise + */ + +__device__ inline bool keepSmall(threadId_t tid, threadId_t partner, size_t stage) { + return ascending(tid, stage) == (tid < partner); +} + + + +/* + * ============================== Sort algorithms ============================== + */ + + +template +__device__ void cudaExchange(ValueT* data, int tid, int partner, bool keepSmall) { + if (( keepSmall && (data[tid] > data[partner])) || + (!keepSmall && (data[tid] < data[partner])) ) { + ValueT temp = data[tid]; + data[tid] = data[partner]; + data[partner] = temp; + } +} + + +template +__global__ void bitonicStep(ValueT* data, size_t n, size_t step, size_t stage) { + threadId_t tid = threadIdx.x + blockIdx.x * blockDim.x; // Compute global thread ID + if (tid < n) { + threadId_t pid = partner(tid, step); + if (pid < n) { + bool keep = keepSmall(tid, pid, stage); + cudaExchange(data, tid, pid, keep); + } + } +} + + +/*! + * A distributed version of the Bitonic sort algorithm. + * + * @note + * Each MPI process should run an instance of this function. + * + * @tparam ShadowedDataT A Shadowed buffer type with random access iterator. + * + * @param data [ShadowedDataT] The local to MPI process data to sort + * @param Processes [mpi_id_t] The total number of MPI processes + * @param rank [mpi_id_t] The current process id + */ + +template +void bitonicSort(DataT& data) { + using value_t = typename DataT::value_type; + + value_t* dev_data; + auto size = data.size(); + + cudaMalloc(&dev_data, size * sizeof(value_t)); + cudaMemcpy(dev_data, data.data(), size * sizeof(value_t), cudaMemcpyHostToDevice); + + int Nthreads = 1024; + int Nblocks = (size + Nthreads - 1) / Nthreads; + + size_t max_depth = static_cast(log2(size)); + for (size_t stage = 1; stage <= max_depth; ++stage) { + for (size_t step = stage; step > 0; ) { + --step; + bitonicStep<<>>(dev_data, size, step, stage); + cudaDeviceSynchronize(); + } + } + + cudaMemcpy(data.data(), dev_data, size * sizeof(value_t), cudaMemcpyDeviceToHost); + cudaFree(dev_data); +} + + + + + +#endif //BITONICSORTCUDA_H_ diff --git a/homework_3/src/config.h b/homework_3/src/config.h index b28387b..3ff1235 100644 --- a/homework_3/src/config.h +++ b/homework_3/src/config.h @@ -35,7 +35,7 @@ static constexpr size_t DEFAULT_DATA_SIZE = 1 << 16; /*! - * Value type selection + * Value and Buffer type selection * * We support the following compiler types or the that translate to them: * char - unsigned char @@ -46,7 +46,8 @@ static constexpr size_t DEFAULT_DATA_SIZE = 1 << 16; * float * double */ -using distValue_t = uint32_t; +using Value_t = uint32_t; +using Data_t = std::vector; /*! * Session option for each invocation of the executable. diff --git a/homework_3/src/distsort.cpp b/homework_3/src/distsort.cpp deleted file mode 100644 index b479983..0000000 --- a/homework_3/src/distsort.cpp +++ /dev/null @@ -1,51 +0,0 @@ -/*! - * \file - * \brief Distributed sort implementation - * - * \author - * Christos Choutouridis AEM:8997 - * - */ -#include "utils.hpp" -#include "distsort.hpp" - - -/*! - * Returns the ascending or descending configuration of the node's sequence based on - * the current node (MPI process) and the depth of the sorting network - * - * @param node [mpi_id_t] The current node (MPI process) - * @param depth [size_t] The total depth of the sorting network (same for each step for a given network) - * @return [bool] True if we need ascending configuration, false otherwise - */ -bool ascending(mpi_id_t node, size_t depth) noexcept { - return !(node & (1 << depth)); -} - -/*! - * Returns the node's partner for data exchange during the sorting network iterations - * of Bitonic - * - * @param node [mpi_id_t] The current node - * @param step [size_t] The step of the sorting network - * @return [mpi_id_t] The node id of the partner for data exchange - */ -mpi_id_t partner(mpi_id_t node, size_t step) noexcept { - return (node ^ (1 << step)); -} - - -/*! - * Predicate to check if a node keeps the small numbers during the bitonic sort network exchange. - * - * @param node [mpi_id_t] The node for which we check - * @param partner [mpi_id_t] The partner of the data exchange - * @param depth [size_t] The total depth of the sorting network (same for each step for a given network) - * @return [bool] True if the node should keep the small values, false otherwise - */ - -bool keepSmall(mpi_id_t node, mpi_id_t partner, size_t depth) { - if (node == partner) - throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n"); - return ascending(node, depth) == (node < partner); -} diff --git a/homework_3/src/distsort.hpp b/homework_3/src/distsort.hpp deleted file mode 100644 index 1517d60..0000000 --- a/homework_3/src/distsort.hpp +++ /dev/null @@ -1,223 +0,0 @@ -/*! - * \file - * \brief Distributed sort implementation header - * - * \author - * Christos Choutouridis AEM:8997 - * - */ - -#ifndef DISTBITONIC_H_ -#define DISTBITONIC_H_ - -#include -#include -#include -#include -#include -#if !defined DEBUG -#define NDEBUG -#endif -#include - -#include "utils.hpp" - -/* - * Exported timers - */ -extern Timing Timer_total; -extern Timing Timer_fullSort; -extern Timing Timer_exchange; -extern Timing Timer_minmax; -extern Timing Timer_elbowSort; - - - -/* - * ============================== Sort utilities ============================== - */ - - - -/*! - * Returns the ascending or descending configuration of the node's sequence based on - * the current node (MPI process) and the depth of the sorting network - * - * @param node [mpi_id_t] The current node (MPI process) - * @param depth [size_t] The total depth of the sorting network (same for each step for a given network) - * @return [bool] True if we need ascending configuration, false otherwise - */ -bool ascending(mpi_id_t node, size_t depth); - -/*! - * Returns the node's partner for data exchange during the sorting network iterations - * of Bitonic - * - * @param node [mpi_id_t] The current node - * @param step [size_t] The step of the sorting network - * @return [mpi_id_t] The node id of the partner for data exchange - */ -mpi_id_t partner(mpi_id_t node, size_t step); - - -/*! - * Predicate to check if a node keeps the small numbers during the bitonic sort network exchange. - * - * @param node [mpi_id_t] The node for which we check - * @param partner [mpi_id_t] The partner of the data exchange - * @param depth [size_t] The total depth of the sorting network (same for each step for a given network) - * @return [bool] True if the node should keep the small values, false otherwise - */ -bool keepSmall(mpi_id_t node, mpi_id_t partner, size_t depth); - - - -/* - * ============================== Data utilities ============================== - */ - -/*! - * Sort a range using the build-in O(Nlog(N)) algorithm - * - * @tparam RangeT A range type with random access iterator - * - * @param data [RangeT] The data to be sorted - * @param ascending [bool] Flag to indicate the sorting order - */ -template -void fullSort(RangeT& data, bool ascending) noexcept { - // Use introsort from stdlib++ here, unless ... __gnu_parallel - if (ascending) { - __gnu_parallel::sort(data.begin(), data.end(), std::less<>()); - } - else { - __gnu_parallel::sort(data.begin(), data.end(), std::greater<>()); - } -} - -/*! - * Core functionality of sort for shadowed buffer types using - * the "elbow sort" algorithm. - * - * @note: - * This algorithm can not work "in place". - * We use the active buffer as source and the shadow as target. - * At the end we switch which buffer is active and which is the shadow. - * @note - * This is the core functionality. Use the elbowSort() function instead - * - * @tparam ShadowedDataT A Shadowed buffer type with random access iterator. - * @tparam CompT A Comparison type for binary operation comparisons - * - * @param data [ShadowedDataT] The data to sort - * @param ascending [bool] Flag to indicate the sorting order - * @param comp [CompT] The binary operator object - */ -template -void elbowSortCore(ShadowedDataT& data, bool ascending, CompT comp) noexcept { - auto& active = data.getActive(); // Get the source vector (the data to sort) - auto& shadow = data.getShadow(); // Get the target vector (the sorted data) - - size_t N = data.size(); // The total size is the same or both vectors - size_t left = std::distance( - active.begin(), - (ascending) ? - std::min_element(active.begin(), active.end()) : - std::max_element(active.begin(), active.end()) - ); // start 'left' from elbow of the bitonic - size_t right = (left == N-1) ? 0 : left + 1; - - // Walk in opposite directions from elbow and insert-sort to target vector - for (size_t i = 0 ; i -void elbowSort(ShadowedDataT& data, bool ascending) noexcept { - if (ascending) - elbowSortCore(data, ascending, std::less<>()); - else - elbowSortCore(data, ascending, std::greater<>()); -} - - -/*! - * Takes two sequences and selects either the larger or the smaller items - * in one-to-one comparison between them. If the initial sequences are bitonic, then - * the result is a bitonic sequence too! - * - * @tparam ValueT The underlying type of the sequences - * - * @param local [ValueT*] Pointer to the local sequence - * @param remote [const ValueT*] Pointer to the remote sequence (copied locally by MPI) - * @param count [size_t] The number of items to process - * @param keepSmall [bool] Flag to indicate if we keep the small items in local sequence - */ -template -void keepMinOrMax(ValueT* local, const ValueT* remote, size_t count, bool keepSmall) noexcept { - std::transform( - local, local + count, - remote, - local, - [&keepSmall](const ValueT& a, const ValueT& b){ - return (keepSmall) ? std::min(a, b) : std::max(a, b); - }); -} - -/* - * ============================== Sort algorithms ============================== - */ - - -/*! - * A distributed version of the Bitonic sort algorithm. - * - * @note - * Each MPI process should run an instance of this function. - * - * @tparam ShadowedDataT A Shadowed buffer type with random access iterator. - * - * @param data [ShadowedDataT] The local to MPI process data to sort - * @param Processes [mpi_id_t] The total number of MPI processes - * @param rank [mpi_id_t] The current process id - */ -template -void distBitonic(ShadowedDataT& data) { - // Initially sort to create a half part of a bitonic sequence - timeCall(Timer_fullSort, fullSort, data, ascending(rank, 0)); - - // Run through sort network using elbow-sort ( O(LogN * LogN) iterations ) - auto p = static_cast(std::log2(Processes)); - for (size_t depth = 1; depth <= p; ++depth) { - for (size_t step = depth; step > 0;) { - --step; - // Find out exchange configuration - auto part = partner(rank, step); - auto ks = keepSmall(rank, part, depth); - // Exchange with partner, keep nim-or-max - exchange(data, part, ks, tag); - - } - // sort - O(N) - timeCall(Timer_elbowSort, elbowSort, data, ascending(rank, depth)); - } -} - -#endif //DISTBITONIC_H_ diff --git a/homework_3/src/main.cpp b/homework_3/src/main.cpp index fc7ec15..64cba4e 100644 --- a/homework_3/src/main.cpp +++ b/homework_3/src/main.cpp @@ -14,34 +14,26 @@ #include "utils.hpp" #include "config.h" -#include "distsort.hpp" +#include "bitonicsort.hpp" // Global session data +Data_t Data; config_t config; -distBuffer_t Data; Log logger; - // Mersenne seeded from hw if possible. range: [type_min, type_max] std::random_device rd; std::mt19937 gen(rd()); //! Performance timers for each one of the "costly" functions Timing Timer_total; -Timing Timer_fullSort; -Timing Timer_exchange; -Timing Timer_minmax; -Timing Timer_elbowSort; + //! Init timing objects for extra rounds void measurements_init() { if (config.perf > 1) { Timer_total.init(config.perf); - Timer_fullSort.init(config.perf); - Timer_exchange.init(config.perf); - Timer_minmax.init(config.perf); - Timer_elbowSort.init(config.perf); } } @@ -49,10 +41,6 @@ void measurements_init() { void measurements_next() { if (config.perf > 1) { Timer_total.next(); - Timer_fullSort.next(); - Timer_exchange.next(); - Timer_minmax.next(); - Timer_elbowSort.next(); } } @@ -136,20 +124,14 @@ bool get_options(int argc, char* argv[]){ /*! * A simple validator for the entire distributed process * - * @tparam ShadowedDataT A Shadowed buffer type with random access iterator. + * @tparam DataT A buffer type with random access iterator. * - * @param data [ShadowedDataT] The local to MPI process - * @param Processes [mpi_id_t] The total number of MPI processes - * @param rank [mpi_id_t] The current process id - * - * @return [bool] True if all are sorted and in total ascending order + * @param data [DataT] The data + * @return [bool] True if sorted in ascending order */ -template -bool validator(ShadowedDataT& data) { - using value_t = typename ShadowedDataT::value_type; - bool ret = true; // Have faith! - - return ret; +template +bool validator(DataT& data) { + return std::is_sorted(data.begin(), data.end()); } /*! @@ -180,15 +162,15 @@ int main(int argc, char* argv[]) try { for (size_t it = 0 ; it < config.perf ; ++it) { // Initialize local data logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl; - std::uniform_int_distribution dis( - std::numeric_limits::min(), - std::numeric_limits::max() + std::uniform_int_distribution dis( + std::numeric_limits::min(), + std::numeric_limits::max() ); std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); }); // Run distributed sort logger << "Starting distributed sorting ... "; Timer_total.start(); - distBitonic(Data); + bitonicSort(Data); Timer_total.stop(); measurements_next(); logger << " Done." << logger.endl; @@ -196,11 +178,7 @@ int main(int argc, char* argv[]) try { // Print-outs and validation if (config.perf > 1) { - Timing::print_duration(Timer_total.median(), "Total ", 0); - Timing::print_duration(Timer_fullSort.median(), "Full-Sort ", 0); - Timing::print_duration(Timer_exchange.median(), "Exchange ", 0); - Timing::print_duration(Timer_minmax.median(), "Min-Max ", 0); - Timing::print_duration(Timer_elbowSort.median(),"Elbow-Sort", 0); + Timing::print_duration(Timer_total.median(), "Total"); } if (config.validation) { // If requested, we have the chance to fail! diff --git a/homework_3/src/utils.hpp b/homework_3/src/utils.hpp index 69b7bc1..bf06ef0 100644 --- a/homework_3/src/utils.hpp +++ b/homework_3/src/utils.hpp @@ -18,124 +18,6 @@ #include "config.h" -/*! - * @brief A std::vector wrapper with 2 vectors, an active and a shadow. - * - * This type exposes the standard vector functionality of the active vector. - * The shadow can be used when we need to use the vector as mutable - * data in algorithms that can not support "in-place" editing (like elbow-sort for example) - * - * @tparam Value_t the underlying data type of the vectors - */ -template -struct ShadowedVec_t { - // STL requirements - using value_type = Value_t; - using iterator = typename std::vector::iterator; - using const_iterator = typename std::vector::const_iterator; - using size_type = typename std::vector::size_type; - - // Default constructor - ShadowedVec_t() = default; - - // Constructor from an std::vector - explicit ShadowedVec_t(const std::vector& vec) - : North(vec), South(), active(north) { - South.resize(North.size()); - } - - explicit ShadowedVec_t(std::vector&& vec) - : North(std::move(vec)), South(), active(north) { - South.resize(North.size()); - } - - // Copy assignment operator - ShadowedVec_t& operator=(const ShadowedVec_t& other) { - if (this != &other) { // Avoid self-assignment - North = other.North; - South = other.South; - active = other.active; - } - return *this; - } - - // Move assignment operator - ShadowedVec_t& operator=(ShadowedVec_t&& other) noexcept { - if (this != &other) { // Avoid self-assignment - North = std::move(other.North); - South = std::move(other.South); - active = other.active; - - // There is no need to zero out other since it is valid but in a non-defined state - } - return *this; - } - - // Type accessors - std::vector& getActive() { return (active == north) ? North : South; } - std::vector& getShadow() { return (active == north) ? South : North; } - const std::vector& getActive() const { return (active == north) ? North : South; } - const std::vector& getShadow() const { return (active == north) ? South : North; } - - // Swap vectors - void switch_active() { active = (active == north) ? south : north; } - - // Dispatch vector functionality to active vector - Value_t& operator[](size_type index) { return getActive()[index]; } - const Value_t& operator[](size_type index) const { return getActive()[index]; } - - Value_t& at(size_type index) { return getActive().at(index); } - const Value_t& at(size_type index) const { return getActive().at(index); } - - void push_back(const Value_t& value) { getActive().push_back(value); } - void push_back(Value_t&& value) { getActive().push_back(std::move(value)); } - void pop_back() { getActive().pop_back(); } - Value_t& front() { return getActive().front(); } - Value_t& back() { return getActive().back(); } - const Value_t& front() const { return getActive().front(); } - const Value_t& back() const { return getActive().back(); } - - iterator begin() { return getActive().begin(); } - const_iterator begin() const { return getActive().begin(); } - iterator end() { return getActive().end(); } - const_iterator end() const { return getActive().end(); } - - size_type size() const { return getActive().size(); } - void resize(size_t new_size) { - North.resize(new_size); - South.resize(new_size); - } - - void reserve(size_t new_capacity) { - North.reserve(new_capacity); - South.reserve(new_capacity); - } - [[nodiscard]] size_t capacity() const { return getActive().capacity(); } - [[nodiscard]] bool empty() const { return getActive().empty(); } - - void clear() { getActive().clear(); } - void swap(std::vector& other) { getActive().swap(other); } - - // Comparisons - bool operator== (const ShadowedVec_t& other) { return getActive() == other.getActive(); } - bool operator!= (const ShadowedVec_t& other) { return getActive() != other.getActive(); } - bool operator== (const std::vector& other) { return getActive() == other; } - bool operator!= (const std::vector& other) { return getActive() != other; } - -private: - std::vector North{}; //!< Actual buffer to be used either as active or shadow - std::vector South{}; //!< Actual buffer to be used either as active or shadow - enum { - north, south - } active{north}; //!< Flag to select between North and South buffer -}; - -/* - * Exported data types - */ -using distBuffer_t = ShadowedVec_t; -extern distBuffer_t Data; - /*! * A Logger for entire program. */ diff --git a/homework_3/test/tests.cpp b/homework_3/test/tests.cpp index 7348e02..f1444d6 100644 --- a/homework_3/test/tests.cpp +++ b/homework_3/test/tests.cpp @@ -25,8 +25,7 @@ protected: /* - * MPI: SysTest (acceptance) - * Each process executes distBubbletonic for uin8_t [16] + * */ TEST_F(TCUDAbitonic, test1) {