Quellcode durchsuchen

HW3: [WIP] V0 version added

hw3
Ursprung
Commit
1fe5ab4da7
9 geänderte Dateien mit 229 neuen und 466 gelöschten Zeilen
  1. +33
    -34
      homework_3/Makefile
  2. +33
    -0
      homework_3/exersize.md
  3. +145
    -0
      homework_3/src/bitonicsort.hpp
  4. +3
    -2
      homework_3/src/config.h
  5. +0
    -51
      homework_3/src/distsort.cpp
  6. +0
    -223
      homework_3/src/distsort.hpp
  7. +14
    -36
      homework_3/src/main.cpp
  8. +0
    -118
      homework_3/src/utils.hpp
  9. +1
    -2
      homework_3/test/tests.cpp

+ 33
- 34
homework_3/Makefile Datei anzeigen

@@ -25,12 +25,12 @@ PROJECT := PDS_homework_3
TARGET := bitonic

# Source directories list(space seperated). Makefile-relative path, UNDER current directory.
SRC_DIR_LIST := src test test/gtest
SRC_DIR_LIST := src #test test/gtest

# Include directories list(space seperated). Makefile-relative path.
INC_DIR_LIST := src \
test \
test/gtest/ \
INC_DIR_LIST := src
# test \
# test/gtest/ \


# Exclude files list(space seperated). Filenames only.
@@ -45,10 +45,10 @@ OUTPUT_DIR := out

# ========== Compiler settings ==========
# Compiler flags for debug and release
DEB_CFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c11 -fopenmp
REL_CFLAGS := -Wall -Wextra -O3 -std=c11 -fopenmp
DEB_CXXFLAGS := -DDEBUG -g3 -Wall -Wextra -std=c++17 -fopenmp
REL_CXXFLAGS := -Wall -Wextra -O3 -std=c++17 -fopenmp
DEB_CFLAGS := -DDEBUG -g3 -std=c11 -Xcompiler "-Wall -Wextra"
REL_CFLAGS := -O3 -std=c11 -Xcompiler "-Wall -Wextra"
DEB_CXXFLAGS := -DDEBUG -g3 -std=c++17 -Xcompiler "-Wall -Wextra"
REL_CXXFLAGS := -O3 -std=c++17 -Xcompiler "-Wall -Wextra"

# Pre-defines
# PRE_DEFS := MYCAB=1729 SUPER_MODE
@@ -56,15 +56,15 @@ PRE_DEFS :=

# ============== Linker settings ==============
# Linker flags (example: -pthread -lm)
LDFLAGS := -pthread
LDFLAGS :=

# Map output file
MAP_FILE := output.map
MAP_FLAG := -Xlinker -Map=$(BUILD_DIR)/$(MAP_FILE)
MAP_FILE := # output.map
MAP_FLAG := # -Xlinker -Map=$(BUILD_DIR)/$(MAP_FILE)

# ============== Docker settings ==============
# We need:
# - Bind the entire project directory(the dir that icludes all the code) as volume.
# - Bind the entire project directory(the dir that includes all the code) as volume.
# - In docker instance, change to working directory(where the makefile is).
DOCKER_VOL_DIR := $(shell pwd)
DOCKER_WRK_DIR :=
@@ -85,6 +85,7 @@ CFLAGS := $(DEB_CFLAGS)
CXXFLAGS := $(DEB_CXXFLAGS)
CXX := g++ #mpic++
CC := gcc #mpicc
LINKER := g++

#
# =========== Main body and Patterns ===========
@@ -117,37 +118,37 @@ DEP := $(foreach file,$(SRC:%.cpp=%.d),$(DEP_DIR)/$(file))
# It is based on Tom Tromey's method.
#
# Invoke cpp to create makefile rules with dependencies for each source file
$(DEP_DIR)/%.d: %.c
@mkdir -p $(@D)
@$(DOCKER) $(CC) -E $(CFLAGS) $(INC) $(DEF) -MM -MT $(OBJ_DIR)/$(<:.c=.o) -MF $@ $<
#$(DEP_DIR)/%.d: %.c
# @mkdir -p $(@D)
# @$(DOCKER) $(CC) -E $(CFLAGS) $(INC) $(DEF) -MM -MT $(OBJ_DIR)/$(<:.c=.o) -MF $@ $<

# c file objects depent on .c AND dependency files, which have an empty recipe
$(OBJ_DIR)/%.o: %.c $(DEP_DIR)/%.d
$(OBJ_DIR)/%.o: %.c
@mkdir -p $(@D)
@$(DOCKER) $(CC) -c $(CFLAGS) $(INC) $(DEF) -o $@ $<
$(DOCKER) $(CC) -c $(CFLAGS) $(INC) $(DEF) -o $@ $<

$(DEP_DIR)/%.d: %.cpp
@mkdir -p $(@D)
@$(DOCKER) $(CXX) -E $(CXXFLAGS) $(INC) $(DEF) -MM -MT $(OBJ_DIR)/$(<:.cpp=.o) -MF $@ $<
#$(DEP_DIR)/%.d: %.cpp
# @mkdir -p $(@D)
# @$(DOCKER) $(CXX) -E $(CXXFLAGS) $(INC) $(DEF) -MM -MT $(OBJ_DIR)/$(<:.cpp=.o) -MF $@ $<

# cpp file objects depent on .cpp AND dependency files, which have an empty recipe
$(OBJ_DIR)/%.o: %.cpp $(DEP_DIR)/%.d
# cpp file objects depend on .cpp AND dependency files, which have an empty recipe
$(OBJ_DIR)/%.o: %.cpp
@mkdir -p $(@D)
@$(DOCKER) $(CXX) -c $(CXXFLAGS) $(INC) $(DEF) -o $@ $<
$(DOCKER) $(CXX) -c $(CXXFLAGS) $(INC) $(DEF) -o $@ $<

# empty recipe for dependency files. This prevents make errors
$(DEP):
#$(DEP):

# now include all dependencies
# After all they are makefile dependency rules ;)
include $(wildcard $(DEP))
#include $(wildcard $(DEP))

# main target rule
$(BUILD_DIR)/$(TARGET): $(OBJ)
@mkdir -p $(@D)
@echo Linking to target: $(TARGET)
@echo $(DOCKER) $(CXX) '$$(OBJ)' $(LDFLAGS) $(MAP_FLAG) -o $(@D)/$(TARGET)
@$(DOCKER) $(CXX) $(OBJ) $(LDFLAGS) $(MAP_FLAG) -o $(@D)/$(TARGET)
@echo $(DOCKER) $(LINKER) '$$(OBJ)' $(LDFLAGS) $(MAP_FLAG) -o $(@D)/$(TARGET)
@$(DOCKER) $(LINKER) $(OBJ) $(LDFLAGS) $(MAP_FLAG) -o $(@D)/$(TARGET)
@echo
@echo Print size information
@$(CSIZE) $(@D)/$(TARGET)
@@ -179,10 +180,12 @@ release: $(BUILD_DIR)/$(TARGET)
#


bitonic_v0: CC := nvcc
bitonic_v0: CXX := nvcc
bitonic_v0: CC := nvcc -x cu
bitonic_v0: CXX := nvcc -x cu
bitonic_v0: LINKER := nvcc
bitonic_v0: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=V0
bitonic_v0: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=V0
bitonic_v0: OUTPUT_DIR := $(OUTPUT_DIR)/v0
bitonic_v0: TARGET := bitonic_v0
bitonic_v0: $(BUILD_DIR)/$(TARGET)
@mkdir -p $(OUTPUT_DIR)
@@ -191,11 +194,7 @@ bitonic_v0: $(BUILD_DIR)/$(TARGET)

hpc-build:
make clean
make distbubbletonic
make clean
make distbitonic
make clean
make tests
make bitonic_v0


all: debug bitonic_v0


+ 33
- 0
homework_3/exersize.md Datei anzeigen

@@ -0,0 +1,33 @@
Parallel & Distributed Computer Systems HW3

January, 2025

Write a program that sorts $N$ integers in ascending order, using CUDA.

The program must perform the following tasks:

- The user specifies a positive integers $q$.

- Start a process with an array of $N = 2^q$ random integers is each processes.

- Sort all $N$ elements int ascending order.

- Check the correctness of the final result.

Your implementation should be based on the following steps:

V0. A kernel where each thread only compares and exchanges. This "eliminates" the 1:n innermost loop. Easy to write, but too many function calls and global synchronizations.

V1. Include the k inner loop in the kernel function. How do we handle the synchronization? Fewer calls, fewer global synchronizations. Faster than V0!

V2. Modify the kernel of V1 to work with local memory instead of global.

You must deliver:
- A report (about $3-4$ pages) that describes your parallel algorithm and implementation.

- Your comments on the speed of your parallel program compared to the serial sort, after trying you program on aristotelis for $q = [20:27]$.

- The source code of your program uploaded online.

Ethics: If you use code found on the web or by an LLM, you should mention your source and the changes you made. You may work in pairs; both partners must submit a single report with both names.
Deadline: 2 February, $2025$.

+ 145
- 0
homework_3/src/bitonicsort.hpp Datei anzeigen

@@ -0,0 +1,145 @@
/*!
* \file
* \brief Bitonic sort CUDA implementation header
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/

#ifndef BITONICSORTCUDA_H_
#define BITONICSORTCUDA_H_

#include <cuda_runtime.h>
#include <vector>
#include <cmath>
#include <cstdint>
#include <utility>

#include "utils.hpp"

/*
* Exported timers
*/
extern Timing Timer_total;

using threadId_t = size_t;


/*
* ============================== Sort utilities ==============================
*/

/*!
* Returns the ascending or descending configuration (up/down phase) of the thread id
* depending on the current depth
*
* @param tid [threadId_t] The current thread
* @param stage [size_t] The current stage of the sorting network (same for each step)
* @return [bool] True if we need ascending configuration, false otherwise
*/
__device__ inline bool ascending(threadId_t tid, size_t stage) noexcept {
return !(tid & (1 << stage));
}

/*!
* Returns the thread's partner for data exchange during the sorting network iterations
* of Bitonic
*
* @param tid [threadId_t] The current node
* @param step [size_t] The step of the sorting network
* @return [threadId_t] The node id of the partner for data exchange
*/
__device__ inline threadId_t partner(threadId_t tid, size_t step) noexcept {
return (tid ^ (1 << step));
}


/*!
* Predicate to check if a node keeps the small numbers during the bitonic sort network exchange.
*
* @param tid [threadId_t] The node for which we check
* @param partner [threadId_t] The partner of the data exchange
* @param stage [size_t] The current stage of the sorting network (same for each step)
* @return [bool] True if the node should keep the small values, false otherwise
*/

__device__ inline bool keepSmall(threadId_t tid, threadId_t partner, size_t stage) {
return ascending(tid, stage) == (tid < partner);
}



/*
* ============================== Sort algorithms ==============================
*/


template <typename ValueT>
__device__ void cudaExchange(ValueT* data, int tid, int partner, bool keepSmall) {
if (( keepSmall && (data[tid] > data[partner])) ||
(!keepSmall && (data[tid] < data[partner])) ) {
ValueT temp = data[tid];
data[tid] = data[partner];
data[partner] = temp;
}
}


template <typename ValueT>
__global__ void bitonicStep(ValueT* data, size_t n, size_t step, size_t stage) {
threadId_t tid = threadIdx.x + blockIdx.x * blockDim.x; // Compute global thread ID
if (tid < n) {
threadId_t pid = partner(tid, step);
if (pid < n) {
bool keep = keepSmall(tid, pid, stage);
cudaExchange(data, tid, pid, keep);
}
}
}


/*!
* A distributed version of the Bitonic sort algorithm.
*
* @note
* Each MPI process should run an instance of this function.
*
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
*
* @param data [ShadowedDataT] The local to MPI process data to sort
* @param Processes [mpi_id_t] The total number of MPI processes
* @param rank [mpi_id_t] The current process id
*/

template <typename DataT>
void bitonicSort(DataT& data) {
using value_t = typename DataT::value_type;

value_t* dev_data;
auto size = data.size();

cudaMalloc(&dev_data, size * sizeof(value_t));
cudaMemcpy(dev_data, data.data(), size * sizeof(value_t), cudaMemcpyHostToDevice);

int Nthreads = 1024;
int Nblocks = (size + Nthreads - 1) / Nthreads;

size_t max_depth = static_cast<size_t>(log2(size));
for (size_t stage = 1; stage <= max_depth; ++stage) {
for (size_t step = stage; step > 0; ) {
--step;
bitonicStep<<<Nblocks, Nthreads>>>(dev_data, size, step, stage);
cudaDeviceSynchronize();
}
}

cudaMemcpy(data.data(), dev_data, size * sizeof(value_t), cudaMemcpyDeviceToHost);
cudaFree(dev_data);
}





#endif //BITONICSORTCUDA_H_

+ 3
- 2
homework_3/src/config.h Datei anzeigen

@@ -35,7 +35,7 @@ static constexpr size_t DEFAULT_DATA_SIZE = 1 << 16;


/*!
* Value type selection
* Value and Buffer type selection
*
* We support the following compiler types or the <cstdint> that translate to them:
* char - unsigned char
@@ -46,7 +46,8 @@ static constexpr size_t DEFAULT_DATA_SIZE = 1 << 16;
* float
* double
*/
using distValue_t = uint32_t;
using Value_t = uint32_t;
using Data_t = std::vector<Value_t>;

/*!
* Session option for each invocation of the executable.


+ 0
- 51
homework_3/src/distsort.cpp Datei anzeigen

@@ -1,51 +0,0 @@
/*!
* \file
* \brief Distributed sort implementation
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/
#include "utils.hpp"
#include "distsort.hpp"


/*!
* Returns the ascending or descending configuration of the node's sequence based on
* the current node (MPI process) and the depth of the sorting network
*
* @param node [mpi_id_t] The current node (MPI process)
* @param depth [size_t] The total depth of the sorting network (same for each step for a given network)
* @return [bool] True if we need ascending configuration, false otherwise
*/
bool ascending(mpi_id_t node, size_t depth) noexcept {
return !(node & (1 << depth));
}

/*!
* Returns the node's partner for data exchange during the sorting network iterations
* of Bitonic
*
* @param node [mpi_id_t] The current node
* @param step [size_t] The step of the sorting network
* @return [mpi_id_t] The node id of the partner for data exchange
*/
mpi_id_t partner(mpi_id_t node, size_t step) noexcept {
return (node ^ (1 << step));
}


/*!
* Predicate to check if a node keeps the small numbers during the bitonic sort network exchange.
*
* @param node [mpi_id_t] The node for which we check
* @param partner [mpi_id_t] The partner of the data exchange
* @param depth [size_t] The total depth of the sorting network (same for each step for a given network)
* @return [bool] True if the node should keep the small values, false otherwise
*/

bool keepSmall(mpi_id_t node, mpi_id_t partner, size_t depth) {
if (node == partner)
throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n");
return ascending(node, depth) == (node < partner);
}

+ 0
- 223
homework_3/src/distsort.hpp Datei anzeigen

@@ -1,223 +0,0 @@
/*!
* \file
* \brief Distributed sort implementation header
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/

#ifndef DISTBITONIC_H_
#define DISTBITONIC_H_

#include <vector>
#include <algorithm>
#include <parallel/algorithm>
#include <cmath>
#include <cstdint>
#if !defined DEBUG
#define NDEBUG
#endif
#include <cassert>

#include "utils.hpp"

/*
* Exported timers
*/
extern Timing Timer_total;
extern Timing Timer_fullSort;
extern Timing Timer_exchange;
extern Timing Timer_minmax;
extern Timing Timer_elbowSort;



/*
* ============================== Sort utilities ==============================
*/



/*!
* Returns the ascending or descending configuration of the node's sequence based on
* the current node (MPI process) and the depth of the sorting network
*
* @param node [mpi_id_t] The current node (MPI process)
* @param depth [size_t] The total depth of the sorting network (same for each step for a given network)
* @return [bool] True if we need ascending configuration, false otherwise
*/
bool ascending(mpi_id_t node, size_t depth);

/*!
* Returns the node's partner for data exchange during the sorting network iterations
* of Bitonic
*
* @param node [mpi_id_t] The current node
* @param step [size_t] The step of the sorting network
* @return [mpi_id_t] The node id of the partner for data exchange
*/
mpi_id_t partner(mpi_id_t node, size_t step);


/*!
* Predicate to check if a node keeps the small numbers during the bitonic sort network exchange.
*
* @param node [mpi_id_t] The node for which we check
* @param partner [mpi_id_t] The partner of the data exchange
* @param depth [size_t] The total depth of the sorting network (same for each step for a given network)
* @return [bool] True if the node should keep the small values, false otherwise
*/
bool keepSmall(mpi_id_t node, mpi_id_t partner, size_t depth);



/*
* ============================== Data utilities ==============================
*/

/*!
* Sort a range using the build-in O(Nlog(N)) algorithm
*
* @tparam RangeT A range type with random access iterator
*
* @param data [RangeT] The data to be sorted
* @param ascending [bool] Flag to indicate the sorting order
*/
template<typename RangeT>
void fullSort(RangeT& data, bool ascending) noexcept {
// Use introsort from stdlib++ here, unless ... __gnu_parallel
if (ascending) {
__gnu_parallel::sort(data.begin(), data.end(), std::less<>());
}
else {
__gnu_parallel::sort(data.begin(), data.end(), std::greater<>());
}
}

/*!
* Core functionality of sort for shadowed buffer types using
* the "elbow sort" algorithm.
*
* @note:
* This algorithm can not work "in place".
* We use the active buffer as source and the shadow as target.
* At the end we switch which buffer is active and which is the shadow.
* @note
* This is the core functionality. Use the elbowSort() function instead
*
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
* @tparam CompT A Comparison type for binary operation comparisons
*
* @param data [ShadowedDataT] The data to sort
* @param ascending [bool] Flag to indicate the sorting order
* @param comp [CompT] The binary operator object
*/
template<typename ShadowedDataT, typename CompT>
void elbowSortCore(ShadowedDataT& data, bool ascending, CompT comp) noexcept {
auto& active = data.getActive(); // Get the source vector (the data to sort)
auto& shadow = data.getShadow(); // Get the target vector (the sorted data)

size_t N = data.size(); // The total size is the same or both vectors
size_t left = std::distance(
active.begin(),
(ascending) ?
std::min_element(active.begin(), active.end()) :
std::max_element(active.begin(), active.end())
); // start 'left' from elbow of the bitonic
size_t right = (left == N-1) ? 0 : left + 1;

// Walk in opposite directions from elbow and insert-sort to target vector
for (size_t i = 0 ; i<N ; ++i) {
if (comp(active[left], active[right])) {
shadow[i] = active[left];
left = (left == 0) ? N-1 : left -1; // cycle decrease
}
else {
shadow[i] = active[right];
right = (right + 1) % N; // cycle increase
}
}
data.switch_active(); // Switch active-shadow buffers
}

/*!
* Sort a shadowed buffer using the "elbow sort" algorithm.
*
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
*
* @param data [ShadowedDataT] The data to sort
* @param ascending [bool] Flag to indicate the sorting order
*/
template<typename ShadowedDataT>
void elbowSort(ShadowedDataT& data, bool ascending) noexcept {
if (ascending)
elbowSortCore(data, ascending, std::less<>());
else
elbowSortCore(data, ascending, std::greater<>());
}


/*!
* Takes two sequences and selects either the larger or the smaller items
* in one-to-one comparison between them. If the initial sequences are bitonic, then
* the result is a bitonic sequence too!
*
* @tparam ValueT The underlying type of the sequences
*
* @param local [ValueT*] Pointer to the local sequence
* @param remote [const ValueT*] Pointer to the remote sequence (copied locally by MPI)
* @param count [size_t] The number of items to process
* @param keepSmall [bool] Flag to indicate if we keep the small items in local sequence
*/
template<typename ValueT>
void keepMinOrMax(ValueT* local, const ValueT* remote, size_t count, bool keepSmall) noexcept {
std::transform(
local, local + count,
remote,
local,
[&keepSmall](const ValueT& a, const ValueT& b){
return (keepSmall) ? std::min(a, b) : std::max(a, b);
});
}

/*
* ============================== Sort algorithms ==============================
*/


/*!
* A distributed version of the Bitonic sort algorithm.
*
* @note
* Each MPI process should run an instance of this function.
*
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
*
* @param data [ShadowedDataT] The local to MPI process data to sort
* @param Processes [mpi_id_t] The total number of MPI processes
* @param rank [mpi_id_t] The current process id
*/
template<typename ShadowedDataT>
void distBitonic(ShadowedDataT& data) {
// Initially sort to create a half part of a bitonic sequence
timeCall(Timer_fullSort, fullSort, data, ascending(rank, 0));

// Run through sort network using elbow-sort ( O(LogN * LogN) iterations )
auto p = static_cast<uint32_t>(std::log2(Processes));
for (size_t depth = 1; depth <= p; ++depth) {
for (size_t step = depth; step > 0;) {
--step;
// Find out exchange configuration
auto part = partner(rank, step);
auto ks = keepSmall(rank, part, depth);
// Exchange with partner, keep nim-or-max
exchange(data, part, ks, tag);

}
// sort - O(N)
timeCall(Timer_elbowSort, elbowSort, data, ascending(rank, depth));
}
}

#endif //DISTBITONIC_H_

+ 14
- 36
homework_3/src/main.cpp Datei anzeigen

@@ -14,34 +14,26 @@

#include "utils.hpp"
#include "config.h"
#include "distsort.hpp"
#include "bitonicsort.hpp"


// Global session data
Data_t Data;
config_t config;
distBuffer_t Data;
Log logger;


// Mersenne seeded from hw if possible. range: [type_min, type_max]
std::random_device rd;
std::mt19937 gen(rd());

//! Performance timers for each one of the "costly" functions
Timing Timer_total;
Timing Timer_fullSort;
Timing Timer_exchange;
Timing Timer_minmax;
Timing Timer_elbowSort;


//! Init timing objects for extra rounds
void measurements_init() {
if (config.perf > 1) {
Timer_total.init(config.perf);
Timer_fullSort.init(config.perf);
Timer_exchange.init(config.perf);
Timer_minmax.init(config.perf);
Timer_elbowSort.init(config.perf);
}
}

@@ -49,10 +41,6 @@ void measurements_init() {
void measurements_next() {
if (config.perf > 1) {
Timer_total.next();
Timer_fullSort.next();
Timer_exchange.next();
Timer_minmax.next();
Timer_elbowSort.next();
}
}

@@ -136,20 +124,14 @@ bool get_options(int argc, char* argv[]){
/*!
* A simple validator for the entire distributed process
*
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
* @tparam DataT A buffer type with random access iterator.
*
* @param data [ShadowedDataT] The local to MPI process
* @param Processes [mpi_id_t] The total number of MPI processes
* @param rank [mpi_id_t] The current process id
*
* @return [bool] True if all are sorted and in total ascending order
* @param data [DataT] The data
* @return [bool] True if sorted in ascending order
*/
template<typename ShadowedDataT>
bool validator(ShadowedDataT& data) {
using value_t = typename ShadowedDataT::value_type;
bool ret = true; // Have faith!

return ret;
template<typename DataT>
bool validator(DataT& data) {
return std::is_sorted(data.begin(), data.end());
}

/*!
@@ -180,15 +162,15 @@ int main(int argc, char* argv[]) try {
for (size_t it = 0 ; it < config.perf ; ++it) {
// Initialize local data
logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl;
std::uniform_int_distribution<distValue_t > dis(
std::numeric_limits<distValue_t>::min(),
std::numeric_limits<distValue_t>::max()
std::uniform_int_distribution<Value_t > dis(
std::numeric_limits<Value_t>::min(),
std::numeric_limits<Value_t>::max()
);
std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); });
// Run distributed sort
logger << "Starting distributed sorting ... ";
Timer_total.start();
distBitonic(Data);
bitonicSort(Data);
Timer_total.stop();
measurements_next();
logger << " Done." << logger.endl;
@@ -196,11 +178,7 @@ int main(int argc, char* argv[]) try {

// Print-outs and validation
if (config.perf > 1) {
Timing::print_duration(Timer_total.median(), "Total ", 0);
Timing::print_duration(Timer_fullSort.median(), "Full-Sort ", 0);
Timing::print_duration(Timer_exchange.median(), "Exchange ", 0);
Timing::print_duration(Timer_minmax.median(), "Min-Max ", 0);
Timing::print_duration(Timer_elbowSort.median(),"Elbow-Sort", 0);
Timing::print_duration(Timer_total.median(), "Total");
}
if (config.validation) {
// If requested, we have the chance to fail!


+ 0
- 118
homework_3/src/utils.hpp Datei anzeigen

@@ -18,124 +18,6 @@
#include "config.h"


/*!
* @brief A std::vector wrapper with 2 vectors, an active and a shadow.
*
* This type exposes the standard vector functionality of the active vector.
* The shadow can be used when we need to use the vector as mutable
* data in algorithms that can not support "in-place" editing (like elbow-sort for example)
*
* @tparam Value_t the underlying data type of the vectors
*/
template <typename Value_t>
struct ShadowedVec_t {
// STL requirements
using value_type = Value_t;
using iterator = typename std::vector<Value_t>::iterator;
using const_iterator = typename std::vector<Value_t>::const_iterator;
using size_type = typename std::vector<Value_t>::size_type;

// Default constructor
ShadowedVec_t() = default;

// Constructor from an std::vector
explicit ShadowedVec_t(const std::vector<Value_t>& vec)
: North(vec), South(), active(north) {
South.resize(North.size());
}

explicit ShadowedVec_t(std::vector<Value_t>&& vec)
: North(std::move(vec)), South(), active(north) {
South.resize(North.size());
}

// Copy assignment operator
ShadowedVec_t& operator=(const ShadowedVec_t& other) {
if (this != &other) { // Avoid self-assignment
North = other.North;
South = other.South;
active = other.active;
}
return *this;
}

// Move assignment operator
ShadowedVec_t& operator=(ShadowedVec_t&& other) noexcept {
if (this != &other) { // Avoid self-assignment
North = std::move(other.North);
South = std::move(other.South);
active = other.active;

// There is no need to zero out other since it is valid but in a non-defined state
}
return *this;
}

// Type accessors
std::vector<Value_t>& getActive() { return (active == north) ? North : South; }
std::vector<Value_t>& getShadow() { return (active == north) ? South : North; }
const std::vector<Value_t>& getActive() const { return (active == north) ? North : South; }
const std::vector<Value_t>& getShadow() const { return (active == north) ? South : North; }

// Swap vectors
void switch_active() { active = (active == north) ? south : north; }

// Dispatch vector functionality to active vector
Value_t& operator[](size_type index) { return getActive()[index]; }
const Value_t& operator[](size_type index) const { return getActive()[index]; }

Value_t& at(size_type index) { return getActive().at(index); }
const Value_t& at(size_type index) const { return getActive().at(index); }

void push_back(const Value_t& value) { getActive().push_back(value); }
void push_back(Value_t&& value) { getActive().push_back(std::move(value)); }
void pop_back() { getActive().pop_back(); }
Value_t& front() { return getActive().front(); }
Value_t& back() { return getActive().back(); }
const Value_t& front() const { return getActive().front(); }
const Value_t& back() const { return getActive().back(); }

iterator begin() { return getActive().begin(); }
const_iterator begin() const { return getActive().begin(); }
iterator end() { return getActive().end(); }
const_iterator end() const { return getActive().end(); }

size_type size() const { return getActive().size(); }
void resize(size_t new_size) {
North.resize(new_size);
South.resize(new_size);
}

void reserve(size_t new_capacity) {
North.reserve(new_capacity);
South.reserve(new_capacity);
}
[[nodiscard]] size_t capacity() const { return getActive().capacity(); }
[[nodiscard]] bool empty() const { return getActive().empty(); }

void clear() { getActive().clear(); }
void swap(std::vector<Value_t>& other) { getActive().swap(other); }

// Comparisons
bool operator== (const ShadowedVec_t& other) { return getActive() == other.getActive(); }
bool operator!= (const ShadowedVec_t& other) { return getActive() != other.getActive(); }
bool operator== (const std::vector<value_type>& other) { return getActive() == other; }
bool operator!= (const std::vector<value_type>& other) { return getActive() != other; }

private:
std::vector<Value_t> North{}; //!< Actual buffer to be used either as active or shadow
std::vector<Value_t> South{}; //!< Actual buffer to be used either as active or shadow
enum {
north, south
} active{north}; //!< Flag to select between North and South buffer
};

/*
* Exported data types
*/
using distBuffer_t = ShadowedVec_t<distValue_t>;
extern distBuffer_t Data;

/*!
* A Logger for entire program.
*/


+ 1
- 2
homework_3/test/tests.cpp Datei anzeigen

@@ -25,8 +25,7 @@ protected:


/*
* MPI: SysTest (acceptance)
* Each process executes distBubbletonic for uin8_t [16]
*
*/
TEST_F(TCUDAbitonic, test1) {



Laden…
Abbrechen
Speichern