HW2: A local only version of the distbitonic

3 settimane fa · 43dff95f67
--- a/homework_2/Makefile
+++ b/homework_2/Makefile
@@ -20,12 +20,17 @@
 # ============== Project settings ==============
 # Project's name
 PROJECT         := PDS_homework_2

 # Excecutable's name
 TARGET          := distbitonic
 TARGET          := bitonic

 # Source directories list(space seperated). Makefile-relative path, UNDER current directory.
 SRC_DIR_LIST    := src
 SRC_DIR_LIST    := src test test/gtest

 # Include directories list(space seperated). Makefile-relative path.
 INC_DIR_LIST    := include \
 				   test \
 				   test/gtest/ \
 				   /usr/lib/x86_64-linux-gnu/openmpi/include/ \
                   src

@@ -148,6 +153,14 @@ $(BUILD_DIR)/$(TARGET): $(OBJ)
 	@$(CSIZE) $(@D)/$(TARGET)
 	@echo Done


 #
 # ================ Default local build rules =================
 # example:
 # make debug

 .DEFAULT_GOAL := all

 .PHONY: clean
 clean:
 	@echo Cleaning build directories
@@ -155,67 +168,53 @@ clean:
 	@rm -rf $(DEP_DIR)
 	@rm -rf $(BUILD_DIR)

 #
 # ================ Local build rules =================
 # example:
 # make debug

 debug: CFLAGS := $(DEB_CFLAGS)
 debug: $(BUILD_DIR)/$(TARGET)

 release: CFLAGS := $(REL_CFLAGS)
 release: $(BUILD_DIR)/$(TARGET)

 all: release

 hpc-results/post:
 	$(CXX) $(CFLAGS) -o $@ hpc-results/main.cpp 

 hpc-clean:
 	rm hpc-results/post

 #
 # ================ Local (and/or) via docker build rules =================
 #
 # examples:
 # make IMAGE=hpcimage v0
 # make IMAGE=hpcimage v1_cilk
 #
 dist_v05: CC := mpicc
 dist_v05: CXX := mpic++
 dist_v05: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=50
 dist_v05: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=50
 dist_v05: TARGET := dist_v05
 dist_v05: $(BUILD_DIR)/$(TARGET)

 dist_v1: CC := mpicc
 dist_v1: CXX := mpic++
 dist_v1: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=100
 dist_v1: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=100
 dist_v1: TARGET := dist_v1
 dist_v1: $(BUILD_DIR)/$(TARGET)

 #
 # ========= Inside CSAL Image build rules ===========
 #
 # 1) first jump into image (make sure you are in the directory where Makefile is):
 #    > docker run -it -v ${PWD}:/usr/src/exercise_1 -w /usr/src/exercise_1/ hpcimage
 # 2) Clean binaries first **important**
 #    > make clean
 # 3) for v4 cilk for example:
 #    > make csal_v4_cilk
 # 4) run executables from `bin/`. Examples:
 #    > ./bin/tcount_ompv3 -i mtx/NACA0015.mtx --timing -r 3 -o /dev/null
 #    > ./bin/tcount_pthv4 -i mtx/com_Youtube.mtx --timing --dynamic --print_count



 #
 # ======== Run from container =========
 #
 # examples:
 #
 # make IMAGE=hpcimage EXEC=knnsearch_v1 run
 # make IMAGE=hpcimage EXEC=knnsearch_v1 run
 #
 # ================ Build rules =================
 #

 # Local or inside HPC rules
 distbubbletonic: CC := mpicc
 distbubbletonic: CXX := mpic++
 distbubbletonic: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=BUBBLETONIC
 distbubbletonic: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=BUBBLETONIC
 distbubbletonic: TARGET := distbubbletonic
 distbubbletonic: $(BUILD_DIR)/$(TARGET)

 distbitonic: CC := mpicc
 distbitonic: CXX := mpic++
 distbitonic: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=BITONIC
 distbitonic: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=BITONIC
 distbitonic: TARGET := distbitonic
 distbitonic: $(BUILD_DIR)/$(TARGET)

 deb_distbubbletonic: CC := mpicc
 deb_distbubbletonic: CXX := mpic++
 deb_distbubbletonic: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=BUBBLETONIC -DDEBUG
 deb_distbubbletonic: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=BUBBLETONIC -DDEBUG
 deb_distbubbletonic: TARGET := deb_distbubbletonic
 deb_distbubbletonic: $(BUILD_DIR)/$(TARGET)

 deb_distbitonic: CC := mpicc
 deb_distbitonic: CXX := mpic++
 deb_distbitonic: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=BITONIC -DDEBUG
 deb_distbitonic: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=BITONIC -DDEBUG
 deb_distbitonic: TARGET := deb_distbitonic
 deb_distbitonic: $(BUILD_DIR)/$(TARGET)

 tests: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=BITONIC -DDEBUG -DTESTING
 tests: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=BITONIC -DDEBUG -DTESTING
 tests: TARGET := tests
 tests: $(BUILD_DIR)/$(TARGET)


 all: debug distbubbletonic distbitonic
 # Note:
 #	Add a gcc based make rule here in order for clangd to successfully scan the project files.
 #	Otherwise we do not need the gcc build.

--- a/homework_2/include/config.h
+++ b/homework_2/include/config.h
@@ -1,5 +1,5 @@
 /*!
 * \file    config,h
 * \file    config.h
 * \brief   Build configuration file.
 *
 * \author
@@ -16,13 +16,13 @@
 /*
 * Defines for different version of the exercise
 */
 #define  V50     (50)
 #define  V100    (100)
 #define  BITONIC        (1)
 #define  BUBBLETONIC    (2)


 // Fail-safe version selection
 #if !defined CODE_VERSION
 #define CODE_VERSION   V1
 #define CODE_VERSION   BITONIC
 #endif


@@ -30,10 +30,11 @@
 * Session option for each invocation of the executable
 */
 struct session_t {
    bool        timing          {false};
    bool        verbose         {false};    //!< Flag to enable verbose output to stdout
    bool timing{false};
    bool verbose{false};    //!< Flag to enable verbose output to stdout
 };

 extern session_t session;


 #endif /* CONFIG_H_ */
--- a/homework_2/include/distbitonic.hpp
+++ b/homework_2/include/distbitonic.hpp
@@ -0,0 +1,44 @@
 /*!
 * \file
 * \brief   Distributed bitonic implementation header
 *
 * \author
 *    Christos Choutouridis AEM:8997
 *    <cchoutou@ece.auth.gr>
 */

 #ifndef DISTBITONIC_H_
 #define DISTBITONIC_H_

 #if !defined DEBUG
 #define NDEBUG
 #endif
 #include <cassert>

 #include <vector>
 #if !defined TESTING
  #include <mpi.h>
 #endif

 using Data_t    = std::vector<uint8_t>;
 using AllData_t = std::vector<Data_t>;

 struct mpi_t {
    size_t world_size{};
    size_t world_rank{};
    std::string processor_name {};
 };

 extern mpi_t mpi;


 bool ascending(size_t node, size_t depth) noexcept;
 size_t partner(size_t node, size_t step) noexcept;
 bool keepsmall(size_t node, size_t partner, size_t depth) noexcept;

 void exchange(size_t node, size_t partner);
 void minmax(AllData_t& data, size_t node, size_t partner, bool keepsmall);
 void sort_network(AllData_t& data, size_t nodes, size_t depth);
 void distbitonic(size_t P, AllData_t& data);

 #endif //DISTBITONIC_H_
--- a/homework_2/include/impl.hpp
+++ b/homework_2/include/impl.hpp
@@ -0,0 +1,14 @@
 /*!
 * \file
 * \brief   The distributed bitonic implementation header
 *
 * \author
 *    Christos Choutouridis AEM:8997
 *    <cchoutou@ece.auth.gr>
 */

 #ifndef IMPL_H_
 #define IMPL_H_


 #endif //IMPL_H_
--- a/homework_2/include/utils.hpp
+++ b/homework_2/include/utils.hpp
@@ -19,32 +19,36 @@
 /*!
 * A Logger for entire program.
 */

 struct Log {
   struct Endl {} endl;    //!< a tag object to to use it as a new line request.
    struct Endl {
    } endl;    //!< a tag object to to use it as a new line request.

    //! We provide logging via << operator
    template<typename T>
    Log &operator<<(T &&t) {
        if (session.verbose) {
            if (line_) {
                std::cout << "[Log]: " << t;
                line_ = false;
            } else
                std::cout << t;
        }
        return *this;
    }

    // overload for special end line handling
    Log &operator<<(Endl e) {
        (void) e;
        if (session.verbose) {
            std::cout << '\n';
            line_ = true;
        }
        return *this;
    }

   //! We provide logging via << operator
   template<typename T>
   Log& operator<< (T&& t) {
      if (session.verbose) {
         if (line_) {
            std::cout << "[Log]: " << t;
            line_ = false;
         }
         else
            std::cout << t;
      }
      return *this;
   }
   // overload for special end line handling
   Log& operator<< (Endl e) { (void)e;
      if (session.verbose) {
         std::cout << '\n';
         line_ = true;
      }
      return *this;
   }
 private:
   bool line_ {true};
    bool line_{true};
 };

 extern Log logger;
@@ -52,36 +56,41 @@ extern Log logger;
 /*!
 * A small timing utility based on chrono.
 */
 struct Timing{
   using Tpoint = std::chrono::steady_clock::time_point;
   using microseconds = std::chrono::microseconds;
   using milliseconds = std::chrono::milliseconds;
   using seconds = std::chrono::seconds;
 struct Timing {
    using Tpoint = std::chrono::steady_clock::time_point;
    using microseconds = std::chrono::microseconds;
    using milliseconds = std::chrono::milliseconds;
    using seconds = std::chrono::seconds;

    //! tool to mark the starting point
    Tpoint start() noexcept { return start_ = std::chrono::steady_clock::now(); }

   //! tool to mark the starting point
   Tpoint start () noexcept { return start_ = std::chrono::steady_clock::now(); }
   //! tool to mark the ending point
   Tpoint stop ()  noexcept { return stop_ = std::chrono::steady_clock::now(); }
    //! tool to mark the ending point
    Tpoint stop() noexcept { return stop_ = std::chrono::steady_clock::now(); }

    auto dt() noexcept {
        return std::chrono::duration_cast<std::chrono::microseconds>(stop_ - start_).count();
    }

    //! tool to print the time interval
    void print_dt(const char *what) noexcept {
        if (session.timing) {
            auto t = stop_ - start_;
            if (std::chrono::duration_cast<microseconds>(t).count() < 10000)
                std::cout << "[Timing]: " << what << ": "
                          << std::to_string(std::chrono::duration_cast<microseconds>(t).count()) << " [usec]\n";
            else if (std::chrono::duration_cast<milliseconds>(t).count() < 10000)
                std::cout << "[Timing]: " << what << ": "
                          << std::to_string(std::chrono::duration_cast<milliseconds>(t).count()) << " [msec]\n";
            else
                std::cout << "[Timing]: " << what << ": "
                          << std::to_string(std::chrono::duration_cast<seconds>(t).count()) << " [sec]\n";
        }
    }

   auto dt () noexcept {
      return std::chrono::duration_cast<std::chrono::microseconds>(stop_ - start_).count();
   }
   //! tool to print the time interval
   void print_dt (const char* what) noexcept {
      if (session.timing) {
         auto t = stop_ - start_;
         if       (std::chrono::duration_cast<microseconds>(t).count() < 10000)
            std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<microseconds>(t).count()) << " [usec]\n";
         else if (std::chrono::duration_cast<milliseconds>(t).count() < 10000)
            std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<milliseconds>(t).count()) << " [msec]\n";
         else
            std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<seconds>(t).count()) << " [sec]\n";
      }
   }
 private:
   Tpoint start_;
   Tpoint stop_;
    Tpoint start_;
    Tpoint stop_;
 };


 #endif /* UTILS_HPP_ */
--- a/homework_2/src/distbitonic.cpp
+++ b/homework_2/src/distbitonic.cpp
@@ -0,0 +1,106 @@
 /*!
 * \file
 * \brief   Distributed bitonic implementation.
 *
 * \author
 *    Christos Choutouridis AEM:8997
 *    <cchoutou@ece.auth.gr>
 */

 #include <vector>
 #include <algorithm>
 #include <cmath>
 #include <cassert>
 #include "distbitonic.hpp"

 /*!
 * Returns the ascending or descending configuration of the node's sequence based on
 * the current node (MPI process) and the depth of the sorting network
 *
 * @param node      The current node (MPI process)
 * @param depth     The total depth of the sorting network (same for each step for a given network)
 *
 * @return          True if we need ascending configuration, false otherwise
 */
 bool ascending(size_t node, size_t depth) noexcept {
    return !(node & (1 << depth));
 }

 /*!
 * Returns the node's partner for data exchange during the sorting network iterations
 *
 * @param node      The current node
 * @param step      The step of the sorting network
 * @return          The node id of the partner for data exchange
 */
 size_t partner(size_t node, size_t step) noexcept {
    return (node ^ (1 << step));
 }

 /*!
 * Predicate to check if a node keeps the small numbers during the bitonic
 * sort network exchange.
 *
 * @param node      The node for which we check
 * @param partner   The partner of the data exchange
 * @param depth     The total depth of the sorting network (same for each step for a given network)
 * @return          True if the node should keep the small values, false otherwise
 */
 bool keepsmall(size_t node, size_t partner, size_t depth) noexcept {
    assert(node != partner);
    return ascending(node, depth) == (node < partner);
 }

 void exchange(size_t node, size_t partner) {
    assert(node != partner);

 }

 void minmax(AllData_t& data, size_t node, size_t partner, bool keepsmall) {
    for (size_t i = 0; i < data[node].size(); ++i) {
        if (keepsmall && data[node][i] > data[partner][i])
            std::swap(data[node][i], data[partner][i]);
        if (!keepsmall && data[node][i] < data[partner][i])
            std::swap(data[node][i], data[partner][i]);
    }
 }

 void sort_network(AllData_t& data, size_t nodes, size_t depth) {
    for (size_t step = depth; step > 0;) {
        --step;
        for (size_t node = 0; node < nodes; ++node) {
            auto part = partner(node, step);
            auto ks = keepsmall(node, part, depth);
            if (node < part) {
                exchange(node, part);
                minmax(data, node, part, ks);
            }
        }
    }
 }

 void distbitonic(size_t P, AllData_t& data) {
    auto p = static_cast<uint32_t>(std::log2(P));

    for (size_t node = 0 ; node < P ; ++node) { // Currently we do all nodes here!
        // Initially sort to create the half part of a bitonic
        if (ascending(node, 0))
            std::sort(data[node].begin(), data[node].end(), std::less<>());
        else
            std::sort(data[node].begin(), data[node].end(), std::greater<>());
    }

    // Run through sort network using elbow-sort
    for (size_t depth = 1; depth <= p; ++depth) {
        sort_network(data, P, depth);

        for (size_t node = 0 ; node < P ; ++node) { // Currently we do all nodes here!
            // elbow-sort here
            if (ascending(node, depth))
                std::sort(data[node].begin(), data[node].end(), std::less<>());
            else
                std::sort(data[node].begin(), data[node].end(), std::greater<>());
        }

    }
 }
--- a/homework_2/src/main.cpp
+++ b/homework_2/src/main.cpp
@@ -1,22 +1,43 @@
 /*!
 * \file    main.cpp
 * \file
 * \brief   Main application file for PDS HW2 (MPI)
 *
 * \author
 *    Christos Choutouridis AEM:8997
 *    <cchoutou@ece.auth.gr>
 */

 #include <exception>
 #include <iostream>
 #include <algorithm>  // rand/srand
 //#include <ctime>      // rand/srand
 #if !defined TESTING
  #include <mpi.h>
 #endif

 #include <mpi.h>

 #include "matrix.hpp"
 #include "distbitonic.hpp"
 #include "utils.hpp"
 #include "config.h"
 //#include "matrix.hpp"

 // Global session data
 session_t   session;
 session_t  session;
 mpi_t      mpi;

 /*
 * Sorting data for up to 8 processes
 */
 AllData_t Data {
        Data_t (8),
        Data_t (8),
        Data_t (8),
        Data_t (8),
        Data_t (8),
        Data_t (8),
        Data_t (8),
        Data_t (8)
 };


 /*!
 * A small command line argument parser
@@ -64,6 +85,9 @@ bool get_options(int argc, char* argv[]){
    return status;
 }



 #if !defined TESTING
 int main(int argc, char* argv[]) try {
    // try to read command line
    if (!get_options(argc, argv))
@@ -72,22 +96,49 @@ int main(int argc, char* argv[]) try {
    // Initialize the MPI environment
    MPI_Init(NULL, NULL);

    #if defined DEBUG
        /*
         * In case of a debug build we will wait here until sleep_wait
         * will reset via debugger. In order to do that the user must attach
         * debugger to all processes. For example:
         *  $> mpirun -np 2 ./<program path>
         *  $> ps aux | grep <program>
         *  $> gdb <program> <PID1>
         *  $> gdb <program> <PID2>
         */
    #if defined TESTING
        volatile bool sleep_wait = false;
    #else
        volatile bool sleep_wait = true;
    #endif
        while (sleep_wait)
            sleep(1);
    #endif

    // Get the number of processes
    int world_size;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
    MPI_Comm_size(MPI_COMM_WORLD, reinterpret_cast<int *>(&mpi.world_size));

    // Get the rank of the process
    int world_rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
    MPI_Comm_rank(MPI_COMM_WORLD, reinterpret_cast<int *>(&mpi.world_rank));

    // Get the name of the processor
    char processor_name[MPI_MAX_PROCESSOR_NAME];
    int name_len;
    MPI_Get_processor_name(processor_name, &name_len);
    mpi.processor_name = std::string (processor_name, name_len);

    // Print off a hello world message
    printf("Hello world from processor %s, rank %d out of %d processors\n",
           processor_name, world_rank, world_size);
    std::cout   << "Hello world from processor: " << mpi.processor_name
                << " rank " << mpi.world_rank
                << " out of " << mpi.world_size << " processors\n";

 //    std::srand(unsigned(std::time(nullptr)));
 //    for (auto& v : Data) {
 //        std::generate(v.begin(), v.end(), std::rand);
 //    }
 //
 //    distbitonic (2, Data);
 //    distbitonic (4, Data);

    // Finalize the MPI environment.
    MPI_Finalize();
@@ -97,4 +148,19 @@ catch (std::exception& e) {
    //we probably pollute the user's screen. Comment `cerr << ...` if you don't like it.
    std::cerr << "Error: " << e.what() << '\n';
    exit(1);
 }
 }

 #else

 #include <gtest/gtest.h>
 #include <exception>

 GTEST_API_ int main(int argc, char **argv) try {
   testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
 catch (std::exception& e) {
    std::cout << "Exception: " << e.what() << '\n';
 }

 #endif
--- a/homework_2/test/gtest/gtest/gtest-all.cpp
+++ b/homework_2/test/gtest/gtest/gtest-all.cpp
--- a/homework_2/test/gtest/gtest/gtest.h
+++ b/homework_2/test/gtest/gtest/gtest.h
--- a/homework_2/test/tests.cpp
+++ b/homework_2/test/tests.cpp
@@ -0,0 +1,384 @@
 /**
 * \file
 * \brief   PDS HW2 tests
 *
 * \author
 *    Christos Choutouridis AEM:8997
 *    <cchoutou@ece.auth.gr>
 */

 #include <gtest/gtest.h>

 #include <algorithm>  // rand/srand
 #include <ctime>      // rand/srand
 #include "distbitonic.hpp"


 /*
 * ==========================================
 */

 /*
 * bool ascending(size_t node, size_t depth);
 * depth 0 (the initial ascending pattern)
 */
 TEST(Tdistbitonic_UT, ascending_test1) {

    EXPECT_EQ(ascending(0, 0), true);
    EXPECT_EQ(ascending(1, 0), false);
    EXPECT_EQ(ascending(2, 0), true);
    EXPECT_EQ(ascending(3, 0), false);
    EXPECT_EQ(ascending(4, 0), true);
    EXPECT_EQ(ascending(5, 0), false);
    EXPECT_EQ(ascending(6, 0), true);
    EXPECT_EQ(ascending(7, 0), false);

    for (size_t node = 0 ; node < 256 ; ++node) {
        EXPECT_EQ(ascending(node, 0), ((node % 2) ? false : true) );
    }
 }

 /*
 * bool ascending(size_t node, size_t depth);
 * depth 1
 */
 TEST(Tdistbitonic_UT, ascending_test2) {

    EXPECT_EQ(ascending(0, 1), true);
    EXPECT_EQ(ascending(1, 1), true);
    EXPECT_EQ(ascending(2, 1), false);
    EXPECT_EQ(ascending(3, 1), false);
    EXPECT_EQ(ascending(4, 1), true);
    EXPECT_EQ(ascending(5, 1), true);
    EXPECT_EQ(ascending(6, 1), false);
    EXPECT_EQ(ascending(7, 1), false);

    for (size_t node = 0 ; node < 256 ; ++node) {
        EXPECT_EQ(ascending(2*node,   1), ((node % 2) ? false:true));
        EXPECT_EQ(ascending(2*node+1, 1), ((node % 2) ? false:true));
    }
 }

 /*
 * bool ascending(size_t node, size_t depth);
 * various depths
 */
 TEST(Tdistbitonic_UT, ascending_test3) {

    // Depth = 3
    size_t ts_depth = 3;

    for (size_t n = 0UL ; n < (1UL<<(ts_depth)) ; ++n)
        EXPECT_EQ(ascending(n,   ts_depth), true);
    for (size_t n = (1UL<<(ts_depth)) ; n < 2*(1UL<<(ts_depth)) ; ++n)
        EXPECT_EQ(ascending(n,   ts_depth), false);
    for (size_t n = 2*(1UL<<(ts_depth)) ; n < 3*(1UL<<(ts_depth)) ; ++n)
        EXPECT_EQ(ascending(n,   ts_depth), true);
    for (size_t n = 3*(1UL<<(ts_depth)) ; n < 4*(1UL<<(ts_depth)) ; ++n)
        EXPECT_EQ(ascending(n,   ts_depth), false);

    // Depth = 4
    ts_depth = 4;

    for (size_t n = 0UL ; n < (1UL<<(ts_depth)) ; ++n)
        EXPECT_EQ(ascending(n,   ts_depth), true);
    for (size_t n = (1UL<<(ts_depth)) ; n < 2*(1UL<<(ts_depth)) ; ++n)
        EXPECT_EQ(ascending(n,   ts_depth), false);
    for (size_t n = 2*(1UL<<(ts_depth)) ; n < 3*(1UL<<(ts_depth)) ; ++n)
        EXPECT_EQ(ascending(n,   ts_depth), true);
    for (size_t n = 3*(1UL<<(ts_depth)) ; n < 4*(1UL<<(ts_depth)) ; ++n)
        EXPECT_EQ(ascending(n,   ts_depth), false);

    // Depth = 8
    ts_depth = 8;

    for (size_t n = 0UL ; n < (1UL<<(ts_depth)) ; ++n)
        EXPECT_EQ(ascending(n,   ts_depth), true);
    for (size_t n = (1UL<<(ts_depth)) ; n < 2*(1UL<<(ts_depth)) ; ++n)
        EXPECT_EQ(ascending(n,   ts_depth), false);
    for (size_t n = 2*(1UL<<(ts_depth)) ; n < 3*(1UL<<(ts_depth)) ; ++n)
        EXPECT_EQ(ascending(n,   ts_depth), true);
    for (size_t n = 3*(1UL<<(ts_depth)) ; n < 4*(1UL<<(ts_depth)) ; ++n)
        EXPECT_EQ(ascending(n,   ts_depth), false);
 }

 /*
 * size_t partner(size_t node, size_t step);
 * step = 0
 */
 TEST(Tdistbitonic_UT, partner_test1) {

    EXPECT_EQ(partner(0, 0), 1UL);
    EXPECT_EQ(partner(1, 0), 0UL);
    EXPECT_EQ(partner(2, 0), 3UL);
    EXPECT_EQ(partner(3, 0), 2UL);
    EXPECT_EQ(partner(4, 0), 5UL);
    EXPECT_EQ(partner(5, 0), 4UL);
    EXPECT_EQ(partner(6, 0), 7UL);
    EXPECT_EQ(partner(7, 0), 6UL);

    for (size_t node = 0 ; node < 256 ; ++node) {
        EXPECT_EQ(partner(node, 0), (node % 2) ? node-1 : node+1);
    }
 }

 /*
 * size_t partner(size_t node, size_t step);
 * step = 1
 */
 TEST(Tdistbitonic_UT, partner_test2) {

    EXPECT_EQ(partner(0, 1), 2UL);
    EXPECT_EQ(partner(1, 1), 3UL);
    EXPECT_EQ(partner(2, 1), 0UL);
    EXPECT_EQ(partner(3, 1), 1UL);
    EXPECT_EQ(partner(4, 1), 6UL);
    EXPECT_EQ(partner(5, 1), 7UL);
    EXPECT_EQ(partner(6, 1), 4UL);
    EXPECT_EQ(partner(7, 1), 5UL);

    for (size_t n1 = 0 ; n1 < 256 ; n1 += 2) {
        auto n2 = n1 + 1UL;
        EXPECT_EQ(partner(n1, 1), ((n1 % 4) ? n1-2 : n1+2));
        EXPECT_EQ(partner(n2, 1), ((n1 % 4) ? n2-2 : n2+2));
    }
 }

 /*
 * size_t partner(size_t node, size_t step);
 * various steps
 */
 TEST(Tdistbitonic_UT, partner_test3) {
    // step = 2
    size_t ts_step = 2;

    for (size_t n1 = 0 ; n1 < 256 ; n1 += 4) {
        auto n2 = n1 + 1UL;
        auto n3 = n1 + 2UL;
        auto n4 = n1 + 3UL;
        EXPECT_EQ(partner(n1, ts_step), ((n1 % 8) ? n1-4 : n1+4));
        EXPECT_EQ(partner(n2, ts_step), ((n1 % 8) ? n2-4 : n2+4));
        EXPECT_EQ(partner(n3, ts_step), ((n1 % 8) ? n3-4 : n3+4));
        EXPECT_EQ(partner(n4, ts_step), ((n1 % 8) ? n4-4 : n4+4));
    }

    // step = 3
    ts_step = 3;

    for (size_t n1 = 0 ; n1 < 256 ; n1 += 8) {
        auto n2 = n1 + 1UL;
        auto n3 = n1 + 2UL;
        auto n4 = n1 + 3UL;
        auto n5 = n1 + 4UL;
        auto n6 = n1 + 5UL;
        auto n7 = n1 + 6UL;
        auto n8 = n1 + 7UL;
        EXPECT_EQ(partner(n1, ts_step), ((n1 % 16) ? n1-8 : n1+8));
        EXPECT_EQ(partner(n2, ts_step), ((n1 % 16) ? n2-8 : n2+8));
        EXPECT_EQ(partner(n3, ts_step), ((n1 % 16) ? n3-8 : n3+8));
        EXPECT_EQ(partner(n4, ts_step), ((n1 % 16) ? n4-8 : n4+8));
        EXPECT_EQ(partner(n5, ts_step), ((n1 % 16) ? n5-8 : n5+8));
        EXPECT_EQ(partner(n6, ts_step), ((n1 % 16) ? n6-8 : n6+8));
        EXPECT_EQ(partner(n7, ts_step), ((n1 % 16) ? n7-8 : n7+8));
        EXPECT_EQ(partner(n8, ts_step), ((n1 % 16) ? n8-8 : n8+8));
    }

    // step = 4
    ts_step = 4;

    for (size_t n1 = 0 ; n1 < 256 ; n1 += 16) {
        auto n2 = n1 + 1UL;
        auto n3 = n1 + 2UL;
        auto n4 = n1 + 3UL;
        auto n5 = n1 + 4UL;
        auto n6 = n1 + 5UL;
        auto n7 = n1 + 6UL;
        auto n8 = n1 + 7UL;
        auto n9 = n1 + 8UL;
        auto n10 = n1 + 9UL;
        auto n11 = n1 + 10UL;
        auto n12 = n1 + 11UL;
        auto n13 = n1 + 12UL;
        auto n14 = n1 + 13UL;
        auto n15 = n1 + 14UL;
        auto n16 = n1 + 15UL;
        EXPECT_EQ(partner(n1,  ts_step), ((n1 % 32) ? n1-16 : n1+16));
        EXPECT_EQ(partner(n2,  ts_step), ((n1 % 32) ? n2-16 : n2+16));
        EXPECT_EQ(partner(n3,  ts_step), ((n1 % 32) ? n3-16 : n3+16));
        EXPECT_EQ(partner(n4,  ts_step), ((n1 % 32) ? n4-16 : n4+16));
        EXPECT_EQ(partner(n5,  ts_step), ((n1 % 32) ? n5-16 : n5+16));
        EXPECT_EQ(partner(n6,  ts_step), ((n1 % 32) ? n6-16 : n6+16));
        EXPECT_EQ(partner(n7,  ts_step), ((n1 % 32) ? n7-16 : n7+16));
        EXPECT_EQ(partner(n8,  ts_step), ((n1 % 32) ? n8-16 : n8+16));
        EXPECT_EQ(partner(n9,  ts_step), ((n1 % 32) ? n9-16 : n9+16));
        EXPECT_EQ(partner(n10, ts_step), ((n1 % 32) ? n10-16 : n10+16));
        EXPECT_EQ(partner(n11, ts_step), ((n1 % 32) ? n11-16 : n11+16));
        EXPECT_EQ(partner(n12, ts_step), ((n1 % 32) ? n12-16 : n12+16));
        EXPECT_EQ(partner(n13, ts_step), ((n1 % 32) ? n13-16 : n13+16));
        EXPECT_EQ(partner(n14, ts_step), ((n1 % 32) ? n14-16 : n14+16));
        EXPECT_EQ(partner(n15, ts_step), ((n1 % 32) ? n15-16 : n15+16));
        EXPECT_EQ(partner(n16, ts_step), ((n1 % 32) ? n16-16 : n16+16));
    }
 }


 /*
 * bool keepsmall(size_t node, size_t partner, size_t depth);
 * Assertion check
 */
 TEST(Tdistbitonic_UT, keepsmall_test1) {
    ASSERT_DEATH(keepsmall(0, 0, 0), "");
    ASSERT_DEATH(keepsmall(1, 1, 42), "");
    ASSERT_DEATH(keepsmall(7, 7, 42), "");
 }

 /*
 * bool keepsmall(size_t node, size_t partner, size_t depth);
 *
 * depth: 1 | step: 0 | partner: [1, 0, 3, 2, 5, 4, 7, 6] | keepsmall: Bool[1, 0, 0, 1, 1, 0, 0, 1]
 */
 TEST(Tdistbitonic_UT, keepsmall_test2) {
    size_t ts_depth   = 1UL;
    size_t ts_partner[]  = { 1, 0, 3, 2, 5, 4, 7, 6};
    bool   ts_expected[] = {1, 0, 0, 1, 1, 0, 0, 1};

    for (size_t node = 0 ; node < 8UL ; ++node ) {
        EXPECT_EQ(ts_expected[node], keepsmall(node, ts_partner[node], ts_depth));
    }
 }

 /*
 * bool keepsmall(size_t node, size_t partner, size_t depth);
 *
 * depth: 2 | step: 1 | partner: [2, 3, 0, 1, 6, 7, 4, 5] | keepsmall: Bool[1, 1, 0, 0, 0, 0, 1, 1]
 */
 TEST(Tdistbitonic_UT, keepsmall_test3) {
    size_t ts_depth   = 2UL;
    size_t ts_partner[]  = { 2, 3, 0, 1, 6, 7, 4, 5};
    bool   ts_expected[] = {1, 1, 0, 0, 0, 0, 1, 1};

    for (size_t node = 0 ; node < 8UL ; ++node ) {
        EXPECT_EQ(ts_expected[node], keepsmall(node, ts_partner[node], ts_depth));
    }
 }

 /*
 * bool keepsmall(size_t node, size_t partner, size_t depth);
 *
 * depth: 2 | step: 0 | partner: [1, 0, 3, 2, 5, 4, 7, 6] | keepsmall: Bool[1, 0, 1, 0, 0, 1, 0, 1]
 */
 TEST(Tdistbitonic_UT, keepsmall_test4) {
    size_t ts_depth   = 2UL;
    size_t ts_partner[]  = { 1, 0, 3, 2, 5, 4, 7, 6};
    bool   ts_expected[] = {1, 0, 1, 0, 0, 1, 0, 1};

    for (size_t node = 0 ; node < 8UL ; ++node ) {
        EXPECT_EQ(ts_expected[node], keepsmall(node, ts_partner[node], ts_depth));
    }
 }

 /*
 * bool keepsmall(size_t node, size_t partner, size_t depth);
 *
 * depth: 3 | step: 2 | partner: [4, 5, 6, 7, 0, 1, 2, 3] | keepsmall: Bool[1, 1, 1, 1, 0, 0, 0, 0]
 */
 TEST(Tdistbitonic_UT, keepsmall_test5) {
    size_t ts_depth   = 3UL;
    size_t ts_partner[]  = { 4, 5, 6, 7, 0, 1, 2, 3};
    bool   ts_expected[] = {1, 1, 1, 1, 0, 0, 0, 0};

    for (size_t node = 0 ; node < 8UL ; ++node ) {
        EXPECT_EQ(ts_expected[node], keepsmall(node, ts_partner[node], ts_depth));
    }
 }

 /*
 * bool keepsmall(size_t node, size_t partner, size_t depth);
 *
 * depth: 3 | step: 1 | partner: [2, 3, 0, 1, 6, 7, 4, 5] | keepsmall: Bool[1, 1, 0, 0, 1, 1, 0, 0]
 */
 TEST(Tdistbitonic_UT, keepsmall_test6) {
    size_t ts_depth   = 3UL;
    size_t ts_partner[]  = { 2, 3, 0, 1, 6, 7, 4, 5};
    bool   ts_expected[] = {1, 1, 0, 0, 1, 1, 0, 0};

    for (size_t node = 0 ; node < 8UL ; ++node ) {
        EXPECT_EQ(ts_expected[node], keepsmall(node, ts_partner[node], ts_depth));
    }
 }

 /*
 * bool keepsmall(size_t node, size_t partner, size_t depth);
 *
 * depth: 3 | step: 0 | partner: [1, 0, 3, 2, 5, 4, 7, 6] | keepsmall: Bool[1, 0, 1, 0, 1, 0, 1, 0]
 */
 TEST(Tdistbitonic_UT, keepsmall_test7) {
    size_t ts_depth   = 3UL;
    size_t ts_partner[]  = { 1, 0, 3, 2, 5, 4, 7, 6};
    bool   ts_expected[] = {1, 0, 1, 0, 1, 0, 1, 0};

    for (size_t node = 0 ; node < 8UL ; ++node ) {
        EXPECT_EQ(ts_expected[node], keepsmall(node, ts_partner[node], ts_depth));
    }
 }

 TEST(Tdistbitonic_UT, distbitonic_test1) {
    AllData_t ts_Data {
            Data_t (8), Data_t (8)
    };

    std::srand(unsigned(std::time(nullptr)));
    for (auto& v : ts_Data) {
        std::generate(v.begin(), v.end(), std::rand);
    }

    distbitonic(2, ts_Data);

    auto max = std::numeric_limits<Data_t::value_type>::min();
    for (auto& v : ts_Data) {
        EXPECT_EQ((max <= v[0]), true);
        EXPECT_EQ(std::is_sorted(v.begin(), v.end()), true);
        max = v.back();
    }
 }

 TEST(Tdistbitonic_UT, distbitonic_test2) {
    AllData_t ts_Data {
            Data_t (8), Data_t (8), Data_t (8), Data_t (8)
    };

    std::srand(unsigned(std::time(nullptr)));
    for (auto& v : ts_Data) {
        std::generate(v.begin(), v.end(), std::rand);
    }

    distbitonic(4, ts_Data);

    auto max = std::numeric_limits<Data_t::value_type>::min();
    for (auto& v : ts_Data) {
        EXPECT_EQ((max <= v[0]), true);
        EXPECT_EQ(std::is_sorted(v.begin(), v.end()), true);
        max = v.back();
    }
 }

 TEST(Tdistbitonic_UT, distbitonic_test3) {
    AllData_t ts_Data {
            Data_t (32), Data_t (32), Data_t (32), Data_t (32),
            Data_t (32), Data_t (32), Data_t (32), Data_t (32)
    };

    std::srand(unsigned(std::time(nullptr)));
    for (auto& v : ts_Data) {
        std::generate(v.begin(), v.end(), std::rand);
    }

    distbitonic(8, ts_Data);

    auto max = std::numeric_limits<Data_t::value_type>::min();
    for (auto& v : ts_Data) {
        EXPECT_EQ((max <= v[0]), true);
        EXPECT_EQ(std::is_sorted(v.begin(), v.end()), true);
        max = v.back();
    }
 }