HW2: (WIP) Checkpoint with code re-arrangement and elbowSort

2024-12-30 14:23:13 +02:00 · 2024-12-30 14:23:13 +02:00 · 4d1d7502aa
commit 4d1d7502aa
parent 1d6b271d2a
11 changed files with 545 additions and 1193 deletions
--- a/homework_2/include/config.h
+++ b/homework_2/include/config.h
@ -10,8 +10,7 @@
 #ifndef CONFIG_H_
 #define CONFIG_H_

-#include <iostream>
-#include <string>
+#include <cstdint>

 /*
 * Defines for different version of the exercise
@ -25,11 +24,15 @@
 #define CODE_VERSION   BITONIC
 #endif

+// Value type selection
+using   distValue_t = uint8_t;

 /*!
 * Session option for each invocation of the executable
 */
 struct session_t {
+    size_t  arraySize{0};
+    bool    ndebug{false};
    bool    timing{false};
    bool    verbose{false};    //!< Flag to enable verbose output to stdout
 };
--- a/homework_2/include/distbitonic.hpp
+++ b/homework_2/include/distbitonic.hpp
@ -1,73 +0,0 @@
-/*!
- * \file
- * \brief   Distributed bitonic implementation header
- *
- * \author
- *    Christos Choutouridis AEM:8997
- *    <cchoutou@ece.auth.gr>
- */
-
-#ifndef DISTBITONIC_H_
-#define DISTBITONIC_H_
-
-#include <cstdint>
-#include "utils.hpp"
-
-
-/*!
- * Enumerator for the different versions of the sorting method
- */
-enum class SortMode {
-    Bubbletonic,    //!< The v0.5 of the algorithm where we use a bubble-sort like approach
-    Bitonic         //!< The v1.0 of the algorithm where we use the bitonic data-exchange approach
-};
-
-using Data_t    = std::vector<uint8_t>;
-using AllData_t = std::vector<Data_t>;
-
-/*
- * ============================== Sort utilities ==============================
- */
-
-/*!
- * The primary function template of ascending(). It is DISABLED since , it is explicitly specialized
- * for each of the \c SortMode
- */
-template <SortMode Mode> bool ascending(mpi_id_t, [[maybe_unused]] size_t) noexcept = delete;
-template <> bool ascending<SortMode::Bubbletonic>(mpi_id_t node, [[maybe_unused]] size_t depth) noexcept;
-template <> bool ascending<SortMode::Bitonic>(mpi_id_t node, size_t depth) noexcept;
-
-/*!
- * The primary function template of partner(). It is DISABLED since , it is explicitly specialized
- * for each of the \c SortMode
- */
-template <SortMode Mode> mpi_id_t partner(mpi_id_t, size_t) noexcept = delete;
-template <> mpi_id_t partner<SortMode::Bubbletonic>(mpi_id_t node, size_t step) noexcept;
-template <> mpi_id_t partner<SortMode::Bitonic>(mpi_id_t node, size_t step) noexcept;
-
-/*!
- * The primary function template of keepSmall(). It is DISABLED since , it is explicitly specialized
- * for each of the \c SortMode
- */
-template<SortMode Mode> bool keepSmall(mpi_id_t, mpi_id_t, [[maybe_unused]] size_t) noexcept = delete;
-template<> bool keepSmall<SortMode::Bubbletonic>(mpi_id_t node, mpi_id_t partner, [[maybe_unused]] size_t depth) noexcept;
-template<> bool keepSmall<SortMode::Bitonic>(mpi_id_t node, mpi_id_t partner, size_t depth) noexcept;
-
-bool isActive(mpi_id_t node, mpi_id_t nodes) noexcept;
-
-/*
- * ============================== Data utilities ==============================
- */
-void exchange(mpi_id_t node, mpi_id_t partner);
-void minmax(AllData_t& data, mpi_id_t node, mpi_id_t partner, bool keepsmall);
-
-/*
- * ============================== Sort algorithms ==============================
- */
-void bubbletonic_network(AllData_t& data, mpi_id_t nodes);
-void distBubbletonic(mpi_id_t P, AllData_t& data);
-
-void bitonic_network(AllData_t& data, mpi_id_t nodes, mpi_id_t depth);
-void distBitonic(mpi_id_t P, AllData_t& data);
-
-#endif //DISTBITONIC_H_
--- a/homework_2/include/distsort.hpp
+++ b/homework_2/include/distsort.hpp
@ -0,0 +1,288 @@
+/*!
+ * \file
+ * \brief   Distributed sort implementation header
+ *
+ * \author
+ *    Christos Choutouridis AEM:8997
+ *    <cchoutou@ece.auth.gr>
+ */
+
+#ifndef DISTBITONIC_H_
+#define DISTBITONIC_H_
+
+#include <vector>
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#if !defined DEBUG
+#define NDEBUG
+#endif
+#include <cassert>
+
+#include "utils.hpp"
+#include "config.h"
+
+
+/*!
+ * Enumerator for the different versions of the sorting method
+ */
+enum class SortMode {
+    Bubbletonic,    //!< The v0.5 of the algorithm where we use a bubble-sort like approach
+    Bitonic         //!< The v1.0 of the algorithm where we use the bitonic data-exchange approach
+};
+
+/*
+ * ============================== Sort utilities ==============================
+ */
+
+/*!
+ * The primary function template of ascending(). It is DISABLED since , it is explicitly specialized
+ * for each of the \c SortMode
+ */
+template <SortMode Mode> inline bool ascending(mpi_id_t, [[maybe_unused]] size_t) noexcept = delete;
+
+/*!
+ * Returns the ascending or descending configuration of the node's sequence based on
+ * the current node (MPI process) and the depth of the sorting network
+ *
+ * @param node      The current node (MPI process)
+ * @return          True if we need ascending configuration, false otherwise
+ */
+template <> inline
+bool ascending<SortMode::Bubbletonic>(mpi_id_t node, [[maybe_unused]] size_t depth) noexcept {
+    return (node % 2) == 0;
+}
+
+/*!
+ * Returns the ascending or descending configuration of the node's sequence based on
+ * the current node (MPI process) and the depth of the sorting network
+ *
+ * @param node      The current node (MPI process)
+ * @param depth     The total depth of the sorting network (same for each step for a given network)
+ *
+ * @return          True if we need ascending configuration, false otherwise
+ */
+template <> inline
+bool ascending<SortMode::Bitonic>(mpi_id_t node, size_t depth) noexcept {
+    return !(node & (1 << depth));
+}
+
+/*!
+ * The primary function template of partner(). It is DISABLED since , it is explicitly specialized
+ * for each of the \c SortMode
+ */
+template <SortMode Mode> inline mpi_id_t partner(mpi_id_t, size_t) noexcept = delete;
+
+/*!
+ * Returns the node's partner for data exchange during the sorting network iterations
+ * of Bubbletonic
+ *
+ * @param node      The current node
+ * @param step      The step of the sorting network
+ * @return          The node id of the partner for data exchange
+ */
+template <> inline
+mpi_id_t partner<SortMode::Bubbletonic>(mpi_id_t node, size_t step) noexcept {
+    //return (node % 2 == step % 2) ? node + 1 : node - 1;
+    return (((node+step) % 2) == 0) ? node + 1 : node - 1;
+}
+
+/*!
+ * Returns the node's partner for data exchange during the sorting network iterations
+ * of Bitonic
+ *
+ * @param node      The current node
+ * @param step      The step of the sorting network
+ * @return          The node id of the partner for data exchange
+ */
+template <> inline
+mpi_id_t partner<SortMode::Bitonic>(mpi_id_t node, size_t step) noexcept {
+    return (node ^ (1 << step));
+}
+
+
+/*!
+ * The primary function template of keepSmall(). It is DISABLED since , it is explicitly specialized
+ * for each of the \c SortMode
+ */
+template<SortMode Mode> inline bool keepSmall(mpi_id_t, mpi_id_t, [[maybe_unused]] size_t) noexcept = delete;
+
+/*!
+ * Predicate to check if a node keeps the small numbers during the bubbletonic sort network exchange.
+ *
+ * @param node      The node for which we check
+ * @param partner   The partner of the data exchange
+ * @return          True if the node should keep the small values, false otherwise
+ */
+template <> inline
+bool keepSmall<SortMode::Bubbletonic>(mpi_id_t node, mpi_id_t partner, [[maybe_unused]] size_t depth) noexcept {
+    assert(node != partner);
+    return (node < partner);
+}
+
+/*!
+ * Predicate to check if a node keeps the small numbers during the bitonic sort network exchange.
+ *
+ * @param node      The node for which we check
+ * @param partner   The partner of the data exchange
+ * @param depth     The total depth of the sorting network (same for each step for a given network)
+ * @return          True if the node should keep the small values, false otherwise
+ */
+template <> inline
+bool keepSmall<SortMode::Bitonic>(mpi_id_t node, mpi_id_t partner, size_t depth) noexcept {
+    assert(node != partner);
+    return ascending<SortMode::Bitonic>(node, depth) == (node < partner);
+}
+
+/*!
+ * Predicate to check if the node is active in the current iteration of the bubbletonic
+ * sort exchange.
+ *
+ * @param node      The node to check
+ * @param nodes     The total number of nodes
+ * @return          True if the node is active, false otherwise
+ */
+bool isActive(mpi_id_t node, size_t nodes) noexcept;
+
+/*
+ * ============================== Data utilities ==============================
+ */
+
+/*!
+ *
+ * @tparam RangeT
+ * @param data
+ * @param ascending
+ */
+template<typename RangeT>
+void fullSort(RangeT& data, bool ascending) {
+    // Use introsort from stdlib++ here, unless ...
+    if (ascending)
+        std::sort(data.begin(), data.end(), std::less<>());
+    else
+        std::sort(data.begin(), data.end(), std::greater<>());
+}
+
+/*!
+ *
+ * @tparam ShadowedT
+ * @tparam CompT
+ * @param data
+ * @param comp
+ */
+template<typename ShadowedT, typename CompT>
+void elbowSortCore(ShadowedT& data, CompT comp) {
+    size_t N = data.size();
+    auto active = data.getActive();
+    auto shadow = data.getShadow();
+    size_t left = std::distance(
+            active.begin(),
+            std::min_element(active.begin(), active.end())
+    );
+    size_t right = (left == N-1) ? 0 : left + 1;
+
+    for (size_t i = 0 ; i<N ; ++i) {
+        if (comp(active[left], active[right])) {
+            shadow[i] = active[left];
+            left = (left == 0) ? N-1 : left -1;
+        }
+        else {
+            shadow[i] = active[right];
+            right = (right + 1) % N;
+        }
+    }
+    data.switch_active();
+}
+
+/*!
+ *
+ * @tparam ShadowedT
+ * @param data
+ * @param ascending
+ */
+template<typename ShadowedT>
+void elbowSort(ShadowedT& data, bool ascending) {
+    if (ascending)
+        elbowSortCore(data, std::less<>());
+    else
+        elbowSortCore(data, std::greater<>());
+}
+
+/*!
+ *
+ * @tparam RangeT
+ * @param local
+ * @param remote
+ * @param keepsmall
+ */
+template<typename RangeT>
+void minmax(RangeT& local, RangeT& remote, bool keepsmall) {
+    using value_t = typename RangeT::value_type;
+    std::transform(
+            local.begin(), local.end(),
+            remote.begin(),
+            local.begin(),
+            [keepsmall](const value_t& a, const value_t& b){
+                return (keepsmall) ? std::min(a, b) : std::max(a, b);
+            });
+}
+
+/*
+ * ============================== Sort algorithms ==============================
+ */
+
+/*!
+ *
+ * @tparam ShadowedT
+ * @param data
+ * @param Processes
+ */
+template<typename ShadowedT>
+void distBubbletonic(ShadowedT& data, mpi_id_t Processes) {
+    // Initially sort to create a half part of a bitonic sequence
+    fullSort(data, ascending<SortMode::Bubbletonic>(mpi.rank(), 0));
+
+    // Sort network
+    for (size_t step = 0; step < Processes-1; ++step) {
+        auto part = partner<SortMode::Bubbletonic>(mpi.rank(), step);
+        auto ks = keepSmall<SortMode::Bubbletonic>(mpi.rank(), part, Processes);
+        if (isActive(mpi.rank(), Processes)) {
+            mpi.exchange(part, data.getActive(), data.getShadow(), step);
+            minmax(data.getActive(), data.getShadow(), ks);
+            elbowSort(data, ascending<SortMode::Bubbletonic>(mpi.rank(), Processes));
+        }
+    }
+
+    if (!ascending<SortMode::Bubbletonic>(mpi.rank(), 0))
+        elbowSort(data, true);
+
+}
+
+
+/*!
+ *
+ * @tparam ShadowedT
+ * @param data
+ * @param Processes
+ */
+template<typename ShadowedT>
+void distBitonic(ShadowedT& data, mpi_id_t Processes) {
+    auto p = static_cast<uint32_t>(std::log2(Processes));
+
+    // Initially sort to create a half part of a bitonic sequence
+    fullSort(data, ascending<SortMode::Bitonic>(mpi.rank(), 0));
+
+    // Run through sort network using elbow-sort
+    for (size_t depth = 1; depth <= p; ++depth) {
+        for (size_t step = depth; step > 0;) {
+            --step;
+            auto part = partner<SortMode::Bitonic>(mpi.rank(), step);
+            auto ks = keepSmall<SortMode::Bitonic>(mpi.rank(), part, depth);
+            mpi.exchange(part, data.getActive(), data.getShadow(), (depth << 8) | step);
+            minmax(data.getActive(), data.getShadow(), ks);
+        }
+        elbowSort (data, ascending<SortMode::Bitonic>(mpi.rank(), depth));
+    }
+}
+
+#endif //DISTBITONIC_H_
--- a/homework_2/include/impl.hpp
+++ b/homework_2/include/impl.hpp
@ -1,14 +0,0 @@
-/*!
- * \file
- * \brief   The distributed bitonic implementation header
- *
- * \author
- *    Christos Choutouridis AEM:8997
- *    <cchoutou@ece.auth.gr>
- */
-
-#ifndef IMPL_H_
-#define IMPL_H_
-
-
-#endif //IMPL_H_
--- a/homework_2/include/matrix.hpp
+++ b/homework_2/include/matrix.hpp
@ -1,804 +0,0 @@
-/**
- * \file    matrix.hpp
- * \brief   A matrix abstraction implementation
- *
- * \author
- *    Christos Choutouridis AEM:8997
- *    <cchoutou@ece.auth.gr>
- */
-#ifndef MATRIX_HPP_
-#define MATRIX_HPP_
-
-#include <type_traits>
-#include <utility>
-#include <algorithm>
-#include <vector>
-#include <tuple>
-
-namespace mtx {
-
-using std::size_t;
-
-/*
- * Small helper to strip types
- */
-template<typename T>
-struct remove_cvref {
-    typedef std::remove_cv_t<std::remove_reference_t<T>> type;
-};
-template<typename T>
-using remove_cvref_t = typename remove_cvref<T>::type;
-
-/*!
- * Enumerator to denote the storage type of the array to use.
- */
-enum class MatrixType {
-   DENSE,      /*!< Matrix is dense */
-   SPARSE,     /*!< Matrix is sparse */
-};
-
-/*!
- * Enumerator to denote the storage type of the array to use.
- */
-enum class MatrixOrder {
-   COLMAJOR,     /*!< Matrix is column major */
-   ROWMAJOR,     /*!< Matrix is row major */
-};
-
-/*
- * Forward type declarations
- */
-
-template<typename MatrixType> struct MatCol;
-template<typename MatrixType> struct MatRow;
-template<typename MatrixType> struct MatVal;
-
- /*!
- * A 2-D matrix functionality over a 1-D array
- *
- * This is a very thin abstraction layer over a native array.
- * This is tested using compiler explorer and our template produce
- * almost identical assembly.
- *
- * The penalty hit we have is due to the fact that we use a one dimension array
- * and we have to calculate the actual position from an (i,j) pair.
- * The use of 1D array was our intention from the beginning, so the penalty
- * was pretty much unavoidable.
- *
- * \tparam DataType  The underling data type of the array
- * \tparam IndexType The underling type for the index variables and sizes
- * \tparam Type      The storage type of the array
- *    \arg  FULL              For full matrix
- *    \arg  SYMMETRIC         For symmetric matrix (we use only the lower part)
- */
-template<typename DataType,
-         typename IndexType = size_t,
-         MatrixType Type    = MatrixType::DENSE,
-         MatrixOrder Order  = MatrixOrder::ROWMAJOR,
-         bool Symmetric     = false>
-struct Matrix {
-
-   using dataType    = DataType;                   //!< meta:export of underling data type
-   using indexType   = IndexType;                  //!< meta:export of underling index type
-   static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order
-   static constexpr MatrixType matrixType = Type;  //!< meta:export of array type
-   static constexpr bool symmetric = Symmetric;    //!< meta:export symmetric flag
-
-   /*!
-    * \name Obj lifetime
-    */
-   //! @{
-
-   //! Construct an empty matrix with dimensions rows x columns
-   Matrix(IndexType rows = IndexType{}, IndexType columns = IndexType{}) noexcept
-       : vector_storage_(capacity(rows, columns)),
-         raw_storage_(nullptr),
-         use_vector_(true),
-         rows_(rows),
-         cols_(columns) {
-       data_ = vector_storage_.data();
-    }
-
-   //! Construct a matrix by copying existing data with dimensions rows x columns
-   Matrix(DataType* data, IndexType major_start, IndexType major_length, IndexType minor_length) noexcept
-      :  vector_storage_(),
-         raw_storage_ (data + major_start * minor_length),
-         use_vector_ (false) {
-      if constexpr (Order == MatrixOrder::ROWMAJOR) {
-         rows_ = major_length;
-         cols_ = minor_length;
-      }
-      else {
-         rows_ = minor_length;
-         cols_ = major_length;
-      }
-      data_ = raw_storage_;
-   }
-
-   //! Construct a matrix using an initializer list
-   Matrix(IndexType rows, IndexType columns, std::initializer_list<DataType> list)
-      : vector_storage_(list),
-        raw_storage_(nullptr),
-        use_vector_(true),
-        rows_(rows),
-        cols_(columns) {
-      if (list.size() != capacity(rows, columns)) {
-          throw std::invalid_argument("Matrix initializer list size does not match matrix dimensions.");
-      }
-      data_ = vector_storage_.data();
-   }
-
-   //! move ctor
-   Matrix(Matrix&& m) noexcept { moves(std::move(m)); }
-   //! move
-   Matrix& operator=(Matrix&& m) noexcept { moves(std::move(m)); return *this; }
-   Matrix(const Matrix& m)             = delete;  //!< No copy ctor
-   Matrix& operator=(const Matrix& m)  = delete;  //!< No copy
-   //Matrix(const Matrix& m);
-   //Matrix& operator=(const Matrix& m) { copy(m); }
-
-   //! @}
-
-   //! \name Data exposure
-   //! @{
-
-
-   //! Get/Set the size of each dimension
-   IndexType rows() const noexcept { return rows_; }
-   IndexType columns() const noexcept { return cols_; }
-
-   //! Get the interface size of the Matrix (what appears to be the size)
-   IndexType size() const {
-      return rows_ * cols_;
-   }
-   //! Set the interface size of the Matrix (what appears to be the size)
-   IndexType resize(IndexType rows, IndexType columns) {
-      if (use_vector_) {
-         rows_ = rows;
-         cols_ = columns;
-         vector_storage_.resize(capacity(rows_, cols_));
-         data_ = vector_storage_.data();
-      }
-      return capacity(rows_, cols_);
-   }
-
-   //! Actual memory capacity of the symmetric matrix
-   static constexpr IndexType capacity(IndexType M, IndexType N) {
-      if constexpr (Symmetric)
-         return (M+1)*N/2;
-      else
-         return M*N;
-   }
-
-   /*
-    * virtual 2D accessors
-    */
-   DataType get (IndexType i, IndexType j) {
-      if constexpr (Symmetric) {
-         auto T = [](size_t i)->size_t { return i*(i+1)/2; };          // Triangular number of i
-         if constexpr (Order == MatrixOrder::COLMAJOR) {
-            // In column major we use the lower triangle of the matrix
-            if (i>=j)   return data_[j*rows_ - T(j) + i];  // Lower, use our notation
-            else        return data_[i*rows_ - T(i) + j];  // Upper, use opposite index
-         }
-         else {
-            // In row major we use the upper triangle of the matrix
-            if (i<=j)   return data_[i*cols_ - T(i) + j];  // Upper, use our notation
-            else        return data_[j*cols_ - T(j) + i];  // Lower, use opposite index
-         }
-      }
-      else {
-         if constexpr (Order == MatrixOrder::COLMAJOR)
-            return data_[i + j*rows_];
-         else
-            return data_[i*cols_ + j];
-      }
-   }
-
-   /*!
-    * \fn DataType set(DataType, IndexType, IndexType)
-    * \param v
-    * \param i
-    * \param j
-    * \return
-    */
-   DataType set (DataType v, IndexType i, IndexType j) {
-      if constexpr (Symmetric) {
-         auto T = [](size_t i)->size_t { return i*(i+1)/2; };          // Triangular number of i
-         if constexpr (Order == MatrixOrder::COLMAJOR) {
-            // In column major we use the lower triangle of the matrix
-            if (i>=j)   return data_[j*rows_ - T(j) + i] = v;  // Lower, use our notation
-            else        return data_[i*rows_ - T(i) + j] = v;  // Upper, use opposite index
-         }
-         else {
-            // In row major we use the upper triangle of the matrix
-            if (i<=j)   return data_[i*cols_ - T(i) + j] = v;  // Upper, use our notation
-            else        return data_[j*cols_ - T(j) + i] = v;  // Lower, use opposite index
-         }
-      }
-      else {
-         if constexpr (Order == MatrixOrder::COLMAJOR)
-            return data_[i + j*rows_] = v;
-         else
-            return data_[i*cols_ + j] = v;
-      }
-   }
-//   DataType operator()(IndexType i, IndexType j) { return get(i, j); }
-   /*!
-    * Return a proxy MatVal object with read and write capabilities.
-    * @param i    The row number
-    * @param j    The column number
-    * @return     tHE MatVal object
-    */
-   MatVal<Matrix> operator()(IndexType i, IndexType j) noexcept {
-      return MatVal<Matrix>(this, get(i, j), i, j);
-   }
-
-   // a basic serial iterator support
-   DataType* data() noexcept { return data_; }
-   DataType* begin() noexcept { return data_; }
-   const DataType* begin() const noexcept { return data_; }
-   DataType* end() noexcept { return data_ + capacity(rows_, cols_); }
-   const DataType* end() const noexcept { return data_ + capacity(rows_, cols_); }
-
-//   IndexType begin_idx() noexcept { return 0; }
-//   IndexType end_idx()   noexcept { return capacity(rows_, cols_); }
-
-   const DataType* data() const noexcept { return data_; }
-   const IndexType begin_idx() const noexcept { return 0; }
-   const IndexType end_idx()   const noexcept { return capacity(rows_, cols_); }
-   //! @}
-
-   /*!
-    * \name Safe iteration API
-    *
-    * This api automates the iteration over the array based on
-    * MatrixType
-    */
-   //! @{
-   template<typename F, typename... Args>
-   void for_each_in (IndexType begin, IndexType end, F&& lambda, Args&&... args) {
-      for (IndexType it=begin ; it<end ; ++it) {
-         std::forward<F>(lambda)(std::forward<Args>(args)..., it);
-      }
-   }
-   //! @}
-
-   //
-   void swap(Matrix& src) noexcept {
-      std::swap(vector_storage_, src.vector_storage_);
-      std::swap(raw_storage_, src.raw_storage_);
-      std::swap(data_, src.data_);
-      std::swap(use_vector_, src.use_vector_);
-      std::swap(rows_, src.rows_);
-      std::swap(cols_, src.cols_);
-   }
-
-private:
-   //! move helper
-   void moves(Matrix&& src) noexcept {
-      vector_storage_ = std::move(src.vector_storage_);
-      raw_storage_    = std::move(src.raw_storage_);
-      data_           = std::move(src.data_);
-      use_vector_     = std::move(src.use_vector_);
-      rows_           = std::move(src.rows_);
-      cols_           = std::move(src.cols_);
-   }
-
-   // Storage
-   std::vector<DataType>
-               vector_storage_;  //!< Internal storage (if used).
-   DataType*   raw_storage_;     //!< External storage (if used).
-   DataType*   data_;            //!< Pointer to active storage.
-   bool        use_vector_;      //!< True if using vector storage, false for raw pointer.
-   IndexType   rows_{};          //!< the virtual size of rows.
-   IndexType   cols_{};          //!< the virtual size of columns.
-};
-
-
-/**
- * A simple sparse matrix specialization.
- *
- * We use CSC format and provide get/set functionalities for each (i,j) item
- * on the matrix. We also provide a () overload using a proxy MatVal object.
- * This way the user can:
- * \code
- *    auto v = A(3,4);
- *    A(3, 4) = 7;
- * \endcode
- *
- * We also provide getCol() and getRow() functions witch return a viewer/iterator to rows and
- * columns of the matrix. In the case of a symmetric matrix instead of a row we return the
- * equivalent column. This way we gain speed due to CSC format nature.
- *
- * @tparam DataType  The type for values
- * @tparam IndexType The type for indexes
- * @tparam Type      The Matrix type (FULL or SYMMETRIC)
- */
-template<typename DataType, typename IndexType,
-         MatrixOrder Order,
-         bool Symmetric>
-struct Matrix<DataType, IndexType, MatrixType::SPARSE, Order, Symmetric> {
-
-   using dataType    = DataType;                   //!< meta:export of underling data type
-   using indexType   = IndexType;                  //!< meta:export of underling index type
-   static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order
-   static constexpr MatrixType matrixType = MatrixType::SPARSE;  //!< meta:export of array type
-   static constexpr bool symmetric = Symmetric;    //!< meta:export symmetric flag
-
-   friend struct MatCol<Matrix>;
-   friend struct MatRow<Matrix>;
-   friend struct MatVal<Matrix>;
-
-   /*!
-    * \name Obj lifetime
-    */
-   //! @{
-
-   //! Default ctor with optional memory allocations
-   Matrix(IndexType n=IndexType{}) noexcept:
-      values{},
-      rows{},
-      col_ptr((n)? n+1:2, IndexType{}),
-      N(n),
-      NNZ(0) { }
-
-   //! A ctor using csc array data
-   Matrix(IndexType n, IndexType nnz, const IndexType* row, const IndexType* col) noexcept:
-      values(nnz, 1),
-      rows(row, row+nnz),
-      col_ptr(col, col+n+1),
-      N(n),
-      NNZ(nnz) { }
-
-   //! ctor using csc array data with value array
-   Matrix(IndexType n, IndexType nnz, const DataType* v, const IndexType* row, const IndexType* col) noexcept:
-      values(v, v+nnz),
-      rows(row, row+nnz),
-      col_ptr(col, col+n+1),
-      N(n),
-      NNZ(nnz) { }
-
-   //! ctor vectors of row/col and default value for values array
-   Matrix(IndexType n, IndexType nnz, const DataType v,
-          const std::vector<IndexType>& row, const std::vector<IndexType>& col) noexcept:
-      values(nnz, v),
-      rows (row),
-      col_ptr(col),
-      N(n),
-      NNZ(nnz) { }
-
-   //! move ctor
-   Matrix(Matrix&& m) noexcept { moves(std::move(m)); }
-   //! move
-   Matrix& operator=(Matrix&& m) noexcept { moves(std::move(m)); return *this; }
-   Matrix(const Matrix& m)             = delete;  //!< make sure there are no copies
-   Matrix& operator=(const Matrix& m)  = delete;  //!< make sure there are no copies
-   //! @}
-
-   //! \name Data exposure
-   //! @{
-
-   //! \return the dimension of the matrix
-   IndexType size()     noexcept { return N; }
-   //! After construction size configuration tool
-   IndexType resize(IndexType n) {
-      col_ptr.resize(n+1);
-      return N = n;
-   }
-   //! \return the NNZ of the matrix
-   IndexType capacity() noexcept { return NNZ; }
-   //! After construction NNZ size configuration tool
-   IndexType capacity(IndexType nnz) noexcept {
-      values.reserve(nnz);
-      rows.reserve(nnz);
-      return NNZ;
-   }
-   // getters for row arrays of the struct (unused)
-   std::vector<DataType>&  getValues() noexcept { return values; }
-   std::vector<IndexType>& getRows() noexcept { return rows; }
-   std::vector<IndexType>& getCols() noexcept { return col_ptr; }
-
-   /*!
-    * Return a proxy MatVal object with read and write capabilities.
-    * @param i    The row number
-    * @param j    The column number
-    * @return     tHE MatVal object
-    */
-   MatVal<Matrix> operator()(IndexType i, IndexType j) noexcept {
-      return MatVal<Matrix>(this, get(i, j), i, j);
-   }
-
-   /*!
-    * A read item functionality using binary search to find the correct row
-    *
-    * @param i    The row number
-    * @param j    The column number
-    * @return     The value of the item or DataType{} if is not present.
-    */
-   DataType get(IndexType i, IndexType j) noexcept {
-      IndexType idx; bool found;
-      std::tie(idx, found) =find_idx(rows, col_ptr[j], col_ptr[j+1], i);
-      return (found) ? values[idx] : 0;
-   }
-
-   /*!
-    * A write item functionality.
-    *
-    * First we search if the matrix has already a value in (i, j) position.
-    * If so we just change it to a new value. If not we add the item on the matrix.
-    *
-    * @note
-    *    When change a value, we don't increase the NNZ value of the struct. We expect the user has already
-    *    change the NNZ value to the right one using @see capacity() function. When adding a value we
-    *    increase the NNZ.
-    *
-    * @param i    The row number
-    * @param j    The column number
-    * @return     The new value of the item .
-    */
-   DataType set(DataType v, IndexType i, IndexType j) {
-      IndexType idx; bool found;
-      std::tie(idx, found) = find_idx(rows, col_ptr[j], col_ptr[j+1], i);
-      if (found)
-         return values[idx] = v;    // we don't change NNZ even if we write "0"
-      else {
-         values.insert(values.begin()+idx, v);
-         rows.insert(rows.begin()+idx, i);
-         std::transform(col_ptr.begin()+j+1, col_ptr.end(), col_ptr.begin()+j+1, [](IndexType it) {
-            return ++it;
-         });
-         ++NNZ;                     // we increase the NNZ even if we write "0"
-         return v;
-      }
-   }
-
-   /*!
-    * Get a view of a CSC column
-    * @param j    The column to get
-    * @return     The MatCol object @see MatCol
-    */
-   MatCol<Matrix> getCol(IndexType j) noexcept {
-      return MatCol<Matrix>(this, col_ptr[j], col_ptr[j+1]);
-   }
-
-   /*!
-    * Get a view of a CSC row
-    *
-    * In case of a SYMMETRIC matrix we can return a column instead.
-    *
-    * @param j    The row to get
-    * @return     On symmetric matrix MatCol otherwise a MatRow
-    */
-
-   MatCol<Matrix> getRow(IndexType i) noexcept {
-      if constexpr (Symmetric)
-         return getCol(i);
-      else
-         return MatRow<Matrix>(this, i);
-   }
-
-   // values only iterator support
-   DataType* begin() noexcept { return values.begin(); }
-   DataType* end()   noexcept { return values.end(); }
-   //! @}
-
-   //! A small iteration helper
-   template<typename F, typename... Args>
-   void for_each_in (IndexType begin, IndexType end, F&& lambda, Args&&... args) {
-      for (IndexType it=begin ; it<end ; ++it) {
-         std::forward<F>(lambda)(std::forward<Args>(args)..., it);
-      }
-   }
-
-private:
-   /*!
-    * A small binary search implementation using index for begin-end instead of iterators.
-    *
-    * \param   v     Reference to vector to search
-    * \param   begin The vector's index to begin
-    * \param   end   The vector's index to end
-    * \param   match What to search
-    * \return  An <index, status> pair.
-    *                index    is the index of the item or end if not found
-    *                status   is true if found, false otherwise
-    */
-   std::pair<IndexType, bool> find_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) {
-      if (v.capacity() != 0 && begin < end) {
-         IndexType b = begin, e = end-1;
-         while (b <= e) {
-            IndexType m = (b+e)/2;
-            if       (v[m] == match)   return  std::make_pair(m, true);
-            else if  (b >= e)          return  std::make_pair(end, false);
-            else {
-               if    (v[m] <  match)   b = m +1;
-               else                    e = m -1;
-            }
-         }
-      }
-      return std::make_pair(end, false);
-   }
-
-   // move helper
-   void moves(Matrix&& src) noexcept {
-      values  = std::move(src.values);
-      rows    = std::move(src.rows);
-      col_ptr = std::move(src.col_ptr);
-      N       = std::move(src.N);   // redundant for primitives
-      NNZ     = std::move(src.NNZ); //
-   }
-   //! \name Data
-   //! @{
-   std::vector<DataType>   values {};     //!< vector to store the values of the matrix
-   std::vector<IndexType>  rows{};        //!< vector to store the row information
-   std::vector<IndexType>  col_ptr{1,0};  //!< vector to store the column pointers
-   IndexType   N{0};                      //!< The dimension of the matrix (square)
-   IndexType   NNZ{0};                    //!< The NNZ (capacity of the matrix)
-   //! @}
-};
-
-
-/*!
- * A view/iterator hybrid object for Matrix columns.
- *
- * This object provides access to a column of a Matrix. The public functionalities
- * allow data access using indexes instead of iterators. We prefer indexes over iterators
- * because we can apply the same index to different inner vector of Matrix without conversion.
- *
- * @tparam DataType
- * @tparam IndexType
- */
-template<typename MatrixType>
-struct MatCol {
-   using owner_t = MatrixType;
-
-   using DataType  = typename MatrixType::dataType;
-   using IndexType = typename MatrixType::indexType;
-
-   /*!
-    * ctor using column pointers for begin-end. own is pointer to Matrix.
-    */
-   MatCol(owner_t* own, const IndexType begin, const IndexType end) noexcept :
-      owner_(own), index_(begin), begin_(begin), end_(end) {
-      vindex_ = vIndexCalc(index_);
-   }
-   MatCol()                        = default;
-   MatCol(const MatCol&)           = delete;   //!< make sure there are no copies
-   MatCol& operator=(const MatCol&)= delete;   //!< make sure there are no copies
-   MatCol(MatCol&&)                = default;
-   MatCol& operator=(MatCol&&)     = default;
-
-   //! a simple dereference operator, like an iterator
-   DataType operator* () {
-      return get();
-   }
-   //! Increment operator acts on index(), like an iterator
-   MatCol& operator++ ()    { advance(); return *this; }
-   MatCol& operator++ (int) { MatCol& p = *this; advance(); return p; }
-
-   //! () operator acts as member access (like a view)
-   DataType operator()(IndexType x) {
-      return (x == index())? get() : DataType{};
-   }
-   //! = operator acts as member assignment (like a view)
-   DataType operator= (DataType v) { return owner_->values[index_] = v; }
-   // iterator like handlers
-   // these return a virtual index value based on the items position on the full matrix
-   // but the move of the index is just a ++ away.
-   IndexType       index()       noexcept { return vindex_; }
-   const IndexType index() const noexcept { return vindex_; }
-   IndexType       begin()       noexcept { return vIndexCalc(begin_); }
-   const IndexType begin() const noexcept { return vIndexCalc(begin_); }
-   IndexType       end()         noexcept { return owner_->N; }
-   const IndexType end()   const noexcept { return owner_->N; }
-
-   /*!
-    * Multiplication operator
-    *
-    * We follow only the non-zero values and multiply only the common indexes.
-    *
-    * @tparam C   Universal reference for the type right half site column
-    *
-    * @param c    The right hand site matrix
-    * @return     The value of the inner product of two vectors
-    * @note       The time complexity is \$ O(nnz1+nnz2) \$.
-    *             Where the nnz is the max NNZ elements of the column of the matrix
-    */
-   template <typename C>
-   DataType operator* (C&& c) {
-      static_assert(std::is_same<remove_cvref_t<C>, MatCol<MatrixType>>(), "");
-      DataType v{};
-      while (index() != end() && c.index() != c.end()) {
-         if      (index() < c.index())  advance(); // advance me
-         else if (index() > c.index())  ++c;       // advance other
-         else { //index() == c.index()
-            v += get() * *c;                       // multiply and advance both
-            ++c;
-            advance();
-         }
-      }
-      return v;
-   }
-
-private:
-   //! small tool to increase the index pointers to Matrix
-   void advance() noexcept {
-      ++index_;
-      vindex_ = vIndexCalc(index_);
-   }
-   //! tool to translate between col_ptr indexes and Matrix "virtual" full matrix indexes
-   IndexType vIndexCalc(IndexType idx) {
-      return (idx < end_) ? owner_->rows[idx] : end();
-   }
-   //! small get tool
-   DataType get() { return owner_->values[index_]; }
-
-   owner_t*    owner_   {nullptr};     //!< Pointer to owner Matrix. MatCol is just a view
-   IndexType   vindex_  {IndexType{}}; //!< Virtual index of full matrix
-   IndexType   index_   {IndexType{}}; //!< index to Matrix::rows
-   IndexType   begin_   {IndexType{}}; //!< beginning index of the column in Matrix::rows
-   IndexType   end_     {IndexType{}}; //!< ending index of the column in Matrix::rows
-};
-
-/*!
- * A view/iterator hybrid object for Matrix rows.
- *
- * This object provides access to a column of a Matrix. The public functionalities
- * allow data access using indexes instead of iterators. We prefer indexes over iterators
- * because we can apply the same index to different inner vector of Matrix without conversion.
- *
- * @tparam DataType
- * @tparam IndexType
- */
-template<typename MatrixType>
-struct MatRow {
-   using owner_t = MatrixType;
-
-   using DataType  = typename MatrixType::dataType;
-   using IndexType = typename MatrixType::indexType;
-
-   /*!
-    * ctor using virtual full matrix row index. own is pointer to Matrix.
-    */
-   MatRow(owner_t* own, const IndexType row) noexcept :
-      owner_(own), vindex_(IndexType{}), row_(row), index_(IndexType{}),
-      begin_(IndexType{}), end_(owner_->NNZ) {
-      // place begin
-      while(begin_ != end_ && owner_->rows[begin_] != row_)
-         ++begin_;
-      // place index_ and vindex_
-      if (owner_->rows[index_] != row_)
-         advance();
-   }
-   MatRow()                        = default;
-   MatRow(const MatRow&)           = delete;   //!< make sure there are no copies
-   MatRow& operator=(const MatRow&)= delete;   //!< make sure there are no copies
-   MatRow(MatRow&&)                = default;
-   MatRow& operator=(MatRow&&)     = default;
-
-   //! a simple dereference operator, like an iterator
-   DataType operator* () {
-      return get();
-   }
-   //! Increment operator acts on index(), like an iterator
-   //! here the increment is a O(N) process.
-   MatRow& operator++ ()    { advance(); return *this; }
-   MatRow& operator++ (int) { MatRow& p = *this; advance(); return p; }
-
-   //! () operator acts as member access (like a view)
-   DataType operator()(IndexType x) {
-      return (x == index())? get() : DataType{};
-   }
-   //! = operator acts as member assignment (like a view)
-   DataType operator= (DataType v) { return owner_->values[index_] = v; }
-   // iterator like handlers
-   // these return a virtual index value based on the items position on the full matrix
-   // but the move of the index is just a ++ away.
-   IndexType       index()       noexcept { return vindex_; }
-   const IndexType index() const noexcept { return vindex_; }
-   IndexType       begin()       noexcept { return vIndexCalc(begin_); }
-   const IndexType begin() const noexcept { return vIndexCalc(begin_); }
-   IndexType       end()         noexcept { return owner_->N; }
-   const IndexType end()   const noexcept { return owner_->N; }
-
-   /*!
-    * Multiplication operator
-    *
-    * We follow only the non-zero values and multiply only the common indexes.
-    *
-    * @tparam C   Universal reference for the type right half site column
-    *
-    * @param c    The right hand site matrix
-    * @return     The value of the inner product of two vectors
-    * @note       The time complexity is \$ O(N+nnz2) \$ and way heavier the ColxCol multiplication.
-    *             Where the nnz is the max NNZ elements of the column of the matrix
-    */
-   template <typename C>
-   DataType operator* (C&& c) {
-      static_assert(std::is_same<remove_cvref_t<C>, MatCol<MatrixType>>(), "");
-      DataType v{};
-      while (index() != end() && c.index() != c.end()) {
-         if      (index() < c.index())  advance(); // advance me
-         else if (index() > c.index())  ++c;       // advance other
-         else { //index() == c.index()
-            v += get() * *c;                       // multiply and advance both
-            ++c;
-            advance();
-         }
-      }
-      return v;
-   }
-private:
-   //! small tool to increase the index pointers to Matrix matrix
-   //! We have to search the entire rows vector in Matrix to find the next
-   //! virtual row position.
-   //! time complexity O(N)
-   void advance() noexcept {
-      do
-         ++index_;
-      while(index_ != end_ && owner_->rows[index_] != row_);
-      vindex_ = vIndexCalc(index_);
-   }
-   //! tool to translate between col_ptr indexes and Matrix "virtual" full matrix indexes
-   IndexType vIndexCalc(IndexType idx) {
-      for(IndexType i =0 ; i<(owner_->N+1) ; ++i)
-         if (idx < owner_->col_ptr[i])
-            return i-1;
-      return end();
-   }
-   //! small get tool
-   DataType get() { return owner_->values[index_]; }
-
-   owner_t*    owner_   {nullptr};     //!< Pointer to owner Matrix. MatCol is just a view
-   IndexType   vindex_  {IndexType{}}; //!< Virtual index of full matrix
-   IndexType   row_     {IndexType{}}; //!< The virtual full matrix row of the object
-   IndexType   index_   {IndexType{}}; //!< index to Matrix::rows
-   IndexType   begin_   {IndexType{}}; //!< beginning index of the column in Matrix::rows
-   IndexType   end_     {IndexType{}}; //!< ending index of the column in Matrix::rows
-};
-
-/*!
- * A proxy Matrix value object/view.
- *
- * This object acts as proxy to provide read/write access to an Matrix item.
- *
- * @tparam DataType     The type of the values of the Matrix matrix
- * @tparam IndexType    The type of the indexes of the Matrix matrix
- */
-template<typename MatrixType>
-struct MatVal {
-   using owner_t = MatrixType;
-
-   using DataType  = typename MatrixType::dataType;
-   using IndexType = typename MatrixType::indexType;
-
-   //!< ctor using all value-row-column data, plus a pointer to owner Matrix object
-   MatVal(owner_t* own, DataType v, IndexType i, IndexType j) :
-      owner_(own), v_(v), i_(i), j_(j) { }
-   MatVal()                         = default;
-   MatVal(const MatVal&)            = delete;  //!< make sure there are no copies
-   MatVal& operator=(const MatVal&) = delete;  //!< make sure there are no copies
-   MatVal(MatVal&&)                 = default;
-   MatVal& operator=(MatVal&&)      = default;
-
-   //! Operator to return the DataType value implicitly
-   operator DataType() { return v_; }
-   //! Operator to write back to owner the assigned value
-   //! for ex: A(2,3) = 5;
-   MatVal& operator=(DataType v) {
-      v_ = v;
-      owner_->set(v_, i_, j_);
-      return *this;
-   }
-private:
-   owner_t*    owner_{nullptr};  //!< Pointer to owner Matrix. MatVal is just a view.
-   DataType    v_{DataType{}};   //!< The value of the row-column pair (for speed)
-   IndexType   i_{IndexType{}};  //!< The row
-   IndexType   j_{IndexType{}};  //!< the column
-};
-
-
-} // namespace mtx
-
-
-#endif /* MATRIX_HPP_ */
--- a/homework_2/include/utils.hpp
+++ b/homework_2/include/utils.hpp
@ -1,5 +1,5 @@
 /**
- * \file    utils.hpp
+ * \file
 * \brief   Utilities header
 *
 * \author
@ -9,6 +9,7 @@
 #ifndef UTILS_HPP_
 #define UTILS_HPP_

+#include <vector>
 #include <iostream>
 #include <chrono>
 #include <unistd.h>
@ -17,6 +18,18 @@
 //#include "matrix.hpp"
 #include "config.h"

+template <typename T> struct MPI_TypeMapper;
+
+// Specializations for supported types
+template <> struct MPI_TypeMapper<char>          { static MPI_Datatype getType() { return MPI_CHAR; } };
+template <> struct MPI_TypeMapper<unsigned char> { static MPI_Datatype getType() { return MPI_UNSIGNED_CHAR; } };
+template <> struct MPI_TypeMapper<short>         { static MPI_Datatype getType() { return MPI_SHORT; } };
+template <> struct MPI_TypeMapper<int>           { static MPI_Datatype getType() { return MPI_INT; } };
+template <> struct MPI_TypeMapper<long>          { static MPI_Datatype getType() { return MPI_LONG; } };
+template <> struct MPI_TypeMapper<long long>     { static MPI_Datatype getType() { return MPI_LONG_LONG; } };
+template <> struct MPI_TypeMapper<unsigned short>{ static MPI_Datatype getType() { return MPI_UNSIGNED_SHORT; } };
+template <> struct MPI_TypeMapper<unsigned long> { static MPI_Datatype getType() { return MPI_UNSIGNED_LONG; } };
+template <> struct MPI_TypeMapper<unsigned long long> { static MPI_Datatype getType() { return MPI_UNSIGNED_LONG_LONG; } };

 template<typename TID = int>
 struct MPI_t {
@ -28,8 +41,10 @@ struct MPI_t {

        // Get the number of processes
        int size_value, rank_value;
-        size_ = static_cast<ID_t>(MPI_Comm_size(MPI_COMM_WORLD, &size_value));
-        rank_ = static_cast<ID_t>(MPI_Comm_rank(MPI_COMM_WORLD, &rank_value));
+        MPI_Comm_size(MPI_COMM_WORLD, &size_value);
+        MPI_Comm_rank(MPI_COMM_WORLD, &rank_value);
+        size_ = static_cast<ID_t>(size_value);
+        rank_ = static_cast<ID_t>(rank_value);

        // Get the name of the processor
        char processor_name[MPI_MAX_PROCESSOR_NAME];
@ -43,18 +58,24 @@ struct MPI_t {
        MPI_Finalize();
    }

-    bool exchange(ID_t partner, const void *send_data, void *recv_data, int data_count, MPI_Datatype datatype) {
-        bool ret = true;
+    template<typename T>
+    void exchange(ID_t partner, const std::vector<T>& send_data, std::vector<T>& recv_data, int tag) {
+        using namespace std::string_literals;
+
        MPI_Status status;
-        MPI_Sendrecv(
-                send_data, data_count, datatype, partner, 0,
-                recv_data, data_count, datatype, partner, 0,
+        MPI_Datatype datatype = MPI_TypeMapper<T>::getType();
+        int send_count = static_cast<int>(send_data.size());
+        int err = MPI_Sendrecv(
+                send_data.data(), send_count, datatype, partner, tag,
+                recv_data.data(), send_count, datatype, partner, tag,
                MPI_COMM_WORLD, &status
        );
-        if (status.MPI_ERROR != MPI_SUCCESS)
-            ret = false;
-
-        return ret;
+        if (err != MPI_SUCCESS) {
+            char err_msg[MPI_MAX_ERROR_STRING];
+            int msg_len;
+            MPI_Error_string(err, err_msg, &msg_len);
+            throw std::runtime_error("(MPI) MPI_Sendrecv() - " +  std::string (err_msg) + '\n');
+        }
    }

    // Accessors
@ -71,12 +92,84 @@ private:
 extern MPI_t<>  mpi;
 using mpi_id_t = MPI_t<>::ID_t;

+template <typename Value_t>
+struct ShadowedVec_t {
+    // STL requirements
+    using value_type = Value_t;
+    using iterator = typename std::vector<Value_t>::iterator;
+    using const_iterator = typename std::vector<Value_t>::const_iterator;
+    using size_type = typename std::vector<Value_t>::size_type;
+
+    // Dispatch to active vector
+    Value_t& operator[](size_type index) { return getActive()[index]; }
+    const Value_t& operator[](size_type index) const { return getActive()[index]; }
+
+    Value_t& at(size_type index) { return getActive().at(index); }
+    const Value_t& at(size_type index) const { return getActive().at(index); }
+
+    void push_back(const Value_t& value) { getActive().push_back(value); }
+    void push_back(Value_t&& value) { getActive().push_back(std::move(value)); }
+    void pop_back() { getActive().pop_back(); }
+    Value_t& front() { return getActive().front(); }
+    const Value_t& front() const { return getActive().front(); }
+    Value_t& back() { return getActive().back(); }
+    const Value_t& back() const { return getActive().back(); }
+
+    iterator begin() { return getActive().begin(); }
+    const_iterator begin() const { return getActive().begin(); }
+    iterator end() { return getActive().end(); }
+    const_iterator end() const { return getActive().end(); }
+
+    size_type size() const { return getActive().size(); }
+    void resize(size_t new_size) {
+        North.resize(new_size);
+        South.resize(new_size);
+    }
+
+    void reserve(size_t new_capacity) {
+        North.reserve(new_capacity);
+        South.reserve(new_capacity);
+    }
+    [[nodiscard]] size_t capacity() const { return getActive().capacity(); }
+    [[nodiscard]] bool empty() const { return getActive().empty(); }
+
+    void clear() { getActive().clear(); }
+
+    void swap(std::vector<Value_t>& other) { getActive().swap(other); }
+
+    // Switching vectors
+    void switch_active() { active = (active == north) ? south : north; }
+
+    // Accessors
+    const std::vector<Value_t>& getNorth() const { return North; }
+    const std::vector<Value_t>& getSouth() const { return South; }
+    std::vector<Value_t>& getActive() {
+        return (active == north) ? North : South;
+    }
+    const std::vector<Value_t>& getActive() const {
+        return (active == north) ? North : South;
+    }
+    std::vector<Value_t>& getShadow() {
+        return (active == north) ? South : North;
+    }
+    const std::vector<Value_t>& getShadow() const {
+        return (active == north) ? South : North;
+    }
+private:
+    enum { north, south } active{north};
+    std::vector<Value_t> North{};
+    std::vector<Value_t> South{};
+};
+
+using distBuffer_t = ShadowedVec_t<distValue_t>;
+
+extern distBuffer_t Data;
+
 /*!
 * A Logger for entire program.
 */
 struct Log {
-    struct Endl {
-    } endl;    //!< a tag object to to use it as a new line request.
+    struct Endl {} endl;    //!< a tag object to to use it as a new line request.

    //! We provide logging via << operator
    template<typename T>
--- a/homework_2/src/distbitonic.cpp
+++ b/homework_2/src/distbitonic.cpp
@ -1,212 +0,0 @@
-/*!
- * \file
- * \brief   Distributed bitonic implementation.
- *
- * \author
- *    Christos Choutouridis AEM:8997
- *    <cchoutou@ece.auth.gr>
- */
-
-#include <vector>
-#include <algorithm>
-#include <cmath>
-#if !defined DEBUG
-#define NDEBUG
-#endif
-#include <cassert>
-
-#include "distbitonic.hpp"
-
-
-
-/*!
- * Returns the ascending or descending configuration of the node's sequence based on
- * the current node (MPI process) and the depth of the sorting network
- *
- * @param node      The current node (MPI process)
- * @return          True if we need ascending configuration, false otherwise
- */
-template <>
-bool ascending<SortMode::Bubbletonic>(mpi_id_t node, [[maybe_unused]] size_t depth) noexcept {
-    return (node % 2) == 0;
-}
-
-/*!
- * Returns the ascending or descending configuration of the node's sequence based on
- * the current node (MPI process) and the depth of the sorting network
- *
- * @param node      The current node (MPI process)
- * @param depth     The total depth of the sorting network (same for each step for a given network)
- *
- * @return          True if we need ascending configuration, false otherwise
- */
-template <>
-bool ascending<SortMode::Bitonic>(mpi_id_t node, size_t depth) noexcept {
-    return !(node & (1 << depth));
-}
-
-/*!
- * Returns the node's partner for data exchange during the sorting network iterations
- * of Bubbletonic
- *
- * @param node      The current node
- * @param step      The step of the sorting network
- * @return          The node id of the partner for data exchange
- */
-template <>
-mpi_id_t partner<SortMode::Bubbletonic>(mpi_id_t node, size_t step) noexcept {
-//    return (node % 2 == step % 2) ? node + 1 : node - 1;
-    return (((node+step) % 2) == 0) ? node + 1 : node - 1;
-}
-
-/*!
- * Returns the node's partner for data exchange during the sorting network iterations
- * of Bitonic
- *
- * @param node      The current node
- * @param step      The step of the sorting network
- * @return          The node id of the partner for data exchange
- */
-template <>
-mpi_id_t partner<SortMode::Bitonic>(mpi_id_t node, size_t step) noexcept {
-    return (node ^ (1 << step));
-}
-
-
-/*!
- * Predicate to check if a node keeps the small numbers during the bubbletonic sort network exchange.
- *
- * @param node      The node for which we check
- * @param partner   The partner of the data exchange
- * @return          True if the node should keep the small values, false otherwise
- */
-template <>
-bool keepSmall<SortMode::Bubbletonic>(mpi_id_t node, mpi_id_t partner, [[maybe_unused]] size_t depth) noexcept {
-    assert(node != partner);
-    return (node < partner);
-}
-
-/*!
- * Predicate to check if a node keeps the small numbers during the bitonic sort network exchange.
- *
- * @param node      The node for which we check
- * @param partner   The partner of the data exchange
- * @param depth     The total depth of the sorting network (same for each step for a given network)
- * @return          True if the node should keep the small values, false otherwise
- */
-template <>
-bool keepSmall<SortMode::Bitonic>(mpi_id_t node, mpi_id_t partner, size_t depth) noexcept {
-    assert(node != partner);
-    return ascending<SortMode::Bitonic>(node, depth) == (node < partner);
-}
-
-/*!
- * Predicate to check if the node is active in the current iteration of the bubbletonic
- * sort exchange.
- *
- * @param node      The node to check
- * @param nodes     The total number of nodes
- * @return          True if the node is active, false otherwise
- */
-bool isActive(mpi_id_t node, mpi_id_t nodes) noexcept {
-    return (node >= 0) && (node < (nodes-1));
-}
-
-
-void exchange(mpi_id_t node, mpi_id_t partner) {
-    assert(node != partner);
-}
-
-void minmax(AllData_t& data, mpi_id_t node, mpi_id_t partner, bool keepsmall) {
-    for (size_t i = 0; i < data[node].size(); ++i) {
-        if (keepsmall && data[node][i] > data[partner][i])
-            std::swap(data[node][i], data[partner][i]);
-        if (!keepsmall && data[node][i] < data[partner][i])
-            std::swap(data[node][i], data[partner][i]);
-    }
-}
-
-
-
-
-void bubbletonic_network(AllData_t& data, mpi_id_t nodes, size_t depth) {
-    for (mpi_id_t node = 0 ; node < nodes ; ++node) { // Currently we do all nodes here!
-        auto part = partner<SortMode::Bubbletonic>(node, depth);
-        auto ks = keepSmall<SortMode::Bubbletonic>(node, part, 0);
-        if (isActive(node, nodes) && node < part) {
-            exchange(node, part);
-            minmax(data, node, part, ks);
-            // elbow-sort here
-            if (ascending<SortMode::Bubbletonic>(node, 0))
-                std::sort(data[node].begin(), data[node].end(), std::less<>());
-            else
-                std::sort(data[node].begin(), data[node].end(), std::greater<>());
-
-            if (ascending<SortMode::Bubbletonic>(part, 0))
-                std::sort(data[part].begin(), data[part].end(), std::less<>());
-            else
-                std::sort(data[part].begin(), data[part].end(), std::greater<>());
-        }
-    }
-}
-
-void distBubbletonic(mpi_id_t P, AllData_t& data) {
-    for (mpi_id_t node = 0 ; node < P ; ++node) { // Currently we do all nodes here!
-        // Initially sort to create the half part of a bitonic
-        if (ascending<SortMode::Bubbletonic>(node, 0))
-            std::sort(data[node].begin(), data[node].end(), std::less<>());
-        else
-            std::sort(data[node].begin(), data[node].end(), std::greater<>());
-    }
-
-    for (size_t depth = 0; depth < P-1; ++depth) {
-        bubbletonic_network(data, P, depth);
-    }
-
-    // Invert the descending ones
-    for (mpi_id_t node = 0 ; node < P ; ++node) { // Currently we do all nodes here!
-        if (!ascending<SortMode::Bubbletonic>(node, 0))
-            std::sort(data[node].begin(), data[node].end(), std::less<>());
-    }
-}
-
-
-void bitonic_network(AllData_t& data, mpi_id_t nodes, size_t depth) {
-    for (size_t step = depth; step > 0;) {
-        --step;
-        for (mpi_id_t node = 0; node < nodes; ++node) { // Currently we do all nodes here!
-            auto part = partner<SortMode::Bitonic>(node, step);
-            auto ks = keepSmall<SortMode::Bitonic>(node, part, depth);
-            if (node < part) {
-                exchange(node, part);
-                minmax(data, node, part, ks);
-            }
-        }
-    }
-}
-
-void distBitonic(mpi_id_t P, AllData_t& data) {
-    auto p = static_cast<uint32_t>(std::log2(P));
-
-    for (mpi_id_t node = 0 ; node < P ; ++node) { // Currently we do all nodes here!
-        // Initially sort to create the half part of a bitonic
-        if (ascending<SortMode::Bitonic>(node, 0))
-            std::sort(data[node].begin(), data[node].end(), std::less<>());
-        else
-            std::sort(data[node].begin(), data[node].end(), std::greater<>());
-    }
-
-    // Run through sort network using elbow-sort
-    for (size_t depth = 1; depth <= p; ++depth) {
-        bitonic_network(data, P, depth);
-
-        for (mpi_id_t node = 0 ; node < P ; ++node) { // Currently we do all nodes here!
-            // elbow-sort here
-            if (ascending<SortMode::Bitonic>(node, depth))
-                std::sort(data[node].begin(), data[node].end(), std::less<>());
-            else
-                std::sort(data[node].begin(), data[node].end(), std::greater<>());
-        }
-
-    }
-}
--- a/homework_2/src/distsort.cpp
+++ b/homework_2/src/distsort.cpp
@ -0,0 +1,31 @@
+/*!
+ * \file
+ * \brief   Distributed sort implementation.
+ *
+ * \author
+ *    Christos Choutouridis AEM:8997
+ *    <cchoutou@ece.auth.gr>
+ */
+
+#if !defined DEBUG
+#define NDEBUG
+#endif
+#include <cassert>
+
+#include "utils.hpp"
+#include "distsort.hpp"
+
+
+/*!
+ * Predicate to check if the node is active in the current iteration of the bubbletonic
+ * sort exchange.
+ *
+ * @param node      The node to check
+ * @param nodes     The total number of nodes
+ * @return          True if the node is active, false otherwise
+ */
+bool isActive(mpi_id_t node, size_t nodes) noexcept {
+    assert(nodes > 0);
+    return (node >= 0) && (node < (nodes-1));
+}
+
--- a/homework_2/src/main.cpp
+++ b/homework_2/src/main.cpp
@ -9,15 +9,19 @@

 #include <exception>
 #include <iostream>
+#include <algorithm>

 #include "utils.hpp"
 #include "config.h"
+#include "distsort.hpp"


 // Global session data
 session_t       session;
 MPI_t<>         mpi;
-
+distBuffer_t    Data;
+Log             logger;
+Timing          timer;

 /*!
 * A small command line argument parser
@ -30,29 +34,46 @@ bool get_options(int argc, char* argv[]){
    for (int i=1 ; i<argc ; ++i) {
        std::string arg(argv[i]);     // get current argument

-        if (arg == "-x" || arg == "--xxxxx") {
-            if (i+2 < argc) {
-//                session.corpusMtxFile = std::string(argv[++i]);
-//                session.corpusDataSet = std::string(argv[++i]);
+        if (arg == "-q" || arg == "--array-size") {
+            if (i+1 < argc) {
+                session.arraySize = 1 << atoi(argv[++i]);
            }
-            else
+            else {
                status = false;
            }
-
-        else if (arg == "-v" || arg == "--verbose")
+        }
+        else if (arg == "--ndebug") {
+            session.ndebug = true;
+        }
+        else if (arg == "-t" || arg == "--timing") {
+            session.timing = true;
+        }
+        else if (arg == "-v" || arg == "--verbose") {
            session.verbose = true;
+        }
        else if (arg == "-h" || arg == "--help") {
-            std::cout << "distBitonic - A distributed bitonic sort\n\n";
-            std::cout << "distBitonic -x <> [-v]\n";
+            std::cout << "distbitonic/distbubbletonic - A distributed bitonic sort\n\n";
+            std::cout << "distbitonic -q <> [--ndebug] [-v]\n";
+            std::cout << "distbitonic -h\n";
+            std::cout << "distbubbletonic -q <> [--ndebug] [-v]\n";
+            std::cout << "distbubbletonic -h\n";
            std::cout << '\n';
            std::cout << "Options:\n\n";
+            std::cout << "   -q | --array-size <size>\n";
+            std::cout << "      Selects the array size according to size = 2^q\n\n";
+            std::cout << "   --ndebug\n";
+            std::cout << "      Skip debug breakpoint when on debug build.\n\n";
+            std::cout << "   -t | --timing\n";
+            std::cout << "      Request timing measurements output to stdout.\n\n";
            std::cout << "   -v | --verbose\n";
            std::cout << "      Request a more verbose output to stdout.\n\n";
            std::cout << "   -h | --help\n";
            std::cout << "      Prints this and exit.\n\n";
            std::cout << "Examples:\n\n";
-            std::cout << "   ...Example case...:\n";
-            std::cout << "   > distBitonic -x <xxxxx>  \n\n";
+            std::cout << "   mpirun -np 4 distbitonic -q 24\n";
+            std::cout << "      Runs distbitonic in 4 MPI processes with 2^24 array points each\n\n";
+            std::cout << "   mpirun -np 16 distbubbletonic -q 20\n";
+            std::cout << "      Runs distbubbletonic in 16 MPI processes with 2^20 array points each\n\n";

            exit(0);
        }
@ -66,7 +87,6 @@ bool get_options(int argc, char* argv[]){
 }


-
 #if !defined TESTING
 int main(int argc, char* argv[]) try {
    // Initialize MPI environment
@ -76,7 +96,13 @@ int main(int argc, char* argv[]) try {
    if (!get_options(argc, argv))
        exit(1);

+    logger << "MPI environment initialized." <<
+              " Rank: " << mpi.rank() <<
+              " Size: " << mpi.size() <<
+              logger.endl;
+
 #if defined DEBUG
+#if defined TESTING
    /*
     * In case of a debug build we will wait here until sleep_wait
     * will reset via debugger. In order to do that the user must attach
@ -86,24 +112,34 @@ int main(int argc, char* argv[]) try {
     *  $> gdb <program> <PID1>
     *  $> gdb <program> <PID2>
     */
-    #if defined TESTING
     volatile bool sleep_wait = false;
 #else
    volatile bool sleep_wait = true;
 #endif
-    while (sleep_wait)
+    while (sleep_wait && !session.ndebug)
        sleep(1);
 #endif

-    // Print off a hello world message
-//    std::cout   << "Hello world from processor: " << mpi.processor_name
-//                << " rank " << mpi.world_rank
-//                << " out of " << mpi.world_size << " processors\n";
+    logger << "Initialize local array of " << session.arraySize << " elements" << logger.endl;
+    std::srand(unsigned(std::time(nullptr)));
+    Data.resize(session.arraySize);
+    std::generate(Data.begin(), Data.end(), std::rand);

+    if (mpi.rank() == 0)
+        logger << "Starting distributed sorting ... ";
+    timer.start();
+    #if CODE_VERSION == BUBBLETONIC
+      distBubbletonic(Data, mpi.size());
+    #else
+      distBitonic (Data, mpi.size());
+    #endif
+    timer.stop();
+    if (mpi.rank() == 0)
+        logger << " Done." << logger.endl;
+    std::string timeMsg = "rank " + std::to_string(mpi.rank());
+    timer.print_dt(timeMsg.c_str());

-//    distBitonic (2, Data);
-//    distBitonic (4, Data);
-
+    std::cout << "[Data]: Rank " << mpi.rank() << ": [" << (int)Data.front() << " .. " << (int)Data.back() << "]" << std::endl;
    mpi.finalize();
    return 0;
 }
--- a/homework_2/test/tests_Bitonic.cpp
+++ b/homework_2/test/tests_Bitonic.cpp
@ -11,7 +11,7 @@

 #include <algorithm>  // rand/srand
 #include <ctime>      // rand/srand
-#include "distbitonic.hpp"
+#include "distsort.hpp"



@ -328,19 +328,20 @@ TEST(TdistBitonic_UT, keepsmall_test7) {
    }
 }

+#if 0
 TEST(TdistBitonic_UT, distBitonic_test1) {
    AllData_t ts_Data {
-            Data_t (8), Data_t (8)
+            ShadowedVec_t (8), ShadowedVec_t (8)
    };
-
-    std::srand(unsigned(std::time(nullptr)));
-    for (auto& v : ts_Data) {
+(unsigned(std::time(nullptr)));
+    for (auto
+    std::srand& v : ts_Data) {
        std::generate(v.begin(), v.end(), std::rand);
    }

    distBitonic(2, ts_Data);

-    auto max = std::numeric_limits<Data_t::value_type>::min();
+    auto max = std::numeric_limits<ShadowedVec_t::value_type>::min();
    for (auto& v : ts_Data) {
        EXPECT_EQ((max <= v[0]), true);
        EXPECT_EQ(std::is_sorted(v.begin(), v.end()), true);
@ -350,7 +351,7 @@ TEST(TdistBitonic_UT, distBitonic_test1) {

 TEST(TdistBitonic_UT, distBitonic_test2) {
    AllData_t ts_Data {
-            Data_t (8), Data_t (8), Data_t (8), Data_t (8)
+            ShadowedVec_t (8), ShadowedVec_t (8), ShadowedVec_t (8), ShadowedVec_t (8)
    };

    std::srand(unsigned(std::time(nullptr)));
@ -360,7 +361,7 @@ TEST(TdistBitonic_UT, distBitonic_test2) {

    distBitonic(4, ts_Data);

-    auto max = std::numeric_limits<Data_t::value_type>::min();
+    auto max = std::numeric_limits<ShadowedVec_t::value_type>::min();
    for (auto& v : ts_Data) {
        EXPECT_EQ((max <= v[0]), true);
        EXPECT_EQ(std::is_sorted(v.begin(), v.end()), true);
@ -370,8 +371,8 @@ TEST(TdistBitonic_UT, distBitonic_test2) {

 TEST(TdistBitonic_UT, distBitonic_test3) {
    AllData_t ts_Data {
-            Data_t (32), Data_t (32), Data_t (32), Data_t (32),
-            Data_t (32), Data_t (32), Data_t (32), Data_t (32)
+            ShadowedVec_t (32), ShadowedVec_t (32), ShadowedVec_t (32), ShadowedVec_t (32),
+            ShadowedVec_t (32), ShadowedVec_t (32), ShadowedVec_t (32), ShadowedVec_t (32)
    };

    std::srand(unsigned(std::time(nullptr)));
@ -381,10 +382,12 @@ TEST(TdistBitonic_UT, distBitonic_test3) {

    distBitonic(8, ts_Data);

-    auto max = std::numeric_limits<Data_t::value_type>::min();
+    auto max = std::numeric_limits<ShadowedVec_t::value_type>::min();
    for (auto& v : ts_Data) {
        EXPECT_EQ((max <= v[0]), true);
        EXPECT_EQ(std::is_sorted(v.begin(), v.end()), true);
        max = v.back();
    }
 }
+
+#endif
--- a/homework_2/test/tests_Bubbletonic.cpp
+++ b/homework_2/test/tests_Bubbletonic.cpp
@ -11,7 +11,7 @@

 #include <algorithm>  // rand/srand
 #include <ctime>      // rand/srand
-#include "distbitonic.hpp"
+#include "distsort.hpp"



@ -120,10 +120,10 @@ TEST(TdistBubbletonic_UT, keepsmall_test2) {
 }


-
+#if 0
 TEST(TdistBubbletonic_UT, distBubbletonic_test1) {
    AllData_t ts_Data {
-            Data_t (8), Data_t (8)
+            ShadowedVec_t (8), ShadowedVec_t (8)
    };

    std::srand(unsigned(std::time(nullptr)));
@ -133,7 +133,7 @@ TEST(TdistBubbletonic_UT, distBubbletonic_test1) {

    distBubbletonic(2, ts_Data);

-    auto max = std::numeric_limits<Data_t::value_type>::min();
+    auto max = std::numeric_limits<ShadowedVec_t::value_type>::min();
    for (auto& v : ts_Data) {
        EXPECT_EQ((max <= v[0]), true);
        EXPECT_EQ(std::is_sorted(v.begin(), v.end()), true);
@ -144,7 +144,7 @@ TEST(TdistBubbletonic_UT, distBubbletonic_test1) {

 TEST(TdistBubbletonic_UT, distBubbletonic_test2) {
    AllData_t ts_Data {
-            Data_t (8), Data_t (8), Data_t (8), Data_t (8)
+            ShadowedVec_t (8), ShadowedVec_t (8), ShadowedVec_t (8), ShadowedVec_t (8)
    };

    std::srand(unsigned(std::time(nullptr)));
@ -154,7 +154,7 @@ TEST(TdistBubbletonic_UT, distBubbletonic_test2) {

    distBubbletonic(4, ts_Data);

-    auto max = std::numeric_limits<Data_t::value_type>::min();
+    auto max = std::numeric_limits<ShadowedVec_t::value_type>::min();
    for (auto& v : ts_Data) {
        EXPECT_EQ((max <= v[0]), true);
        EXPECT_EQ(std::is_sorted(v.begin(), v.end()), true);
@ -164,8 +164,8 @@ TEST(TdistBubbletonic_UT, distBubbletonic_test2) {

 TEST(TdistBubbletonic_UT, distBubbletonic_test3) {
    AllData_t ts_Data {
-            Data_t (32), Data_t (32), Data_t (32), Data_t (32),
-            Data_t (32), Data_t (32), Data_t (32), Data_t (32)
+            ShadowedVec_t (32), ShadowedVec_t (32), ShadowedVec_t (32), ShadowedVec_t (32),
+            ShadowedVec_t (32), ShadowedVec_t (32), ShadowedVec_t (32), ShadowedVec_t (32)
    };

    std::srand(unsigned(std::time(nullptr)));
@ -175,10 +175,11 @@ TEST(TdistBubbletonic_UT, distBubbletonic_test3) {

    distBubbletonic(8, ts_Data);

-    auto max = std::numeric_limits<Data_t::value_type>::min();
+    auto max = std::numeric_limits<ShadowedVec_t::value_type>::min();
    for (auto& v : ts_Data) {
        EXPECT_EQ((max <= v[0]), true);
        EXPECT_EQ(std::is_sorted(v.begin(), v.end()), true);
        max = v.back();
    }
 }
+#endif