A V0 implementation tested against matlab

Supports: - HDF5 file load and store - Precision timing of the process to stdout - logging (verbose mode) to stdout - Command line arguments and help
vor 3 Monaten · f591127e16
--- a/homework_1/.gitignore
+++ b/homework_1/.gitignore
@@ -3,6 +3,7 @@ bin/
 out/
 mat/
 mtx/
 .unused/

 # hpc
 exclude
@@ -13,6 +14,3 @@ hpc_auth_sync.sh
 .cproject
 .settings/

 # matlab
 *.m~

--- a/homework_1/Libs/matrix/include/matrix.hpp
+++ b/homework_1/Libs/matrix/include/matrix.hpp
@@ -0,0 +1,750 @@
 /**
 * \file    matrix.hpp
 * \brief   A matrix abstraction implementation
 *
 * \author
 *    Christos Choutouridis AEM:8997
 *    <cchoutou@ece.auth.gr>
 */
 #ifndef MATRIX_HPP_
 #define MATRIX_HPP_

 #include <type_traits>
 #include <utility>
 #include <algorithm>
 #include <vector>
 #include <tuple>

 namespace mtx {

 using std::size_t;

 /*
 * Small helper to strip types
 */
 template<typename T>
 struct remove_cvref {
    typedef std::remove_cv_t<std::remove_reference_t<T>> type;
 };
 template<typename T>
 using remove_cvref_t = typename remove_cvref<T>::type;

 /*!
 * Enumerator to denote the storage type of the array to use.
 */
 enum class MatrixType {
   DENSE,      /*!< Matrix is dense */
   SPARSE,     /*!< Matrix is sparse */
 };

 /*!
 * Enumerator to denote the storage type of the array to use.
 */
 enum class MatrixOrder {
   COLMAJOR,     /*!< Matrix is column major */
   ROWMAJOR,     /*!< Matrix is row major */
 };

 /*
 * Forward type declarations
 */

 template<typename MatrixType> struct MatCol;
 template<typename MatrixType> struct MatRow;
 template<typename MatrixType> struct MatVal;

 /*!
 * A 2-D matrix functionality over a 1-D array
 *
 * This is a very thin abstraction layer over a native array.
 * This is tested using compiler explorer and our template produce
 * almost identical assembly.
 *
 * The penalty hit we have is due to the fact that we use a one dimension array
 * and we have to calculate the actual position from an (i,j) pair.
 * The use of 1D array was our intention from the beginning, so the penalty
 * was pretty much unavoidable.
 *
 * \tparam DataType  The underling data type of the array
 * \tparam IndexType The underling type for the index variables and sizes
 * \tparam Type      The storage type of the array
 *    \arg  FULL              For full matrix
 *    \arg  SYMMETRIC         For symmetric matrix (we use only the lower part)
 */
 template<typename DataType,
         typename IndexType = size_t,
         MatrixType Type    = MatrixType::DENSE,
         MatrixOrder Order  = MatrixOrder::ROWMAJOR,
         bool Symmetric     = false>
 struct Matrix {

   using dataType    = DataType;                   //!< meta:export of underling data type
   using indexType   = IndexType;                  //!< meta:export of underling index type
   static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order
   static constexpr MatrixType matrixType = Type;  //!< meta:export of array type
   static constexpr bool symmetric = Symmetric;    //!< meta:export symmetric flag
   /*!
    * \name Obj lifetime
    */
   //! @{

   //! Construct an empty matrix with dimensions rows x columns
   Matrix(IndexType rows = IndexType{}, IndexType columns = IndexType{}) noexcept :
      m_(capacity(rows, columns)), rows_(rows), cols_(columns) { }

   //! Construct a matrix by copying existing data with dimensions rows x columns
   Matrix(DataType* data, IndexType rows, IndexType columns) noexcept :
      m_(data, data + capacity(rows, columns)), rows_(rows), cols_(columns) { }

   //! Construct a matrix using an initializer list
   Matrix(IndexType rows, IndexType columns, std::initializer_list<DataType> list)
      : m_(list), rows_(rows), cols_(columns) {
      if (list.size() != capacity(rows, columns)) {
         throw std::invalid_argument("Matrix initializer list size does not match matrix dimensions.");
      }
   }

   //! move ctor
   Matrix(Matrix&& m) noexcept { moves(std::move(m)); }
   //! move
   Matrix& operator=(Matrix&& m) noexcept { moves(std::move(m)); return *this; }
   Matrix(const Matrix& m)             = delete;  //!< No copy ctor
   Matrix& operator=(const Matrix& m)  = delete;  //!< No copy
   //! @}

   //! \name Data exposure
   //! @{


   //! Get/Set the size of each dimension
   IndexType rows() const noexcept { return rows_; }
   IndexType columns() const noexcept { return cols_; }

   //! Get the interface size of the Matrix (what appears to be the size)
   IndexType size() const {
      return rows_ * cols_;
   }
   //! Set the interface size of the Matrix (what appears to be the size)
   IndexType resize(IndexType rows, IndexType columns) {
      rows_ = rows;
      cols_ = columns;
      m_.reserve(capacity(rows_, cols_));
      return capacity(rows_, cols_);
   }

   //! Actual memory capacity of the symmetric matrix
   static constexpr IndexType capacity(IndexType M, IndexType N) {
      if constexpr (Symmetric)
         return (M+1)*N/2;
      else
         return M*N;
   }

   /*
    * virtual 2D accessors
    */
   DataType get (IndexType i, IndexType j) {
      if constexpr (Symmetric) {
         auto T = [](size_t i)->size_t { return i*(i+1)/2; };          // Triangular number of i
         if constexpr (Order == MatrixOrder::COLMAJOR) {
            // In column major we use the lower triangle of the matrix
            if (i>=j)   return m_[j*rows_ - T(j) + i];  // Lower, use our notation
            else        return m_[i*rows_ - T(i) + j];  // Upper, use opposite index
         }
         else {
            // In row major we use the upper triangle of the matrix
            if (i<=j)   return m_[i*cols_ - T(i) + j];  // Upper, use our notation
            else        return m_[j*cols_ - T(j) + i];  // Lower, use opposite index
         }
      }
      else {
         if constexpr (Order == MatrixOrder::COLMAJOR)
            return m_[i + j*rows_];
         else
            return m_[i*cols_ + j];
      }
   }

   DataType set (DataType v, IndexType i, IndexType j) {
      if constexpr (Symmetric) {
         auto T = [](size_t i)->size_t { return i*(i+1)/2; };          // Triangular number of i
         if constexpr (Order == MatrixOrder::COLMAJOR) {
            // In column major we use the lower triangle of the matrix
            if (i>=j)   return m_[j*rows_ - T(j) + i] = v;  // Lower, use our notation
            else        return m_[i*rows_ - T(i) + j] = v;  // Upper, use opposite index
         }
         else {
            // In row major we use the upper triangle of the matrix
            if (i<=j)   return m_[i*cols_ - T(i) + j] = v;  // Upper, use our notation
            else        return m_[j*cols_ - T(j) + i] = v;  // Lower, use opposite index
         }
      }
      else {
         if constexpr (Order == MatrixOrder::COLMAJOR)
            return m_[i + j*rows_] = v;
         else
            return m_[i*cols_ + j] = v;
      }
   }
 //   DataType operator()(IndexType i, IndexType j) { return get(i, j); }
   /*!
    * Return a proxy MatVal object with read and write capabilities.
    * @param i    The row number
    * @param j    The column number
    * @return     tHE MatVal object
    */
   MatVal<Matrix> operator()(IndexType i, IndexType j) noexcept {
      return MatVal<Matrix>(this, get(i, j), i, j);
   }

   // a basic serial iterator support
   DataType* data() noexcept { return m_.data(); }
 //   IndexType begin_idx() noexcept { return 0; }
 //   IndexType end_idx()   noexcept { return capacity(rows_, cols_); }

   const DataType* data() const noexcept { return m_.data(); }
   const IndexType begin_idx() const noexcept { return 0; }
   const IndexType end_idx()   const noexcept { return capacity(rows_, cols_); }
   //! @}

   /*!
    * \name Safe iteration API
    *
    * This api automates the iteration over the array based on
    * MatrixType
    */
   //! @{
   template<typename F, typename... Args>
   void for_each_in (IndexType begin, IndexType end, F&& lambda, Args&&... args) {
      for (IndexType it=begin ; it<end ; ++it) {
         std::forward<F>(lambda)(std::forward<Args>(args)..., it);
      }
   }
   //! @}

   //
   void swap(Matrix& src) noexcept {
      std::swap(m_, src.m_);
      std::swap(rows_, src.rows_);
      std::swap(cols_, src.cols_);
   }
 private:
   //! move helper
   void moves(Matrix&& src) noexcept {
      m_    = std::move(src.m_);
      rows_ = std::move(src.rows_);
      cols_ = std::move(src.cols_);
   }

   std::vector<DataType> m_ {};  //!< Pointer to actual data.
   IndexType   rows_{};          //!< the virtual size of rows.
   IndexType   cols_{};          //!< the virtual size of columns.
 };


 /**
 * A simple sparse matrix specialization.
 *
 * We use CSC format and provide get/set functionalities for each (i,j) item
 * on the matrix. We also provide a () overload using a proxy MatVal object.
 * This way the user can:
 * \code
 *    auto v = A(3,4);
 *    A(3, 4) = 7;
 * \endcode
 *
 * We also provide getCol() and getRow() functions witch return a viewer/iterator to rows and
 * columns of the matrix. In the case of a symmetric matrix instead of a row we return the
 * equivalent column. This way we gain speed due to CSC format nature.
 *
 * @tparam DataType  The type for values
 * @tparam IndexType The type for indexes
 * @tparam Type      The Matrix type (FULL or SYMMETRIC)
 */
 template<typename DataType, typename IndexType,
         MatrixOrder Order,
         bool Symmetric>
 struct Matrix<DataType, IndexType, MatrixType::SPARSE, Order, Symmetric> {

   using dataType    = DataType;                   //!< meta:export of underling data type
   using indexType   = IndexType;                  //!< meta:export of underling index type
   static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order
   static constexpr MatrixType matrixType = MatrixType::SPARSE;  //!< meta:export of array type
   static constexpr bool symmetric = Symmetric;    //!< meta:export symmetric flag

   friend struct MatCol<Matrix>;
   friend struct MatRow<Matrix>;
   friend struct MatVal<Matrix>;

   /*!
    * \name Obj lifetime
    */
   //! @{

   //! Default ctor with optional memory allocations
   Matrix(IndexType n=IndexType{}) noexcept:
      values{},
      rows{},
      col_ptr((n)? n+1:2, IndexType{}),
      N(n),
      NNZ(0) { }

   //! A ctor using csc array data
   Matrix(IndexType n, IndexType nnz, const IndexType* row, const IndexType* col) noexcept:
      values(nnz, 1),
      rows(row, row+nnz),
      col_ptr(col, col+n+1),
      N(n),
      NNZ(nnz) { }

   //! ctor using csc array data with value array
   Matrix(IndexType n, IndexType nnz, const DataType* v, const IndexType* row, const IndexType* col) noexcept:
      values(v, v+nnz),
      rows(row, row+nnz),
      col_ptr(col, col+n+1),
      N(n),
      NNZ(nnz) { }

   //! ctor vectors of row/col and default value for values array
   Matrix(IndexType n, IndexType nnz, const DataType v,
          const std::vector<IndexType>& row, const std::vector<IndexType>& col) noexcept:
      values(nnz, v),
      rows (row),
      col_ptr(col),
      N(n),
      NNZ(nnz) { }

   //! move ctor
   Matrix(Matrix&& m) noexcept { moves(std::move(m)); }
   //! move
   Matrix& operator=(Matrix&& m) noexcept { moves(std::move(m)); return *this; }
   Matrix(const Matrix& m)             = delete;  //!< make sure there are no copies
   Matrix& operator=(const Matrix& m)  = delete;  //!< make sure there are no copies
   //! @}

   //! \name Data exposure
   //! @{

   //! \return the dimension of the matrix
   IndexType size()     noexcept { return N; }
   //! After construction size configuration tool
   IndexType resize(IndexType n) {
      col_ptr.resize(n+1);
      return N = n;
   }
   //! \return the NNZ of the matrix
   IndexType capacity() noexcept { return NNZ; }
   //! After construction NNZ size configuration tool
   IndexType capacity(IndexType nnz) noexcept {
      values.reserve(nnz);
      rows.reserve(nnz);
      return NNZ;
   }
   // getters for row arrays of the struct (unused)
   std::vector<DataType>&  getValues() noexcept { return values; }
   std::vector<IndexType>& getRows() noexcept { return rows; }
   std::vector<IndexType>& getCols() noexcept { return col_ptr; }

   /*!
    * Return a proxy MatVal object with read and write capabilities.
    * @param i    The row number
    * @param j    The column number
    * @return     tHE MatVal object
    */
   MatVal<Matrix> operator()(IndexType i, IndexType j) noexcept {
      return MatVal<Matrix>(this, get(i, j), i, j);
   }

   /*!
    * A read item functionality using binary search to find the correct row
    *
    * @param i    The row number
    * @param j    The column number
    * @return     The value of the item or DataType{} if is not present.
    */
   DataType get(IndexType i, IndexType j) noexcept {
      IndexType idx; bool found;
      std::tie(idx, found) =find_idx(rows, col_ptr[j], col_ptr[j+1], i);
      return (found) ? values[idx] : 0;
   }

   /*!
    * A write item functionality.
    *
    * First we search if the matrix has already a value in (i, j) position.
    * If so we just change it to a new value. If not we add the item on the matrix.
    *
    * @note
    *    When change a value, we don't increase the NNZ value of the struct. We expect the user has already
    *    change the NNZ value to the right one using @see capacity() function. When adding a value we
    *    increase the NNZ.
    *
    * @param i    The row number
    * @param j    The column number
    * @return     The new value of the item .
    */
   DataType set(DataType v, IndexType i, IndexType j) {
      IndexType idx; bool found;
      std::tie(idx, found) = find_idx(rows, col_ptr[j], col_ptr[j+1], i);
      if (found)
         return values[idx] = v;    // we don't change NNZ even if we write "0"
      else {
         values.insert(values.begin()+idx, v);
         rows.insert(rows.begin()+idx, i);
         std::transform(col_ptr.begin()+j+1, col_ptr.end(), col_ptr.begin()+j+1, [](IndexType it) {
            return ++it;
         });
         ++NNZ;                     // we increase the NNZ even if we write "0"
         return v;
      }
   }

   /*!
    * Get a view of a CSC column
    * @param j    The column to get
    * @return     The MatCol object @see MatCol
    */
   MatCol<Matrix> getCol(IndexType j) noexcept {
      return MatCol<Matrix>(this, col_ptr[j], col_ptr[j+1]);
   }

   /*!
    * Get a view of a CSC row
    *
    * In case of a SYMMETRIC matrix we can return a column instead.
    *
    * @param j    The row to get
    * @return     On symmetric matrix MatCol otherwise a MatRow
    */

   MatCol<Matrix> getRow(IndexType i) noexcept {
      if constexpr (Symmetric)
         return getCol(i);
      else
         return MatRow<Matrix>(this, i);
   }

   // values only iterator support
   DataType* begin() noexcept { return values.begin(); }
   DataType* end()   noexcept { return values.end(); }
   //! @}

   //! A small iteration helper
   template<typename F, typename... Args>
   void for_each_in (IndexType begin, IndexType end, F&& lambda, Args&&... args) {
      for (IndexType it=begin ; it<end ; ++it) {
         std::forward<F>(lambda)(std::forward<Args>(args)..., it);
      }
   }

 private:
   /*!
    * A small binary search implementation using index for begin-end instead of iterators.
    *
    * \param   v     Reference to vector to search
    * \param   begin The vector's index to begin
    * \param   end   The vector's index to end
    * \param   match What to search
    * \return  An <index, status> pair.
    *                index    is the index of the item or end if not found
    *                status   is true if found, false otherwise
    */
   std::pair<IndexType, bool> find_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) {
      if (v.capacity() != 0 && begin < end) {
         IndexType b = begin, e = end-1;
         while (b <= e) {
            IndexType m = (b+e)/2;
            if       (v[m] == match)   return  std::make_pair(m, true);
            else if  (b >= e)          return  std::make_pair(end, false);
            else {
               if    (v[m] <  match)   b = m +1;
               else                    e = m -1;
            }
         }
      }
      return std::make_pair(end, false);
   }

   // move helper
   void moves(Matrix&& src) noexcept {
      values  = std::move(src.values);
      rows    = std::move(src.rows);
      col_ptr = std::move(src.col_ptr);
      N       = std::move(src.N);   // redundant for primitives
      NNZ     = std::move(src.NNZ); //
   }
   //! \name Data
   //! @{
   std::vector<DataType>   values {};     //!< vector to store the values of the matrix
   std::vector<IndexType>  rows{};        //!< vector to store the row information
   std::vector<IndexType>  col_ptr{1,0};  //!< vector to store the column pointers
   IndexType   N{0};                      //!< The dimension of the matrix (square)
   IndexType   NNZ{0};                    //!< The NNZ (capacity of the matrix)
   //! @}
 };


 /*!
 * A view/iterator hybrid object for Matrix columns.
 *
 * This object provides access to a column of a Matrix. The public functionalities
 * allow data access using indexes instead of iterators. We prefer indexes over iterators
 * because we can apply the same index to different inner vector of Matrix without conversion.
 *
 * @tparam DataType
 * @tparam IndexType
 */
 template<typename MatrixType>
 struct MatCol {
   using owner_t = MatrixType;

   using DataType  = typename MatrixType::dataType;
   using IndexType = typename MatrixType::indexType;

   /*!
    * ctor using column pointers for begin-end. own is pointer to Matrix.
    */
   MatCol(owner_t* own, const IndexType begin, const IndexType end) noexcept :
      owner_(own), index_(begin), begin_(begin), end_(end) {
      vindex_ = vIndexCalc(index_);
   }
   MatCol()                        = default;
   MatCol(const MatCol&)           = delete;   //!< make sure there are no copies
   MatCol& operator=(const MatCol&)= delete;   //!< make sure there are no copies
   MatCol(MatCol&&)                = default;
   MatCol& operator=(MatCol&&)     = default;

   //! a simple dereference operator, like an iterator
   DataType operator* () {
      return get();
   }
   //! Increment operator acts on index(), like an iterator
   MatCol& operator++ ()    { advance(); return *this; }
   MatCol& operator++ (int) { MatCol& p = *this; advance(); return p; }

   //! () operator acts as member access (like a view)
   DataType operator()(IndexType x) {
      return (x == index())? get() : DataType{};
   }
   //! = operator acts as member assignment (like a view)
   DataType operator= (DataType v) { return owner_->values[index_] = v; }
   // iterator like handlers
   // these return a virtual index value based on the items position on the full matrix
   // but the move of the index is just a ++ away.
   IndexType       index()       noexcept { return vindex_; }
   const IndexType index() const noexcept { return vindex_; }
   IndexType       begin()       noexcept { return vIndexCalc(begin_); }
   const IndexType begin() const noexcept { return vIndexCalc(begin_); }
   IndexType       end()         noexcept { return owner_->N; }
   const IndexType end()   const noexcept { return owner_->N; }

   /*!
    * Multiplication operator
    *
    * We follow only the non-zero values and multiply only the common indexes.
    *
    * @tparam C   Universal reference for the type right half site column
    *
    * @param c    The right hand site matrix
    * @return     The value of the inner product of two vectors
    * @note       The time complexity is \$ O(nnz1+nnz2) \$.
    *             Where the nnz is the max NNZ elements of the column of the matrix
    */
   template <typename C>
   DataType operator* (C&& c) {
      static_assert(std::is_same<remove_cvref_t<C>, MatCol<MatrixType>>(), "");
      DataType v{};
      while (index() != end() && c.index() != c.end()) {
         if      (index() < c.index())  advance(); // advance me
         else if (index() > c.index())  ++c;       // advance other
         else { //index() == c.index()
            v += get() * *c;                       // multiply and advance both
            ++c;
            advance();
         }
      }
      return v;
   }

 private:
   //! small tool to increase the index pointers to Matrix
   void advance() noexcept {
      ++index_;
      vindex_ = vIndexCalc(index_);
   }
   //! tool to translate between col_ptr indexes and Matrix "virtual" full matrix indexes
   IndexType vIndexCalc(IndexType idx) {
      return (idx < end_) ? owner_->rows[idx] : end();
   }
   //! small get tool
   DataType get() { return owner_->values[index_]; }

   owner_t*    owner_   {nullptr};     //!< Pointer to owner Matrix. MatCol is just a view
   IndexType   vindex_  {IndexType{}}; //!< Virtual index of full matrix
   IndexType   index_   {IndexType{}}; //!< index to Matrix::rows
   IndexType   begin_   {IndexType{}}; //!< beginning index of the column in Matrix::rows
   IndexType   end_     {IndexType{}}; //!< ending index of the column in Matrix::rows
 };

 /*!
 * A view/iterator hybrid object for Matrix rows.
 *
 * This object provides access to a column of a Matrix. The public functionalities
 * allow data access using indexes instead of iterators. We prefer indexes over iterators
 * because we can apply the same index to different inner vector of Matrix without conversion.
 *
 * @tparam DataType
 * @tparam IndexType
 */
 template<typename MatrixType>
 struct MatRow {
   using owner_t = MatrixType;

   using DataType  = typename MatrixType::dataType;
   using IndexType = typename MatrixType::indexType;

   /*!
    * ctor using virtual full matrix row index. own is pointer to Matrix.
    */
   MatRow(owner_t* own, const IndexType row) noexcept :
      owner_(own), vindex_(IndexType{}), row_(row), index_(IndexType{}),
      begin_(IndexType{}), end_(owner_->NNZ) {
      // place begin
      while(begin_ != end_ && owner_->rows[begin_] != row_)
         ++begin_;
      // place index_ and vindex_
      if (owner_->rows[index_] != row_)
         advance();
   }
   MatRow()                        = default;
   MatRow(const MatRow&)           = delete;   //!< make sure there are no copies
   MatRow& operator=(const MatRow&)= delete;   //!< make sure there are no copies
   MatRow(MatRow&&)                = default;
   MatRow& operator=(MatRow&&)     = default;

   //! a simple dereference operator, like an iterator
   DataType operator* () {
      return get();
   }
   //! Increment operator acts on index(), like an iterator
   //! here the increment is a O(N) process.
   MatRow& operator++ ()    { advance(); return *this; }
   MatRow& operator++ (int) { MatRow& p = *this; advance(); return p; }

   //! () operator acts as member access (like a view)
   DataType operator()(IndexType x) {
      return (x == index())? get() : DataType{};
   }
   //! = operator acts as member assignment (like a view)
   DataType operator= (DataType v) { return owner_->values[index_] = v; }
   // iterator like handlers
   // these return a virtual index value based on the items position on the full matrix
   // but the move of the index is just a ++ away.
   IndexType       index()       noexcept { return vindex_; }
   const IndexType index() const noexcept { return vindex_; }
   IndexType       begin()       noexcept { return vIndexCalc(begin_); }
   const IndexType begin() const noexcept { return vIndexCalc(begin_); }
   IndexType       end()         noexcept { return owner_->N; }
   const IndexType end()   const noexcept { return owner_->N; }

   /*!
    * Multiplication operator
    *
    * We follow only the non-zero values and multiply only the common indexes.
    *
    * @tparam C   Universal reference for the type right half site column
    *
    * @param c    The right hand site matrix
    * @return     The value of the inner product of two vectors
    * @note       The time complexity is \$ O(N+nnz2) \$ and way heavier the ColxCol multiplication.
    *             Where the nnz is the max NNZ elements of the column of the matrix
    */
   template <typename C>
   DataType operator* (C&& c) {
      static_assert(std::is_same<remove_cvref_t<C>, MatCol<MatrixType>>(), "");
      DataType v{};
      while (index() != end() && c.index() != c.end()) {
         if      (index() < c.index())  advance(); // advance me
         else if (index() > c.index())  ++c;       // advance other
         else { //index() == c.index()
            v += get() * *c;                       // multiply and advance both
            ++c;
            advance();
         }
      }
      return v;
   }
 private:
   //! small tool to increase the index pointers to Matrix matrix
   //! We have to search the entire rows vector in Matrix to find the next
   //! virtual row position.
   //! time complexity O(N)
   void advance() noexcept {
      do
         ++index_;
      while(index_ != end_ && owner_->rows[index_] != row_);
      vindex_ = vIndexCalc(index_);
   }
   //! tool to translate between col_ptr indexes and Matrix "virtual" full matrix indexes
   IndexType vIndexCalc(IndexType idx) {
      for(IndexType i =0 ; i<(owner_->N+1) ; ++i)
         if (idx < owner_->col_ptr[i])
            return i-1;
      return end();
   }
   //! small get tool
   DataType get() { return owner_->values[index_]; }

   owner_t*    owner_   {nullptr};     //!< Pointer to owner Matrix. MatCol is just a view
   IndexType   vindex_  {IndexType{}}; //!< Virtual index of full matrix
   IndexType   row_     {IndexType{}}; //!< The virtual full matrix row of the object
   IndexType   index_   {IndexType{}}; //!< index to Matrix::rows
   IndexType   begin_   {IndexType{}}; //!< beginning index of the column in Matrix::rows
   IndexType   end_     {IndexType{}}; //!< ending index of the column in Matrix::rows
 };

 /*!
 * A proxy Matrix value object/view.
 *
 * This object acts as proxy to provide read/write access to an Matrix item.
 *
 * @tparam DataType     The type of the values of the Matrix matrix
 * @tparam IndexType    The type of the indexes of the Matrix matrix
 */
 template<typename MatrixType>
 struct MatVal {
   using owner_t = MatrixType;

   using DataType  = typename MatrixType::dataType;
   using IndexType = typename MatrixType::indexType;

   //!< ctor using all value-row-column data, plus a pointer to owner Matrix object
   MatVal(owner_t* own, DataType v, IndexType i, IndexType j) :
      owner_(own), v_(v), i_(i), j_(j) { }
   MatVal()                         = default;
   MatVal(const MatVal&)            = delete;  //!< make sure there are no copies
   MatVal& operator=(const MatVal&) = delete;  //!< make sure there are no copies
   MatVal(MatVal&&)                 = default;
   MatVal& operator=(MatVal&&)      = default;

   //! Operator to return the DataType value implicitly
   operator DataType() { return v_; }
   //! Operator to write back to owner the assigned value
   //! for ex: A(2,3) = 5;
   MatVal& operator=(DataType v) {
      v_ = v;
      owner_->set(v_, i_, j_);
      return *this;
   }
 private:
   owner_t*    owner_{nullptr};  //!< Pointer to owner Matrix. MatVal is just a view.
   DataType    v_{DataType{}};   //!< The value of the row-column pair (for speed)
   IndexType   i_{IndexType{}};  //!< The row
   IndexType   j_{IndexType{}};  //!< the column
 };


 } // namespace mtx


 #endif /* MATRIX_HPP_ */
--- a/homework_1/Makefile
+++ b/homework_1/Makefile
@@ -26,7 +26,11 @@ TARGET          := knnsearch
 SRC_DIR_LIST    := src
 # Include directories list(space seperated). Makefile-relative path.
 INC_DIR_LIST    := inc \
                   src
                   src \
                   Libs/matrix/include/ \
                   /usr/include/hdf5/serial/
 #                   Libs/MATLAB/R2019b/include/ \

 # Exclude files list(space seperated). Filenames only.
 # EXC_FILE_LIST := bad.cpp old.cpp

@@ -49,7 +53,11 @@ PRE_DEFS		:=

 # ============== Linker settings ==============
 # Linker flags (example: -pthread -lm)
 LDFLAGS         := -pthread -lopenblas
 LDFLAGS         := -pthread -lopenblas \
                   -L/usr/lib/x86_64-linux-gnu/hdf5/serial -lhdf5
 #                   -LLibs/MATLAB/R2019b/bin/ -lmat -lmx -Wl,-rpath,Libs/MATLAB/R2019b/bin/ \
 #                   -Wl,-rpath,Libs/unwind/bin/

 # Map output file
 MAP_FILE        := output.map
 MAP_FLAG        := -Xlinker -Map=$(BUILD_DIR)/$(MAP_FILE)
@@ -184,6 +192,12 @@ local_v0: TARGET := local_v0
 local_v0: $(BUILD_DIR)/$(TARGET)
 	cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)

 local_v0_opt: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=0
 local_v0_opt: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=0
 local_v0_opt: TARGET := local_v0_opt
 local_v0_opt: $(BUILD_DIR)/$(TARGET)
 	cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)
 	
 local_v1: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=4
 local_v1: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=4
 local_v1: TARGET := local_v1
--- a/homework_1/inc/config.h
+++ b/homework_1/inc/config.h
@@ -0,0 +1,75 @@
 /*!
 * \file    config,h
 * \brief   Build configuration file.
 *
 * \author
 *    Christos Choutouridis AEM:8997
 *    <cchoutou@ece.auth.gr>
 */

 #ifndef CONFIG_H_
 #define CONFIG_H_

 #include <iostream>
 #include <string>

 #include <matrix.hpp>

 // HDF5 supported types
 enum class HDF5_type {
   SCHAR, CHAR, SHORT, USHORT, INT, UINT, LONG, ULONG, LLONG, ULLONG, FLOAT, DOUBLE
 };

 /*
 * Defines for different version of the exercise
 */
 #define  V0    0
 #define  V1    1


 // Fail-safe version selection
 #if !defined CODE_VERSION
 #define CODE_VERSION   V1
 #endif

 // matrix alias template dispatcher based on pre-define flag from compiler (see Makefile)
 #if   CODE_VERSION == V0
   #define NAMESPACE_VERSION using namespace v0
   using MatrixDst = mtx::Matrix<double>;
   using MatrixIdx = mtx::Matrix<uint32_t>;
   static constexpr HDF5_type DstHDF5Type = HDF5_type::DOUBLE;
   static constexpr HDF5_type IdxHDF5Type = HDF5_type::INT;
 #elif CODE_VERSION == V1
   #define NAMESPACE_VERSION using namespace v1
   using MatrixDst = mtx::Matrix<double>;
   using MatrixIdx = mtx::Matrix<uint32_t>;
   static constexpr HDF5_type DstHDF5Type = HDF5_type::DOUBLE;
   static constexpr HDF5_type IdxHDF5Type = HDF5_type::INT;
 #endif



 //! enumerator for output handling
 enum class StdOutputMode{ STD, FILE };

 /*!
 * Session option for each invocation of the executable
 */
 struct session_t {
   std::string   corpusMtxFile  {};                        //!< corpus matrix file name in HDF5 format
   std::string   corpusDataSet  {};                        //!< corpus dataset name in HDF5 matrix file
   std::string   queryMtxFile   {};                        //!< optional query matrix file name in HDF5 format
   std::string   queryDataSet   {};                        //!< optional query dataset name in HDF5 matrix file
   bool          queryMtx       {false};                   //!< Flag to indicate that there is a separate query matrix
   size_t        k              {1};                       //!< The number of nearest neighbors to find
   std::string   outMtxFile     {"out.hdf5"};              //!< output matrix file name in HDF5 format
   std::string   outMtxIdxDataSet {"/Idx"};                //!< Index output dataset name in HDF5 matrix file
   std::string   outMtxDstDataSet {"/Dst"};                //!< Distance output dataset name in HDF5 matrix file
   std::size_t   max_threads    {};                        //!< Maximum threads to use
   bool          timing         {false};                   //!< Enable timing prints of the program
   bool          verbose        {false};                   //!< Flag to enable verbose output to stdout
 };

 extern session_t session;

 #endif /* CONFIG_H_ */
--- a/homework_1/inc/utils.hpp
+++ b/homework_1/inc/utils.hpp
@@ -0,0 +1,224 @@
 /**
 * \file    utils.hpp
 * \brief   Utilities header
 *
 * \author
 *    Christos Choutouridis AEM:8997
 *    <cchoutou@ece.auth.gr>
 */
 #ifndef UTILS_HPP_
 #define UTILS_HPP_

 #include <iostream>
 #include <chrono>
 #include <unistd.h>
 #include <hdf5.h>

 #include <matrix.hpp>
 #include <config.h>

 /*!
 * A Logger for entire program.
 */
 struct Log {
   struct Endl {} endl;    //!< a tag object to to use it as a new line request.

   //! We provide logging via << operator
   template<typename T>
   Log& operator<< (T&& t) {
      if (session.verbose) {
         if (line_) {
            std::cout << "[Log]: " << t;
            line_ = false;
         }
         else
            std::cout << t;
      }
      return *this;
   }
   // overload for special end line handling
   Log& operator<< (Endl e) { (void)e;
      if (session.verbose) {
         std::cout << '\n';
         line_ = true;
      }
      return *this;
   }
 private:
   bool line_ {true};
 };

 extern Log logger;

 /*!
 * A small timing utility based on chrono.
 */
 struct Timing{
   using Tpoint = std::chrono::steady_clock::time_point;
   using microseconds = std::chrono::microseconds;
   using milliseconds = std::chrono::milliseconds;
   using seconds = std::chrono::seconds;

   //! tool to mark the starting point
   Tpoint start () noexcept { return start_ = std::chrono::steady_clock::now(); }
   //! tool to mark the ending point
   Tpoint stop ()  noexcept { return stop_ = std::chrono::steady_clock::now(); }

   auto dt () noexcept {
      return std::chrono::duration_cast<std::chrono::microseconds>(stop_ - start_).count();
   }
   //! tool to print the time interval
   void print_dt (const char* what) noexcept {
      if (session.timing) {
         auto t = stop_ - start_;
         if       (std::chrono::duration_cast<microseconds>(t).count() < 10000)
            std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<microseconds>(t).count()) << " [usec]\n";
         else if (std::chrono::duration_cast<milliseconds>(t).count() < 10000)
            std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<milliseconds>(t).count()) << " [msec]\n";
         else
            std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<seconds>(t).count()) << " [sec]\n";
      }
   }
 private:
   Tpoint start_;
   Tpoint stop_;
 };



 struct Mtx {

   template<typename MatrixType, HDF5_type Type>
   static void load(const std::string& filename, const std::string& dataset, MatrixType& matrix) {

      hid_t file_id{}, dataset_id{}, dataspace_id{};
      herr_t read_st;
      do {
         // Open file
         logger << "Load HDF5 file: " << filename << " Dataset: " << dataset << "...";
         if ((file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT)) < 0)
            break;

         // Open dataset
         if ((dataset_id = H5Dopen2(file_id, dataset.c_str(), H5P_DEFAULT)) < 0)
            break;

         // Get dataspace and allocate memory for read buffer
         if ((dataspace_id = H5Dget_space(dataset_id)) < 0)
            break;
         hsize_t dims[2];
         H5Sget_simple_extent_dims(dataspace_id, dims, NULL);
         matrix.resize(dims[0], dims[1]);

         // Read the dataset
         // ToDo: Come up with a better way to do this
         if constexpr (Type == HDF5_type::DOUBLE) {
            if ((read_st = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
               break;
         }
         else if (Type == HDF5_type::FLOAT) {
            if ((read_st = H5Dread(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
               break;
         }
         else if (Type == HDF5_type::UINT) {
            if ((read_st = H5Dread(dataset_id, H5T_NATIVE_UINT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
               break;
         }
         else if (Type == HDF5_type::INT) {
            if ((read_st = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
               break;
         }
         // Done
         H5Dclose(dataset_id);
         H5Sclose(dataspace_id);
         H5Fclose(file_id);
         logger << " Done" << logger.endl;
         return;
      } while (0);

      // Error: close everything (if possible) and return false
      H5Dclose(dataset_id);
      H5Sclose(dataspace_id);
      H5Fclose(file_id);
      throw std::runtime_error("Cannot store to " + filename + " dataset:" + dataset + '\n');
   }


   template<typename MatrixType, HDF5_type Type>
   static void store(const std::string& filename, const std::string& dataset, MatrixType& matrix) {

      hid_t file_id{}, dataset_id{}, dataspace_id{};
      herr_t write_st;
      do {
         // Try to open the file in read-write mode
         logger << "Store HDF5 file: " << filename << " Dataset: " << dataset << "...";
         if (access(session.outMtxFile.c_str(), F_OK) == 0){
            if ((file_id = H5Fopen(filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT)) < 0)
               break;
         }
         else {
            if ((file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0)
               break;
         }

         // Create the dataspace for the dataset
         hsize_t dims[] = { matrix.rows(), matrix.columns() };
         if ((dataspace_id = H5Screate_simple(2, dims, NULL)) < 0)
            break;

         // ToDo: Come up with a better way to do this
         if constexpr (Type == HDF5_type::DOUBLE) {
            // Create the dataset with default properties
            if ((dataset_id = H5Dcreate2(
                  file_id, dataset.c_str(), H5T_NATIVE_DOUBLE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
               break;
            // Write the data to the dataset
            if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
               break;
         }
         else if (Type == HDF5_type::FLOAT) {
            // Create the dataset with default properties
            if ((dataset_id = H5Dcreate2(
                  file_id, dataset.c_str(), H5T_NATIVE_FLOAT, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
               break;
            // Write the data to the dataset
            if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
               break;
         }
         else if (Type == HDF5_type::UINT) {
            // Create the dataset with default properties
            if ((dataset_id = H5Dcreate2(
                  file_id, dataset.c_str(), H5T_NATIVE_UINT, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
               break;
            // Write the data to the dataset
            if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_UINT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
               break;
         }
         else if (Type == HDF5_type::INT) {
            // Create the dataset with default properties
            if ((dataset_id = H5Dcreate2(
                  file_id, dataset.c_str(), H5T_NATIVE_INT, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
               break;
            // Write the data to the dataset
            if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
               break;
         }
         // Close the dataset, dataspace, and file
         H5Dclose(dataset_id);
         H5Sclose(dataspace_id);
         H5Fclose(file_id);
         logger << " Done" << logger.endl;
         return;
      } while (0);

      // Error: close everything (if possible) and return false
      H5Dclose(dataset_id);
      H5Sclose(dataspace_id);
      H5Fclose(file_id);
      throw std::runtime_error("Cannot store " + filename + " with dataset:" + dataset +'\n');
   }
 };



 #endif /* UTILS_HPP_ */
--- a/homework_1/inc/v0.hpp
+++ b/homework_1/inc/v0.hpp
@@ -0,0 +1,119 @@
 /**
 * \file    v0.hpp
 * \brief
 *
 * \author
 *    Christos Choutouridis AEM:8997
 *    <cchoutou@ece.auth.gr>
 */
 #ifndef V0_HPP_
 #define V0_HPP_

 #include <cblas.h>
 #include <cmath>
 #include <vector>
 #include <algorithm>

 #include <matrix.hpp>
 #include <config.h>

 namespace v0 {

 /*!
 * Function to compute squared Euclidean distances
 *
 * \fn void pdist2(const double*, const double*, double*, int, int, int)
 * \param X    m x d matrix (Column major)
 * \param Y    n x d matrix (Column major)
 * \param D2   m x n matrix to store distances (Column major)
 * \param m    number of rows in X
 * \param n    number of rows in Y
 * \param d    number of columns in both X and Y
 */
 template<typename DataType>
 void pdist2(const mtx::Matrix<DataType>& X, const mtx::Matrix<DataType>& Y, mtx::Matrix<DataType>& D2) {
   int M = X.rows();
   int N = Y.rows();
   int d = X.columns();

   // Compute the squared norms of each row in X and Y
   std::vector<DataType> X_norms(M), Y_norms(N);
   for (int i = 0; i < M ; ++i) {
      X_norms[i] = cblas_ddot(d, X.data() + i * d, 1, X.data() + i * d, 1);
   }
   for (int j = 0; j < N ; ++j) {
      Y_norms[j] = cblas_ddot(d, Y.data() + j * d, 1, Y.data() + j * d, 1);
   }

   // Compute -2 * X * Y'
   cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, M, N, d, -2.0, X.data(), d, Y.data(), d, 0.0, D2.data(), N);

   // Step 3: Add the squared norms to each entry in D2
   for (int i = 0; i < M ; ++i) {
      for (int j = 0; j < N; ++j) {
         D2.set(D2.get(i, j) + X_norms[i] + Y_norms[j], i, j);
         //D2.set(std::max(D2.get(i, j), 0.0),            i, j); // Ensure non-negative
         D2.set(std::sqrt(D2.get(i, j)),                i, j); // Take the square root of each
      }
   }
 }

 template<typename DataType, typename IndexType>
 void quickselect(std::vector<std::pair<DataType, IndexType>>& vec, int k) {
   std::nth_element(
      vec.begin(),
      vec.begin() + k,
      vec.end(),
      [](const std::pair<DataType, IndexType>& a, const std::pair<DataType, IndexType>& b) {
         return a.first < b.first;
   });
   vec.resize(k);  // Keep only the k smallest elements
 }

 /*!
 * \param C    Is a MxD matrix (Corpus)
 * \param Q    Is a NxD matrix (Query)
 * \param k    The number of nearest neighbors needed
 * \param idx  Is the Nxk matrix with the k indexes of the C points, that are
 *             neighbors of the nth point of Q
 * \param dst  Is the Nxk matrix with the k distances to the C points of the nth
 *             point of Q
 */
 template<typename DataType, typename IndexType>
 void knnsearch(const mtx::Matrix<DataType>& C, const mtx::Matrix<DataType>& Q, int k,
               mtx::Matrix<IndexType>& idx,
               mtx::Matrix<DataType>& dst) {

   int M = C.rows();
   int N = Q.rows();

   mtx::Matrix<DataType> D(M, N);

   pdist2(C, Q, D);

   idx.resize(N, k);
   dst.resize(N, k);

   for (int j = 0; j < N; ++j) {
      // Create a vector of pairs (distance, index) for the j-th query
      std::vector<std::pair<DataType, IndexType>> dst_idx(M);
      for (int i = 0; i < M; ++i) {
        dst_idx[i] = {D.data()[i * N + j], i};
      }
      // Find the k smallest distances using quickSelectKSmallest
      quickselect(dst_idx, k);

      // Sort the k smallest results by distance for consistency
      std::sort(dst_idx.begin(), dst_idx.end());

      // Store the indices and distances
      for (int i = 0; i < k; ++i) {
         idx(j, i) = dst_idx[i].second;
         dst(j, i) = dst_idx[i].first;
      }
   }
 }

 }

 #endif /* V0_HPP_ */
--- a/homework_1/inc/v1.hpp
+++ b/homework_1/inc/v1.hpp
@@ -0,0 +1,16 @@
 /**
 * \file    v0.hpp
 * \brief
 *
 * \author
 *    Christos Choutouridis AEM:8997
 *    <cchoutou@ece.auth.gr>
 */
 #ifndef V1_HPP_
 #define V1_HPP_





 #endif /* V1_HPP_ */
--- a/homework_1/matlab/.gitignore
+++ b/homework_1/matlab/.gitignore
@@ -0,0 +1,2 @@
 # matlab
 *.m~
--- a/homework_1/matlab/dist2.m
+++ b/homework_1/matlab/dist2.m
@@ -1,10 +1,12 @@
 function [D2] = dist2(X, Y)
 % Calculates the squares of the distances of X and Y
 %
 % X: A mxd array with m d-dimentional points
 % Y: A nxd array with n d-dimentional points
 % X: A Mxd array with m d-dimentional points
 % Y: A Nxd array with n d-dimentional points
 % d: Must be the same

 %
 % D2: The MxN matrix with the distances
 %
    [~, d1] = size(X);
    [~, d2] = size(Y);
    if d1 ~= d2
--- a/homework_1/matlab/knnsearch2.m
+++ b/homework_1/matlab/knnsearch2.m
@@ -1,8 +1,12 @@
 function [idx, dst] = knnsearch2(C, Q, k)
 % C: Is a mxd matrix (Corpus)
 % Q: Is a nxd matrix (Query)
 % k: The number of nearest neighbors needded

 % C:   Is a MxD matrix (Corpus)
 % Q:   Is a NxD matrix (Query)
 % k:   The number of nearest neighbors needded
 % idx: Is the Nxk matrix with the k indexes of the C points, that are
 %      neighbors of the nth point of Q
 % dst: Is the Nxk matrix with the k distances to the C points of the nth
 %      point of Q
 %
    % Calculate the distance matrix between C and Q
    % D is an m x n matrix where each element D(i, j) is the distance 
    % between the i-th point in C and the j-th point in Q.
--- a/homework_1/src/main.cpp
+++ b/homework_1/src/main.cpp
@@ -1,107 +1,161 @@
 #include <iostream>
 #include <cblas.h>
 #include <cmath>
 #include <vector>
 #include <algorithm>
 #include <queue>

 /*!
 * Function to compute squared Euclidean distances
 * \file    main.cpp
 * \brief   Main application file
 *
 * \fn void pdist2(const double*, const double*, double*, int, int, int)
 * \param X    m x d matrix
 * \param Y    n x d matrix
 * \param D2   m x n matrix to store distances
 * \param m    number of rows in X
 * \param n    number of rows in Y
 * \param d    number of columns in both X and Y
 * \author
 *    Christos Choutouridis AEM:8997
 *    <cchoutou@ece.auth.gr>
 */
 void pdist2(const double* X, const double* Y, double* D2, int m, int n, int d){
   // Compute the squared norms of each row in X and Y
   std::vector<double> X_norms(m), Y_norms(n);
   for (int i = 0; i < m; ++i) {
      X_norms[i] = cblas_ddot(d, X + i * d, 1, X + i * d, 1);
   }
   for (int j = 0; j < n; ++j) {
      Y_norms[j] = cblas_ddot(d, Y + j * d, 1, Y + j * d, 1);
   }

   // Compute -2 * X * Y'
   cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, n, d, -2.0, X, d, Y, d, 0.0, D2, n);
 #include <iostream>
 #include <string>
 #include <exception>
 #include <unistd.h>
 #include <cstdio>

 #include <v0.hpp>
 #include <v1.hpp>
 #include <matrix.hpp>
 #include <utils.hpp>
 #include <config.h>

 // Global session data
 session_t   session;
 Log         logger;
 Timing      timer;

   // Step 3: Add the squared norms to each entry in D2
   for (int i = 0; i < m; ++i) {
      for (int j = 0; j < n; ++j) {
         D2[i * n + j] += X_norms[i] + Y_norms[j];
         D2[i * n + j] = std::max(D2[i * n + j], 0.0); // Ensure non-negative
         D2[i * n + j] = std::sqrt(D2[i * n + j]);     // Take the square root of each
 /*!
 * A small command line argument parser
 * \return  The status of the operation
 */
 bool get_options(int argc, char* argv[]){
   bool status =true;

   // iterate over the passed arguments
   for (int i=1 ; i<argc ; ++i) {
      std::string arg(argv[i]);     // get current argument

      if (arg == "-c" || arg == "--corpus") {
         if (i+2 < argc) {
            session.corpusMtxFile = std::string(argv[++i]);
            session.corpusDataSet = std::string(argv[++i]);
         }
         else
            status = false;
      }
      else if (arg == "-o" || arg == "--output") {
         if (i+3 < argc) {
            session.outMtxFile = std::string(argv[++i]);
            session.outMtxIdxDataSet = std::string(argv[++i]);
            session.outMtxDstDataSet = std::string(argv[++i]);
         }
         else
            status = false;
      }
      else if (arg == "-q" || arg == "--query") {
         if (i+2 < argc) {
            session.queryMtxFile = std::string(argv[++i]);
            session.queryDataSet = std::string(argv[++i]);
            session.queryMtx = true;
         }
         else
            status = false;
      }
      else if (arg == "-k") {
         session.k = (i+1 < argc) ? std::atoi(argv[++i]) : session.k;
      }
      else if (arg == "-n" || arg == "--max_trheads") {
         session.max_threads = (i+1 < argc) ? std::atoi(argv[++i]) : session.max_threads;
      }
      else if (arg == "-t" || arg == "--timing")
         session.timing = true;
      else if (arg == "-v" || arg == "--verbose")
         session.verbose = true;
      else if (arg == "-h" || arg == "--help") {
         std::cout << "annsearch - an aproximation knnsearch utility\n\n";
         std::cout << "annsearch -c <file> [-k <N>] [-o <file>] [-q <file>] [-n <threads>] [-t] [-v]\n";
         std::cout << '\n';
         std::cout << "Options:\n\n";
         std::cout << "   -c | --corpus <file> <dataset>\n";
         std::cout << "      Path to hdf5 file to open and name of the dataset to load\n\n";
         std::cout << "   -o | --output <file> <idx-dataset> <dst-dataset> \n";
         std::cout << "      Path to <file> to store the data and the names of the datasets.\n\n";
         std::cout << "   -q | --query <file> <dataset>\n";
         std::cout << "      Path to hdf5 file to open and name of the dataset to load\n";
         std::cout << "      If not defined, the corpus is used\n\n";
         std::cout << "   -k <number>\n";
         std::cout << "      Set the number of closest neighbors to find. \n\n";
         std::cout << "   -n | --max_trheads <threads>\n";
         std::cout << "      Reduce the thread number for the execution to <threads>. <threads> must be less or equal to available CPUs.\n\n";
         std::cout << "   -t | --timing\n";
         std::cout << "      Request timing measurements output to stdout.\n\n";
         std::cout << "   -v | --verbose\n";
         std::cout << "      Request a more verbose output to stdout.\n\n";
         std::cout << "   -h | --help <size>\n";
         std::cout << "      Prints this and exit.\n\n";
         std::cout << "Examples:\n\n";
         std::cout << "   ...Example case...:\n";
         std::cout << "   > ./annsearch -i <MFILE> ... \n\n";

         exit(0);
      }
      else {   // parse error
         std::cout << "Invocation error. Try -h for details.\n";
         status = false;
      }
   }
 }

 void quickselect(std::vector<std::pair<double, int>>& vec, int k) {
   std::nth_element(
      vec.begin(),
      vec.begin() + k,
      vec.end(),
      [](const std::pair<double, int>& a, const std::pair<double, int>& b) {
         return a.first < b.first;
   });
   vec.resize(k);  // Keep only the k smallest elements
 }

 // K-nearest neighbor search function
 void knnsearch(const double* C, const double* Q, int m, int n, int d, int k,
               std::vector<std::vector<int>>& idx, std::vector<std::vector<double>>& dst) {
    std::vector<double> D(m * n);
    pdist2(C, Q, D.data(), m, n, d);

    idx.resize(n, std::vector<int>(k));
    dst.resize(n, std::vector<double>(k));

    for (int j = 0; j < n; ++j) {
        // Create a vector of pairs (distance, index) for the j-th query
        std::vector<std::pair<double, int>> dst_idx(m);
        for (int i = 0; i < m; ++i) {
           dst_idx[i] = {D[i * n + j], i};
        }

        // Find the k smallest distances using quickSelectKSmallest
        quickselect(dst_idx, k);

        // Sort the k smallest results by distance for consistency
        std::sort(dst_idx.begin(), dst_idx.end());

        // Store the indices and distances
        for (int i = 0; i < k; ++i) {
            idx[j][i] = dst_idx[i].second;
            dst[j][i] = dst_idx[i].first;
        }
    }
   return status;
 }

 int main(){

   int m = 5; // Number of points in C (corpus)
   int n = 3; // Number of points in Q (query)
   int d = 2; // Dimensions
   int k = 2; // Number of nearest neighbors to find

   double C[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; // m x d matrix
   double Q[] = {1.5, 2.5, 3.5, 4.5, 5.5, 6.5};                     // n x d matrix

   std::vector<std::vector<int>> idx;
   std::vector<std::vector<double>> dst;

   knnsearch(C, Q, m, n, d, k, idx, dst);

   // Print results
   for (int i = 0; i < n; ++i) {
      std::cout << "Query point " << i << ":\n";
       for (int j = 0; j < k; ++j) {
          std::cout << "  Neighbor " << j <<": Index = " << idx[i][j] <<", Distance = " << dst[i][j] << '\n';
       }
   }
 NAMESPACE_VERSION;

 int main(int argc, char* argv[]) try {
   // Instantiate matrixes
   MatrixDst Corpus;
   MatrixDst Query;
   MatrixIdx Idx;
   MatrixDst Dst;

   // try to read command line
   if (!get_options(argc, argv))
      exit(1);

   if (access(session.outMtxFile.c_str(), F_OK) == 0)
      std::remove(session.outMtxFile.c_str());

   // Load data
   timer.start();
   Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus);
   if (session.queryMtx)
      Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query);
   timer.stop();
   timer.print_dt("Load hdf5 files");

   logger << "Start knnsearch ...";
   timer.start();
   if (session.queryMtx)
      knnsearch(Corpus, Query, session.k, Idx, Dst);
   else
      knnsearch(Corpus, Corpus, session.k, Idx, Dst);
   timer.stop();
   logger << " Done" << logger.endl;
   timer.print_dt("knnsearch");


   // Store data
   timer.start();
   Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx);
   Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst);
   timer.stop();
   timer.print_dt("Store hdf5 files");

   return 0;
 }
 catch (std::exception& e) {
   //we probably pollute the user's screen. Comment `cerr << ...` if you don't like it.
   std::cerr << "Error: " << e.what() << '\n';
   exit(1);
 }