A V0 implementation tested against matlab

Supports: - HDF5 file load and store - Precision timing of the process to stdout - logging (verbose mode) to stdout - Command line arguments and help
2024-11-18 00:29:04 +02:00 · 2024-11-18 00:29:04 +02:00 · f591127e16
commit f591127e16
parent 7c88260900
11 changed files with 1357 additions and 99 deletions
--- a/homework_1/.gitignore
+++ b/homework_1/.gitignore
@ -3,6 +3,7 @@ bin/
 out/
 mat/
 mtx/
+.unused/

 # hpc
 exclude
@ -13,6 +14,3 @@ hpc_auth_sync.sh
 .cproject
 .settings/

-# matlab
-*.m~
-
--- a/homework_1/Libs/matrix/include/matrix.hpp
+++ b/homework_1/Libs/matrix/include/matrix.hpp
@ -0,0 +1,750 @@
+/**
+ * \file    matrix.hpp
+ * \brief   A matrix abstraction implementation
+ *
+ * \author
+ *    Christos Choutouridis AEM:8997
+ *    <cchoutou@ece.auth.gr>
+ */
+#ifndef MATRIX_HPP_
+#define MATRIX_HPP_
+
+#include <type_traits>
+#include <utility>
+#include <algorithm>
+#include <vector>
+#include <tuple>
+
+namespace mtx {
+
+using std::size_t;
+
+/*
+ * Small helper to strip types
+ */
+template<typename T>
+struct remove_cvref {
+    typedef std::remove_cv_t<std::remove_reference_t<T>> type;
+};
+template<typename T>
+using remove_cvref_t = typename remove_cvref<T>::type;
+
+/*!
+ * Enumerator to denote the storage type of the array to use.
+ */
+enum class MatrixType {
+   DENSE,      /*!< Matrix is dense */
+   SPARSE,     /*!< Matrix is sparse */
+};
+
+/*!
+ * Enumerator to denote the storage type of the array to use.
+ */
+enum class MatrixOrder {
+   COLMAJOR,     /*!< Matrix is column major */
+   ROWMAJOR,     /*!< Matrix is row major */
+};
+
+/*
+ * Forward type declarations
+ */
+
+template<typename MatrixType> struct MatCol;
+template<typename MatrixType> struct MatRow;
+template<typename MatrixType> struct MatVal;
+
+ /*!
+ * A 2-D matrix functionality over a 1-D array
+ *
+ * This is a very thin abstraction layer over a native array.
+ * This is tested using compiler explorer and our template produce
+ * almost identical assembly.
+ *
+ * The penalty hit we have is due to the fact that we use a one dimension array
+ * and we have to calculate the actual position from an (i,j) pair.
+ * The use of 1D array was our intention from the beginning, so the penalty
+ * was pretty much unavoidable.
+ *
+ * \tparam DataType  The underling data type of the array
+ * \tparam IndexType The underling type for the index variables and sizes
+ * \tparam Type      The storage type of the array
+ *    \arg  FULL              For full matrix
+ *    \arg  SYMMETRIC         For symmetric matrix (we use only the lower part)
+ */
+template<typename DataType,
+         typename IndexType = size_t,
+         MatrixType Type    = MatrixType::DENSE,
+         MatrixOrder Order  = MatrixOrder::ROWMAJOR,
+         bool Symmetric     = false>
+struct Matrix {
+
+   using dataType    = DataType;                   //!< meta:export of underling data type
+   using indexType   = IndexType;                  //!< meta:export of underling index type
+   static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order
+   static constexpr MatrixType matrixType = Type;  //!< meta:export of array type
+   static constexpr bool symmetric = Symmetric;    //!< meta:export symmetric flag
+   /*!
+    * \name Obj lifetime
+    */
+   //! @{
+
+   //! Construct an empty matrix with dimensions rows x columns
+   Matrix(IndexType rows = IndexType{}, IndexType columns = IndexType{}) noexcept :
+      m_(capacity(rows, columns)), rows_(rows), cols_(columns) { }
+
+   //! Construct a matrix by copying existing data with dimensions rows x columns
+   Matrix(DataType* data, IndexType rows, IndexType columns) noexcept :
+      m_(data, data + capacity(rows, columns)), rows_(rows), cols_(columns) { }
+
+   //! Construct a matrix using an initializer list
+   Matrix(IndexType rows, IndexType columns, std::initializer_list<DataType> list)
+      : m_(list), rows_(rows), cols_(columns) {
+      if (list.size() != capacity(rows, columns)) {
+         throw std::invalid_argument("Matrix initializer list size does not match matrix dimensions.");
+      }
+   }
+
+   //! move ctor
+   Matrix(Matrix&& m) noexcept { moves(std::move(m)); }
+   //! move
+   Matrix& operator=(Matrix&& m) noexcept { moves(std::move(m)); return *this; }
+   Matrix(const Matrix& m)             = delete;  //!< No copy ctor
+   Matrix& operator=(const Matrix& m)  = delete;  //!< No copy
+   //! @}
+
+   //! \name Data exposure
+   //! @{
+
+
+   //! Get/Set the size of each dimension
+   IndexType rows() const noexcept { return rows_; }
+   IndexType columns() const noexcept { return cols_; }
+
+   //! Get the interface size of the Matrix (what appears to be the size)
+   IndexType size() const {
+      return rows_ * cols_;
+   }
+   //! Set the interface size of the Matrix (what appears to be the size)
+   IndexType resize(IndexType rows, IndexType columns) {
+      rows_ = rows;
+      cols_ = columns;
+      m_.reserve(capacity(rows_, cols_));
+      return capacity(rows_, cols_);
+   }
+
+   //! Actual memory capacity of the symmetric matrix
+   static constexpr IndexType capacity(IndexType M, IndexType N) {
+      if constexpr (Symmetric)
+         return (M+1)*N/2;
+      else
+         return M*N;
+   }
+
+   /*
+    * virtual 2D accessors
+    */
+   DataType get (IndexType i, IndexType j) {
+      if constexpr (Symmetric) {
+         auto T = [](size_t i)->size_t { return i*(i+1)/2; };          // Triangular number of i
+         if constexpr (Order == MatrixOrder::COLMAJOR) {
+            // In column major we use the lower triangle of the matrix
+            if (i>=j)   return m_[j*rows_ - T(j) + i];  // Lower, use our notation
+            else        return m_[i*rows_ - T(i) + j];  // Upper, use opposite index
+         }
+         else {
+            // In row major we use the upper triangle of the matrix
+            if (i<=j)   return m_[i*cols_ - T(i) + j];  // Upper, use our notation
+            else        return m_[j*cols_ - T(j) + i];  // Lower, use opposite index
+         }
+      }
+      else {
+         if constexpr (Order == MatrixOrder::COLMAJOR)
+            return m_[i + j*rows_];
+         else
+            return m_[i*cols_ + j];
+      }
+   }
+
+   DataType set (DataType v, IndexType i, IndexType j) {
+      if constexpr (Symmetric) {
+         auto T = [](size_t i)->size_t { return i*(i+1)/2; };          // Triangular number of i
+         if constexpr (Order == MatrixOrder::COLMAJOR) {
+            // In column major we use the lower triangle of the matrix
+            if (i>=j)   return m_[j*rows_ - T(j) + i] = v;  // Lower, use our notation
+            else        return m_[i*rows_ - T(i) + j] = v;  // Upper, use opposite index
+         }
+         else {
+            // In row major we use the upper triangle of the matrix
+            if (i<=j)   return m_[i*cols_ - T(i) + j] = v;  // Upper, use our notation
+            else        return m_[j*cols_ - T(j) + i] = v;  // Lower, use opposite index
+         }
+      }
+      else {
+         if constexpr (Order == MatrixOrder::COLMAJOR)
+            return m_[i + j*rows_] = v;
+         else
+            return m_[i*cols_ + j] = v;
+      }
+   }
+//   DataType operator()(IndexType i, IndexType j) { return get(i, j); }
+   /*!
+    * Return a proxy MatVal object with read and write capabilities.
+    * @param i    The row number
+    * @param j    The column number
+    * @return     tHE MatVal object
+    */
+   MatVal<Matrix> operator()(IndexType i, IndexType j) noexcept {
+      return MatVal<Matrix>(this, get(i, j), i, j);
+   }
+
+   // a basic serial iterator support
+   DataType* data() noexcept { return m_.data(); }
+//   IndexType begin_idx() noexcept { return 0; }
+//   IndexType end_idx()   noexcept { return capacity(rows_, cols_); }
+
+   const DataType* data() const noexcept { return m_.data(); }
+   const IndexType begin_idx() const noexcept { return 0; }
+   const IndexType end_idx()   const noexcept { return capacity(rows_, cols_); }
+   //! @}
+
+   /*!
+    * \name Safe iteration API
+    *
+    * This api automates the iteration over the array based on
+    * MatrixType
+    */
+   //! @{
+   template<typename F, typename... Args>
+   void for_each_in (IndexType begin, IndexType end, F&& lambda, Args&&... args) {
+      for (IndexType it=begin ; it<end ; ++it) {
+         std::forward<F>(lambda)(std::forward<Args>(args)..., it);
+      }
+   }
+   //! @}
+
+   //
+   void swap(Matrix& src) noexcept {
+      std::swap(m_, src.m_);
+      std::swap(rows_, src.rows_);
+      std::swap(cols_, src.cols_);
+   }
+private:
+   //! move helper
+   void moves(Matrix&& src) noexcept {
+      m_    = std::move(src.m_);
+      rows_ = std::move(src.rows_);
+      cols_ = std::move(src.cols_);
+   }
+
+   std::vector<DataType> m_ {};  //!< Pointer to actual data.
+   IndexType   rows_{};          //!< the virtual size of rows.
+   IndexType   cols_{};          //!< the virtual size of columns.
+};
+
+
+/**
+ * A simple sparse matrix specialization.
+ *
+ * We use CSC format and provide get/set functionalities for each (i,j) item
+ * on the matrix. We also provide a () overload using a proxy MatVal object.
+ * This way the user can:
+ * \code
+ *    auto v = A(3,4);
+ *    A(3, 4) = 7;
+ * \endcode
+ *
+ * We also provide getCol() and getRow() functions witch return a viewer/iterator to rows and
+ * columns of the matrix. In the case of a symmetric matrix instead of a row we return the
+ * equivalent column. This way we gain speed due to CSC format nature.
+ *
+ * @tparam DataType  The type for values
+ * @tparam IndexType The type for indexes
+ * @tparam Type      The Matrix type (FULL or SYMMETRIC)
+ */
+template<typename DataType, typename IndexType,
+         MatrixOrder Order,
+         bool Symmetric>
+struct Matrix<DataType, IndexType, MatrixType::SPARSE, Order, Symmetric> {
+
+   using dataType    = DataType;                   //!< meta:export of underling data type
+   using indexType   = IndexType;                  //!< meta:export of underling index type
+   static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order
+   static constexpr MatrixType matrixType = MatrixType::SPARSE;  //!< meta:export of array type
+   static constexpr bool symmetric = Symmetric;    //!< meta:export symmetric flag
+
+   friend struct MatCol<Matrix>;
+   friend struct MatRow<Matrix>;
+   friend struct MatVal<Matrix>;
+
+   /*!
+    * \name Obj lifetime
+    */
+   //! @{
+
+   //! Default ctor with optional memory allocations
+   Matrix(IndexType n=IndexType{}) noexcept:
+      values{},
+      rows{},
+      col_ptr((n)? n+1:2, IndexType{}),
+      N(n),
+      NNZ(0) { }
+
+   //! A ctor using csc array data
+   Matrix(IndexType n, IndexType nnz, const IndexType* row, const IndexType* col) noexcept:
+      values(nnz, 1),
+      rows(row, row+nnz),
+      col_ptr(col, col+n+1),
+      N(n),
+      NNZ(nnz) { }
+
+   //! ctor using csc array data with value array
+   Matrix(IndexType n, IndexType nnz, const DataType* v, const IndexType* row, const IndexType* col) noexcept:
+      values(v, v+nnz),
+      rows(row, row+nnz),
+      col_ptr(col, col+n+1),
+      N(n),
+      NNZ(nnz) { }
+
+   //! ctor vectors of row/col and default value for values array
+   Matrix(IndexType n, IndexType nnz, const DataType v,
+          const std::vector<IndexType>& row, const std::vector<IndexType>& col) noexcept:
+      values(nnz, v),
+      rows (row),
+      col_ptr(col),
+      N(n),
+      NNZ(nnz) { }
+
+   //! move ctor
+   Matrix(Matrix&& m) noexcept { moves(std::move(m)); }
+   //! move
+   Matrix& operator=(Matrix&& m) noexcept { moves(std::move(m)); return *this; }
+   Matrix(const Matrix& m)             = delete;  //!< make sure there are no copies
+   Matrix& operator=(const Matrix& m)  = delete;  //!< make sure there are no copies
+   //! @}
+
+   //! \name Data exposure
+   //! @{
+
+   //! \return the dimension of the matrix
+   IndexType size()     noexcept { return N; }
+   //! After construction size configuration tool
+   IndexType resize(IndexType n) {
+      col_ptr.resize(n+1);
+      return N = n;
+   }
+   //! \return the NNZ of the matrix
+   IndexType capacity() noexcept { return NNZ; }
+   //! After construction NNZ size configuration tool
+   IndexType capacity(IndexType nnz) noexcept {
+      values.reserve(nnz);
+      rows.reserve(nnz);
+      return NNZ;
+   }
+   // getters for row arrays of the struct (unused)
+   std::vector<DataType>&  getValues() noexcept { return values; }
+   std::vector<IndexType>& getRows() noexcept { return rows; }
+   std::vector<IndexType>& getCols() noexcept { return col_ptr; }
+
+   /*!
+    * Return a proxy MatVal object with read and write capabilities.
+    * @param i    The row number
+    * @param j    The column number
+    * @return     tHE MatVal object
+    */
+   MatVal<Matrix> operator()(IndexType i, IndexType j) noexcept {
+      return MatVal<Matrix>(this, get(i, j), i, j);
+   }
+
+   /*!
+    * A read item functionality using binary search to find the correct row
+    *
+    * @param i    The row number
+    * @param j    The column number
+    * @return     The value of the item or DataType{} if is not present.
+    */
+   DataType get(IndexType i, IndexType j) noexcept {
+      IndexType idx; bool found;
+      std::tie(idx, found) =find_idx(rows, col_ptr[j], col_ptr[j+1], i);
+      return (found) ? values[idx] : 0;
+   }
+
+   /*!
+    * A write item functionality.
+    *
+    * First we search if the matrix has already a value in (i, j) position.
+    * If so we just change it to a new value. If not we add the item on the matrix.
+    *
+    * @note
+    *    When change a value, we don't increase the NNZ value of the struct. We expect the user has already
+    *    change the NNZ value to the right one using @see capacity() function. When adding a value we
+    *    increase the NNZ.
+    *
+    * @param i    The row number
+    * @param j    The column number
+    * @return     The new value of the item .
+    */
+   DataType set(DataType v, IndexType i, IndexType j) {
+      IndexType idx; bool found;
+      std::tie(idx, found) = find_idx(rows, col_ptr[j], col_ptr[j+1], i);
+      if (found)
+         return values[idx] = v;    // we don't change NNZ even if we write "0"
+      else {
+         values.insert(values.begin()+idx, v);
+         rows.insert(rows.begin()+idx, i);
+         std::transform(col_ptr.begin()+j+1, col_ptr.end(), col_ptr.begin()+j+1, [](IndexType it) {
+            return ++it;
+         });
+         ++NNZ;                     // we increase the NNZ even if we write "0"
+         return v;
+      }
+   }
+
+   /*!
+    * Get a view of a CSC column
+    * @param j    The column to get
+    * @return     The MatCol object @see MatCol
+    */
+   MatCol<Matrix> getCol(IndexType j) noexcept {
+      return MatCol<Matrix>(this, col_ptr[j], col_ptr[j+1]);
+   }
+
+   /*!
+    * Get a view of a CSC row
+    *
+    * In case of a SYMMETRIC matrix we can return a column instead.
+    *
+    * @param j    The row to get
+    * @return     On symmetric matrix MatCol otherwise a MatRow
+    */
+
+   MatCol<Matrix> getRow(IndexType i) noexcept {
+      if constexpr (Symmetric)
+         return getCol(i);
+      else
+         return MatRow<Matrix>(this, i);
+   }
+
+   // values only iterator support
+   DataType* begin() noexcept { return values.begin(); }
+   DataType* end()   noexcept { return values.end(); }
+   //! @}
+
+   //! A small iteration helper
+   template<typename F, typename... Args>
+   void for_each_in (IndexType begin, IndexType end, F&& lambda, Args&&... args) {
+      for (IndexType it=begin ; it<end ; ++it) {
+         std::forward<F>(lambda)(std::forward<Args>(args)..., it);
+      }
+   }
+
+private:
+   /*!
+    * A small binary search implementation using index for begin-end instead of iterators.
+    *
+    * \param   v     Reference to vector to search
+    * \param   begin The vector's index to begin
+    * \param   end   The vector's index to end
+    * \param   match What to search
+    * \return  An <index, status> pair.
+    *                index    is the index of the item or end if not found
+    *                status   is true if found, false otherwise
+    */
+   std::pair<IndexType, bool> find_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) {
+      if (v.capacity() != 0 && begin < end) {
+         IndexType b = begin, e = end-1;
+         while (b <= e) {
+            IndexType m = (b+e)/2;
+            if       (v[m] == match)   return  std::make_pair(m, true);
+            else if  (b >= e)          return  std::make_pair(end, false);
+            else {
+               if    (v[m] <  match)   b = m +1;
+               else                    e = m -1;
+            }
+         }
+      }
+      return std::make_pair(end, false);
+   }
+
+   // move helper
+   void moves(Matrix&& src) noexcept {
+      values  = std::move(src.values);
+      rows    = std::move(src.rows);
+      col_ptr = std::move(src.col_ptr);
+      N       = std::move(src.N);   // redundant for primitives
+      NNZ     = std::move(src.NNZ); //
+   }
+   //! \name Data
+   //! @{
+   std::vector<DataType>   values {};     //!< vector to store the values of the matrix
+   std::vector<IndexType>  rows{};        //!< vector to store the row information
+   std::vector<IndexType>  col_ptr{1,0};  //!< vector to store the column pointers
+   IndexType   N{0};                      //!< The dimension of the matrix (square)
+   IndexType   NNZ{0};                    //!< The NNZ (capacity of the matrix)
+   //! @}
+};
+
+
+/*!
+ * A view/iterator hybrid object for Matrix columns.
+ *
+ * This object provides access to a column of a Matrix. The public functionalities
+ * allow data access using indexes instead of iterators. We prefer indexes over iterators
+ * because we can apply the same index to different inner vector of Matrix without conversion.
+ *
+ * @tparam DataType
+ * @tparam IndexType
+ */
+template<typename MatrixType>
+struct MatCol {
+   using owner_t = MatrixType;
+
+   using DataType  = typename MatrixType::dataType;
+   using IndexType = typename MatrixType::indexType;
+
+   /*!
+    * ctor using column pointers for begin-end. own is pointer to Matrix.
+    */
+   MatCol(owner_t* own, const IndexType begin, const IndexType end) noexcept :
+      owner_(own), index_(begin), begin_(begin), end_(end) {
+      vindex_ = vIndexCalc(index_);
+   }
+   MatCol()                        = default;
+   MatCol(const MatCol&)           = delete;   //!< make sure there are no copies
+   MatCol& operator=(const MatCol&)= delete;   //!< make sure there are no copies
+   MatCol(MatCol&&)                = default;
+   MatCol& operator=(MatCol&&)     = default;
+
+   //! a simple dereference operator, like an iterator
+   DataType operator* () {
+      return get();
+   }
+   //! Increment operator acts on index(), like an iterator
+   MatCol& operator++ ()    { advance(); return *this; }
+   MatCol& operator++ (int) { MatCol& p = *this; advance(); return p; }
+
+   //! () operator acts as member access (like a view)
+   DataType operator()(IndexType x) {
+      return (x == index())? get() : DataType{};
+   }
+   //! = operator acts as member assignment (like a view)
+   DataType operator= (DataType v) { return owner_->values[index_] = v; }
+   // iterator like handlers
+   // these return a virtual index value based on the items position on the full matrix
+   // but the move of the index is just a ++ away.
+   IndexType       index()       noexcept { return vindex_; }
+   const IndexType index() const noexcept { return vindex_; }
+   IndexType       begin()       noexcept { return vIndexCalc(begin_); }
+   const IndexType begin() const noexcept { return vIndexCalc(begin_); }
+   IndexType       end()         noexcept { return owner_->N; }
+   const IndexType end()   const noexcept { return owner_->N; }
+
+   /*!
+    * Multiplication operator
+    *
+    * We follow only the non-zero values and multiply only the common indexes.
+    *
+    * @tparam C   Universal reference for the type right half site column
+    *
+    * @param c    The right hand site matrix
+    * @return     The value of the inner product of two vectors
+    * @note       The time complexity is \$ O(nnz1+nnz2) \$.
+    *             Where the nnz is the max NNZ elements of the column of the matrix
+    */
+   template <typename C>
+   DataType operator* (C&& c) {
+      static_assert(std::is_same<remove_cvref_t<C>, MatCol<MatrixType>>(), "");
+      DataType v{};
+      while (index() != end() && c.index() != c.end()) {
+         if      (index() < c.index())  advance(); // advance me
+         else if (index() > c.index())  ++c;       // advance other
+         else { //index() == c.index()
+            v += get() * *c;                       // multiply and advance both
+            ++c;
+            advance();
+         }
+      }
+      return v;
+   }
+
+private:
+   //! small tool to increase the index pointers to Matrix
+   void advance() noexcept {
+      ++index_;
+      vindex_ = vIndexCalc(index_);
+   }
+   //! tool to translate between col_ptr indexes and Matrix "virtual" full matrix indexes
+   IndexType vIndexCalc(IndexType idx) {
+      return (idx < end_) ? owner_->rows[idx] : end();
+   }
+   //! small get tool
+   DataType get() { return owner_->values[index_]; }
+
+   owner_t*    owner_   {nullptr};     //!< Pointer to owner Matrix. MatCol is just a view
+   IndexType   vindex_  {IndexType{}}; //!< Virtual index of full matrix
+   IndexType   index_   {IndexType{}}; //!< index to Matrix::rows
+   IndexType   begin_   {IndexType{}}; //!< beginning index of the column in Matrix::rows
+   IndexType   end_     {IndexType{}}; //!< ending index of the column in Matrix::rows
+};
+
+/*!
+ * A view/iterator hybrid object for Matrix rows.
+ *
+ * This object provides access to a column of a Matrix. The public functionalities
+ * allow data access using indexes instead of iterators. We prefer indexes over iterators
+ * because we can apply the same index to different inner vector of Matrix without conversion.
+ *
+ * @tparam DataType
+ * @tparam IndexType
+ */
+template<typename MatrixType>
+struct MatRow {
+   using owner_t = MatrixType;
+
+   using DataType  = typename MatrixType::dataType;
+   using IndexType = typename MatrixType::indexType;
+
+   /*!
+    * ctor using virtual full matrix row index. own is pointer to Matrix.
+    */
+   MatRow(owner_t* own, const IndexType row) noexcept :
+      owner_(own), vindex_(IndexType{}), row_(row), index_(IndexType{}),
+      begin_(IndexType{}), end_(owner_->NNZ) {
+      // place begin
+      while(begin_ != end_ && owner_->rows[begin_] != row_)
+         ++begin_;
+      // place index_ and vindex_
+      if (owner_->rows[index_] != row_)
+         advance();
+   }
+   MatRow()                        = default;
+   MatRow(const MatRow&)           = delete;   //!< make sure there are no copies
+   MatRow& operator=(const MatRow&)= delete;   //!< make sure there are no copies
+   MatRow(MatRow&&)                = default;
+   MatRow& operator=(MatRow&&)     = default;
+
+   //! a simple dereference operator, like an iterator
+   DataType operator* () {
+      return get();
+   }
+   //! Increment operator acts on index(), like an iterator
+   //! here the increment is a O(N) process.
+   MatRow& operator++ ()    { advance(); return *this; }
+   MatRow& operator++ (int) { MatRow& p = *this; advance(); return p; }
+
+   //! () operator acts as member access (like a view)
+   DataType operator()(IndexType x) {
+      return (x == index())? get() : DataType{};
+   }
+   //! = operator acts as member assignment (like a view)
+   DataType operator= (DataType v) { return owner_->values[index_] = v; }
+   // iterator like handlers
+   // these return a virtual index value based on the items position on the full matrix
+   // but the move of the index is just a ++ away.
+   IndexType       index()       noexcept { return vindex_; }
+   const IndexType index() const noexcept { return vindex_; }
+   IndexType       begin()       noexcept { return vIndexCalc(begin_); }
+   const IndexType begin() const noexcept { return vIndexCalc(begin_); }
+   IndexType       end()         noexcept { return owner_->N; }
+   const IndexType end()   const noexcept { return owner_->N; }
+
+   /*!
+    * Multiplication operator
+    *
+    * We follow only the non-zero values and multiply only the common indexes.
+    *
+    * @tparam C   Universal reference for the type right half site column
+    *
+    * @param c    The right hand site matrix
+    * @return     The value of the inner product of two vectors
+    * @note       The time complexity is \$ O(N+nnz2) \$ and way heavier the ColxCol multiplication.
+    *             Where the nnz is the max NNZ elements of the column of the matrix
+    */
+   template <typename C>
+   DataType operator* (C&& c) {
+      static_assert(std::is_same<remove_cvref_t<C>, MatCol<MatrixType>>(), "");
+      DataType v{};
+      while (index() != end() && c.index() != c.end()) {
+         if      (index() < c.index())  advance(); // advance me
+         else if (index() > c.index())  ++c;       // advance other
+         else { //index() == c.index()
+            v += get() * *c;                       // multiply and advance both
+            ++c;
+            advance();
+         }
+      }
+      return v;
+   }
+private:
+   //! small tool to increase the index pointers to Matrix matrix
+   //! We have to search the entire rows vector in Matrix to find the next
+   //! virtual row position.
+   //! time complexity O(N)
+   void advance() noexcept {
+      do
+         ++index_;
+      while(index_ != end_ && owner_->rows[index_] != row_);
+      vindex_ = vIndexCalc(index_);
+   }
+   //! tool to translate between col_ptr indexes and Matrix "virtual" full matrix indexes
+   IndexType vIndexCalc(IndexType idx) {
+      for(IndexType i =0 ; i<(owner_->N+1) ; ++i)
+         if (idx < owner_->col_ptr[i])
+            return i-1;
+      return end();
+   }
+   //! small get tool
+   DataType get() { return owner_->values[index_]; }
+
+   owner_t*    owner_   {nullptr};     //!< Pointer to owner Matrix. MatCol is just a view
+   IndexType   vindex_  {IndexType{}}; //!< Virtual index of full matrix
+   IndexType   row_     {IndexType{}}; //!< The virtual full matrix row of the object
+   IndexType   index_   {IndexType{}}; //!< index to Matrix::rows
+   IndexType   begin_   {IndexType{}}; //!< beginning index of the column in Matrix::rows
+   IndexType   end_     {IndexType{}}; //!< ending index of the column in Matrix::rows
+};
+
+/*!
+ * A proxy Matrix value object/view.
+ *
+ * This object acts as proxy to provide read/write access to an Matrix item.
+ *
+ * @tparam DataType     The type of the values of the Matrix matrix
+ * @tparam IndexType    The type of the indexes of the Matrix matrix
+ */
+template<typename MatrixType>
+struct MatVal {
+   using owner_t = MatrixType;
+
+   using DataType  = typename MatrixType::dataType;
+   using IndexType = typename MatrixType::indexType;
+
+   //!< ctor using all value-row-column data, plus a pointer to owner Matrix object
+   MatVal(owner_t* own, DataType v, IndexType i, IndexType j) :
+      owner_(own), v_(v), i_(i), j_(j) { }
+   MatVal()                         = default;
+   MatVal(const MatVal&)            = delete;  //!< make sure there are no copies
+   MatVal& operator=(const MatVal&) = delete;  //!< make sure there are no copies
+   MatVal(MatVal&&)                 = default;
+   MatVal& operator=(MatVal&&)      = default;
+
+   //! Operator to return the DataType value implicitly
+   operator DataType() { return v_; }
+   //! Operator to write back to owner the assigned value
+   //! for ex: A(2,3) = 5;
+   MatVal& operator=(DataType v) {
+      v_ = v;
+      owner_->set(v_, i_, j_);
+      return *this;
+   }
+private:
+   owner_t*    owner_{nullptr};  //!< Pointer to owner Matrix. MatVal is just a view.
+   DataType    v_{DataType{}};   //!< The value of the row-column pair (for speed)
+   IndexType   i_{IndexType{}};  //!< The row
+   IndexType   j_{IndexType{}};  //!< the column
+};
+
+
+} // namespace mtx
+
+
+#endif /* MATRIX_HPP_ */
--- a/homework_1/Makefile
+++ b/homework_1/Makefile
@ -26,7 +26,11 @@ TARGET          := knnsearch
 SRC_DIR_LIST    := src
 # Include directories list(space seperated). Makefile-relative path.
 INC_DIR_LIST    := inc \
-                   src
+                   src \
+                   Libs/matrix/include/ \
+                   /usr/include/hdf5/serial/
+#                   Libs/MATLAB/R2019b/include/ \
+
 # Exclude files list(space seperated). Filenames only.
 # EXC_FILE_LIST := bad.cpp old.cpp

@ -49,7 +53,11 @@ PRE_DEFS		:=

 # ============== Linker settings ==============
 # Linker flags (example: -pthread -lm)
-LDFLAGS         := -pthread -lopenblas
+LDFLAGS         := -pthread -lopenblas \
+                   -L/usr/lib/x86_64-linux-gnu/hdf5/serial -lhdf5
+#                   -LLibs/MATLAB/R2019b/bin/ -lmat -lmx -Wl,-rpath,Libs/MATLAB/R2019b/bin/ \
+#                   -Wl,-rpath,Libs/unwind/bin/
+
 # Map output file
 MAP_FILE        := output.map
 MAP_FLAG        := -Xlinker -Map=$(BUILD_DIR)/$(MAP_FILE)
@ -184,6 +192,12 @@ local_v0: TARGET := local_v0
 local_v0: $(BUILD_DIR)/$(TARGET)
 	cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)

+local_v0_opt: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=0
+local_v0_opt: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=0
+local_v0_opt: TARGET := local_v0_opt
+local_v0_opt: $(BUILD_DIR)/$(TARGET)
+	cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)
+	
 local_v1: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=4
 local_v1: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=4
 local_v1: TARGET := local_v1
--- a/homework_1/inc/config.h
+++ b/homework_1/inc/config.h
@ -0,0 +1,75 @@
+/*!
+ * \file    config,h
+ * \brief   Build configuration file.
+ *
+ * \author
+ *    Christos Choutouridis AEM:8997
+ *    <cchoutou@ece.auth.gr>
+ */
+
+#ifndef CONFIG_H_
+#define CONFIG_H_
+
+#include <iostream>
+#include <string>
+
+#include <matrix.hpp>
+
+// HDF5 supported types
+enum class HDF5_type {
+   SCHAR, CHAR, SHORT, USHORT, INT, UINT, LONG, ULONG, LLONG, ULLONG, FLOAT, DOUBLE
+};
+
+/*
+ * Defines for different version of the exercise
+ */
+#define  V0    0
+#define  V1    1
+
+
+// Fail-safe version selection
+#if !defined CODE_VERSION
+ #define CODE_VERSION   V1
+#endif
+
+// matrix alias template dispatcher based on pre-define flag from compiler (see Makefile)
+#if   CODE_VERSION == V0
+   #define NAMESPACE_VERSION using namespace v0
+   using MatrixDst = mtx::Matrix<double>;
+   using MatrixIdx = mtx::Matrix<uint32_t>;
+   static constexpr HDF5_type DstHDF5Type = HDF5_type::DOUBLE;
+   static constexpr HDF5_type IdxHDF5Type = HDF5_type::INT;
+#elif CODE_VERSION == V1
+   #define NAMESPACE_VERSION using namespace v1
+   using MatrixDst = mtx::Matrix<double>;
+   using MatrixIdx = mtx::Matrix<uint32_t>;
+   static constexpr HDF5_type DstHDF5Type = HDF5_type::DOUBLE;
+   static constexpr HDF5_type IdxHDF5Type = HDF5_type::INT;
+#endif
+
+
+
+//! enumerator for output handling
+enum class StdOutputMode{ STD, FILE };
+
+/*!
+ * Session option for each invocation of the executable
+ */
+struct session_t {
+   std::string   corpusMtxFile  {};                        //!< corpus matrix file name in HDF5 format
+   std::string   corpusDataSet  {};                        //!< corpus dataset name in HDF5 matrix file
+   std::string   queryMtxFile   {};                        //!< optional query matrix file name in HDF5 format
+   std::string   queryDataSet   {};                        //!< optional query dataset name in HDF5 matrix file
+   bool          queryMtx       {false};                   //!< Flag to indicate that there is a separate query matrix
+   size_t        k              {1};                       //!< The number of nearest neighbors to find
+   std::string   outMtxFile     {"out.hdf5"};              //!< output matrix file name in HDF5 format
+   std::string   outMtxIdxDataSet {"/Idx"};                //!< Index output dataset name in HDF5 matrix file
+   std::string   outMtxDstDataSet {"/Dst"};                //!< Distance output dataset name in HDF5 matrix file
+   std::size_t   max_threads    {};                        //!< Maximum threads to use
+   bool          timing         {false};                   //!< Enable timing prints of the program
+   bool          verbose        {false};                   //!< Flag to enable verbose output to stdout
+};
+
+extern session_t session;
+
+#endif /* CONFIG_H_ */
--- a/homework_1/inc/utils.hpp
+++ b/homework_1/inc/utils.hpp
@ -0,0 +1,224 @@
+/**
+ * \file    utils.hpp
+ * \brief   Utilities header
+ *
+ * \author
+ *    Christos Choutouridis AEM:8997
+ *    <cchoutou@ece.auth.gr>
+ */
+#ifndef UTILS_HPP_
+#define UTILS_HPP_
+
+#include <iostream>
+#include <chrono>
+#include <unistd.h>
+#include <hdf5.h>
+
+#include <matrix.hpp>
+#include <config.h>
+
+/*!
+ * A Logger for entire program.
+ */
+struct Log {
+   struct Endl {} endl;    //!< a tag object to to use it as a new line request.
+
+   //! We provide logging via << operator
+   template<typename T>
+   Log& operator<< (T&& t) {
+      if (session.verbose) {
+         if (line_) {
+            std::cout << "[Log]: " << t;
+            line_ = false;
+         }
+         else
+            std::cout << t;
+      }
+      return *this;
+   }
+   // overload for special end line handling
+   Log& operator<< (Endl e) { (void)e;
+      if (session.verbose) {
+         std::cout << '\n';
+         line_ = true;
+      }
+      return *this;
+   }
+private:
+   bool line_ {true};
+};
+
+extern Log logger;
+
+/*!
+ * A small timing utility based on chrono.
+ */
+struct Timing{
+   using Tpoint = std::chrono::steady_clock::time_point;
+   using microseconds = std::chrono::microseconds;
+   using milliseconds = std::chrono::milliseconds;
+   using seconds = std::chrono::seconds;
+
+   //! tool to mark the starting point
+   Tpoint start () noexcept { return start_ = std::chrono::steady_clock::now(); }
+   //! tool to mark the ending point
+   Tpoint stop ()  noexcept { return stop_ = std::chrono::steady_clock::now(); }
+
+   auto dt () noexcept {
+      return std::chrono::duration_cast<std::chrono::microseconds>(stop_ - start_).count();
+   }
+   //! tool to print the time interval
+   void print_dt (const char* what) noexcept {
+      if (session.timing) {
+         auto t = stop_ - start_;
+         if       (std::chrono::duration_cast<microseconds>(t).count() < 10000)
+            std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<microseconds>(t).count()) << " [usec]\n";
+         else if (std::chrono::duration_cast<milliseconds>(t).count() < 10000)
+            std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<milliseconds>(t).count()) << " [msec]\n";
+         else
+            std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<seconds>(t).count()) << " [sec]\n";
+      }
+   }
+private:
+   Tpoint start_;
+   Tpoint stop_;
+};
+
+
+
+struct Mtx {
+
+   template<typename MatrixType, HDF5_type Type>
+   static void load(const std::string& filename, const std::string& dataset, MatrixType& matrix) {
+
+      hid_t file_id{}, dataset_id{}, dataspace_id{};
+      herr_t read_st;
+      do {
+         // Open file
+         logger << "Load HDF5 file: " << filename << " Dataset: " << dataset << "...";
+         if ((file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT)) < 0)
+            break;
+
+         // Open dataset
+         if ((dataset_id = H5Dopen2(file_id, dataset.c_str(), H5P_DEFAULT)) < 0)
+            break;
+
+         // Get dataspace and allocate memory for read buffer
+         if ((dataspace_id = H5Dget_space(dataset_id)) < 0)
+            break;
+         hsize_t dims[2];
+         H5Sget_simple_extent_dims(dataspace_id, dims, NULL);
+         matrix.resize(dims[0], dims[1]);
+
+         // Read the dataset
+         // ToDo: Come up with a better way to do this
+         if constexpr (Type == HDF5_type::DOUBLE) {
+            if ((read_st = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
+               break;
+         }
+         else if (Type == HDF5_type::FLOAT) {
+            if ((read_st = H5Dread(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
+               break;
+         }
+         else if (Type == HDF5_type::UINT) {
+            if ((read_st = H5Dread(dataset_id, H5T_NATIVE_UINT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
+               break;
+         }
+         else if (Type == HDF5_type::INT) {
+            if ((read_st = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
+               break;
+         }
+         // Done
+         H5Dclose(dataset_id);
+         H5Sclose(dataspace_id);
+         H5Fclose(file_id);
+         logger << " Done" << logger.endl;
+         return;
+      } while (0);
+
+      // Error: close everything (if possible) and return false
+      H5Dclose(dataset_id);
+      H5Sclose(dataspace_id);
+      H5Fclose(file_id);
+      throw std::runtime_error("Cannot store to " + filename + " dataset:" + dataset + '\n');
+   }
+
+
+   template<typename MatrixType, HDF5_type Type>
+   static void store(const std::string& filename, const std::string& dataset, MatrixType& matrix) {
+
+      hid_t file_id{}, dataset_id{}, dataspace_id{};
+      herr_t write_st;
+      do {
+         // Try to open the file in read-write mode
+         logger << "Store HDF5 file: " << filename << " Dataset: " << dataset << "...";
+         if (access(session.outMtxFile.c_str(), F_OK) == 0){
+            if ((file_id = H5Fopen(filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT)) < 0)
+               break;
+         }
+         else {
+            if ((file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0)
+               break;
+         }
+
+         // Create the dataspace for the dataset
+         hsize_t dims[] = { matrix.rows(), matrix.columns() };
+         if ((dataspace_id = H5Screate_simple(2, dims, NULL)) < 0)
+            break;
+
+         // ToDo: Come up with a better way to do this
+         if constexpr (Type == HDF5_type::DOUBLE) {
+            // Create the dataset with default properties
+            if ((dataset_id = H5Dcreate2(
+                  file_id, dataset.c_str(), H5T_NATIVE_DOUBLE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
+               break;
+            // Write the data to the dataset
+            if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
+               break;
+         }
+         else if (Type == HDF5_type::FLOAT) {
+            // Create the dataset with default properties
+            if ((dataset_id = H5Dcreate2(
+                  file_id, dataset.c_str(), H5T_NATIVE_FLOAT, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
+               break;
+            // Write the data to the dataset
+            if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
+               break;
+         }
+         else if (Type == HDF5_type::UINT) {
+            // Create the dataset with default properties
+            if ((dataset_id = H5Dcreate2(
+                  file_id, dataset.c_str(), H5T_NATIVE_UINT, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
+               break;
+            // Write the data to the dataset
+            if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_UINT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
+               break;
+         }
+         else if (Type == HDF5_type::INT) {
+            // Create the dataset with default properties
+            if ((dataset_id = H5Dcreate2(
+                  file_id, dataset.c_str(), H5T_NATIVE_INT, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
+               break;
+            // Write the data to the dataset
+            if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
+               break;
+         }
+         // Close the dataset, dataspace, and file
+         H5Dclose(dataset_id);
+         H5Sclose(dataspace_id);
+         H5Fclose(file_id);
+         logger << " Done" << logger.endl;
+         return;
+      } while (0);
+
+      // Error: close everything (if possible) and return false
+      H5Dclose(dataset_id);
+      H5Sclose(dataspace_id);
+      H5Fclose(file_id);
+      throw std::runtime_error("Cannot store " + filename + " with dataset:" + dataset +'\n');
+   }
+};
+
+
+
+#endif /* UTILS_HPP_ */
--- a/homework_1/inc/v0.hpp
+++ b/homework_1/inc/v0.hpp
@ -0,0 +1,119 @@
+/**
+ * \file    v0.hpp
+ * \brief
+ *
+ * \author
+ *    Christos Choutouridis AEM:8997
+ *    <cchoutou@ece.auth.gr>
+ */
+#ifndef V0_HPP_
+#define V0_HPP_
+
+#include <cblas.h>
+#include <cmath>
+#include <vector>
+#include <algorithm>
+
+#include <matrix.hpp>
+#include <config.h>
+
+namespace v0 {
+
+/*!
+ * Function to compute squared Euclidean distances
+ *
+ * \fn void pdist2(const double*, const double*, double*, int, int, int)
+ * \param X    m x d matrix (Column major)
+ * \param Y    n x d matrix (Column major)
+ * \param D2   m x n matrix to store distances (Column major)
+ * \param m    number of rows in X
+ * \param n    number of rows in Y
+ * \param d    number of columns in both X and Y
+ */
+template<typename DataType>
+void pdist2(const mtx::Matrix<DataType>& X, const mtx::Matrix<DataType>& Y, mtx::Matrix<DataType>& D2) {
+   int M = X.rows();
+   int N = Y.rows();
+   int d = X.columns();
+
+   // Compute the squared norms of each row in X and Y
+   std::vector<DataType> X_norms(M), Y_norms(N);
+   for (int i = 0; i < M ; ++i) {
+      X_norms[i] = cblas_ddot(d, X.data() + i * d, 1, X.data() + i * d, 1);
+   }
+   for (int j = 0; j < N ; ++j) {
+      Y_norms[j] = cblas_ddot(d, Y.data() + j * d, 1, Y.data() + j * d, 1);
+   }
+
+   // Compute -2 * X * Y'
+   cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, M, N, d, -2.0, X.data(), d, Y.data(), d, 0.0, D2.data(), N);
+
+   // Step 3: Add the squared norms to each entry in D2
+   for (int i = 0; i < M ; ++i) {
+      for (int j = 0; j < N; ++j) {
+         D2.set(D2.get(i, j) + X_norms[i] + Y_norms[j], i, j);
+         //D2.set(std::max(D2.get(i, j), 0.0),            i, j); // Ensure non-negative
+         D2.set(std::sqrt(D2.get(i, j)),                i, j); // Take the square root of each
+      }
+   }
+}
+
+template<typename DataType, typename IndexType>
+void quickselect(std::vector<std::pair<DataType, IndexType>>& vec, int k) {
+   std::nth_element(
+      vec.begin(),
+      vec.begin() + k,
+      vec.end(),
+      [](const std::pair<DataType, IndexType>& a, const std::pair<DataType, IndexType>& b) {
+         return a.first < b.first;
+   });
+   vec.resize(k);  // Keep only the k smallest elements
+}
+
+/*!
+ * \param C    Is a MxD matrix (Corpus)
+ * \param Q    Is a NxD matrix (Query)
+ * \param k    The number of nearest neighbors needed
+ * \param idx  Is the Nxk matrix with the k indexes of the C points, that are
+ *             neighbors of the nth point of Q
+ * \param dst  Is the Nxk matrix with the k distances to the C points of the nth
+ *             point of Q
+ */
+template<typename DataType, typename IndexType>
+void knnsearch(const mtx::Matrix<DataType>& C, const mtx::Matrix<DataType>& Q, int k,
+               mtx::Matrix<IndexType>& idx,
+               mtx::Matrix<DataType>& dst) {
+
+   int M = C.rows();
+   int N = Q.rows();
+
+   mtx::Matrix<DataType> D(M, N);
+
+   pdist2(C, Q, D);
+
+   idx.resize(N, k);
+   dst.resize(N, k);
+
+   for (int j = 0; j < N; ++j) {
+      // Create a vector of pairs (distance, index) for the j-th query
+      std::vector<std::pair<DataType, IndexType>> dst_idx(M);
+      for (int i = 0; i < M; ++i) {
+        dst_idx[i] = {D.data()[i * N + j], i};
+      }
+      // Find the k smallest distances using quickSelectKSmallest
+      quickselect(dst_idx, k);
+
+      // Sort the k smallest results by distance for consistency
+      std::sort(dst_idx.begin(), dst_idx.end());
+
+      // Store the indices and distances
+      for (int i = 0; i < k; ++i) {
+         idx(j, i) = dst_idx[i].second;
+         dst(j, i) = dst_idx[i].first;
+      }
+   }
+}
+
+}
+
+#endif /* V0_HPP_ */
--- a/homework_1/inc/v1.hpp
+++ b/homework_1/inc/v1.hpp
@ -0,0 +1,16 @@
+/**
+ * \file    v0.hpp
+ * \brief
+ *
+ * \author
+ *    Christos Choutouridis AEM:8997
+ *    <cchoutou@ece.auth.gr>
+ */
+#ifndef V1_HPP_
+#define V1_HPP_
+
+
+
+
+
+#endif /* V1_HPP_ */
--- a/homework_1/matlab/.gitignore
+++ b/homework_1/matlab/.gitignore
@ -0,0 +1,2 @@
+# matlab
+*.m~
--- a/homework_1/matlab/dist2.m
+++ b/homework_1/matlab/dist2.m
@ -1,10 +1,12 @@
 function [D2] = dist2(X, Y)
 % Calculates the squares of the distances of X and Y
 %
-% X: A mxd array with m d-dimentional points
-% Y: A nxd array with n d-dimentional points
+% X: A Mxd array with m d-dimentional points
+% Y: A Nxd array with n d-dimentional points
 % d: Must be the same
-
+%
+% D2: The MxN matrix with the distances
+%
    [~, d1] = size(X);
    [~, d2] = size(Y);
    if d1 ~= d2
--- a/homework_1/matlab/knnsearch2.m
+++ b/homework_1/matlab/knnsearch2.m
@ -1,8 +1,12 @@
 function [idx, dst] = knnsearch2(C, Q, k)
-% C: Is a mxd matrix (Corpus)
-% Q: Is a nxd matrix (Query)
-% k: The number of nearest neighbors needded
-
+% C:   Is a MxD matrix (Corpus)
+% Q:   Is a NxD matrix (Query)
+% k:   The number of nearest neighbors needded
+% idx: Is the Nxk matrix with the k indexes of the C points, that are
+%      neighbors of the nth point of Q
+% dst: Is the Nxk matrix with the k distances to the C points of the nth
+%      point of Q
+%
    % Calculate the distance matrix between C and Q
    % D is an m x n matrix where each element D(i, j) is the distance 
    % between the i-th point in C and the j-th point in Q.
--- a/homework_1/src/main.cpp
+++ b/homework_1/src/main.cpp
@ -1,107 +1,161 @@
+/*!
+ * \file    main.cpp
+ * \brief   Main application file
+ *
+ * \author
+ *    Christos Choutouridis AEM:8997
+ *    <cchoutou@ece.auth.gr>
+ */
+
 #include <iostream>
-#include <cblas.h>
-#include <cmath>
-#include <vector>
-#include <algorithm>
-#include <queue>
+#include <string>
+#include <exception>
+#include <unistd.h>
+#include <cstdio>
+
+#include <v0.hpp>
+#include <v1.hpp>
+#include <matrix.hpp>
+#include <utils.hpp>
+#include <config.h>
+
+// Global session data
+session_t   session;
+Log         logger;
+Timing      timer;

 /*!
- * Function to compute squared Euclidean distances
- *
- * \fn void pdist2(const double*, const double*, double*, int, int, int)
- * \param X    m x d matrix
- * \param Y    n x d matrix
- * \param D2   m x n matrix to store distances
- * \param m    number of rows in X
- * \param n    number of rows in Y
- * \param d    number of columns in both X and Y
+ * A small command line argument parser
+ * \return  The status of the operation
 */
-void pdist2(const double* X, const double* Y, double* D2, int m, int n, int d){
-   // Compute the squared norms of each row in X and Y
-   std::vector<double> X_norms(m), Y_norms(n);
-   for (int i = 0; i < m; ++i) {
-      X_norms[i] = cblas_ddot(d, X + i * d, 1, X + i * d, 1);
-   }
-   for (int j = 0; j < n; ++j) {
-      Y_norms[j] = cblas_ddot(d, Y + j * d, 1, Y + j * d, 1);
-   }
+bool get_options(int argc, char* argv[]){
+   bool status =true;

-   // Compute -2 * X * Y'
-   cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, n, d, -2.0, X, d, Y, d, 0.0, D2, n);
+   // iterate over the passed arguments
+   for (int i=1 ; i<argc ; ++i) {
+      std::string arg(argv[i]);     // get current argument

-   // Step 3: Add the squared norms to each entry in D2
-   for (int i = 0; i < m; ++i) {
-      for (int j = 0; j < n; ++j) {
-         D2[i * n + j] += X_norms[i] + Y_norms[j];
-         D2[i * n + j] = std::max(D2[i * n + j], 0.0); // Ensure non-negative
-         D2[i * n + j] = std::sqrt(D2[i * n + j]);     // Take the square root of each
+      if (arg == "-c" || arg == "--corpus") {
+         if (i+2 < argc) {
+            session.corpusMtxFile = std::string(argv[++i]);
+            session.corpusDataSet = std::string(argv[++i]);
+         }
+         else
+            status = false;
+      }
+      else if (arg == "-o" || arg == "--output") {
+         if (i+3 < argc) {
+            session.outMtxFile = std::string(argv[++i]);
+            session.outMtxIdxDataSet = std::string(argv[++i]);
+            session.outMtxDstDataSet = std::string(argv[++i]);
+         }
+         else
+            status = false;
+      }
+      else if (arg == "-q" || arg == "--query") {
+         if (i+2 < argc) {
+            session.queryMtxFile = std::string(argv[++i]);
+            session.queryDataSet = std::string(argv[++i]);
+            session.queryMtx = true;
+         }
+         else
+            status = false;
+      }
+      else if (arg == "-k") {
+         session.k = (i+1 < argc) ? std::atoi(argv[++i]) : session.k;
+      }
+      else if (arg == "-n" || arg == "--max_trheads") {
+         session.max_threads = (i+1 < argc) ? std::atoi(argv[++i]) : session.max_threads;
+      }
+      else if (arg == "-t" || arg == "--timing")
+         session.timing = true;
+      else if (arg == "-v" || arg == "--verbose")
+         session.verbose = true;
+      else if (arg == "-h" || arg == "--help") {
+         std::cout << "annsearch - an aproximation knnsearch utility\n\n";
+         std::cout << "annsearch -c <file> [-k <N>] [-o <file>] [-q <file>] [-n <threads>] [-t] [-v]\n";
+         std::cout << '\n';
+         std::cout << "Options:\n\n";
+         std::cout << "   -c | --corpus <file> <dataset>\n";
+         std::cout << "      Path to hdf5 file to open and name of the dataset to load\n\n";
+         std::cout << "   -o | --output <file> <idx-dataset> <dst-dataset> \n";
+         std::cout << "      Path to <file> to store the data and the names of the datasets.\n\n";
+         std::cout << "   -q | --query <file> <dataset>\n";
+         std::cout << "      Path to hdf5 file to open and name of the dataset to load\n";
+         std::cout << "      If not defined, the corpus is used\n\n";
+         std::cout << "   -k <number>\n";
+         std::cout << "      Set the number of closest neighbors to find. \n\n";
+         std::cout << "   -n | --max_trheads <threads>\n";
+         std::cout << "      Reduce the thread number for the execution to <threads>. <threads> must be less or equal to available CPUs.\n\n";
+         std::cout << "   -t | --timing\n";
+         std::cout << "      Request timing measurements output to stdout.\n\n";
+         std::cout << "   -v | --verbose\n";
+         std::cout << "      Request a more verbose output to stdout.\n\n";
+         std::cout << "   -h | --help <size>\n";
+         std::cout << "      Prints this and exit.\n\n";
+         std::cout << "Examples:\n\n";
+         std::cout << "   ...Example case...:\n";
+         std::cout << "   > ./annsearch -i <MFILE> ... \n\n";
+
+         exit(0);
+      }
+      else {   // parse error
+         std::cout << "Invocation error. Try -h for details.\n";
+         status = false;
      }
   }
+
+   return status;
 }

-void quickselect(std::vector<std::pair<double, int>>& vec, int k) {
-   std::nth_element(
-      vec.begin(),
-      vec.begin() + k,
-      vec.end(),
-      [](const std::pair<double, int>& a, const std::pair<double, int>& b) {
-         return a.first < b.first;
-   });
-   vec.resize(k);  // Keep only the k smallest elements
-}
+NAMESPACE_VERSION;

-// K-nearest neighbor search function
-void knnsearch(const double* C, const double* Q, int m, int n, int d, int k,
-               std::vector<std::vector<int>>& idx, std::vector<std::vector<double>>& dst) {
-    std::vector<double> D(m * n);
-    pdist2(C, Q, D.data(), m, n, d);
+int main(int argc, char* argv[]) try {
+   // Instantiate matrixes
+   MatrixDst Corpus;
+   MatrixDst Query;
+   MatrixIdx Idx;
+   MatrixDst Dst;

-    idx.resize(n, std::vector<int>(k));
-    dst.resize(n, std::vector<double>(k));
+   // try to read command line
+   if (!get_options(argc, argv))
+      exit(1);

-    for (int j = 0; j < n; ++j) {
-        // Create a vector of pairs (distance, index) for the j-th query
-        std::vector<std::pair<double, int>> dst_idx(m);
-        for (int i = 0; i < m; ++i) {
-           dst_idx[i] = {D[i * n + j], i};
-        }
+   if (access(session.outMtxFile.c_str(), F_OK) == 0)
+      std::remove(session.outMtxFile.c_str());

-        // Find the k smallest distances using quickSelectKSmallest
-        quickselect(dst_idx, k);
+   // Load data
+   timer.start();
+   Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus);
+   if (session.queryMtx)
+      Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query);
+   timer.stop();
+   timer.print_dt("Load hdf5 files");

-        // Sort the k smallest results by distance for consistency
-        std::sort(dst_idx.begin(), dst_idx.end());
+   logger << "Start knnsearch ...";
+   timer.start();
+   if (session.queryMtx)
+      knnsearch(Corpus, Query, session.k, Idx, Dst);
+   else
+      knnsearch(Corpus, Corpus, session.k, Idx, Dst);
+   timer.stop();
+   logger << " Done" << logger.endl;
+   timer.print_dt("knnsearch");

-        // Store the indices and distances
-        for (int i = 0; i < k; ++i) {
-            idx[j][i] = dst_idx[i].second;
-            dst[j][i] = dst_idx[i].first;
-        }
-    }
-}

-int main(){
-
-   int m = 5; // Number of points in C (corpus)
-   int n = 3; // Number of points in Q (query)
-   int d = 2; // Dimensions
-   int k = 2; // Number of nearest neighbors to find
-
-   double C[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; // m x d matrix
-   double Q[] = {1.5, 2.5, 3.5, 4.5, 5.5, 6.5};                     // n x d matrix
-
-   std::vector<std::vector<int>> idx;
-   std::vector<std::vector<double>> dst;
-
-   knnsearch(C, Q, m, n, d, k, idx, dst);
-
-   // Print results
-   for (int i = 0; i < n; ++i) {
-      std::cout << "Query point " << i << ":\n";
-       for (int j = 0; j < k; ++j) {
-          std::cout << "  Neighbor " << j <<": Index = " << idx[i][j] <<", Distance = " << dst[i][j] << '\n';
-       }
-   }
+   // Store data
+   timer.start();
+   Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx);
+   Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst);
+   timer.stop();
+   timer.print_dt("Store hdf5 files");

   return 0;
 }
+catch (std::exception& e) {
+   //we probably pollute the user's screen. Comment `cerr << ...` if you don't like it.
+   std::cerr << "Error: " << e.what() << '\n';
+   exit(1);
+}
+
+