A V0 implementation tested against matlab
Supports: - HDF5 file load and store - Precision timing of the process to stdout - logging (verbose mode) to stdout - Command line arguments and help
This commit is contained in:
parent
7c88260900
commit
f591127e16
4
.gitignore → homework_1/.gitignore
vendored
4
.gitignore → homework_1/.gitignore
vendored
@ -3,6 +3,7 @@ bin/
|
||||
out/
|
||||
mat/
|
||||
mtx/
|
||||
.unused/
|
||||
|
||||
# hpc
|
||||
exclude
|
||||
@ -13,6 +14,3 @@ hpc_auth_sync.sh
|
||||
.cproject
|
||||
.settings/
|
||||
|
||||
# matlab
|
||||
*.m~
|
||||
|
750
homework_1/Libs/matrix/include/matrix.hpp
Normal file
750
homework_1/Libs/matrix/include/matrix.hpp
Normal file
@ -0,0 +1,750 @@
|
||||
/**
|
||||
* \file matrix.hpp
|
||||
* \brief A matrix abstraction implementation
|
||||
*
|
||||
* \author
|
||||
* Christos Choutouridis AEM:8997
|
||||
* <cchoutou@ece.auth.gr>
|
||||
*/
|
||||
#ifndef MATRIX_HPP_
|
||||
#define MATRIX_HPP_
|
||||
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
|
||||
namespace mtx {
|
||||
|
||||
using std::size_t;
|
||||
|
||||
/*
|
||||
* Small helper to strip types
|
||||
*/
|
||||
template<typename T>
|
||||
struct remove_cvref {
|
||||
typedef std::remove_cv_t<std::remove_reference_t<T>> type;
|
||||
};
|
||||
template<typename T>
|
||||
using remove_cvref_t = typename remove_cvref<T>::type;
|
||||
|
||||
/*!
|
||||
* Enumerator to denote the storage type of the array to use.
|
||||
*/
|
||||
enum class MatrixType {
|
||||
DENSE, /*!< Matrix is dense */
|
||||
SPARSE, /*!< Matrix is sparse */
|
||||
};
|
||||
|
||||
/*!
|
||||
* Enumerator to denote the storage type of the array to use.
|
||||
*/
|
||||
enum class MatrixOrder {
|
||||
COLMAJOR, /*!< Matrix is column major */
|
||||
ROWMAJOR, /*!< Matrix is row major */
|
||||
};
|
||||
|
||||
/*
|
||||
* Forward type declarations
|
||||
*/
|
||||
|
||||
template<typename MatrixType> struct MatCol;
|
||||
template<typename MatrixType> struct MatRow;
|
||||
template<typename MatrixType> struct MatVal;
|
||||
|
||||
/*!
|
||||
* A 2-D matrix functionality over a 1-D array
|
||||
*
|
||||
* This is a very thin abstraction layer over a native array.
|
||||
* This is tested using compiler explorer and our template produce
|
||||
* almost identical assembly.
|
||||
*
|
||||
* The penalty hit we have is due to the fact that we use a one dimension array
|
||||
* and we have to calculate the actual position from an (i,j) pair.
|
||||
* The use of 1D array was our intention from the beginning, so the penalty
|
||||
* was pretty much unavoidable.
|
||||
*
|
||||
* \tparam DataType The underling data type of the array
|
||||
* \tparam IndexType The underling type for the index variables and sizes
|
||||
* \tparam Type The storage type of the array
|
||||
* \arg FULL For full matrix
|
||||
* \arg SYMMETRIC For symmetric matrix (we use only the lower part)
|
||||
*/
|
||||
template<typename DataType,
|
||||
typename IndexType = size_t,
|
||||
MatrixType Type = MatrixType::DENSE,
|
||||
MatrixOrder Order = MatrixOrder::ROWMAJOR,
|
||||
bool Symmetric = false>
|
||||
struct Matrix {
|
||||
|
||||
using dataType = DataType; //!< meta:export of underling data type
|
||||
using indexType = IndexType; //!< meta:export of underling index type
|
||||
static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order
|
||||
static constexpr MatrixType matrixType = Type; //!< meta:export of array type
|
||||
static constexpr bool symmetric = Symmetric; //!< meta:export symmetric flag
|
||||
/*!
|
||||
* \name Obj lifetime
|
||||
*/
|
||||
//! @{
|
||||
|
||||
//! Construct an empty matrix with dimensions rows x columns
|
||||
Matrix(IndexType rows = IndexType{}, IndexType columns = IndexType{}) noexcept :
|
||||
m_(capacity(rows, columns)), rows_(rows), cols_(columns) { }
|
||||
|
||||
//! Construct a matrix by copying existing data with dimensions rows x columns
|
||||
Matrix(DataType* data, IndexType rows, IndexType columns) noexcept :
|
||||
m_(data, data + capacity(rows, columns)), rows_(rows), cols_(columns) { }
|
||||
|
||||
//! Construct a matrix using an initializer list
|
||||
Matrix(IndexType rows, IndexType columns, std::initializer_list<DataType> list)
|
||||
: m_(list), rows_(rows), cols_(columns) {
|
||||
if (list.size() != capacity(rows, columns)) {
|
||||
throw std::invalid_argument("Matrix initializer list size does not match matrix dimensions.");
|
||||
}
|
||||
}
|
||||
|
||||
//! move ctor
|
||||
Matrix(Matrix&& m) noexcept { moves(std::move(m)); }
|
||||
//! move
|
||||
Matrix& operator=(Matrix&& m) noexcept { moves(std::move(m)); return *this; }
|
||||
Matrix(const Matrix& m) = delete; //!< No copy ctor
|
||||
Matrix& operator=(const Matrix& m) = delete; //!< No copy
|
||||
//! @}
|
||||
|
||||
//! \name Data exposure
|
||||
//! @{
|
||||
|
||||
|
||||
//! Get/Set the size of each dimension
|
||||
IndexType rows() const noexcept { return rows_; }
|
||||
IndexType columns() const noexcept { return cols_; }
|
||||
|
||||
//! Get the interface size of the Matrix (what appears to be the size)
|
||||
IndexType size() const {
|
||||
return rows_ * cols_;
|
||||
}
|
||||
//! Set the interface size of the Matrix (what appears to be the size)
|
||||
IndexType resize(IndexType rows, IndexType columns) {
|
||||
rows_ = rows;
|
||||
cols_ = columns;
|
||||
m_.reserve(capacity(rows_, cols_));
|
||||
return capacity(rows_, cols_);
|
||||
}
|
||||
|
||||
//! Actual memory capacity of the symmetric matrix
|
||||
static constexpr IndexType capacity(IndexType M, IndexType N) {
|
||||
if constexpr (Symmetric)
|
||||
return (M+1)*N/2;
|
||||
else
|
||||
return M*N;
|
||||
}
|
||||
|
||||
/*
|
||||
* virtual 2D accessors
|
||||
*/
|
||||
DataType get (IndexType i, IndexType j) {
|
||||
if constexpr (Symmetric) {
|
||||
auto T = [](size_t i)->size_t { return i*(i+1)/2; }; // Triangular number of i
|
||||
if constexpr (Order == MatrixOrder::COLMAJOR) {
|
||||
// In column major we use the lower triangle of the matrix
|
||||
if (i>=j) return m_[j*rows_ - T(j) + i]; // Lower, use our notation
|
||||
else return m_[i*rows_ - T(i) + j]; // Upper, use opposite index
|
||||
}
|
||||
else {
|
||||
// In row major we use the upper triangle of the matrix
|
||||
if (i<=j) return m_[i*cols_ - T(i) + j]; // Upper, use our notation
|
||||
else return m_[j*cols_ - T(j) + i]; // Lower, use opposite index
|
||||
}
|
||||
}
|
||||
else {
|
||||
if constexpr (Order == MatrixOrder::COLMAJOR)
|
||||
return m_[i + j*rows_];
|
||||
else
|
||||
return m_[i*cols_ + j];
|
||||
}
|
||||
}
|
||||
|
||||
DataType set (DataType v, IndexType i, IndexType j) {
|
||||
if constexpr (Symmetric) {
|
||||
auto T = [](size_t i)->size_t { return i*(i+1)/2; }; // Triangular number of i
|
||||
if constexpr (Order == MatrixOrder::COLMAJOR) {
|
||||
// In column major we use the lower triangle of the matrix
|
||||
if (i>=j) return m_[j*rows_ - T(j) + i] = v; // Lower, use our notation
|
||||
else return m_[i*rows_ - T(i) + j] = v; // Upper, use opposite index
|
||||
}
|
||||
else {
|
||||
// In row major we use the upper triangle of the matrix
|
||||
if (i<=j) return m_[i*cols_ - T(i) + j] = v; // Upper, use our notation
|
||||
else return m_[j*cols_ - T(j) + i] = v; // Lower, use opposite index
|
||||
}
|
||||
}
|
||||
else {
|
||||
if constexpr (Order == MatrixOrder::COLMAJOR)
|
||||
return m_[i + j*rows_] = v;
|
||||
else
|
||||
return m_[i*cols_ + j] = v;
|
||||
}
|
||||
}
|
||||
// DataType operator()(IndexType i, IndexType j) { return get(i, j); }
|
||||
/*!
|
||||
* Return a proxy MatVal object with read and write capabilities.
|
||||
* @param i The row number
|
||||
* @param j The column number
|
||||
* @return tHE MatVal object
|
||||
*/
|
||||
MatVal<Matrix> operator()(IndexType i, IndexType j) noexcept {
|
||||
return MatVal<Matrix>(this, get(i, j), i, j);
|
||||
}
|
||||
|
||||
// a basic serial iterator support
|
||||
DataType* data() noexcept { return m_.data(); }
|
||||
// IndexType begin_idx() noexcept { return 0; }
|
||||
// IndexType end_idx() noexcept { return capacity(rows_, cols_); }
|
||||
|
||||
const DataType* data() const noexcept { return m_.data(); }
|
||||
const IndexType begin_idx() const noexcept { return 0; }
|
||||
const IndexType end_idx() const noexcept { return capacity(rows_, cols_); }
|
||||
//! @}
|
||||
|
||||
/*!
|
||||
* \name Safe iteration API
|
||||
*
|
||||
* This api automates the iteration over the array based on
|
||||
* MatrixType
|
||||
*/
|
||||
//! @{
|
||||
template<typename F, typename... Args>
|
||||
void for_each_in (IndexType begin, IndexType end, F&& lambda, Args&&... args) {
|
||||
for (IndexType it=begin ; it<end ; ++it) {
|
||||
std::forward<F>(lambda)(std::forward<Args>(args)..., it);
|
||||
}
|
||||
}
|
||||
//! @}
|
||||
|
||||
//
|
||||
void swap(Matrix& src) noexcept {
|
||||
std::swap(m_, src.m_);
|
||||
std::swap(rows_, src.rows_);
|
||||
std::swap(cols_, src.cols_);
|
||||
}
|
||||
private:
|
||||
//! move helper
|
||||
void moves(Matrix&& src) noexcept {
|
||||
m_ = std::move(src.m_);
|
||||
rows_ = std::move(src.rows_);
|
||||
cols_ = std::move(src.cols_);
|
||||
}
|
||||
|
||||
std::vector<DataType> m_ {}; //!< Pointer to actual data.
|
||||
IndexType rows_{}; //!< the virtual size of rows.
|
||||
IndexType cols_{}; //!< the virtual size of columns.
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* A simple sparse matrix specialization.
|
||||
*
|
||||
* We use CSC format and provide get/set functionalities for each (i,j) item
|
||||
* on the matrix. We also provide a () overload using a proxy MatVal object.
|
||||
* This way the user can:
|
||||
* \code
|
||||
* auto v = A(3,4);
|
||||
* A(3, 4) = 7;
|
||||
* \endcode
|
||||
*
|
||||
* We also provide getCol() and getRow() functions witch return a viewer/iterator to rows and
|
||||
* columns of the matrix. In the case of a symmetric matrix instead of a row we return the
|
||||
* equivalent column. This way we gain speed due to CSC format nature.
|
||||
*
|
||||
* @tparam DataType The type for values
|
||||
* @tparam IndexType The type for indexes
|
||||
* @tparam Type The Matrix type (FULL or SYMMETRIC)
|
||||
*/
|
||||
template<typename DataType, typename IndexType,
|
||||
MatrixOrder Order,
|
||||
bool Symmetric>
|
||||
struct Matrix<DataType, IndexType, MatrixType::SPARSE, Order, Symmetric> {
|
||||
|
||||
using dataType = DataType; //!< meta:export of underling data type
|
||||
using indexType = IndexType; //!< meta:export of underling index type
|
||||
static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order
|
||||
static constexpr MatrixType matrixType = MatrixType::SPARSE; //!< meta:export of array type
|
||||
static constexpr bool symmetric = Symmetric; //!< meta:export symmetric flag
|
||||
|
||||
friend struct MatCol<Matrix>;
|
||||
friend struct MatRow<Matrix>;
|
||||
friend struct MatVal<Matrix>;
|
||||
|
||||
/*!
|
||||
* \name Obj lifetime
|
||||
*/
|
||||
//! @{
|
||||
|
||||
//! Default ctor with optional memory allocations
|
||||
Matrix(IndexType n=IndexType{}) noexcept:
|
||||
values{},
|
||||
rows{},
|
||||
col_ptr((n)? n+1:2, IndexType{}),
|
||||
N(n),
|
||||
NNZ(0) { }
|
||||
|
||||
//! A ctor using csc array data
|
||||
Matrix(IndexType n, IndexType nnz, const IndexType* row, const IndexType* col) noexcept:
|
||||
values(nnz, 1),
|
||||
rows(row, row+nnz),
|
||||
col_ptr(col, col+n+1),
|
||||
N(n),
|
||||
NNZ(nnz) { }
|
||||
|
||||
//! ctor using csc array data with value array
|
||||
Matrix(IndexType n, IndexType nnz, const DataType* v, const IndexType* row, const IndexType* col) noexcept:
|
||||
values(v, v+nnz),
|
||||
rows(row, row+nnz),
|
||||
col_ptr(col, col+n+1),
|
||||
N(n),
|
||||
NNZ(nnz) { }
|
||||
|
||||
//! ctor vectors of row/col and default value for values array
|
||||
Matrix(IndexType n, IndexType nnz, const DataType v,
|
||||
const std::vector<IndexType>& row, const std::vector<IndexType>& col) noexcept:
|
||||
values(nnz, v),
|
||||
rows (row),
|
||||
col_ptr(col),
|
||||
N(n),
|
||||
NNZ(nnz) { }
|
||||
|
||||
//! move ctor
|
||||
Matrix(Matrix&& m) noexcept { moves(std::move(m)); }
|
||||
//! move
|
||||
Matrix& operator=(Matrix&& m) noexcept { moves(std::move(m)); return *this; }
|
||||
Matrix(const Matrix& m) = delete; //!< make sure there are no copies
|
||||
Matrix& operator=(const Matrix& m) = delete; //!< make sure there are no copies
|
||||
//! @}
|
||||
|
||||
//! \name Data exposure
|
||||
//! @{
|
||||
|
||||
//! \return the dimension of the matrix
|
||||
IndexType size() noexcept { return N; }
|
||||
//! After construction size configuration tool
|
||||
IndexType resize(IndexType n) {
|
||||
col_ptr.resize(n+1);
|
||||
return N = n;
|
||||
}
|
||||
//! \return the NNZ of the matrix
|
||||
IndexType capacity() noexcept { return NNZ; }
|
||||
//! After construction NNZ size configuration tool
|
||||
IndexType capacity(IndexType nnz) noexcept {
|
||||
values.reserve(nnz);
|
||||
rows.reserve(nnz);
|
||||
return NNZ;
|
||||
}
|
||||
// getters for row arrays of the struct (unused)
|
||||
std::vector<DataType>& getValues() noexcept { return values; }
|
||||
std::vector<IndexType>& getRows() noexcept { return rows; }
|
||||
std::vector<IndexType>& getCols() noexcept { return col_ptr; }
|
||||
|
||||
/*!
|
||||
* Return a proxy MatVal object with read and write capabilities.
|
||||
* @param i The row number
|
||||
* @param j The column number
|
||||
* @return tHE MatVal object
|
||||
*/
|
||||
MatVal<Matrix> operator()(IndexType i, IndexType j) noexcept {
|
||||
return MatVal<Matrix>(this, get(i, j), i, j);
|
||||
}
|
||||
|
||||
/*!
|
||||
* A read item functionality using binary search to find the correct row
|
||||
*
|
||||
* @param i The row number
|
||||
* @param j The column number
|
||||
* @return The value of the item or DataType{} if is not present.
|
||||
*/
|
||||
DataType get(IndexType i, IndexType j) noexcept {
|
||||
IndexType idx; bool found;
|
||||
std::tie(idx, found) =find_idx(rows, col_ptr[j], col_ptr[j+1], i);
|
||||
return (found) ? values[idx] : 0;
|
||||
}
|
||||
|
||||
/*!
|
||||
* A write item functionality.
|
||||
*
|
||||
* First we search if the matrix has already a value in (i, j) position.
|
||||
* If so we just change it to a new value. If not we add the item on the matrix.
|
||||
*
|
||||
* @note
|
||||
* When change a value, we don't increase the NNZ value of the struct. We expect the user has already
|
||||
* change the NNZ value to the right one using @see capacity() function. When adding a value we
|
||||
* increase the NNZ.
|
||||
*
|
||||
* @param i The row number
|
||||
* @param j The column number
|
||||
* @return The new value of the item .
|
||||
*/
|
||||
DataType set(DataType v, IndexType i, IndexType j) {
|
||||
IndexType idx; bool found;
|
||||
std::tie(idx, found) = find_idx(rows, col_ptr[j], col_ptr[j+1], i);
|
||||
if (found)
|
||||
return values[idx] = v; // we don't change NNZ even if we write "0"
|
||||
else {
|
||||
values.insert(values.begin()+idx, v);
|
||||
rows.insert(rows.begin()+idx, i);
|
||||
std::transform(col_ptr.begin()+j+1, col_ptr.end(), col_ptr.begin()+j+1, [](IndexType it) {
|
||||
return ++it;
|
||||
});
|
||||
++NNZ; // we increase the NNZ even if we write "0"
|
||||
return v;
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get a view of a CSC column
|
||||
* @param j The column to get
|
||||
* @return The MatCol object @see MatCol
|
||||
*/
|
||||
MatCol<Matrix> getCol(IndexType j) noexcept {
|
||||
return MatCol<Matrix>(this, col_ptr[j], col_ptr[j+1]);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get a view of a CSC row
|
||||
*
|
||||
* In case of a SYMMETRIC matrix we can return a column instead.
|
||||
*
|
||||
* @param j The row to get
|
||||
* @return On symmetric matrix MatCol otherwise a MatRow
|
||||
*/
|
||||
|
||||
MatCol<Matrix> getRow(IndexType i) noexcept {
|
||||
if constexpr (Symmetric)
|
||||
return getCol(i);
|
||||
else
|
||||
return MatRow<Matrix>(this, i);
|
||||
}
|
||||
|
||||
// values only iterator support
|
||||
DataType* begin() noexcept { return values.begin(); }
|
||||
DataType* end() noexcept { return values.end(); }
|
||||
//! @}
|
||||
|
||||
//! A small iteration helper
|
||||
template<typename F, typename... Args>
|
||||
void for_each_in (IndexType begin, IndexType end, F&& lambda, Args&&... args) {
|
||||
for (IndexType it=begin ; it<end ; ++it) {
|
||||
std::forward<F>(lambda)(std::forward<Args>(args)..., it);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
/*!
|
||||
* A small binary search implementation using index for begin-end instead of iterators.
|
||||
*
|
||||
* \param v Reference to vector to search
|
||||
* \param begin The vector's index to begin
|
||||
* \param end The vector's index to end
|
||||
* \param match What to search
|
||||
* \return An <index, status> pair.
|
||||
* index is the index of the item or end if not found
|
||||
* status is true if found, false otherwise
|
||||
*/
|
||||
std::pair<IndexType, bool> find_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) {
|
||||
if (v.capacity() != 0 && begin < end) {
|
||||
IndexType b = begin, e = end-1;
|
||||
while (b <= e) {
|
||||
IndexType m = (b+e)/2;
|
||||
if (v[m] == match) return std::make_pair(m, true);
|
||||
else if (b >= e) return std::make_pair(end, false);
|
||||
else {
|
||||
if (v[m] < match) b = m +1;
|
||||
else e = m -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return std::make_pair(end, false);
|
||||
}
|
||||
|
||||
// move helper
|
||||
void moves(Matrix&& src) noexcept {
|
||||
values = std::move(src.values);
|
||||
rows = std::move(src.rows);
|
||||
col_ptr = std::move(src.col_ptr);
|
||||
N = std::move(src.N); // redundant for primitives
|
||||
NNZ = std::move(src.NNZ); //
|
||||
}
|
||||
//! \name Data
|
||||
//! @{
|
||||
std::vector<DataType> values {}; //!< vector to store the values of the matrix
|
||||
std::vector<IndexType> rows{}; //!< vector to store the row information
|
||||
std::vector<IndexType> col_ptr{1,0}; //!< vector to store the column pointers
|
||||
IndexType N{0}; //!< The dimension of the matrix (square)
|
||||
IndexType NNZ{0}; //!< The NNZ (capacity of the matrix)
|
||||
//! @}
|
||||
};
|
||||
|
||||
|
||||
/*!
|
||||
* A view/iterator hybrid object for Matrix columns.
|
||||
*
|
||||
* This object provides access to a column of a Matrix. The public functionalities
|
||||
* allow data access using indexes instead of iterators. We prefer indexes over iterators
|
||||
* because we can apply the same index to different inner vector of Matrix without conversion.
|
||||
*
|
||||
* @tparam DataType
|
||||
* @tparam IndexType
|
||||
*/
|
||||
template<typename MatrixType>
|
||||
struct MatCol {
|
||||
using owner_t = MatrixType;
|
||||
|
||||
using DataType = typename MatrixType::dataType;
|
||||
using IndexType = typename MatrixType::indexType;
|
||||
|
||||
/*!
|
||||
* ctor using column pointers for begin-end. own is pointer to Matrix.
|
||||
*/
|
||||
MatCol(owner_t* own, const IndexType begin, const IndexType end) noexcept :
|
||||
owner_(own), index_(begin), begin_(begin), end_(end) {
|
||||
vindex_ = vIndexCalc(index_);
|
||||
}
|
||||
MatCol() = default;
|
||||
MatCol(const MatCol&) = delete; //!< make sure there are no copies
|
||||
MatCol& operator=(const MatCol&)= delete; //!< make sure there are no copies
|
||||
MatCol(MatCol&&) = default;
|
||||
MatCol& operator=(MatCol&&) = default;
|
||||
|
||||
//! a simple dereference operator, like an iterator
|
||||
DataType operator* () {
|
||||
return get();
|
||||
}
|
||||
//! Increment operator acts on index(), like an iterator
|
||||
MatCol& operator++ () { advance(); return *this; }
|
||||
MatCol& operator++ (int) { MatCol& p = *this; advance(); return p; }
|
||||
|
||||
//! () operator acts as member access (like a view)
|
||||
DataType operator()(IndexType x) {
|
||||
return (x == index())? get() : DataType{};
|
||||
}
|
||||
//! = operator acts as member assignment (like a view)
|
||||
DataType operator= (DataType v) { return owner_->values[index_] = v; }
|
||||
// iterator like handlers
|
||||
// these return a virtual index value based on the items position on the full matrix
|
||||
// but the move of the index is just a ++ away.
|
||||
IndexType index() noexcept { return vindex_; }
|
||||
const IndexType index() const noexcept { return vindex_; }
|
||||
IndexType begin() noexcept { return vIndexCalc(begin_); }
|
||||
const IndexType begin() const noexcept { return vIndexCalc(begin_); }
|
||||
IndexType end() noexcept { return owner_->N; }
|
||||
const IndexType end() const noexcept { return owner_->N; }
|
||||
|
||||
/*!
|
||||
* Multiplication operator
|
||||
*
|
||||
* We follow only the non-zero values and multiply only the common indexes.
|
||||
*
|
||||
* @tparam C Universal reference for the type right half site column
|
||||
*
|
||||
* @param c The right hand site matrix
|
||||
* @return The value of the inner product of two vectors
|
||||
* @note The time complexity is \$ O(nnz1+nnz2) \$.
|
||||
* Where the nnz is the max NNZ elements of the column of the matrix
|
||||
*/
|
||||
template <typename C>
|
||||
DataType operator* (C&& c) {
|
||||
static_assert(std::is_same<remove_cvref_t<C>, MatCol<MatrixType>>(), "");
|
||||
DataType v{};
|
||||
while (index() != end() && c.index() != c.end()) {
|
||||
if (index() < c.index()) advance(); // advance me
|
||||
else if (index() > c.index()) ++c; // advance other
|
||||
else { //index() == c.index()
|
||||
v += get() * *c; // multiply and advance both
|
||||
++c;
|
||||
advance();
|
||||
}
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
private:
|
||||
//! small tool to increase the index pointers to Matrix
|
||||
void advance() noexcept {
|
||||
++index_;
|
||||
vindex_ = vIndexCalc(index_);
|
||||
}
|
||||
//! tool to translate between col_ptr indexes and Matrix "virtual" full matrix indexes
|
||||
IndexType vIndexCalc(IndexType idx) {
|
||||
return (idx < end_) ? owner_->rows[idx] : end();
|
||||
}
|
||||
//! small get tool
|
||||
DataType get() { return owner_->values[index_]; }
|
||||
|
||||
owner_t* owner_ {nullptr}; //!< Pointer to owner Matrix. MatCol is just a view
|
||||
IndexType vindex_ {IndexType{}}; //!< Virtual index of full matrix
|
||||
IndexType index_ {IndexType{}}; //!< index to Matrix::rows
|
||||
IndexType begin_ {IndexType{}}; //!< beginning index of the column in Matrix::rows
|
||||
IndexType end_ {IndexType{}}; //!< ending index of the column in Matrix::rows
|
||||
};
|
||||
|
||||
/*!
|
||||
* A view/iterator hybrid object for Matrix rows.
|
||||
*
|
||||
* This object provides access to a column of a Matrix. The public functionalities
|
||||
* allow data access using indexes instead of iterators. We prefer indexes over iterators
|
||||
* because we can apply the same index to different inner vector of Matrix without conversion.
|
||||
*
|
||||
* @tparam DataType
|
||||
* @tparam IndexType
|
||||
*/
|
||||
template<typename MatrixType>
|
||||
struct MatRow {
|
||||
using owner_t = MatrixType;
|
||||
|
||||
using DataType = typename MatrixType::dataType;
|
||||
using IndexType = typename MatrixType::indexType;
|
||||
|
||||
/*!
|
||||
* ctor using virtual full matrix row index. own is pointer to Matrix.
|
||||
*/
|
||||
MatRow(owner_t* own, const IndexType row) noexcept :
|
||||
owner_(own), vindex_(IndexType{}), row_(row), index_(IndexType{}),
|
||||
begin_(IndexType{}), end_(owner_->NNZ) {
|
||||
// place begin
|
||||
while(begin_ != end_ && owner_->rows[begin_] != row_)
|
||||
++begin_;
|
||||
// place index_ and vindex_
|
||||
if (owner_->rows[index_] != row_)
|
||||
advance();
|
||||
}
|
||||
MatRow() = default;
|
||||
MatRow(const MatRow&) = delete; //!< make sure there are no copies
|
||||
MatRow& operator=(const MatRow&)= delete; //!< make sure there are no copies
|
||||
MatRow(MatRow&&) = default;
|
||||
MatRow& operator=(MatRow&&) = default;
|
||||
|
||||
//! a simple dereference operator, like an iterator
|
||||
DataType operator* () {
|
||||
return get();
|
||||
}
|
||||
//! Increment operator acts on index(), like an iterator
|
||||
//! here the increment is a O(N) process.
|
||||
MatRow& operator++ () { advance(); return *this; }
|
||||
MatRow& operator++ (int) { MatRow& p = *this; advance(); return p; }
|
||||
|
||||
//! () operator acts as member access (like a view)
|
||||
DataType operator()(IndexType x) {
|
||||
return (x == index())? get() : DataType{};
|
||||
}
|
||||
//! = operator acts as member assignment (like a view)
|
||||
DataType operator= (DataType v) { return owner_->values[index_] = v; }
|
||||
// iterator like handlers
|
||||
// these return a virtual index value based on the items position on the full matrix
|
||||
// but the move of the index is just a ++ away.
|
||||
IndexType index() noexcept { return vindex_; }
|
||||
const IndexType index() const noexcept { return vindex_; }
|
||||
IndexType begin() noexcept { return vIndexCalc(begin_); }
|
||||
const IndexType begin() const noexcept { return vIndexCalc(begin_); }
|
||||
IndexType end() noexcept { return owner_->N; }
|
||||
const IndexType end() const noexcept { return owner_->N; }
|
||||
|
||||
/*!
|
||||
* Multiplication operator
|
||||
*
|
||||
* We follow only the non-zero values and multiply only the common indexes.
|
||||
*
|
||||
* @tparam C Universal reference for the type right half site column
|
||||
*
|
||||
* @param c The right hand site matrix
|
||||
* @return The value of the inner product of two vectors
|
||||
* @note The time complexity is \$ O(N+nnz2) \$ and way heavier the ColxCol multiplication.
|
||||
* Where the nnz is the max NNZ elements of the column of the matrix
|
||||
*/
|
||||
template <typename C>
|
||||
DataType operator* (C&& c) {
|
||||
static_assert(std::is_same<remove_cvref_t<C>, MatCol<MatrixType>>(), "");
|
||||
DataType v{};
|
||||
while (index() != end() && c.index() != c.end()) {
|
||||
if (index() < c.index()) advance(); // advance me
|
||||
else if (index() > c.index()) ++c; // advance other
|
||||
else { //index() == c.index()
|
||||
v += get() * *c; // multiply and advance both
|
||||
++c;
|
||||
advance();
|
||||
}
|
||||
}
|
||||
return v;
|
||||
}
|
||||
private:
|
||||
//! small tool to increase the index pointers to Matrix matrix
|
||||
//! We have to search the entire rows vector in Matrix to find the next
|
||||
//! virtual row position.
|
||||
//! time complexity O(N)
|
||||
void advance() noexcept {
|
||||
do
|
||||
++index_;
|
||||
while(index_ != end_ && owner_->rows[index_] != row_);
|
||||
vindex_ = vIndexCalc(index_);
|
||||
}
|
||||
//! tool to translate between col_ptr indexes and Matrix "virtual" full matrix indexes
|
||||
IndexType vIndexCalc(IndexType idx) {
|
||||
for(IndexType i =0 ; i<(owner_->N+1) ; ++i)
|
||||
if (idx < owner_->col_ptr[i])
|
||||
return i-1;
|
||||
return end();
|
||||
}
|
||||
//! small get tool
|
||||
DataType get() { return owner_->values[index_]; }
|
||||
|
||||
owner_t* owner_ {nullptr}; //!< Pointer to owner Matrix. MatCol is just a view
|
||||
IndexType vindex_ {IndexType{}}; //!< Virtual index of full matrix
|
||||
IndexType row_ {IndexType{}}; //!< The virtual full matrix row of the object
|
||||
IndexType index_ {IndexType{}}; //!< index to Matrix::rows
|
||||
IndexType begin_ {IndexType{}}; //!< beginning index of the column in Matrix::rows
|
||||
IndexType end_ {IndexType{}}; //!< ending index of the column in Matrix::rows
|
||||
};
|
||||
|
||||
/*!
|
||||
* A proxy Matrix value object/view.
|
||||
*
|
||||
* This object acts as proxy to provide read/write access to an Matrix item.
|
||||
*
|
||||
* @tparam DataType The type of the values of the Matrix matrix
|
||||
* @tparam IndexType The type of the indexes of the Matrix matrix
|
||||
*/
|
||||
template<typename MatrixType>
|
||||
struct MatVal {
|
||||
using owner_t = MatrixType;
|
||||
|
||||
using DataType = typename MatrixType::dataType;
|
||||
using IndexType = typename MatrixType::indexType;
|
||||
|
||||
//!< ctor using all value-row-column data, plus a pointer to owner Matrix object
|
||||
MatVal(owner_t* own, DataType v, IndexType i, IndexType j) :
|
||||
owner_(own), v_(v), i_(i), j_(j) { }
|
||||
MatVal() = default;
|
||||
MatVal(const MatVal&) = delete; //!< make sure there are no copies
|
||||
MatVal& operator=(const MatVal&) = delete; //!< make sure there are no copies
|
||||
MatVal(MatVal&&) = default;
|
||||
MatVal& operator=(MatVal&&) = default;
|
||||
|
||||
//! Operator to return the DataType value implicitly
|
||||
operator DataType() { return v_; }
|
||||
//! Operator to write back to owner the assigned value
|
||||
//! for ex: A(2,3) = 5;
|
||||
MatVal& operator=(DataType v) {
|
||||
v_ = v;
|
||||
owner_->set(v_, i_, j_);
|
||||
return *this;
|
||||
}
|
||||
private:
|
||||
owner_t* owner_{nullptr}; //!< Pointer to owner Matrix. MatVal is just a view.
|
||||
DataType v_{DataType{}}; //!< The value of the row-column pair (for speed)
|
||||
IndexType i_{IndexType{}}; //!< The row
|
||||
IndexType j_{IndexType{}}; //!< the column
|
||||
};
|
||||
|
||||
|
||||
} // namespace mtx
|
||||
|
||||
|
||||
#endif /* MATRIX_HPP_ */
|
@ -26,7 +26,11 @@ TARGET := knnsearch
|
||||
SRC_DIR_LIST := src
|
||||
# Include directories list(space seperated). Makefile-relative path.
|
||||
INC_DIR_LIST := inc \
|
||||
src
|
||||
src \
|
||||
Libs/matrix/include/ \
|
||||
/usr/include/hdf5/serial/
|
||||
# Libs/MATLAB/R2019b/include/ \
|
||||
|
||||
# Exclude files list(space seperated). Filenames only.
|
||||
# EXC_FILE_LIST := bad.cpp old.cpp
|
||||
|
||||
@ -49,7 +53,11 @@ PRE_DEFS :=
|
||||
|
||||
# ============== Linker settings ==============
|
||||
# Linker flags (example: -pthread -lm)
|
||||
LDFLAGS := -pthread -lopenblas
|
||||
LDFLAGS := -pthread -lopenblas \
|
||||
-L/usr/lib/x86_64-linux-gnu/hdf5/serial -lhdf5
|
||||
# -LLibs/MATLAB/R2019b/bin/ -lmat -lmx -Wl,-rpath,Libs/MATLAB/R2019b/bin/ \
|
||||
# -Wl,-rpath,Libs/unwind/bin/
|
||||
|
||||
# Map output file
|
||||
MAP_FILE := output.map
|
||||
MAP_FLAG := -Xlinker -Map=$(BUILD_DIR)/$(MAP_FILE)
|
||||
@ -184,6 +192,12 @@ local_v0: TARGET := local_v0
|
||||
local_v0: $(BUILD_DIR)/$(TARGET)
|
||||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)
|
||||
|
||||
local_v0_opt: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=0
|
||||
local_v0_opt: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=0
|
||||
local_v0_opt: TARGET := local_v0_opt
|
||||
local_v0_opt: $(BUILD_DIR)/$(TARGET)
|
||||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)
|
||||
|
||||
local_v1: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=4
|
||||
local_v1: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=4
|
||||
local_v1: TARGET := local_v1
|
||||
|
75
homework_1/inc/config.h
Normal file
75
homework_1/inc/config.h
Normal file
@ -0,0 +1,75 @@
|
||||
/*!
|
||||
* \file config,h
|
||||
* \brief Build configuration file.
|
||||
*
|
||||
* \author
|
||||
* Christos Choutouridis AEM:8997
|
||||
* <cchoutou@ece.auth.gr>
|
||||
*/
|
||||
|
||||
#ifndef CONFIG_H_
|
||||
#define CONFIG_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include <matrix.hpp>
|
||||
|
||||
// HDF5 supported types
|
||||
enum class HDF5_type {
|
||||
SCHAR, CHAR, SHORT, USHORT, INT, UINT, LONG, ULONG, LLONG, ULLONG, FLOAT, DOUBLE
|
||||
};
|
||||
|
||||
/*
|
||||
* Defines for different version of the exercise
|
||||
*/
|
||||
#define V0 0
|
||||
#define V1 1
|
||||
|
||||
|
||||
// Fail-safe version selection
|
||||
#if !defined CODE_VERSION
|
||||
#define CODE_VERSION V1
|
||||
#endif
|
||||
|
||||
// matrix alias template dispatcher based on pre-define flag from compiler (see Makefile)
|
||||
#if CODE_VERSION == V0
|
||||
#define NAMESPACE_VERSION using namespace v0
|
||||
using MatrixDst = mtx::Matrix<double>;
|
||||
using MatrixIdx = mtx::Matrix<uint32_t>;
|
||||
static constexpr HDF5_type DstHDF5Type = HDF5_type::DOUBLE;
|
||||
static constexpr HDF5_type IdxHDF5Type = HDF5_type::INT;
|
||||
#elif CODE_VERSION == V1
|
||||
#define NAMESPACE_VERSION using namespace v1
|
||||
using MatrixDst = mtx::Matrix<double>;
|
||||
using MatrixIdx = mtx::Matrix<uint32_t>;
|
||||
static constexpr HDF5_type DstHDF5Type = HDF5_type::DOUBLE;
|
||||
static constexpr HDF5_type IdxHDF5Type = HDF5_type::INT;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//! enumerator for output handling
|
||||
enum class StdOutputMode{ STD, FILE };
|
||||
|
||||
/*!
|
||||
* Session option for each invocation of the executable
|
||||
*/
|
||||
struct session_t {
|
||||
std::string corpusMtxFile {}; //!< corpus matrix file name in HDF5 format
|
||||
std::string corpusDataSet {}; //!< corpus dataset name in HDF5 matrix file
|
||||
std::string queryMtxFile {}; //!< optional query matrix file name in HDF5 format
|
||||
std::string queryDataSet {}; //!< optional query dataset name in HDF5 matrix file
|
||||
bool queryMtx {false}; //!< Flag to indicate that there is a separate query matrix
|
||||
size_t k {1}; //!< The number of nearest neighbors to find
|
||||
std::string outMtxFile {"out.hdf5"}; //!< output matrix file name in HDF5 format
|
||||
std::string outMtxIdxDataSet {"/Idx"}; //!< Index output dataset name in HDF5 matrix file
|
||||
std::string outMtxDstDataSet {"/Dst"}; //!< Distance output dataset name in HDF5 matrix file
|
||||
std::size_t max_threads {}; //!< Maximum threads to use
|
||||
bool timing {false}; //!< Enable timing prints of the program
|
||||
bool verbose {false}; //!< Flag to enable verbose output to stdout
|
||||
};
|
||||
|
||||
extern session_t session;
|
||||
|
||||
#endif /* CONFIG_H_ */
|
224
homework_1/inc/utils.hpp
Normal file
224
homework_1/inc/utils.hpp
Normal file
@ -0,0 +1,224 @@
|
||||
/**
|
||||
* \file utils.hpp
|
||||
* \brief Utilities header
|
||||
*
|
||||
* \author
|
||||
* Christos Choutouridis AEM:8997
|
||||
* <cchoutou@ece.auth.gr>
|
||||
*/
|
||||
#ifndef UTILS_HPP_
|
||||
#define UTILS_HPP_
|
||||
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <unistd.h>
|
||||
#include <hdf5.h>
|
||||
|
||||
#include <matrix.hpp>
|
||||
#include <config.h>
|
||||
|
||||
/*!
|
||||
* A Logger for entire program.
|
||||
*/
|
||||
struct Log {
|
||||
struct Endl {} endl; //!< a tag object to to use it as a new line request.
|
||||
|
||||
//! We provide logging via << operator
|
||||
template<typename T>
|
||||
Log& operator<< (T&& t) {
|
||||
if (session.verbose) {
|
||||
if (line_) {
|
||||
std::cout << "[Log]: " << t;
|
||||
line_ = false;
|
||||
}
|
||||
else
|
||||
std::cout << t;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
// overload for special end line handling
|
||||
Log& operator<< (Endl e) { (void)e;
|
||||
if (session.verbose) {
|
||||
std::cout << '\n';
|
||||
line_ = true;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
private:
|
||||
bool line_ {true};
|
||||
};
|
||||
|
||||
extern Log logger;
|
||||
|
||||
/*!
|
||||
* A small timing utility based on chrono.
|
||||
*/
|
||||
struct Timing{
|
||||
using Tpoint = std::chrono::steady_clock::time_point;
|
||||
using microseconds = std::chrono::microseconds;
|
||||
using milliseconds = std::chrono::milliseconds;
|
||||
using seconds = std::chrono::seconds;
|
||||
|
||||
//! tool to mark the starting point
|
||||
Tpoint start () noexcept { return start_ = std::chrono::steady_clock::now(); }
|
||||
//! tool to mark the ending point
|
||||
Tpoint stop () noexcept { return stop_ = std::chrono::steady_clock::now(); }
|
||||
|
||||
auto dt () noexcept {
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(stop_ - start_).count();
|
||||
}
|
||||
//! tool to print the time interval
|
||||
void print_dt (const char* what) noexcept {
|
||||
if (session.timing) {
|
||||
auto t = stop_ - start_;
|
||||
if (std::chrono::duration_cast<microseconds>(t).count() < 10000)
|
||||
std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<microseconds>(t).count()) << " [usec]\n";
|
||||
else if (std::chrono::duration_cast<milliseconds>(t).count() < 10000)
|
||||
std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<milliseconds>(t).count()) << " [msec]\n";
|
||||
else
|
||||
std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<seconds>(t).count()) << " [sec]\n";
|
||||
}
|
||||
}
|
||||
private:
|
||||
Tpoint start_;
|
||||
Tpoint stop_;
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct Mtx {
|
||||
|
||||
template<typename MatrixType, HDF5_type Type>
|
||||
static void load(const std::string& filename, const std::string& dataset, MatrixType& matrix) {
|
||||
|
||||
hid_t file_id{}, dataset_id{}, dataspace_id{};
|
||||
herr_t read_st;
|
||||
do {
|
||||
// Open file
|
||||
logger << "Load HDF5 file: " << filename << " Dataset: " << dataset << "...";
|
||||
if ((file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT)) < 0)
|
||||
break;
|
||||
|
||||
// Open dataset
|
||||
if ((dataset_id = H5Dopen2(file_id, dataset.c_str(), H5P_DEFAULT)) < 0)
|
||||
break;
|
||||
|
||||
// Get dataspace and allocate memory for read buffer
|
||||
if ((dataspace_id = H5Dget_space(dataset_id)) < 0)
|
||||
break;
|
||||
hsize_t dims[2];
|
||||
H5Sget_simple_extent_dims(dataspace_id, dims, NULL);
|
||||
matrix.resize(dims[0], dims[1]);
|
||||
|
||||
// Read the dataset
|
||||
// ToDo: Come up with a better way to do this
|
||||
if constexpr (Type == HDF5_type::DOUBLE) {
|
||||
if ((read_st = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
|
||||
break;
|
||||
}
|
||||
else if (Type == HDF5_type::FLOAT) {
|
||||
if ((read_st = H5Dread(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
|
||||
break;
|
||||
}
|
||||
else if (Type == HDF5_type::UINT) {
|
||||
if ((read_st = H5Dread(dataset_id, H5T_NATIVE_UINT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
|
||||
break;
|
||||
}
|
||||
else if (Type == HDF5_type::INT) {
|
||||
if ((read_st = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
|
||||
break;
|
||||
}
|
||||
// Done
|
||||
H5Dclose(dataset_id);
|
||||
H5Sclose(dataspace_id);
|
||||
H5Fclose(file_id);
|
||||
logger << " Done" << logger.endl;
|
||||
return;
|
||||
} while (0);
|
||||
|
||||
// Error: close everything (if possible) and return false
|
||||
H5Dclose(dataset_id);
|
||||
H5Sclose(dataspace_id);
|
||||
H5Fclose(file_id);
|
||||
throw std::runtime_error("Cannot store to " + filename + " dataset:" + dataset + '\n');
|
||||
}
|
||||
|
||||
|
||||
template<typename MatrixType, HDF5_type Type>
|
||||
static void store(const std::string& filename, const std::string& dataset, MatrixType& matrix) {
|
||||
|
||||
hid_t file_id{}, dataset_id{}, dataspace_id{};
|
||||
herr_t write_st;
|
||||
do {
|
||||
// Try to open the file in read-write mode
|
||||
logger << "Store HDF5 file: " << filename << " Dataset: " << dataset << "...";
|
||||
if (access(session.outMtxFile.c_str(), F_OK) == 0){
|
||||
if ((file_id = H5Fopen(filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT)) < 0)
|
||||
break;
|
||||
}
|
||||
else {
|
||||
if ((file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
// Create the dataspace for the dataset
|
||||
hsize_t dims[] = { matrix.rows(), matrix.columns() };
|
||||
if ((dataspace_id = H5Screate_simple(2, dims, NULL)) < 0)
|
||||
break;
|
||||
|
||||
// ToDo: Come up with a better way to do this
|
||||
if constexpr (Type == HDF5_type::DOUBLE) {
|
||||
// Create the dataset with default properties
|
||||
if ((dataset_id = H5Dcreate2(
|
||||
file_id, dataset.c_str(), H5T_NATIVE_DOUBLE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
|
||||
break;
|
||||
// Write the data to the dataset
|
||||
if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
|
||||
break;
|
||||
}
|
||||
else if (Type == HDF5_type::FLOAT) {
|
||||
// Create the dataset with default properties
|
||||
if ((dataset_id = H5Dcreate2(
|
||||
file_id, dataset.c_str(), H5T_NATIVE_FLOAT, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
|
||||
break;
|
||||
// Write the data to the dataset
|
||||
if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
|
||||
break;
|
||||
}
|
||||
else if (Type == HDF5_type::UINT) {
|
||||
// Create the dataset with default properties
|
||||
if ((dataset_id = H5Dcreate2(
|
||||
file_id, dataset.c_str(), H5T_NATIVE_UINT, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
|
||||
break;
|
||||
// Write the data to the dataset
|
||||
if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_UINT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
|
||||
break;
|
||||
}
|
||||
else if (Type == HDF5_type::INT) {
|
||||
// Create the dataset with default properties
|
||||
if ((dataset_id = H5Dcreate2(
|
||||
file_id, dataset.c_str(), H5T_NATIVE_INT, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
|
||||
break;
|
||||
// Write the data to the dataset
|
||||
if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
|
||||
break;
|
||||
}
|
||||
// Close the dataset, dataspace, and file
|
||||
H5Dclose(dataset_id);
|
||||
H5Sclose(dataspace_id);
|
||||
H5Fclose(file_id);
|
||||
logger << " Done" << logger.endl;
|
||||
return;
|
||||
} while (0);
|
||||
|
||||
// Error: close everything (if possible) and return false
|
||||
H5Dclose(dataset_id);
|
||||
H5Sclose(dataspace_id);
|
||||
H5Fclose(file_id);
|
||||
throw std::runtime_error("Cannot store " + filename + " with dataset:" + dataset +'\n');
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif /* UTILS_HPP_ */
|
119
homework_1/inc/v0.hpp
Normal file
119
homework_1/inc/v0.hpp
Normal file
@ -0,0 +1,119 @@
|
||||
/**
|
||||
* \file v0.hpp
|
||||
* \brief
|
||||
*
|
||||
* \author
|
||||
* Christos Choutouridis AEM:8997
|
||||
* <cchoutou@ece.auth.gr>
|
||||
*/
|
||||
#ifndef V0_HPP_
|
||||
#define V0_HPP_
|
||||
|
||||
#include <cblas.h>
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include <matrix.hpp>
|
||||
#include <config.h>
|
||||
|
||||
namespace v0 {
|
||||
|
||||
/*!
|
||||
* Function to compute squared Euclidean distances
|
||||
*
|
||||
* \fn void pdist2(const double*, const double*, double*, int, int, int)
|
||||
* \param X m x d matrix (Column major)
|
||||
* \param Y n x d matrix (Column major)
|
||||
* \param D2 m x n matrix to store distances (Column major)
|
||||
* \param m number of rows in X
|
||||
* \param n number of rows in Y
|
||||
* \param d number of columns in both X and Y
|
||||
*/
|
||||
template<typename DataType>
|
||||
void pdist2(const mtx::Matrix<DataType>& X, const mtx::Matrix<DataType>& Y, mtx::Matrix<DataType>& D2) {
|
||||
int M = X.rows();
|
||||
int N = Y.rows();
|
||||
int d = X.columns();
|
||||
|
||||
// Compute the squared norms of each row in X and Y
|
||||
std::vector<DataType> X_norms(M), Y_norms(N);
|
||||
for (int i = 0; i < M ; ++i) {
|
||||
X_norms[i] = cblas_ddot(d, X.data() + i * d, 1, X.data() + i * d, 1);
|
||||
}
|
||||
for (int j = 0; j < N ; ++j) {
|
||||
Y_norms[j] = cblas_ddot(d, Y.data() + j * d, 1, Y.data() + j * d, 1);
|
||||
}
|
||||
|
||||
// Compute -2 * X * Y'
|
||||
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, M, N, d, -2.0, X.data(), d, Y.data(), d, 0.0, D2.data(), N);
|
||||
|
||||
// Step 3: Add the squared norms to each entry in D2
|
||||
for (int i = 0; i < M ; ++i) {
|
||||
for (int j = 0; j < N; ++j) {
|
||||
D2.set(D2.get(i, j) + X_norms[i] + Y_norms[j], i, j);
|
||||
//D2.set(std::max(D2.get(i, j), 0.0), i, j); // Ensure non-negative
|
||||
D2.set(std::sqrt(D2.get(i, j)), i, j); // Take the square root of each
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename DataType, typename IndexType>
|
||||
void quickselect(std::vector<std::pair<DataType, IndexType>>& vec, int k) {
|
||||
std::nth_element(
|
||||
vec.begin(),
|
||||
vec.begin() + k,
|
||||
vec.end(),
|
||||
[](const std::pair<DataType, IndexType>& a, const std::pair<DataType, IndexType>& b) {
|
||||
return a.first < b.first;
|
||||
});
|
||||
vec.resize(k); // Keep only the k smallest elements
|
||||
}
|
||||
|
||||
/*!
|
||||
* \param C Is a MxD matrix (Corpus)
|
||||
* \param Q Is a NxD matrix (Query)
|
||||
* \param k The number of nearest neighbors needed
|
||||
* \param idx Is the Nxk matrix with the k indexes of the C points, that are
|
||||
* neighbors of the nth point of Q
|
||||
* \param dst Is the Nxk matrix with the k distances to the C points of the nth
|
||||
* point of Q
|
||||
*/
|
||||
template<typename DataType, typename IndexType>
|
||||
void knnsearch(const mtx::Matrix<DataType>& C, const mtx::Matrix<DataType>& Q, int k,
|
||||
mtx::Matrix<IndexType>& idx,
|
||||
mtx::Matrix<DataType>& dst) {
|
||||
|
||||
int M = C.rows();
|
||||
int N = Q.rows();
|
||||
|
||||
mtx::Matrix<DataType> D(M, N);
|
||||
|
||||
pdist2(C, Q, D);
|
||||
|
||||
idx.resize(N, k);
|
||||
dst.resize(N, k);
|
||||
|
||||
for (int j = 0; j < N; ++j) {
|
||||
// Create a vector of pairs (distance, index) for the j-th query
|
||||
std::vector<std::pair<DataType, IndexType>> dst_idx(M);
|
||||
for (int i = 0; i < M; ++i) {
|
||||
dst_idx[i] = {D.data()[i * N + j], i};
|
||||
}
|
||||
// Find the k smallest distances using quickSelectKSmallest
|
||||
quickselect(dst_idx, k);
|
||||
|
||||
// Sort the k smallest results by distance for consistency
|
||||
std::sort(dst_idx.begin(), dst_idx.end());
|
||||
|
||||
// Store the indices and distances
|
||||
for (int i = 0; i < k; ++i) {
|
||||
idx(j, i) = dst_idx[i].second;
|
||||
dst(j, i) = dst_idx[i].first;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* V0_HPP_ */
|
16
homework_1/inc/v1.hpp
Normal file
16
homework_1/inc/v1.hpp
Normal file
@ -0,0 +1,16 @@
|
||||
/**
|
||||
* \file v0.hpp
|
||||
* \brief
|
||||
*
|
||||
* \author
|
||||
* Christos Choutouridis AEM:8997
|
||||
* <cchoutou@ece.auth.gr>
|
||||
*/
|
||||
#ifndef V1_HPP_
|
||||
#define V1_HPP_
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* V1_HPP_ */
|
2
homework_1/matlab/.gitignore
vendored
Normal file
2
homework_1/matlab/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
# matlab
|
||||
*.m~
|
@ -1,10 +1,12 @@
|
||||
function [D2] = dist2(X, Y)
|
||||
% Calculates the squares of the distances of X and Y
|
||||
%
|
||||
% X: A mxd array with m d-dimentional points
|
||||
% Y: A nxd array with n d-dimentional points
|
||||
% X: A Mxd array with m d-dimentional points
|
||||
% Y: A Nxd array with n d-dimentional points
|
||||
% d: Must be the same
|
||||
|
||||
%
|
||||
% D2: The MxN matrix with the distances
|
||||
%
|
||||
[~, d1] = size(X);
|
||||
[~, d2] = size(Y);
|
||||
if d1 ~= d2
|
||||
|
@ -1,8 +1,12 @@
|
||||
function [idx, dst] = knnsearch2(C, Q, k)
|
||||
% C: Is a mxd matrix (Corpus)
|
||||
% Q: Is a nxd matrix (Query)
|
||||
% k: The number of nearest neighbors needded
|
||||
|
||||
% C: Is a MxD matrix (Corpus)
|
||||
% Q: Is a NxD matrix (Query)
|
||||
% k: The number of nearest neighbors needded
|
||||
% idx: Is the Nxk matrix with the k indexes of the C points, that are
|
||||
% neighbors of the nth point of Q
|
||||
% dst: Is the Nxk matrix with the k distances to the C points of the nth
|
||||
% point of Q
|
||||
%
|
||||
% Calculate the distance matrix between C and Q
|
||||
% D is an m x n matrix where each element D(i, j) is the distance
|
||||
% between the i-th point in C and the j-th point in Q.
|
||||
|
@ -1,107 +1,161 @@
|
||||
/*!
|
||||
* \file main.cpp
|
||||
* \brief Main application file
|
||||
*
|
||||
* \author
|
||||
* Christos Choutouridis AEM:8997
|
||||
* <cchoutou@ece.auth.gr>
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <cblas.h>
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <exception>
|
||||
#include <unistd.h>
|
||||
#include <cstdio>
|
||||
|
||||
#include <v0.hpp>
|
||||
#include <v1.hpp>
|
||||
#include <matrix.hpp>
|
||||
#include <utils.hpp>
|
||||
#include <config.h>
|
||||
|
||||
// Global session data
|
||||
session_t session;
|
||||
Log logger;
|
||||
Timing timer;
|
||||
|
||||
/*!
|
||||
* Function to compute squared Euclidean distances
|
||||
*
|
||||
* \fn void pdist2(const double*, const double*, double*, int, int, int)
|
||||
* \param X m x d matrix
|
||||
* \param Y n x d matrix
|
||||
* \param D2 m x n matrix to store distances
|
||||
* \param m number of rows in X
|
||||
* \param n number of rows in Y
|
||||
* \param d number of columns in both X and Y
|
||||
* A small command line argument parser
|
||||
* \return The status of the operation
|
||||
*/
|
||||
void pdist2(const double* X, const double* Y, double* D2, int m, int n, int d){
|
||||
// Compute the squared norms of each row in X and Y
|
||||
std::vector<double> X_norms(m), Y_norms(n);
|
||||
for (int i = 0; i < m; ++i) {
|
||||
X_norms[i] = cblas_ddot(d, X + i * d, 1, X + i * d, 1);
|
||||
}
|
||||
for (int j = 0; j < n; ++j) {
|
||||
Y_norms[j] = cblas_ddot(d, Y + j * d, 1, Y + j * d, 1);
|
||||
}
|
||||
bool get_options(int argc, char* argv[]){
|
||||
bool status =true;
|
||||
|
||||
// Compute -2 * X * Y'
|
||||
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, n, d, -2.0, X, d, Y, d, 0.0, D2, n);
|
||||
// iterate over the passed arguments
|
||||
for (int i=1 ; i<argc ; ++i) {
|
||||
std::string arg(argv[i]); // get current argument
|
||||
|
||||
// Step 3: Add the squared norms to each entry in D2
|
||||
for (int i = 0; i < m; ++i) {
|
||||
for (int j = 0; j < n; ++j) {
|
||||
D2[i * n + j] += X_norms[i] + Y_norms[j];
|
||||
D2[i * n + j] = std::max(D2[i * n + j], 0.0); // Ensure non-negative
|
||||
D2[i * n + j] = std::sqrt(D2[i * n + j]); // Take the square root of each
|
||||
if (arg == "-c" || arg == "--corpus") {
|
||||
if (i+2 < argc) {
|
||||
session.corpusMtxFile = std::string(argv[++i]);
|
||||
session.corpusDataSet = std::string(argv[++i]);
|
||||
}
|
||||
else
|
||||
status = false;
|
||||
}
|
||||
else if (arg == "-o" || arg == "--output") {
|
||||
if (i+3 < argc) {
|
||||
session.outMtxFile = std::string(argv[++i]);
|
||||
session.outMtxIdxDataSet = std::string(argv[++i]);
|
||||
session.outMtxDstDataSet = std::string(argv[++i]);
|
||||
}
|
||||
else
|
||||
status = false;
|
||||
}
|
||||
else if (arg == "-q" || arg == "--query") {
|
||||
if (i+2 < argc) {
|
||||
session.queryMtxFile = std::string(argv[++i]);
|
||||
session.queryDataSet = std::string(argv[++i]);
|
||||
session.queryMtx = true;
|
||||
}
|
||||
else
|
||||
status = false;
|
||||
}
|
||||
else if (arg == "-k") {
|
||||
session.k = (i+1 < argc) ? std::atoi(argv[++i]) : session.k;
|
||||
}
|
||||
else if (arg == "-n" || arg == "--max_trheads") {
|
||||
session.max_threads = (i+1 < argc) ? std::atoi(argv[++i]) : session.max_threads;
|
||||
}
|
||||
else if (arg == "-t" || arg == "--timing")
|
||||
session.timing = true;
|
||||
else if (arg == "-v" || arg == "--verbose")
|
||||
session.verbose = true;
|
||||
else if (arg == "-h" || arg == "--help") {
|
||||
std::cout << "annsearch - an aproximation knnsearch utility\n\n";
|
||||
std::cout << "annsearch -c <file> [-k <N>] [-o <file>] [-q <file>] [-n <threads>] [-t] [-v]\n";
|
||||
std::cout << '\n';
|
||||
std::cout << "Options:\n\n";
|
||||
std::cout << " -c | --corpus <file> <dataset>\n";
|
||||
std::cout << " Path to hdf5 file to open and name of the dataset to load\n\n";
|
||||
std::cout << " -o | --output <file> <idx-dataset> <dst-dataset> \n";
|
||||
std::cout << " Path to <file> to store the data and the names of the datasets.\n\n";
|
||||
std::cout << " -q | --query <file> <dataset>\n";
|
||||
std::cout << " Path to hdf5 file to open and name of the dataset to load\n";
|
||||
std::cout << " If not defined, the corpus is used\n\n";
|
||||
std::cout << " -k <number>\n";
|
||||
std::cout << " Set the number of closest neighbors to find. \n\n";
|
||||
std::cout << " -n | --max_trheads <threads>\n";
|
||||
std::cout << " Reduce the thread number for the execution to <threads>. <threads> must be less or equal to available CPUs.\n\n";
|
||||
std::cout << " -t | --timing\n";
|
||||
std::cout << " Request timing measurements output to stdout.\n\n";
|
||||
std::cout << " -v | --verbose\n";
|
||||
std::cout << " Request a more verbose output to stdout.\n\n";
|
||||
std::cout << " -h | --help <size>\n";
|
||||
std::cout << " Prints this and exit.\n\n";
|
||||
std::cout << "Examples:\n\n";
|
||||
std::cout << " ...Example case...:\n";
|
||||
std::cout << " > ./annsearch -i <MFILE> ... \n\n";
|
||||
|
||||
exit(0);
|
||||
}
|
||||
else { // parse error
|
||||
std::cout << "Invocation error. Try -h for details.\n";
|
||||
status = false;
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void quickselect(std::vector<std::pair<double, int>>& vec, int k) {
|
||||
std::nth_element(
|
||||
vec.begin(),
|
||||
vec.begin() + k,
|
||||
vec.end(),
|
||||
[](const std::pair<double, int>& a, const std::pair<double, int>& b) {
|
||||
return a.first < b.first;
|
||||
});
|
||||
vec.resize(k); // Keep only the k smallest elements
|
||||
}
|
||||
NAMESPACE_VERSION;
|
||||
|
||||
// K-nearest neighbor search function
|
||||
void knnsearch(const double* C, const double* Q, int m, int n, int d, int k,
|
||||
std::vector<std::vector<int>>& idx, std::vector<std::vector<double>>& dst) {
|
||||
std::vector<double> D(m * n);
|
||||
pdist2(C, Q, D.data(), m, n, d);
|
||||
int main(int argc, char* argv[]) try {
|
||||
// Instantiate matrixes
|
||||
MatrixDst Corpus;
|
||||
MatrixDst Query;
|
||||
MatrixIdx Idx;
|
||||
MatrixDst Dst;
|
||||
|
||||
idx.resize(n, std::vector<int>(k));
|
||||
dst.resize(n, std::vector<double>(k));
|
||||
// try to read command line
|
||||
if (!get_options(argc, argv))
|
||||
exit(1);
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
// Create a vector of pairs (distance, index) for the j-th query
|
||||
std::vector<std::pair<double, int>> dst_idx(m);
|
||||
for (int i = 0; i < m; ++i) {
|
||||
dst_idx[i] = {D[i * n + j], i};
|
||||
}
|
||||
if (access(session.outMtxFile.c_str(), F_OK) == 0)
|
||||
std::remove(session.outMtxFile.c_str());
|
||||
|
||||
// Find the k smallest distances using quickSelectKSmallest
|
||||
quickselect(dst_idx, k);
|
||||
// Load data
|
||||
timer.start();
|
||||
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus);
|
||||
if (session.queryMtx)
|
||||
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query);
|
||||
timer.stop();
|
||||
timer.print_dt("Load hdf5 files");
|
||||
|
||||
// Sort the k smallest results by distance for consistency
|
||||
std::sort(dst_idx.begin(), dst_idx.end());
|
||||
logger << "Start knnsearch ...";
|
||||
timer.start();
|
||||
if (session.queryMtx)
|
||||
knnsearch(Corpus, Query, session.k, Idx, Dst);
|
||||
else
|
||||
knnsearch(Corpus, Corpus, session.k, Idx, Dst);
|
||||
timer.stop();
|
||||
logger << " Done" << logger.endl;
|
||||
timer.print_dt("knnsearch");
|
||||
|
||||
// Store the indices and distances
|
||||
for (int i = 0; i < k; ++i) {
|
||||
idx[j][i] = dst_idx[i].second;
|
||||
dst[j][i] = dst_idx[i].first;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(){
|
||||
|
||||
int m = 5; // Number of points in C (corpus)
|
||||
int n = 3; // Number of points in Q (query)
|
||||
int d = 2; // Dimensions
|
||||
int k = 2; // Number of nearest neighbors to find
|
||||
|
||||
double C[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; // m x d matrix
|
||||
double Q[] = {1.5, 2.5, 3.5, 4.5, 5.5, 6.5}; // n x d matrix
|
||||
|
||||
std::vector<std::vector<int>> idx;
|
||||
std::vector<std::vector<double>> dst;
|
||||
|
||||
knnsearch(C, Q, m, n, d, k, idx, dst);
|
||||
|
||||
// Print results
|
||||
for (int i = 0; i < n; ++i) {
|
||||
std::cout << "Query point " << i << ":\n";
|
||||
for (int j = 0; j < k; ++j) {
|
||||
std::cout << " Neighbor " << j <<": Index = " << idx[i][j] <<", Distance = " << dst[i][j] << '\n';
|
||||
}
|
||||
}
|
||||
// Store data
|
||||
timer.start();
|
||||
Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx);
|
||||
Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst);
|
||||
timer.stop();
|
||||
timer.print_dt("Store hdf5 files");
|
||||
|
||||
return 0;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
//we probably pollute the user's screen. Comment `cerr << ...` if you don't like it.
|
||||
std::cerr << "Error: " << e.what() << '\n';
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user