Quellcode durchsuchen

A V0 implementation tested against matlab

Supports:
 - HDF5 file load and store
 - Precision timing of the process to stdout
 - logging (verbose mode) to stdout
 - Command line arguments and help
master
Ursprung
Commit
f591127e16
11 geänderte Dateien mit 1364 neuen und 106 gelöschten Zeilen
  1. +1
    -3
      homework_1/.gitignore
  2. +750
    -0
      homework_1/Libs/matrix/include/matrix.hpp
  3. +16
    -2
      homework_1/Makefile
  4. +75
    -0
      homework_1/inc/config.h
  5. +224
    -0
      homework_1/inc/utils.hpp
  6. +119
    -0
      homework_1/inc/v0.hpp
  7. +16
    -0
      homework_1/inc/v1.hpp
  8. +2
    -0
      homework_1/matlab/.gitignore
  9. +5
    -3
      homework_1/matlab/dist2.m
  10. +8
    -4
      homework_1/matlab/knnsearch2.m
  11. +148
    -94
      homework_1/src/main.cpp

.gitignore → homework_1/.gitignore Datei anzeigen

@@ -3,6 +3,7 @@ bin/
out/
mat/
mtx/
.unused/

# hpc
exclude
@@ -13,6 +14,3 @@ hpc_auth_sync.sh
.cproject
.settings/

# matlab
*.m~


+ 750
- 0
homework_1/Libs/matrix/include/matrix.hpp Datei anzeigen

@@ -0,0 +1,750 @@
/**
* \file matrix.hpp
* \brief A matrix abstraction implementation
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/
#ifndef MATRIX_HPP_
#define MATRIX_HPP_

#include <type_traits>
#include <utility>
#include <algorithm>
#include <vector>
#include <tuple>

namespace mtx {

using std::size_t;

/*
* Small helper to strip types
*/
template<typename T>
struct remove_cvref {
typedef std::remove_cv_t<std::remove_reference_t<T>> type;
};
template<typename T>
using remove_cvref_t = typename remove_cvref<T>::type;

/*!
* Enumerator to denote the storage type of the array to use.
*/
enum class MatrixType {
DENSE, /*!< Matrix is dense */
SPARSE, /*!< Matrix is sparse */
};

/*!
* Enumerator to denote the storage type of the array to use.
*/
enum class MatrixOrder {
COLMAJOR, /*!< Matrix is column major */
ROWMAJOR, /*!< Matrix is row major */
};

/*
* Forward type declarations
*/

template<typename MatrixType> struct MatCol;
template<typename MatrixType> struct MatRow;
template<typename MatrixType> struct MatVal;

/*!
* A 2-D matrix functionality over a 1-D array
*
* This is a very thin abstraction layer over a native array.
* This is tested using compiler explorer and our template produce
* almost identical assembly.
*
* The penalty hit we have is due to the fact that we use a one dimension array
* and we have to calculate the actual position from an (i,j) pair.
* The use of 1D array was our intention from the beginning, so the penalty
* was pretty much unavoidable.
*
* \tparam DataType The underling data type of the array
* \tparam IndexType The underling type for the index variables and sizes
* \tparam Type The storage type of the array
* \arg FULL For full matrix
* \arg SYMMETRIC For symmetric matrix (we use only the lower part)
*/
template<typename DataType,
typename IndexType = size_t,
MatrixType Type = MatrixType::DENSE,
MatrixOrder Order = MatrixOrder::ROWMAJOR,
bool Symmetric = false>
struct Matrix {

using dataType = DataType; //!< meta:export of underling data type
using indexType = IndexType; //!< meta:export of underling index type
static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order
static constexpr MatrixType matrixType = Type; //!< meta:export of array type
static constexpr bool symmetric = Symmetric; //!< meta:export symmetric flag
/*!
* \name Obj lifetime
*/
//! @{

//! Construct an empty matrix with dimensions rows x columns
Matrix(IndexType rows = IndexType{}, IndexType columns = IndexType{}) noexcept :
m_(capacity(rows, columns)), rows_(rows), cols_(columns) { }

//! Construct a matrix by copying existing data with dimensions rows x columns
Matrix(DataType* data, IndexType rows, IndexType columns) noexcept :
m_(data, data + capacity(rows, columns)), rows_(rows), cols_(columns) { }

//! Construct a matrix using an initializer list
Matrix(IndexType rows, IndexType columns, std::initializer_list<DataType> list)
: m_(list), rows_(rows), cols_(columns) {
if (list.size() != capacity(rows, columns)) {
throw std::invalid_argument("Matrix initializer list size does not match matrix dimensions.");
}
}

//! move ctor
Matrix(Matrix&& m) noexcept { moves(std::move(m)); }
//! move
Matrix& operator=(Matrix&& m) noexcept { moves(std::move(m)); return *this; }
Matrix(const Matrix& m) = delete; //!< No copy ctor
Matrix& operator=(const Matrix& m) = delete; //!< No copy
//! @}

//! \name Data exposure
//! @{


//! Get/Set the size of each dimension
IndexType rows() const noexcept { return rows_; }
IndexType columns() const noexcept { return cols_; }

//! Get the interface size of the Matrix (what appears to be the size)
IndexType size() const {
return rows_ * cols_;
}
//! Set the interface size of the Matrix (what appears to be the size)
IndexType resize(IndexType rows, IndexType columns) {
rows_ = rows;
cols_ = columns;
m_.reserve(capacity(rows_, cols_));
return capacity(rows_, cols_);
}

//! Actual memory capacity of the symmetric matrix
static constexpr IndexType capacity(IndexType M, IndexType N) {
if constexpr (Symmetric)
return (M+1)*N/2;
else
return M*N;
}

/*
* virtual 2D accessors
*/
DataType get (IndexType i, IndexType j) {
if constexpr (Symmetric) {
auto T = [](size_t i)->size_t { return i*(i+1)/2; }; // Triangular number of i
if constexpr (Order == MatrixOrder::COLMAJOR) {
// In column major we use the lower triangle of the matrix
if (i>=j) return m_[j*rows_ - T(j) + i]; // Lower, use our notation
else return m_[i*rows_ - T(i) + j]; // Upper, use opposite index
}
else {
// In row major we use the upper triangle of the matrix
if (i<=j) return m_[i*cols_ - T(i) + j]; // Upper, use our notation
else return m_[j*cols_ - T(j) + i]; // Lower, use opposite index
}
}
else {
if constexpr (Order == MatrixOrder::COLMAJOR)
return m_[i + j*rows_];
else
return m_[i*cols_ + j];
}
}

DataType set (DataType v, IndexType i, IndexType j) {
if constexpr (Symmetric) {
auto T = [](size_t i)->size_t { return i*(i+1)/2; }; // Triangular number of i
if constexpr (Order == MatrixOrder::COLMAJOR) {
// In column major we use the lower triangle of the matrix
if (i>=j) return m_[j*rows_ - T(j) + i] = v; // Lower, use our notation
else return m_[i*rows_ - T(i) + j] = v; // Upper, use opposite index
}
else {
// In row major we use the upper triangle of the matrix
if (i<=j) return m_[i*cols_ - T(i) + j] = v; // Upper, use our notation
else return m_[j*cols_ - T(j) + i] = v; // Lower, use opposite index
}
}
else {
if constexpr (Order == MatrixOrder::COLMAJOR)
return m_[i + j*rows_] = v;
else
return m_[i*cols_ + j] = v;
}
}
// DataType operator()(IndexType i, IndexType j) { return get(i, j); }
/*!
* Return a proxy MatVal object with read and write capabilities.
* @param i The row number
* @param j The column number
* @return tHE MatVal object
*/
MatVal<Matrix> operator()(IndexType i, IndexType j) noexcept {
return MatVal<Matrix>(this, get(i, j), i, j);
}

// a basic serial iterator support
DataType* data() noexcept { return m_.data(); }
// IndexType begin_idx() noexcept { return 0; }
// IndexType end_idx() noexcept { return capacity(rows_, cols_); }

const DataType* data() const noexcept { return m_.data(); }
const IndexType begin_idx() const noexcept { return 0; }
const IndexType end_idx() const noexcept { return capacity(rows_, cols_); }
//! @}

/*!
* \name Safe iteration API
*
* This api automates the iteration over the array based on
* MatrixType
*/
//! @{
template<typename F, typename... Args>
void for_each_in (IndexType begin, IndexType end, F&& lambda, Args&&... args) {
for (IndexType it=begin ; it<end ; ++it) {
std::forward<F>(lambda)(std::forward<Args>(args)..., it);
}
}
//! @}

//
void swap(Matrix& src) noexcept {
std::swap(m_, src.m_);
std::swap(rows_, src.rows_);
std::swap(cols_, src.cols_);
}
private:
//! move helper
void moves(Matrix&& src) noexcept {
m_ = std::move(src.m_);
rows_ = std::move(src.rows_);
cols_ = std::move(src.cols_);
}

std::vector<DataType> m_ {}; //!< Pointer to actual data.
IndexType rows_{}; //!< the virtual size of rows.
IndexType cols_{}; //!< the virtual size of columns.
};


/**
* A simple sparse matrix specialization.
*
* We use CSC format and provide get/set functionalities for each (i,j) item
* on the matrix. We also provide a () overload using a proxy MatVal object.
* This way the user can:
* \code
* auto v = A(3,4);
* A(3, 4) = 7;
* \endcode
*
* We also provide getCol() and getRow() functions witch return a viewer/iterator to rows and
* columns of the matrix. In the case of a symmetric matrix instead of a row we return the
* equivalent column. This way we gain speed due to CSC format nature.
*
* @tparam DataType The type for values
* @tparam IndexType The type for indexes
* @tparam Type The Matrix type (FULL or SYMMETRIC)
*/
template<typename DataType, typename IndexType,
MatrixOrder Order,
bool Symmetric>
struct Matrix<DataType, IndexType, MatrixType::SPARSE, Order, Symmetric> {

using dataType = DataType; //!< meta:export of underling data type
using indexType = IndexType; //!< meta:export of underling index type
static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order
static constexpr MatrixType matrixType = MatrixType::SPARSE; //!< meta:export of array type
static constexpr bool symmetric = Symmetric; //!< meta:export symmetric flag

friend struct MatCol<Matrix>;
friend struct MatRow<Matrix>;
friend struct MatVal<Matrix>;

/*!
* \name Obj lifetime
*/
//! @{

//! Default ctor with optional memory allocations
Matrix(IndexType n=IndexType{}) noexcept:
values{},
rows{},
col_ptr((n)? n+1:2, IndexType{}),
N(n),
NNZ(0) { }

//! A ctor using csc array data
Matrix(IndexType n, IndexType nnz, const IndexType* row, const IndexType* col) noexcept:
values(nnz, 1),
rows(row, row+nnz),
col_ptr(col, col+n+1),
N(n),
NNZ(nnz) { }

//! ctor using csc array data with value array
Matrix(IndexType n, IndexType nnz, const DataType* v, const IndexType* row, const IndexType* col) noexcept:
values(v, v+nnz),
rows(row, row+nnz),
col_ptr(col, col+n+1),
N(n),
NNZ(nnz) { }

//! ctor vectors of row/col and default value for values array
Matrix(IndexType n, IndexType nnz, const DataType v,
const std::vector<IndexType>& row, const std::vector<IndexType>& col) noexcept:
values(nnz, v),
rows (row),
col_ptr(col),
N(n),
NNZ(nnz) { }

//! move ctor
Matrix(Matrix&& m) noexcept { moves(std::move(m)); }
//! move
Matrix& operator=(Matrix&& m) noexcept { moves(std::move(m)); return *this; }
Matrix(const Matrix& m) = delete; //!< make sure there are no copies
Matrix& operator=(const Matrix& m) = delete; //!< make sure there are no copies
//! @}

//! \name Data exposure
//! @{

//! \return the dimension of the matrix
IndexType size() noexcept { return N; }
//! After construction size configuration tool
IndexType resize(IndexType n) {
col_ptr.resize(n+1);
return N = n;
}
//! \return the NNZ of the matrix
IndexType capacity() noexcept { return NNZ; }
//! After construction NNZ size configuration tool
IndexType capacity(IndexType nnz) noexcept {
values.reserve(nnz);
rows.reserve(nnz);
return NNZ;
}
// getters for row arrays of the struct (unused)
std::vector<DataType>& getValues() noexcept { return values; }
std::vector<IndexType>& getRows() noexcept { return rows; }
std::vector<IndexType>& getCols() noexcept { return col_ptr; }

/*!
* Return a proxy MatVal object with read and write capabilities.
* @param i The row number
* @param j The column number
* @return tHE MatVal object
*/
MatVal<Matrix> operator()(IndexType i, IndexType j) noexcept {
return MatVal<Matrix>(this, get(i, j), i, j);
}

/*!
* A read item functionality using binary search to find the correct row
*
* @param i The row number
* @param j The column number
* @return The value of the item or DataType{} if is not present.
*/
DataType get(IndexType i, IndexType j) noexcept {
IndexType idx; bool found;
std::tie(idx, found) =find_idx(rows, col_ptr[j], col_ptr[j+1], i);
return (found) ? values[idx] : 0;
}

/*!
* A write item functionality.
*
* First we search if the matrix has already a value in (i, j) position.
* If so we just change it to a new value. If not we add the item on the matrix.
*
* @note
* When change a value, we don't increase the NNZ value of the struct. We expect the user has already
* change the NNZ value to the right one using @see capacity() function. When adding a value we
* increase the NNZ.
*
* @param i The row number
* @param j The column number
* @return The new value of the item .
*/
DataType set(DataType v, IndexType i, IndexType j) {
IndexType idx; bool found;
std::tie(idx, found) = find_idx(rows, col_ptr[j], col_ptr[j+1], i);
if (found)
return values[idx] = v; // we don't change NNZ even if we write "0"
else {
values.insert(values.begin()+idx, v);
rows.insert(rows.begin()+idx, i);
std::transform(col_ptr.begin()+j+1, col_ptr.end(), col_ptr.begin()+j+1, [](IndexType it) {
return ++it;
});
++NNZ; // we increase the NNZ even if we write "0"
return v;
}
}

/*!
* Get a view of a CSC column
* @param j The column to get
* @return The MatCol object @see MatCol
*/
MatCol<Matrix> getCol(IndexType j) noexcept {
return MatCol<Matrix>(this, col_ptr[j], col_ptr[j+1]);
}

/*!
* Get a view of a CSC row
*
* In case of a SYMMETRIC matrix we can return a column instead.
*
* @param j The row to get
* @return On symmetric matrix MatCol otherwise a MatRow
*/

MatCol<Matrix> getRow(IndexType i) noexcept {
if constexpr (Symmetric)
return getCol(i);
else
return MatRow<Matrix>(this, i);
}

// values only iterator support
DataType* begin() noexcept { return values.begin(); }
DataType* end() noexcept { return values.end(); }
//! @}

//! A small iteration helper
template<typename F, typename... Args>
void for_each_in (IndexType begin, IndexType end, F&& lambda, Args&&... args) {
for (IndexType it=begin ; it<end ; ++it) {
std::forward<F>(lambda)(std::forward<Args>(args)..., it);
}
}

private:
/*!
* A small binary search implementation using index for begin-end instead of iterators.
*
* \param v Reference to vector to search
* \param begin The vector's index to begin
* \param end The vector's index to end
* \param match What to search
* \return An <index, status> pair.
* index is the index of the item or end if not found
* status is true if found, false otherwise
*/
std::pair<IndexType, bool> find_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) {
if (v.capacity() != 0 && begin < end) {
IndexType b = begin, e = end-1;
while (b <= e) {
IndexType m = (b+e)/2;
if (v[m] == match) return std::make_pair(m, true);
else if (b >= e) return std::make_pair(end, false);
else {
if (v[m] < match) b = m +1;
else e = m -1;
}
}
}
return std::make_pair(end, false);
}

// move helper
void moves(Matrix&& src) noexcept {
values = std::move(src.values);
rows = std::move(src.rows);
col_ptr = std::move(src.col_ptr);
N = std::move(src.N); // redundant for primitives
NNZ = std::move(src.NNZ); //
}
//! \name Data
//! @{
std::vector<DataType> values {}; //!< vector to store the values of the matrix
std::vector<IndexType> rows{}; //!< vector to store the row information
std::vector<IndexType> col_ptr{1,0}; //!< vector to store the column pointers
IndexType N{0}; //!< The dimension of the matrix (square)
IndexType NNZ{0}; //!< The NNZ (capacity of the matrix)
//! @}
};


/*!
* A view/iterator hybrid object for Matrix columns.
*
* This object provides access to a column of a Matrix. The public functionalities
* allow data access using indexes instead of iterators. We prefer indexes over iterators
* because we can apply the same index to different inner vector of Matrix without conversion.
*
* @tparam DataType
* @tparam IndexType
*/
template<typename MatrixType>
struct MatCol {
using owner_t = MatrixType;

using DataType = typename MatrixType::dataType;
using IndexType = typename MatrixType::indexType;

/*!
* ctor using column pointers for begin-end. own is pointer to Matrix.
*/
MatCol(owner_t* own, const IndexType begin, const IndexType end) noexcept :
owner_(own), index_(begin), begin_(begin), end_(end) {
vindex_ = vIndexCalc(index_);
}
MatCol() = default;
MatCol(const MatCol&) = delete; //!< make sure there are no copies
MatCol& operator=(const MatCol&)= delete; //!< make sure there are no copies
MatCol(MatCol&&) = default;
MatCol& operator=(MatCol&&) = default;

//! a simple dereference operator, like an iterator
DataType operator* () {
return get();
}
//! Increment operator acts on index(), like an iterator
MatCol& operator++ () { advance(); return *this; }
MatCol& operator++ (int) { MatCol& p = *this; advance(); return p; }

//! () operator acts as member access (like a view)
DataType operator()(IndexType x) {
return (x == index())? get() : DataType{};
}
//! = operator acts as member assignment (like a view)
DataType operator= (DataType v) { return owner_->values[index_] = v; }
// iterator like handlers
// these return a virtual index value based on the items position on the full matrix
// but the move of the index is just a ++ away.
IndexType index() noexcept { return vindex_; }
const IndexType index() const noexcept { return vindex_; }
IndexType begin() noexcept { return vIndexCalc(begin_); }
const IndexType begin() const noexcept { return vIndexCalc(begin_); }
IndexType end() noexcept { return owner_->N; }
const IndexType end() const noexcept { return owner_->N; }

/*!
* Multiplication operator
*
* We follow only the non-zero values and multiply only the common indexes.
*
* @tparam C Universal reference for the type right half site column
*
* @param c The right hand site matrix
* @return The value of the inner product of two vectors
* @note The time complexity is \$ O(nnz1+nnz2) \$.
* Where the nnz is the max NNZ elements of the column of the matrix
*/
template <typename C>
DataType operator* (C&& c) {
static_assert(std::is_same<remove_cvref_t<C>, MatCol<MatrixType>>(), "");
DataType v{};
while (index() != end() && c.index() != c.end()) {
if (index() < c.index()) advance(); // advance me
else if (index() > c.index()) ++c; // advance other
else { //index() == c.index()
v += get() * *c; // multiply and advance both
++c;
advance();
}
}
return v;
}

private:
//! small tool to increase the index pointers to Matrix
void advance() noexcept {
++index_;
vindex_ = vIndexCalc(index_);
}
//! tool to translate between col_ptr indexes and Matrix "virtual" full matrix indexes
IndexType vIndexCalc(IndexType idx) {
return (idx < end_) ? owner_->rows[idx] : end();
}
//! small get tool
DataType get() { return owner_->values[index_]; }

owner_t* owner_ {nullptr}; //!< Pointer to owner Matrix. MatCol is just a view
IndexType vindex_ {IndexType{}}; //!< Virtual index of full matrix
IndexType index_ {IndexType{}}; //!< index to Matrix::rows
IndexType begin_ {IndexType{}}; //!< beginning index of the column in Matrix::rows
IndexType end_ {IndexType{}}; //!< ending index of the column in Matrix::rows
};

/*!
* A view/iterator hybrid object for Matrix rows.
*
* This object provides access to a column of a Matrix. The public functionalities
* allow data access using indexes instead of iterators. We prefer indexes over iterators
* because we can apply the same index to different inner vector of Matrix without conversion.
*
* @tparam DataType
* @tparam IndexType
*/
template<typename MatrixType>
struct MatRow {
using owner_t = MatrixType;

using DataType = typename MatrixType::dataType;
using IndexType = typename MatrixType::indexType;

/*!
* ctor using virtual full matrix row index. own is pointer to Matrix.
*/
MatRow(owner_t* own, const IndexType row) noexcept :
owner_(own), vindex_(IndexType{}), row_(row), index_(IndexType{}),
begin_(IndexType{}), end_(owner_->NNZ) {
// place begin
while(begin_ != end_ && owner_->rows[begin_] != row_)
++begin_;
// place index_ and vindex_
if (owner_->rows[index_] != row_)
advance();
}
MatRow() = default;
MatRow(const MatRow&) = delete; //!< make sure there are no copies
MatRow& operator=(const MatRow&)= delete; //!< make sure there are no copies
MatRow(MatRow&&) = default;
MatRow& operator=(MatRow&&) = default;

//! a simple dereference operator, like an iterator
DataType operator* () {
return get();
}
//! Increment operator acts on index(), like an iterator
//! here the increment is a O(N) process.
MatRow& operator++ () { advance(); return *this; }
MatRow& operator++ (int) { MatRow& p = *this; advance(); return p; }

//! () operator acts as member access (like a view)
DataType operator()(IndexType x) {
return (x == index())? get() : DataType{};
}
//! = operator acts as member assignment (like a view)
DataType operator= (DataType v) { return owner_->values[index_] = v; }
// iterator like handlers
// these return a virtual index value based on the items position on the full matrix
// but the move of the index is just a ++ away.
IndexType index() noexcept { return vindex_; }
const IndexType index() const noexcept { return vindex_; }
IndexType begin() noexcept { return vIndexCalc(begin_); }
const IndexType begin() const noexcept { return vIndexCalc(begin_); }
IndexType end() noexcept { return owner_->N; }
const IndexType end() const noexcept { return owner_->N; }

/*!
* Multiplication operator
*
* We follow only the non-zero values and multiply only the common indexes.
*
* @tparam C Universal reference for the type right half site column
*
* @param c The right hand site matrix
* @return The value of the inner product of two vectors
* @note The time complexity is \$ O(N+nnz2) \$ and way heavier the ColxCol multiplication.
* Where the nnz is the max NNZ elements of the column of the matrix
*/
template <typename C>
DataType operator* (C&& c) {
static_assert(std::is_same<remove_cvref_t<C>, MatCol<MatrixType>>(), "");
DataType v{};
while (index() != end() && c.index() != c.end()) {
if (index() < c.index()) advance(); // advance me
else if (index() > c.index()) ++c; // advance other
else { //index() == c.index()
v += get() * *c; // multiply and advance both
++c;
advance();
}
}
return v;
}
private:
//! small tool to increase the index pointers to Matrix matrix
//! We have to search the entire rows vector in Matrix to find the next
//! virtual row position.
//! time complexity O(N)
void advance() noexcept {
do
++index_;
while(index_ != end_ && owner_->rows[index_] != row_);
vindex_ = vIndexCalc(index_);
}
//! tool to translate between col_ptr indexes and Matrix "virtual" full matrix indexes
IndexType vIndexCalc(IndexType idx) {
for(IndexType i =0 ; i<(owner_->N+1) ; ++i)
if (idx < owner_->col_ptr[i])
return i-1;
return end();
}
//! small get tool
DataType get() { return owner_->values[index_]; }

owner_t* owner_ {nullptr}; //!< Pointer to owner Matrix. MatCol is just a view
IndexType vindex_ {IndexType{}}; //!< Virtual index of full matrix
IndexType row_ {IndexType{}}; //!< The virtual full matrix row of the object
IndexType index_ {IndexType{}}; //!< index to Matrix::rows
IndexType begin_ {IndexType{}}; //!< beginning index of the column in Matrix::rows
IndexType end_ {IndexType{}}; //!< ending index of the column in Matrix::rows
};

/*!
* A proxy Matrix value object/view.
*
* This object acts as proxy to provide read/write access to an Matrix item.
*
* @tparam DataType The type of the values of the Matrix matrix
* @tparam IndexType The type of the indexes of the Matrix matrix
*/
template<typename MatrixType>
struct MatVal {
using owner_t = MatrixType;

using DataType = typename MatrixType::dataType;
using IndexType = typename MatrixType::indexType;

//!< ctor using all value-row-column data, plus a pointer to owner Matrix object
MatVal(owner_t* own, DataType v, IndexType i, IndexType j) :
owner_(own), v_(v), i_(i), j_(j) { }
MatVal() = default;
MatVal(const MatVal&) = delete; //!< make sure there are no copies
MatVal& operator=(const MatVal&) = delete; //!< make sure there are no copies
MatVal(MatVal&&) = default;
MatVal& operator=(MatVal&&) = default;

//! Operator to return the DataType value implicitly
operator DataType() { return v_; }
//! Operator to write back to owner the assigned value
//! for ex: A(2,3) = 5;
MatVal& operator=(DataType v) {
v_ = v;
owner_->set(v_, i_, j_);
return *this;
}
private:
owner_t* owner_{nullptr}; //!< Pointer to owner Matrix. MatVal is just a view.
DataType v_{DataType{}}; //!< The value of the row-column pair (for speed)
IndexType i_{IndexType{}}; //!< The row
IndexType j_{IndexType{}}; //!< the column
};


} // namespace mtx


#endif /* MATRIX_HPP_ */

+ 16
- 2
homework_1/Makefile Datei anzeigen

@@ -26,7 +26,11 @@ TARGET := knnsearch
SRC_DIR_LIST := src
# Include directories list(space seperated). Makefile-relative path.
INC_DIR_LIST := inc \
src
src \
Libs/matrix/include/ \
/usr/include/hdf5/serial/
# Libs/MATLAB/R2019b/include/ \

# Exclude files list(space seperated). Filenames only.
# EXC_FILE_LIST := bad.cpp old.cpp

@@ -49,7 +53,11 @@ PRE_DEFS :=

# ============== Linker settings ==============
# Linker flags (example: -pthread -lm)
LDFLAGS := -pthread -lopenblas
LDFLAGS := -pthread -lopenblas \
-L/usr/lib/x86_64-linux-gnu/hdf5/serial -lhdf5
# -LLibs/MATLAB/R2019b/bin/ -lmat -lmx -Wl,-rpath,Libs/MATLAB/R2019b/bin/ \
# -Wl,-rpath,Libs/unwind/bin/

# Map output file
MAP_FILE := output.map
MAP_FLAG := -Xlinker -Map=$(BUILD_DIR)/$(MAP_FILE)
@@ -184,6 +192,12 @@ local_v0: TARGET := local_v0
local_v0: $(BUILD_DIR)/$(TARGET)
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)

local_v0_opt: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=0
local_v0_opt: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=0
local_v0_opt: TARGET := local_v0_opt
local_v0_opt: $(BUILD_DIR)/$(TARGET)
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)
local_v1: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=4
local_v1: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=4
local_v1: TARGET := local_v1


+ 75
- 0
homework_1/inc/config.h Datei anzeigen

@@ -0,0 +1,75 @@
/*!
* \file config,h
* \brief Build configuration file.
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/

#ifndef CONFIG_H_
#define CONFIG_H_

#include <iostream>
#include <string>

#include <matrix.hpp>

// HDF5 supported types
enum class HDF5_type {
SCHAR, CHAR, SHORT, USHORT, INT, UINT, LONG, ULONG, LLONG, ULLONG, FLOAT, DOUBLE
};

/*
* Defines for different version of the exercise
*/
#define V0 0
#define V1 1


// Fail-safe version selection
#if !defined CODE_VERSION
#define CODE_VERSION V1
#endif

// matrix alias template dispatcher based on pre-define flag from compiler (see Makefile)
#if CODE_VERSION == V0
#define NAMESPACE_VERSION using namespace v0
using MatrixDst = mtx::Matrix<double>;
using MatrixIdx = mtx::Matrix<uint32_t>;
static constexpr HDF5_type DstHDF5Type = HDF5_type::DOUBLE;
static constexpr HDF5_type IdxHDF5Type = HDF5_type::INT;
#elif CODE_VERSION == V1
#define NAMESPACE_VERSION using namespace v1
using MatrixDst = mtx::Matrix<double>;
using MatrixIdx = mtx::Matrix<uint32_t>;
static constexpr HDF5_type DstHDF5Type = HDF5_type::DOUBLE;
static constexpr HDF5_type IdxHDF5Type = HDF5_type::INT;
#endif



//! enumerator for output handling
enum class StdOutputMode{ STD, FILE };

/*!
* Session option for each invocation of the executable
*/
struct session_t {
std::string corpusMtxFile {}; //!< corpus matrix file name in HDF5 format
std::string corpusDataSet {}; //!< corpus dataset name in HDF5 matrix file
std::string queryMtxFile {}; //!< optional query matrix file name in HDF5 format
std::string queryDataSet {}; //!< optional query dataset name in HDF5 matrix file
bool queryMtx {false}; //!< Flag to indicate that there is a separate query matrix
size_t k {1}; //!< The number of nearest neighbors to find
std::string outMtxFile {"out.hdf5"}; //!< output matrix file name in HDF5 format
std::string outMtxIdxDataSet {"/Idx"}; //!< Index output dataset name in HDF5 matrix file
std::string outMtxDstDataSet {"/Dst"}; //!< Distance output dataset name in HDF5 matrix file
std::size_t max_threads {}; //!< Maximum threads to use
bool timing {false}; //!< Enable timing prints of the program
bool verbose {false}; //!< Flag to enable verbose output to stdout
};

extern session_t session;

#endif /* CONFIG_H_ */

+ 224
- 0
homework_1/inc/utils.hpp Datei anzeigen

@@ -0,0 +1,224 @@
/**
* \file utils.hpp
* \brief Utilities header
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/
#ifndef UTILS_HPP_
#define UTILS_HPP_

#include <iostream>
#include <chrono>
#include <unistd.h>
#include <hdf5.h>

#include <matrix.hpp>
#include <config.h>

/*!
* A Logger for entire program.
*/
struct Log {
struct Endl {} endl; //!< a tag object to to use it as a new line request.

//! We provide logging via << operator
template<typename T>
Log& operator<< (T&& t) {
if (session.verbose) {
if (line_) {
std::cout << "[Log]: " << t;
line_ = false;
}
else
std::cout << t;
}
return *this;
}
// overload for special end line handling
Log& operator<< (Endl e) { (void)e;
if (session.verbose) {
std::cout << '\n';
line_ = true;
}
return *this;
}
private:
bool line_ {true};
};

extern Log logger;

/*!
* A small timing utility based on chrono.
*/
struct Timing{
using Tpoint = std::chrono::steady_clock::time_point;
using microseconds = std::chrono::microseconds;
using milliseconds = std::chrono::milliseconds;
using seconds = std::chrono::seconds;

//! tool to mark the starting point
Tpoint start () noexcept { return start_ = std::chrono::steady_clock::now(); }
//! tool to mark the ending point
Tpoint stop () noexcept { return stop_ = std::chrono::steady_clock::now(); }

auto dt () noexcept {
return std::chrono::duration_cast<std::chrono::microseconds>(stop_ - start_).count();
}
//! tool to print the time interval
void print_dt (const char* what) noexcept {
if (session.timing) {
auto t = stop_ - start_;
if (std::chrono::duration_cast<microseconds>(t).count() < 10000)
std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<microseconds>(t).count()) << " [usec]\n";
else if (std::chrono::duration_cast<milliseconds>(t).count() < 10000)
std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<milliseconds>(t).count()) << " [msec]\n";
else
std::cout << "[Timing]: " << what << ": " << std::to_string(std::chrono::duration_cast<seconds>(t).count()) << " [sec]\n";
}
}
private:
Tpoint start_;
Tpoint stop_;
};



struct Mtx {

template<typename MatrixType, HDF5_type Type>
static void load(const std::string& filename, const std::string& dataset, MatrixType& matrix) {

hid_t file_id{}, dataset_id{}, dataspace_id{};
herr_t read_st;
do {
// Open file
logger << "Load HDF5 file: " << filename << " Dataset: " << dataset << "...";
if ((file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT)) < 0)
break;

// Open dataset
if ((dataset_id = H5Dopen2(file_id, dataset.c_str(), H5P_DEFAULT)) < 0)
break;

// Get dataspace and allocate memory for read buffer
if ((dataspace_id = H5Dget_space(dataset_id)) < 0)
break;
hsize_t dims[2];
H5Sget_simple_extent_dims(dataspace_id, dims, NULL);
matrix.resize(dims[0], dims[1]);

// Read the dataset
// ToDo: Come up with a better way to do this
if constexpr (Type == HDF5_type::DOUBLE) {
if ((read_st = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
break;
}
else if (Type == HDF5_type::FLOAT) {
if ((read_st = H5Dread(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
break;
}
else if (Type == HDF5_type::UINT) {
if ((read_st = H5Dread(dataset_id, H5T_NATIVE_UINT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
break;
}
else if (Type == HDF5_type::INT) {
if ((read_st = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) < 0)
break;
}
// Done
H5Dclose(dataset_id);
H5Sclose(dataspace_id);
H5Fclose(file_id);
logger << " Done" << logger.endl;
return;
} while (0);

// Error: close everything (if possible) and return false
H5Dclose(dataset_id);
H5Sclose(dataspace_id);
H5Fclose(file_id);
throw std::runtime_error("Cannot store to " + filename + " dataset:" + dataset + '\n');
}


template<typename MatrixType, HDF5_type Type>
static void store(const std::string& filename, const std::string& dataset, MatrixType& matrix) {

hid_t file_id{}, dataset_id{}, dataspace_id{};
herr_t write_st;
do {
// Try to open the file in read-write mode
logger << "Store HDF5 file: " << filename << " Dataset: " << dataset << "...";
if (access(session.outMtxFile.c_str(), F_OK) == 0){
if ((file_id = H5Fopen(filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT)) < 0)
break;
}
else {
if ((file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0)
break;
}

// Create the dataspace for the dataset
hsize_t dims[] = { matrix.rows(), matrix.columns() };
if ((dataspace_id = H5Screate_simple(2, dims, NULL)) < 0)
break;

// ToDo: Come up with a better way to do this
if constexpr (Type == HDF5_type::DOUBLE) {
// Create the dataset with default properties
if ((dataset_id = H5Dcreate2(
file_id, dataset.c_str(), H5T_NATIVE_DOUBLE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
break;
// Write the data to the dataset
if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
break;
}
else if (Type == HDF5_type::FLOAT) {
// Create the dataset with default properties
if ((dataset_id = H5Dcreate2(
file_id, dataset.c_str(), H5T_NATIVE_FLOAT, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
break;
// Write the data to the dataset
if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
break;
}
else if (Type == HDF5_type::UINT) {
// Create the dataset with default properties
if ((dataset_id = H5Dcreate2(
file_id, dataset.c_str(), H5T_NATIVE_UINT, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
break;
// Write the data to the dataset
if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_UINT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
break;
}
else if (Type == HDF5_type::INT) {
// Create the dataset with default properties
if ((dataset_id = H5Dcreate2(
file_id, dataset.c_str(), H5T_NATIVE_INT, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
break;
// Write the data to the dataset
if ((write_st = H5Dwrite(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix.data())) <0 )
break;
}
// Close the dataset, dataspace, and file
H5Dclose(dataset_id);
H5Sclose(dataspace_id);
H5Fclose(file_id);
logger << " Done" << logger.endl;
return;
} while (0);

// Error: close everything (if possible) and return false
H5Dclose(dataset_id);
H5Sclose(dataspace_id);
H5Fclose(file_id);
throw std::runtime_error("Cannot store " + filename + " with dataset:" + dataset +'\n');
}
};



#endif /* UTILS_HPP_ */

+ 119
- 0
homework_1/inc/v0.hpp Datei anzeigen

@@ -0,0 +1,119 @@
/**
* \file v0.hpp
* \brief
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/
#ifndef V0_HPP_
#define V0_HPP_

#include <cblas.h>
#include <cmath>
#include <vector>
#include <algorithm>

#include <matrix.hpp>
#include <config.h>

namespace v0 {

/*!
* Function to compute squared Euclidean distances
*
* \fn void pdist2(const double*, const double*, double*, int, int, int)
* \param X m x d matrix (Column major)
* \param Y n x d matrix (Column major)
* \param D2 m x n matrix to store distances (Column major)
* \param m number of rows in X
* \param n number of rows in Y
* \param d number of columns in both X and Y
*/
template<typename DataType>
void pdist2(const mtx::Matrix<DataType>& X, const mtx::Matrix<DataType>& Y, mtx::Matrix<DataType>& D2) {
int M = X.rows();
int N = Y.rows();
int d = X.columns();

// Compute the squared norms of each row in X and Y
std::vector<DataType> X_norms(M), Y_norms(N);
for (int i = 0; i < M ; ++i) {
X_norms[i] = cblas_ddot(d, X.data() + i * d, 1, X.data() + i * d, 1);
}
for (int j = 0; j < N ; ++j) {
Y_norms[j] = cblas_ddot(d, Y.data() + j * d, 1, Y.data() + j * d, 1);
}

// Compute -2 * X * Y'
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, M, N, d, -2.0, X.data(), d, Y.data(), d, 0.0, D2.data(), N);

// Step 3: Add the squared norms to each entry in D2
for (int i = 0; i < M ; ++i) {
for (int j = 0; j < N; ++j) {
D2.set(D2.get(i, j) + X_norms[i] + Y_norms[j], i, j);
//D2.set(std::max(D2.get(i, j), 0.0), i, j); // Ensure non-negative
D2.set(std::sqrt(D2.get(i, j)), i, j); // Take the square root of each
}
}
}

template<typename DataType, typename IndexType>
void quickselect(std::vector<std::pair<DataType, IndexType>>& vec, int k) {
std::nth_element(
vec.begin(),
vec.begin() + k,
vec.end(),
[](const std::pair<DataType, IndexType>& a, const std::pair<DataType, IndexType>& b) {
return a.first < b.first;
});
vec.resize(k); // Keep only the k smallest elements
}

/*!
* \param C Is a MxD matrix (Corpus)
* \param Q Is a NxD matrix (Query)
* \param k The number of nearest neighbors needed
* \param idx Is the Nxk matrix with the k indexes of the C points, that are
* neighbors of the nth point of Q
* \param dst Is the Nxk matrix with the k distances to the C points of the nth
* point of Q
*/
template<typename DataType, typename IndexType>
void knnsearch(const mtx::Matrix<DataType>& C, const mtx::Matrix<DataType>& Q, int k,
mtx::Matrix<IndexType>& idx,
mtx::Matrix<DataType>& dst) {

int M = C.rows();
int N = Q.rows();

mtx::Matrix<DataType> D(M, N);

pdist2(C, Q, D);

idx.resize(N, k);
dst.resize(N, k);

for (int j = 0; j < N; ++j) {
// Create a vector of pairs (distance, index) for the j-th query
std::vector<std::pair<DataType, IndexType>> dst_idx(M);
for (int i = 0; i < M; ++i) {
dst_idx[i] = {D.data()[i * N + j], i};
}
// Find the k smallest distances using quickSelectKSmallest
quickselect(dst_idx, k);

// Sort the k smallest results by distance for consistency
std::sort(dst_idx.begin(), dst_idx.end());

// Store the indices and distances
for (int i = 0; i < k; ++i) {
idx(j, i) = dst_idx[i].second;
dst(j, i) = dst_idx[i].first;
}
}
}

}

#endif /* V0_HPP_ */

+ 16
- 0
homework_1/inc/v1.hpp Datei anzeigen

@@ -0,0 +1,16 @@
/**
* \file v0.hpp
* \brief
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/
#ifndef V1_HPP_
#define V1_HPP_





#endif /* V1_HPP_ */

+ 2
- 0
homework_1/matlab/.gitignore Datei anzeigen

@@ -0,0 +1,2 @@
# matlab
*.m~

+ 5
- 3
homework_1/matlab/dist2.m Datei anzeigen

@@ -1,10 +1,12 @@
function [D2] = dist2(X, Y)
% Calculates the squares of the distances of X and Y
%
% X: A mxd array with m d-dimentional points
% Y: A nxd array with n d-dimentional points
% X: A Mxd array with m d-dimentional points
% Y: A Nxd array with n d-dimentional points
% d: Must be the same

%
% D2: The MxN matrix with the distances
%
[~, d1] = size(X);
[~, d2] = size(Y);
if d1 ~= d2


+ 8
- 4
homework_1/matlab/knnsearch2.m Datei anzeigen

@@ -1,8 +1,12 @@
function [idx, dst] = knnsearch2(C, Q, k)
% C: Is a mxd matrix (Corpus)
% Q: Is a nxd matrix (Query)
% k: The number of nearest neighbors needded

% C: Is a MxD matrix (Corpus)
% Q: Is a NxD matrix (Query)
% k: The number of nearest neighbors needded
% idx: Is the Nxk matrix with the k indexes of the C points, that are
% neighbors of the nth point of Q
% dst: Is the Nxk matrix with the k distances to the C points of the nth
% point of Q
%
% Calculate the distance matrix between C and Q
% D is an m x n matrix where each element D(i, j) is the distance
% between the i-th point in C and the j-th point in Q.


+ 148
- 94
homework_1/src/main.cpp Datei anzeigen

@@ -1,107 +1,161 @@
#include <iostream>
#include <cblas.h>
#include <cmath>
#include <vector>
#include <algorithm>
#include <queue>

/*!
* Function to compute squared Euclidean distances
* \file main.cpp
* \brief Main application file
*
* \fn void pdist2(const double*, const double*, double*, int, int, int)
* \param X m x d matrix
* \param Y n x d matrix
* \param D2 m x n matrix to store distances
* \param m number of rows in X
* \param n number of rows in Y
* \param d number of columns in both X and Y
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/
void pdist2(const double* X, const double* Y, double* D2, int m, int n, int d){
// Compute the squared norms of each row in X and Y
std::vector<double> X_norms(m), Y_norms(n);
for (int i = 0; i < m; ++i) {
X_norms[i] = cblas_ddot(d, X + i * d, 1, X + i * d, 1);
}
for (int j = 0; j < n; ++j) {
Y_norms[j] = cblas_ddot(d, Y + j * d, 1, Y + j * d, 1);
}

// Compute -2 * X * Y'
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, n, d, -2.0, X, d, Y, d, 0.0, D2, n);
#include <iostream>
#include <string>
#include <exception>
#include <unistd.h>
#include <cstdio>

#include <v0.hpp>
#include <v1.hpp>
#include <matrix.hpp>
#include <utils.hpp>
#include <config.h>

// Global session data
session_t session;
Log logger;
Timing timer;

// Step 3: Add the squared norms to each entry in D2
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
D2[i * n + j] += X_norms[i] + Y_norms[j];
D2[i * n + j] = std::max(D2[i * n + j], 0.0); // Ensure non-negative
D2[i * n + j] = std::sqrt(D2[i * n + j]); // Take the square root of each
/*!
* A small command line argument parser
* \return The status of the operation
*/
bool get_options(int argc, char* argv[]){
bool status =true;

// iterate over the passed arguments
for (int i=1 ; i<argc ; ++i) {
std::string arg(argv[i]); // get current argument

if (arg == "-c" || arg == "--corpus") {
if (i+2 < argc) {
session.corpusMtxFile = std::string(argv[++i]);
session.corpusDataSet = std::string(argv[++i]);
}
else
status = false;
}
else if (arg == "-o" || arg == "--output") {
if (i+3 < argc) {
session.outMtxFile = std::string(argv[++i]);
session.outMtxIdxDataSet = std::string(argv[++i]);
session.outMtxDstDataSet = std::string(argv[++i]);
}
else
status = false;
}
else if (arg == "-q" || arg == "--query") {
if (i+2 < argc) {
session.queryMtxFile = std::string(argv[++i]);
session.queryDataSet = std::string(argv[++i]);
session.queryMtx = true;
}
else
status = false;
}
else if (arg == "-k") {
session.k = (i+1 < argc) ? std::atoi(argv[++i]) : session.k;
}
else if (arg == "-n" || arg == "--max_trheads") {
session.max_threads = (i+1 < argc) ? std::atoi(argv[++i]) : session.max_threads;
}
else if (arg == "-t" || arg == "--timing")
session.timing = true;
else if (arg == "-v" || arg == "--verbose")
session.verbose = true;
else if (arg == "-h" || arg == "--help") {
std::cout << "annsearch - an aproximation knnsearch utility\n\n";
std::cout << "annsearch -c <file> [-k <N>] [-o <file>] [-q <file>] [-n <threads>] [-t] [-v]\n";
std::cout << '\n';
std::cout << "Options:\n\n";
std::cout << " -c | --corpus <file> <dataset>\n";
std::cout << " Path to hdf5 file to open and name of the dataset to load\n\n";
std::cout << " -o | --output <file> <idx-dataset> <dst-dataset> \n";
std::cout << " Path to <file> to store the data and the names of the datasets.\n\n";
std::cout << " -q | --query <file> <dataset>\n";
std::cout << " Path to hdf5 file to open and name of the dataset to load\n";
std::cout << " If not defined, the corpus is used\n\n";
std::cout << " -k <number>\n";
std::cout << " Set the number of closest neighbors to find. \n\n";
std::cout << " -n | --max_trheads <threads>\n";
std::cout << " Reduce the thread number for the execution to <threads>. <threads> must be less or equal to available CPUs.\n\n";
std::cout << " -t | --timing\n";
std::cout << " Request timing measurements output to stdout.\n\n";
std::cout << " -v | --verbose\n";
std::cout << " Request a more verbose output to stdout.\n\n";
std::cout << " -h | --help <size>\n";
std::cout << " Prints this and exit.\n\n";
std::cout << "Examples:\n\n";
std::cout << " ...Example case...:\n";
std::cout << " > ./annsearch -i <MFILE> ... \n\n";

exit(0);
}
else { // parse error
std::cout << "Invocation error. Try -h for details.\n";
status = false;
}
}
}

void quickselect(std::vector<std::pair<double, int>>& vec, int k) {
std::nth_element(
vec.begin(),
vec.begin() + k,
vec.end(),
[](const std::pair<double, int>& a, const std::pair<double, int>& b) {
return a.first < b.first;
});
vec.resize(k); // Keep only the k smallest elements
}

// K-nearest neighbor search function
void knnsearch(const double* C, const double* Q, int m, int n, int d, int k,
std::vector<std::vector<int>>& idx, std::vector<std::vector<double>>& dst) {
std::vector<double> D(m * n);
pdist2(C, Q, D.data(), m, n, d);

idx.resize(n, std::vector<int>(k));
dst.resize(n, std::vector<double>(k));

for (int j = 0; j < n; ++j) {
// Create a vector of pairs (distance, index) for the j-th query
std::vector<std::pair<double, int>> dst_idx(m);
for (int i = 0; i < m; ++i) {
dst_idx[i] = {D[i * n + j], i};
}

// Find the k smallest distances using quickSelectKSmallest
quickselect(dst_idx, k);

// Sort the k smallest results by distance for consistency
std::sort(dst_idx.begin(), dst_idx.end());

// Store the indices and distances
for (int i = 0; i < k; ++i) {
idx[j][i] = dst_idx[i].second;
dst[j][i] = dst_idx[i].first;
}
}
return status;
}

int main(){

int m = 5; // Number of points in C (corpus)
int n = 3; // Number of points in Q (query)
int d = 2; // Dimensions
int k = 2; // Number of nearest neighbors to find

double C[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; // m x d matrix
double Q[] = {1.5, 2.5, 3.5, 4.5, 5.5, 6.5}; // n x d matrix

std::vector<std::vector<int>> idx;
std::vector<std::vector<double>> dst;

knnsearch(C, Q, m, n, d, k, idx, dst);

// Print results
for (int i = 0; i < n; ++i) {
std::cout << "Query point " << i << ":\n";
for (int j = 0; j < k; ++j) {
std::cout << " Neighbor " << j <<": Index = " << idx[i][j] <<", Distance = " << dst[i][j] << '\n';
}
}
NAMESPACE_VERSION;

int main(int argc, char* argv[]) try {
// Instantiate matrixes
MatrixDst Corpus;
MatrixDst Query;
MatrixIdx Idx;
MatrixDst Dst;

// try to read command line
if (!get_options(argc, argv))
exit(1);

if (access(session.outMtxFile.c_str(), F_OK) == 0)
std::remove(session.outMtxFile.c_str());

// Load data
timer.start();
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus);
if (session.queryMtx)
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query);
timer.stop();
timer.print_dt("Load hdf5 files");

logger << "Start knnsearch ...";
timer.start();
if (session.queryMtx)
knnsearch(Corpus, Query, session.k, Idx, Dst);
else
knnsearch(Corpus, Corpus, session.k, Idx, Dst);
timer.stop();
logger << " Done" << logger.endl;
timer.print_dt("knnsearch");


// Store data
timer.start();
Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx);
Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst);
timer.stop();
timer.print_dt("Store hdf5 files");

return 0;
}
catch (std::exception& e) {
//we probably pollute the user's screen. Comment `cerr << ...` if you don't like it.
std::cerr << "Error: " << e.what() << '\n';
exit(1);
}



Laden…
Abbrechen
Speichern