Преглед на файлове

WIP: v1 recursive

Christos Choutouridis преди 3 месеца
променени са 16 файла, в които са добавени 29579 реда и са изтрити 106 реда
  1. +12
  2. +11673
  3. +17103
  4. +6
  5. +189
  6. +2
  7. +21
  8. +92
  9. +11
  10. +19
  11. +46
  12. +32
  13. +23
  14. +0
  15. +33
  16. +317

+ 12
- 7
homework_1/Makefile Целия файл

@@ -23,11 +23,11 @@ PROJECT := PDS_homework_1
# Excecutable's name
TARGET := knnsearch
# Source directories list(space seperated). Makefile-relative path, UNDER current directory.
SRC_DIR_LIST := src gtest
# Include directories list(space seperated). Makefile-relative path.
INC_DIR_LIST := inc \
src \
Libs/matrix/include/ \
gtest \
# Libs/MATLAB/R2019b/include/ \

@@ -56,7 +56,6 @@ PRE_DEFS :=
LDFLAGS := -pthread -lopenblas \
-L/usr/lib/x86_64-linux-gnu/hdf5/serial -lhdf5
# -LLibs/MATLAB/R2019b/bin/ -lmat -lmx -Wl,-rpath,Libs/MATLAB/R2019b/bin/ \
# -Wl,-rpath,Libs/unwind/bin/

# Map output file
MAP_FILE := output.map
@@ -198,8 +197,8 @@ local_v0_opt: TARGET := local_v0_opt
local_v0_opt: $(BUILD_DIR)/$(TARGET)
local_v1: TARGET := local_v1
local_v1: $(BUILD_DIR)/$(TARGET)
@@ -235,7 +234,13 @@ v1: TARGET := knnsearch_v1

tests: TARGET := tests
tests: $(BUILD_DIR)/$(TARGET)
# ========= Inside CSAL Image build rules ===========
@@ -280,6 +285,6 @@ csal_v1: $(BUILD_DIR)/$(TARGET)
# make IMAGE=hpcimage EXEC=knnsearch_v1 run
# make IMAGE=hpcimage EXEC=knnsearch_v1 run
$(DOCKER) ./out/$(EXEC)

+ 11673
- 0
Файловите разлики са ограничени, защото са твърде много
Целия файл

+ 17103
- 0
Файловите разлики са ограничени, защото са твърде много
Целия файл

+ 6
- 16
homework_1/inc/config.h Целия файл

@@ -13,7 +13,7 @@
#include <iostream>
#include <string>

#include <matrix.hpp>
#include "matrix.hpp"

// HDF5 supported types
enum class HDF5_type {
@@ -32,21 +32,11 @@ enum class HDF5_type {

// matrix alias template dispatcher based on pre-define flag from compiler (see Makefile)
#define NAMESPACE_VERSION using namespace v0
using MatrixDst = mtx::Matrix<double>;
using MatrixIdx = mtx::Matrix<uint32_t>;
static constexpr HDF5_type DstHDF5Type = HDF5_type::DOUBLE;
static constexpr HDF5_type IdxHDF5Type = HDF5_type::INT;
#elif CODE_VERSION == V1
#define NAMESPACE_VERSION using namespace v1
using MatrixDst = mtx::Matrix<double>;
using MatrixIdx = mtx::Matrix<uint32_t>;
static constexpr HDF5_type DstHDF5Type = HDF5_type::DOUBLE;
static constexpr HDF5_type IdxHDF5Type = HDF5_type::INT;

// matrix alias template dispatcher
using MatrixDst = mtx::Matrix<double>;
using MatrixIdx = mtx::Matrix<uint32_t>;
static constexpr HDF5_type DstHDF5Type = HDF5_type::DOUBLE;
static constexpr HDF5_type IdxHDF5Type = HDF5_type::INT;

//! enumerator for output handling

homework_1/Libs/matrix/include/matrix.hpp → homework_1/inc/matrix.hpp Целия файл

@@ -83,25 +83,49 @@ struct Matrix {
static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order
static constexpr MatrixType matrixType = Type; //!< meta:export of array type
static constexpr bool symmetric = Symmetric; //!< meta:export symmetric flag

* \name Obj lifetime
//! @{

//! Construct an empty matrix with dimensions rows x columns
Matrix(IndexType rows = IndexType{}, IndexType columns = IndexType{}) noexcept :
m_(capacity(rows, columns)), rows_(rows), cols_(columns) { }
Matrix(IndexType rows = IndexType{}, IndexType columns = IndexType{}) noexcept
: vector_storage_(capacity(rows, columns)),
cols_(columns) {
data_ = vector_storage_.data();

//! Construct a matrix by copying existing data with dimensions rows x columns
Matrix(DataType* data, IndexType rows, IndexType columns) noexcept :
m_(data, data + capacity(rows, columns)), rows_(rows), cols_(columns) { }
Matrix(DataType* data, IndexType major_start, IndexType major_length, IndexType minor_length) noexcept
: vector_storage_(),
raw_storage_ (data + major_start * minor_length),
use_vector_ (false) {
if constexpr (Order == MatrixOrder::ROWMAJOR) {
rows_ = major_length;
cols_ = minor_length;
else {
rows_ = minor_length;
cols_ = major_length;
data_ = raw_storage_;

//! Construct a matrix using an initializer list
Matrix(IndexType rows, IndexType columns, std::initializer_list<DataType> list)
: m_(list), rows_(rows), cols_(columns) {
: vector_storage_(list),
cols_(columns) {
if (list.size() != capacity(rows, columns)) {
throw std::invalid_argument("Matrix initializer list size does not match matrix dimensions.");
throw std::invalid_argument("Matrix initializer list size does not match matrix dimensions.");
data_ = vector_storage_.data();

//! move ctor
@@ -126,9 +150,12 @@ struct Matrix {
//! Set the interface size of the Matrix (what appears to be the size)
IndexType resize(IndexType rows, IndexType columns) {
rows_ = rows;
cols_ = columns;
m_.reserve(capacity(rows_, cols_));
if (use_vector_) {
rows_ = rows;
cols_ = columns;
vector_storage_.resize(capacity(rows_, cols_));
data_ = vector_storage_.data();
return capacity(rows_, cols_);

@@ -148,42 +175,49 @@ struct Matrix {
auto T = [](size_t i)->size_t { return i*(i+1)/2; }; // Triangular number of i
if constexpr (Order == MatrixOrder::COLMAJOR) {
// In column major we use the lower triangle of the matrix
if (i>=j) return m_[j*rows_ - T(j) + i]; // Lower, use our notation
else return m_[i*rows_ - T(i) + j]; // Upper, use opposite index
if (i>=j) return data_[j*rows_ - T(j) + i]; // Lower, use our notation
else return data_[i*rows_ - T(i) + j]; // Upper, use opposite index
else {
// In row major we use the upper triangle of the matrix
if (i<=j) return m_[i*cols_ - T(i) + j]; // Upper, use our notation
else return m_[j*cols_ - T(j) + i]; // Lower, use opposite index
if (i<=j) return data_[i*cols_ - T(i) + j]; // Upper, use our notation
else return data_[j*cols_ - T(j) + i]; // Lower, use opposite index
else {
if constexpr (Order == MatrixOrder::COLMAJOR)
return m_[i + j*rows_];
return data_[i + j*rows_];
return m_[i*cols_ + j];
return data_[i*cols_ + j];

* \fn DataType set(DataType, IndexType, IndexType)
* \param v
* \param i
* \param j
* \return
DataType set (DataType v, IndexType i, IndexType j) {
if constexpr (Symmetric) {
auto T = [](size_t i)->size_t { return i*(i+1)/2; }; // Triangular number of i
if constexpr (Order == MatrixOrder::COLMAJOR) {
// In column major we use the lower triangle of the matrix
if (i>=j) return m_[j*rows_ - T(j) + i] = v; // Lower, use our notation
else return m_[i*rows_ - T(i) + j] = v; // Upper, use opposite index
if (i>=j) return data_[j*rows_ - T(j) + i] = v; // Lower, use our notation
else return data_[i*rows_ - T(i) + j] = v; // Upper, use opposite index
else {
// In row major we use the upper triangle of the matrix
if (i<=j) return m_[i*cols_ - T(i) + j] = v; // Upper, use our notation
else return m_[j*cols_ - T(j) + i] = v; // Lower, use opposite index
if (i<=j) return data_[i*cols_ - T(i) + j] = v; // Upper, use our notation
else return data_[j*cols_ - T(j) + i] = v; // Lower, use opposite index
else {
if constexpr (Order == MatrixOrder::COLMAJOR)
return m_[i + j*rows_] = v;
return data_[i + j*rows_] = v;
return m_[i*cols_ + j] = v;
return data_[i*cols_ + j] = v;
// DataType operator()(IndexType i, IndexType j) { return get(i, j); }
@@ -198,11 +232,11 @@ struct Matrix {

// a basic serial iterator support
DataType* data() noexcept { return m_.data(); }
DataType* data() noexcept { return data_; }
// IndexType begin_idx() noexcept { return 0; }
// IndexType end_idx() noexcept { return capacity(rows_, cols_); }

const DataType* data() const noexcept { return m_.data(); }
const DataType* data() const noexcept { return data_; }
const IndexType begin_idx() const noexcept { return 0; }
const IndexType end_idx() const noexcept { return capacity(rows_, cols_); }
//! @}
@@ -224,19 +258,29 @@ struct Matrix {

void swap(Matrix& src) noexcept {
std::swap(m_, src.m_);
std::swap(vector_storage_, src.vector_storage_);
std::swap(raw_storage_, src.raw_storage_);
std::swap(data_, src.data_);
std::swap(use_vector_, src.use_vector_);
std::swap(rows_, src.rows_);
std::swap(cols_, src.cols_);
//! move helper
void moves(Matrix&& src) noexcept {
m_ = std::move(src.m_);
data_ = std::move(src.vector_storage_);
data_ = std::move(src.raw_storage_);
data_ = std::move(src.data_);
data_ = std::move(src.use_vector_);
rows_ = std::move(src.rows_);
cols_ = std::move(src.cols_);

std::vector<DataType> m_ {}; //!< Pointer to actual data.
vector_storage_; //!< Internal storage (if used).
DataType* raw_storage_; //!< External storage (if used).
DataType* data_; //!< Pointer to active storage.
bool use_vector_; //!< True if using vector storage, false for raw pointer.
IndexType rows_{}; //!< the virtual size of rows.
IndexType cols_{}; //!< the virtual size of columns.
@@ -484,6 +528,125 @@ private:

template<typename ...> struct Matrix_view { };

* @struct Matrix_view
* @tparam MatrixType
template<template <typename, typename, MatrixType, MatrixOrder, bool> class Matrix,
typename DataType,
typename IndexType,
MatrixType Type,
MatrixOrder Order>
struct Matrix_view<Matrix<DataType, IndexType, Type, Order, false>> {
using owner_t = Matrix<DataType, IndexType, Type, Order, false>;

using dataType = DataType; //!< meta:export of underling data type
using indexType = IndexType; //!< meta:export of underling index type
static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order
static constexpr MatrixType matrixType = Type; //!< meta:export of array type

* \name Obj lifetime
//! @{

//! Construct a matrix view to entire matrix
Matrix_view(const owner_t* owner) noexcept :
owner_(owner), m_(owner->data()), rows_(owner->rows()), cols_(owner->columns()) { }

Matrix_view(const owner_t* owner, IndexType begin, IndexType end) noexcept :
owner_(owner) {
if constexpr (Order == MatrixOrder::ROWMAJOR) {
m_ = owner->data() + begin * owner->columns();
rows_ = end - begin;
cols_ = owner->columns();
} else if (Order == MatrixOrder::COLMAJOR) {
m_ = owner->data() + begin * owner->rows();
rows_ = owner->rows();
cols_ = end - begin;

Matrix_view(Matrix_view&& m) = delete; //! No move
Matrix_view& operator=(Matrix_view&& m) = delete;
Matrix_view(const Matrix_view& m) = delete; //!< No copy
Matrix_view& operator=(const Matrix_view& m) = delete;
//! @}

//! Get/Set the size of each dimension
const IndexType rows() const noexcept { return rows_; }
const IndexType columns() const noexcept { return cols_; }

//! Get the interface size of the Matrix (what appears to be the size)
IndexType size() const {
return rows_ * cols_;

//! Actual memory capacity of the symmetric matrix
static constexpr IndexType capacity(IndexType M, IndexType N) {
return M*N;
* virtual 2D accessors
const DataType get (IndexType i, IndexType j) const {
if constexpr (Order == MatrixOrder::COLMAJOR)
return m_[i + j*rows_];
return m_[i*cols_ + j];

DataType set (DataType v, IndexType i, IndexType j) {
if constexpr (Order == MatrixOrder::COLMAJOR)
return m_[i + j*rows_] = v;
return m_[i*cols_ + j] = v;
// DataType operator()(IndexType i, IndexType j) { return get(i, j); }
* Return a proxy MatVal object with read and write capabilities.
* @param i The row number
* @param j The column number
* @return tHE MatVal object
MatVal<Matrix_view> operator()(IndexType i, IndexType j) noexcept {
return MatVal<Matrix_view>(this, get(i, j), i, j);

// a basic serial iterator support
DataType* data() noexcept { return m_.data(); }
// IndexType begin_idx() noexcept { return 0; }
// IndexType end_idx() noexcept { return capacity(rows_, cols_); }

const DataType* data() const noexcept { return m_; }
const IndexType begin_idx() const noexcept { return 0; }
const IndexType end_idx() const noexcept { return capacity(rows_, cols_); }
//! @}

* \name Safe iteration API
* This api automates the iteration over the array based on
* MatrixType
//! @{
template<typename F, typename... Args>
void for_each_in (IndexType begin, IndexType end, F&& lambda, Args&&... args) {
for (IndexType it=begin ; it<end ; ++it) {
std::forward<F>(lambda)(std::forward<Args>(args)..., it);
//! @}
const owner_t* owner_ {nullptr}; //!< Pointer to Matrix
DataType* m_ {nullptr}; //!< Starting address of the slice/view
IndexType rows_{}; //!< the virtual size of rows.
IndexType cols_{}; //!< the virtual size of columns.

* A view/iterator hybrid object for Matrix columns.

+ 2
- 2
homework_1/inc/utils.hpp Целия файл

@@ -14,8 +14,8 @@
#include <unistd.h>
#include <hdf5.h>

#include <matrix.hpp>
#include <config.h>
#include "matrix.hpp"
#include "config.h"

* A Logger for entire program.

+ 21
- 21
homework_1/inc/v0.hpp Целия файл

@@ -14,8 +14,8 @@
#include <vector>
#include <algorithm>

#include <matrix.hpp>
#include <config.h>
#include "matrix.hpp"
#include "config.h"

namespace v0 {

@@ -30,8 +30,10 @@ namespace v0 {
* \param n number of rows in Y
* \param d number of columns in both X and Y
template<typename DataType>
void pdist2(const mtx::Matrix<DataType>& X, const mtx::Matrix<DataType>& Y, mtx::Matrix<DataType>& D2) {
template<typename Matrix>
void pdist2(const Matrix& X, const Matrix& Y, Matrix& D2) {
using DataType = typename Matrix::dataType;

int M = X.rows();
int N = Y.rows();
int d = X.columns();
@@ -79,26 +81,24 @@ void quickselect(std::vector<std::pair<DataType, IndexType>>& vec, int k) {
* \param dst Is the Nxk matrix with the k distances to the C points of the nth
* point of Q
template<typename DataType, typename IndexType>
void knnsearch(const mtx::Matrix<DataType>& C, const mtx::Matrix<DataType>& Q, int k,
mtx::Matrix<IndexType>& idx,
mtx::Matrix<DataType>& dst) {
template<typename MatrixD, typename MatrixI>
void knnsearch(const MatrixD& C, const MatrixD& Q, size_t idx_offset, size_t k, size_t m, MatrixI& idx, MatrixD& dst) {

int M = C.rows();
int N = Q.rows();
using DstType = typename MatrixD::dataType;
using IdxType = typename MatrixI::dataType;

mtx::Matrix<DataType> D(M, N);
size_t M = C.rows();
size_t N = Q.rows();

pdist2(C, Q, D);
mtx::Matrix<DstType> D(M, N);

idx.resize(N, k);
dst.resize(N, k);
pdist2(C, Q, D);

for (int j = 0; j < N; ++j) {
for (size_t j = 0; j < N; ++j) {
// Create a vector of pairs (distance, index) for the j-th query
std::vector<std::pair<DataType, IndexType>> dst_idx(M);
for (int i = 0; i < M; ++i) {
dst_idx[i] = {D.data()[i * N + j], i};
std::vector<std::pair<DstType, IdxType>> dst_idx(M);
for (size_t i = 0; i < M; ++i) {
dst_idx[i] = {D.data()[i * N + j], i};
// Find the k smallest distances using quickSelectKSmallest
quickselect(dst_idx, k);
@@ -107,9 +107,9 @@ void knnsearch(const mtx::Matrix<DataType>& C, const mtx::Matrix<DataType>& Q, i
std::sort(dst_idx.begin(), dst_idx.end());

// Store the indices and distances
for (int i = 0; i < k; ++i) {
idx(j, i) = dst_idx[i].second;
dst(j, i) = dst_idx[i].first;
for (size_t i = 0; i < k; ++i) {
dst.set(dst_idx[i].first, j, i);
idx.set(dst_idx[i].second + idx_offset, j, i);

+ 92
- 0
homework_1/inc/v1.hpp Целия файл

@@ -9,8 +9,100 @@
#ifndef V1_HPP_
#define V1_HPP_

#include <vector>
#include <algorithm>

#include "matrix.hpp"
#include "v0.hpp"
#include "config.h"

namespace v1 {

template <typename DataType, typename IndexType>
void mergeResultsWithM(mtx::Matrix<IndexType>& N1, mtx::Matrix<DataType>& D1,
mtx::Matrix<IndexType>& N2, mtx::Matrix<DataType>& D2,
size_t k, size_t m,
mtx::Matrix<IndexType>& N, mtx::Matrix<DataType>& D) {
size_t numQueries = N1.rows();
size_t maxCandidates = std::min((IndexType)m, (IndexType)(N1.columns() + N2.columns()));

for (size_t q = 0; q < numQueries; ++q) {
// Combine distances and neighbors
std::vector<std::pair<DataType, IndexType>> candidates(N1.columns() + N2.columns());

// Concatenate N1 and N2 rows
for (size_t i = 0; i < N1.columns(); ++i) {
candidates[i] = {D1.get(q, i), N1.get(q, i)};
for (size_t i = 0; i < N2.columns(); ++i) {
candidates[i + N1.columns()] = {D2.get(q, i), N2.get(q, i)};

// Keep only the top-m candidates
v0::quickselect(candidates, maxCandidates);

// Sort the top-m candidates
std::sort(candidates.begin(), candidates.begin() + maxCandidates);

// If m < k, pad the remaining slots with invalid values
for (size_t i = 0; i < k; ++i) {
if (i < maxCandidates) {
D.set(candidates[i].first, q, i);
N.set(candidates[i].second, q, i);
} else {
D.set(std::numeric_limits<DataType>::infinity(), q, i);
N.set(static_cast<IndexType>(-1), q, i); // Invalid index (end)

template<typename MatrixD, typename MatrixI>
void knnsearch(const MatrixD& C, const MatrixD& Q, size_t idx_offset, size_t k, size_t m, MatrixI& idx, MatrixD& dst) {

using DstType = typename MatrixD::dataType;
using IdxType = typename MatrixI::dataType;

if (C.rows() <= 8 || Q.rows() <= 4) {
// Base case: Call knnsearch directly
v0::knnsearch(C, Q, idx_offset, k, m, idx, dst);

// Divide Corpus and Query into subsets
IdxType midC = C.rows() / 2;
IdxType midQ = Q.rows() / 2;

// Slice corpus and query matrixes
MatrixD C1((DstType*)C.data(), 0, midC, C.columns());
MatrixD C2((DstType*)C.data(), midC, midC, C.columns());
MatrixD Q1((DstType*)Q.data(), 0, midQ, Q.columns());
MatrixD Q2((DstType*)Q.data(), midQ, midQ, Q.columns());

// Allocate temporary matrixes for all permutations
MatrixI N1_1(midQ, k), N1_2(midQ, k), N2_1(midQ, k), N2_2(midQ, k);
MatrixD D1_1(midQ, k), D1_2(midQ, k), D2_1(midQ, k), D2_2(midQ, k);

// Recursive calls
knnsearch(C1, Q1, idx_offset, k, m, N1_1, D1_1);
knnsearch(C2, Q1, idx_offset + midC, k, m, N1_2, D1_2);
knnsearch(C1, Q2, idx_offset, k, m, N2_1, D2_1);
knnsearch(C2, Q2, idx_offset + midC, k, m, N2_2, D2_2);

// slice output matrixes
MatrixI N1((IdxType*)idx.data(), 0, midQ, k);
MatrixI N2((IdxType*)idx.data(), midQ, midQ, k);
MatrixD D1((DstType*)dst.data(), 0, midQ, k);
MatrixD D2((DstType*)dst.data(), midQ, midQ, k);

// Merge results in place
mergeResultsWithM(N1_1, D1_1, N1_2, D1_2, k, m, N1, D1);
mergeResultsWithM(N2_1, D2_1, N2_2, D2_2, k, m, N2, D2);


} // namespace v1

#endif /* V1_HPP_ */

+ 11
- 0
homework_1/matlab/mergeReducedResultsWithMink.m Целия файл

@@ -0,0 +1,11 @@
function [N, D] = mergeReducedResultsWithMink(N_sub, D_sub, C_sub, k, ~)
% Merge reduced results for one subset of queries using mink
numQueries = size(N_sub, 1); % Number of queries

% Find the k smallest distances and their indices using mink
[D, idx] = mink(D_sub, k, 2);

% Select the corresponding neighbors based on the indices
N = N_sub(sub2ind(size(N_sub), ...
repmat((1:numQueries)', 1, k), idx));

+ 19
- 0
homework_1/matlab/mergeResults.m Целия файл

@@ -0,0 +1,19 @@
function [N, D] = mergeResults(N1, D1, N2, D2, C1, C2, k, ~)
% Merge neighbors from two sources for a specific subset of queries
numQueries = size(N1, 1); % Queries corresponding to N1 (or N2)
N_combined = [N1, N2 + size(C1, 1)]; % Adjust indices for C2
D_combined = [D1, D2];
% Sort distances and select top-k for each query
[D_sorted, idx] = sort(D_combined, 2); % Sort by distance for each query
idx = idx(:, 1:k); % Top-k indices

% Select corresponding neighbors
N = zeros(numQueries, k);
for i = 1:numQueries
N(i, :) = N_combined(i, idx(i, :));

% Select corresponding distances
D = D_sorted(:, 1:k);

+ 46
- 0
homework_1/matlab/mergeResultsWithM.m Целия файл

@@ -0,0 +1,46 @@
function [N, D] = mergeResultsWithM(N1, D1, N2, D2, C1, C2, k, m)
% Merge neighbors from two sources with a limit on candidate neighbors (m)
numQueries = size(N1, 1); % Number of queries in this subset
maxCandidates = min(m, size(N1, 2) + size(N2, 2)); % Maximum candidates to consider

% Combine distances and neighbors
N_combined = [N1, N2 + size(C1, 1)]; % Adjust indices for C2
D_combined = [D1, D2];

% Sort distances and keep only top-m candidates for each query
[D_sorted, idx] = sort(D_combined, 2, 'ascend');
D_sorted = D_sorted(:, 1:maxCandidates); % Keep only top-m distances
idx = idx(:, 1:maxCandidates); % Keep indices corresponding to top-m distances

% Select the corresponding neighbors
%N_sorted = N_combined(sub2ind(size(N_combined), ...
% repmat((1:numQueries)', 1, maxCandidates), idx));
N_sorted = zeros(numQueries, maxCandidates); % Initialize output
for i = 1:numQueries
for j = 1:maxCandidates
N_sorted(i, j) = N_combined(i, idx(i, j));

% Handle cases where m < k
if maxCandidates < k
% Pad with Inf distances and invalid indices
D_sorted = [D_sorted, Inf(numQueries, k - maxCandidates)];
N_sorted = [N_sorted, zeros(numQueries, k - maxCandidates)];

% Extract top-k from the reduced set of candidates
[D, idx_final] = sort(D_sorted, 2, 'ascend');
D = D(:, 1:k); % Final top-k distances
%N = N_sorted(sub2ind(size(N_sorted), ...
% repmat((1:numQueries)', 1, k), idx_final(:, 1:k)));
% Extract top-k neighbors using a loop
N = zeros(numQueries, k); % Initialize output
for i = 1:numQueries
for j = 1:k
N(i, j) = N_sorted(i, idx_final(i, j));


+ 32
- 0
homework_1/matlab/recursiveKNN.m Целия файл

@@ -0,0 +1,32 @@
function [neighbors, distances] = recursiveKNN(C, Q, k, m)
% Recursive break, call knnsearch
if size(C, 1) <= 1000 || size(Q, 1) <= 500 % Adjastable limit
[neighbors, distances] = knnsearch(C, Q, 'K', k);

% Divide into subsets
midC = floor(size(C, 1) / 2);
midQ = floor(size(Q, 1) / 2);
C1 = C(1:midC, :);
C2 = C(midC+1:end, :);
Q1 = Q(1:midQ, :);
Q2 = Q(midQ+1:end, :);

% Recursive calls
[N1_1, D1_1] = recursiveKNN(C1, Q1, k, m);
[N1_2, D1_2] = recursiveKNN(C2, Q1, k, m);
[N2_1, D2_1] = recursiveKNN(C1, Q2, k, m);
[N2_2, D2_2] = recursiveKNN(C2, Q2, k, m);

% Merge
[N1, D1] = mergeResultsWithM(N1_1, D1_1, N1_2, D1_2, C1, C2, k, m);
[N2, D2] = mergeResultsWithM(N2_1, D2_1, N2_2, D2_2, C1, C2, k, m);

% Combine results for Q1 and Q2
neighbors = [N1; N2];
distances = [D1; D2];


+ 23
- 0
homework_1/matlab/run_test.m Целия файл

@@ -0,0 +1,23 @@

%C = rand(10000, 2); % Corpus
%Q = rand(10000, 2); % Queries
C = rand(20000, 2); % Δύο clusters
Q = C;
%Q = rand(10000, 2); % Queries κοντά στο μέσο
k = 100; % Number of neighbors
m = 100; % Max candidates per query
global count;

count =0;
tic; [pi, pd] = recursiveKNN(C, Q, k, m); toc
tic; [si, sd] = knnsearch(C, Q, 'k', k); toc

[a,b] = size(si);
all_neighbors = a*b
accuracy = 1 - nnz(si-pi)/all_neighbors
false_neighbors = nnz(sd-pd)

+ 0
- 24
homework_1/matlab/test.m Целия файл

@@ -1,24 +0,0 @@

C = rand(40000,4);
Q = rand(4000,4);

disp ('C-Q');
disp ('build-in')
tic; [i1, d1] = knnsearch(C, Q, 'k', 4); toc
disp ('mine')
tic; [i2, d2] = knnsearch2(C, Q, 4); toc

disp (' ');
disp (' ');
disp ('C-C');
disp ('build-in')
tic; [i1, d1] = knnsearch(C, C, 'k', 4); toc
disp ('mine')
tic; [i2, d2] = knnsearch2(C, C, 4); toc

+ 33
- 10
homework_1/src/main.cpp Целия файл

@@ -13,11 +13,12 @@
#include <unistd.h>
#include <cstdio>

#include <v0.hpp>
#include <v1.hpp>
#include <matrix.hpp>
#include <utils.hpp>
#include <config.h>
#include "matrix.hpp"

#include "v0.hpp"
#include "v1.hpp"
#include "utils.hpp"
#include "config.h"

// Global session data
session_t session;
@@ -108,8 +109,7 @@ bool get_options(int argc, char* argv[]){
return status;


#ifndef TESTING
int main(int argc, char* argv[]) try {
// Instantiate matrixes
MatrixDst Corpus;
@@ -117,6 +117,12 @@ int main(int argc, char* argv[]) try {
MatrixIdx Idx;
MatrixDst Dst;

using namespace v0;
using namespace v1;

// try to read command line
if (!get_options(argc, argv))
@@ -132,17 +138,21 @@ int main(int argc, char* argv[]) try {
timer.print_dt("Load hdf5 files");

// Prepare output memory
Idx.resize(Query.rows(), session.k);
Dst.resize(Query.rows(), session.k);

// Do the search
logger << "Start knnsearch ...";
if (session.queryMtx)
knnsearch(Corpus, Query, session.k, Idx, Dst);
knnsearch(Corpus, Query, 0, session.k, session.k, Idx, Dst);
knnsearch(Corpus, Corpus, session.k, Idx, Dst);
knnsearch(Corpus, Corpus, 0, session.k, session.k, Idx, Dst);
logger << " Done" << logger.endl;

// Store data
Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx);
@@ -158,4 +168,17 @@ catch (std::exception& e) {

#elif defined TESTING

#include <gtest/gtest.h>
#include <exception>

GTEST_API_ int main(int argc, char **argv) try {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
catch (std::exception& e) {
std::cout << "Exception: " << e.what() << '\n';


+ 317
- 0
homework_1/src/tests.cpp Целия файл

@@ -0,0 +1,317 @@
* \file tests.cpp
* \brief PDS homework_1 tests
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>

#include <gtest/gtest.h>

#include "matrix.hpp"

#include "v0.hpp"
#include "v1.hpp"
#include "utils.hpp"
#include "config.h"

using matrix_t = mtx::Matrix<int>;

// =====================================
// C1, Q1
mtx::Matrix<double> C1(10,2, {
0.8147, 0.1576,
0.9058, 0.9706,
0.1270, 0.9572,
0.9134, 0.4854,
0.6324, 0.8003,
0.0975, 0.1419,
0.2785, 0.4218,
0.5469, 0.9157,
0.9575, 0.7922,
0.9649, 0.9595

mtx::Matrix<double> Q1(5,2, {
0.6557, 0.7577,
0.0357, 0.7431,
0.8491, 0.3922,
0.9340, 0.6555,
0.6787, 0.1712

// =====================================
// C2, Q2
mtx::Matrix<double> C2(16,4, {
0.7060, 0.4456, 0.5060, 0.6160,
0.0318, 0.6463, 0.6991, 0.4733,
0.2769, 0.7094, 0.8909, 0.3517,
0.0462, 0.7547, 0.9593, 0.8308,
0.0971, 0.2760, 0.5472, 0.5853,
0.8235, 0.6797, 0.1386, 0.5497,
0.6948, 0.6551, 0.1493, 0.9172,
0.3171, 0.1626, 0.2575, 0.2858,
0.9502, 0.1190, 0.8407, 0.7572,
0.0344, 0.4984, 0.2543, 0.7537,
0.4387, 0.9597, 0.8143, 0.3804,
0.3816, 0.3404, 0.2435, 0.5678,
0.7655, 0.5853, 0.9293, 0.0759,
0.7952, 0.2238, 0.3500, 0.0540,
0.1869, 0.7513, 0.1966, 0.5308,
0.4898, 0.2551, 0.2511, 0.7792

mtx::Matrix<double> Q2(8,4, {
0.9340, 0.3112, 0.4505, 0.0782,
0.1299, 0.5285, 0.0838, 0.4427,
0.5688, 0.1656, 0.2290, 0.1067,
0.4694, 0.6020, 0.9133, 0.9619,
0.0119, 0.2630, 0.1524, 0.0046,
0.3371, 0.6541, 0.8258, 0.7749,
0.1622, 0.6892, 0.5383, 0.8173,
0.7943, 0.7482, 0.9961, 0.8687

* ==========================================
* pdist2
TEST(Tv0_UT, pdist2_test1) {

mtx::Matrix<double> D1_exp(10, 5, {
0.6208, 0.9745, 0.2371, 0.5120, 0.1367,
0.3284, 0.8993, 0.5811, 0.3164, 0.8310,
0.5651, 0.2327, 0.9169, 0.8616, 0.9603,
0.3749, 0.9147, 0.1132, 0.1713, 0.3921,
0.0485, 0.5994, 0.4621, 0.3346, 0.6308,
0.8312, 0.6044, 0.7922, 0.9815, 0.5819,
0.5052, 0.4028, 0.5714, 0.6959, 0.4722,
0.1919, 0.5395, 0.6045, 0.4665, 0.7561,
0.3037, 0.9231, 0.4144, 0.1387, 0.6807,
0.3692, 0.9540, 0.5790, 0.3056, 0.8386

mtx::Matrix<double> D (10,5);

v0::pdist2(C1, Q1, D);

for (size_t i = 0 ; i< D.rows() ; ++i)
for (size_t j = 0 ; j<D.columns() ; ++j) {
EXPECT_EQ (D1_exp.get(i ,j) + 0.01 > D(i, j), true);
EXPECT_EQ (D1_exp.get(i ,j) - 0.01 < D(i, j), true);

TEST(Tv0_UT, pdist2_test2) {

mtx::Matrix<double> D2_exp(16, 8, {
0.6020, 0.7396, 0.6583, 0.6050, 1.0070, 0.5542, 0.6298, 0.6352,
1.0696, 0.6348, 0.9353, 0.6914, 0.8160, 0.4475, 0.4037, 0.9145,
0.9268, 0.8450, 0.9376, 0.6492, 0.9671, 0.4360, 0.5956, 0.7400,
1.3455, 0.9876, 1.2953, 0.4709, 1.2557, 0.3402, 0.4417, 0.7500,
0.9839, 0.5476, 0.7517, 0.7216, 0.7074, 0.5605, 0.4784, 0.9954,
0.6839, 0.7200, 0.7305, 0.9495, 1.0628, 0.8718, 0.8178, 0.9179,
0.9850, 0.7514, 0.9585, 0.7996, 1.2054, 0.7784, 0.6680, 0.8591,
0.6950, 0.4730, 0.3103, 1.0504, 0.4397, 0.8967, 0.8140, 1.2066,
0.8065, 1.2298, 0.9722, 0.7153, 1.3933, 0.8141, 1.0204, 0.6758,
1.1572, 0.3686, 0.9031, 0.8232, 0.7921, 0.6656, 0.3708, 1.0970,
0.9432, 0.9049, 1.0320, 0.6905, 1.1167, 0.5094, 0.6455, 0.6653,
0.7672, 0.3740, 0.5277, 0.8247, 0.6842, 0.6945, 0.5648, 0.9968,
0.5768, 1.1210, 0.8403, 0.9345, 1.1316, 0.8292, 1.0380, 0.8127,
0.1939, 0.8703, 0.2684, 1.1794, 0.8103, 1.0683, 1.1115, 1.1646,
1.0106, 0.2708, 0.8184, 0.8954, 0.7402, 0.6982, 0.4509, 1.0594,
0.8554, 0.5878, 0.6834, 0.7699, 0.9155, 0.7161, 0.6162, 0.9481

mtx::Matrix<double> D (16,8);

v0::pdist2(C2, Q2, D);

for (size_t i = 0 ; i< D.rows() ; ++i)
for (size_t j = 0 ; j<D.columns() ; ++j) {
EXPECT_EQ (D2_exp.get(i ,j) + 0.01 > D(i, j), true);
EXPECT_EQ (D2_exp.get(i ,j) - 0.01 < D(i, j), true);

* ==========================================
* v0::knn
TEST(Tv0_UT, knn_test1) {
size_t k = 3;
mtx::Matrix<uint32_t> Idx_exp(5, k, {
5, 8, 9,
3, 7, 8,
4, 1, 9,
9, 4, 10,
1, 4, 7

mtx::Matrix<double> Dst_exp(5, k, {
0.0485, 0.1919, 0.3037,
0.2327, 0.4028, 0.5395,
0.1132, 0.2371, 0.4144,
0.1387, 0.1713, 0.3056,
0.1367, 0.3921, 0.4722

mtx::Matrix<uint32_t> Idx(5, k);
mtx::Matrix<double> Dst(5, k);

v0::knnsearch(C1, Q1, 0, k, 0, Idx, Dst);

for (size_t i = 0 ; i< Idx.rows() ; ++i)
for (size_t j = 0 ; j<Idx.columns() ; ++j) {
EXPECT_EQ (Idx_exp(i ,j) == Idx(i, j) + 1, true); // matlab starts from 1
EXPECT_EQ (Dst_exp.get(i ,j) + 0.01 > Dst(i, j), true);
EXPECT_EQ (Dst_exp.get(i ,j) - 0.01 < Dst(i, j), true);


TEST(Tv0_UT, knn_test2) {
size_t k = 3;
mtx::Matrix<uint32_t> Idx_exp(8, k, {
14, 13, 1,
15, 10, 12,
14, 8, 12,
4, 1, 3,
8, 12, 5,
4, 3, 2,
10, 2, 4,
1, 11, 9

mtx::Matrix<double> Dst_exp(8, k, {
0.1939, 0.5768, 0.6020,
0.2708, 0.3686, 0.3740,
0.2684, 0.3103, 0.5277,
0.4709, 0.6050, 0.6492,
0.4397, 0.6842, 0.7074,
0.3402, 0.4360, 0.4475,
0.3708, 0.4037, 0.4417,
0.6352, 0.6653, 0.6758

mtx::Matrix<uint32_t> Idx(8, k);
mtx::Matrix<double> Dst(8, k);

v0::knnsearch(C2, Q2, 0, k, 0, Idx, Dst);

for (size_t i = 0 ; i< Idx.rows() ; ++i)
for (size_t j = 0 ; j<Idx.columns() ; ++j) {
EXPECT_EQ (Idx_exp(i ,j) == Idx(i, j) + 1, true); // matlab starts from 1
EXPECT_EQ (Dst_exp.get(i ,j) + 0.01 > Dst(i, j), true);
EXPECT_EQ (Dst_exp.get(i ,j) - 0.01 < Dst(i, j), true);


* ==========================================
* v1::knn
TEST(Tv1_UT, knn_test1) {
size_t k = 3;
mtx::Matrix<uint32_t> Idx_exp(8, k, {
14, 13, 1,
15, 10, 12,
14, 8, 12,
4, 1, 3,
8, 12, 5,
4, 3, 2,
10, 2, 4,
1, 11, 9

mtx::Matrix<double> Dst_exp(8, k, {
0.1939, 0.5768, 0.6020,
0.2708, 0.3686, 0.3740,
0.2684, 0.3103, 0.5277,
0.4709, 0.6050, 0.6492,
0.4397, 0.6842, 0.7074,
0.3402, 0.4360, 0.4475,
0.3708, 0.4037, 0.4417,
0.6352, 0.6653, 0.6758

mtx::Matrix<uint32_t> Idx(8, k);
mtx::Matrix<double> Dst(8, k);

v1::knnsearch(C2, Q2, 0, k, k, Idx, Dst);

for (size_t i = 0 ; i< Idx.rows() ; ++i)
for (size_t j = 0 ; j<Idx.columns() ; ++j) {
EXPECT_EQ (Idx_exp(i ,j) == Idx(i, j) + 1, true); // matlab starts from 1
EXPECT_EQ (Dst_exp.get(i ,j) + 0.01 > Dst(i, j), true);
EXPECT_EQ (Dst_exp.get(i ,j) - 0.01 < Dst(i, j), true);


// all-to-all
TEST(Tv1_UT, knn_test2) {
size_t k = 3;
mtx::Matrix<uint32_t> Idx_exp(16, k, {
1, 16, 12,
2, 3, 5,
3, 11, 2,
4, 2, 3,
5, 10, 2,
6, 7, 1,
7, 6, 16,
8, 12, 5,
9, 1, 16,
10, 15, 5,
11, 3, 2,
12, 16, 8,
13, 3, 11,
14, 8, 1,
15, 10, 12,
16, 12, 1

mtx::Matrix<double> Dst_exp(16, k, {
0, 0.4179, 0.4331,
0, 0.3401, 0.4207,
0, 0.3092, 0.3401,
0, 0.4555, 0.5381,
0, 0.4093, 0.4207,
0, 0.3903, 0.4560,
0, 0.3903, 0.4811,
0, 0.3398, 0.4846,
0, 0.5461, 0.7607,
0, 0.3745, 0.4093,
0, 0.3092, 0.5345,
0, 0.2524, 0.3398,
0, 0.5759, 0.5941,
0, 0.5428, 0.6304,
0, 0.3745, 0.4586,
0, 0.2524, 0.4179

mtx::Matrix<uint32_t> Idx(16, k);
mtx::Matrix<double> Dst(16, k);

v1::knnsearch(C2, C2, 0, k, k, Idx, Dst);

for (size_t i = 0 ; i< Idx.rows() ; ++i)
for (size_t j = 0 ; j<Idx.columns() ; ++j) {
EXPECT_EQ (Idx_exp(i ,j) == Idx(i, j) + 1, true); // matlab starts from 1
EXPECT_EQ (Dst_exp.get(i ,j) + 0.01 > Dst(i, j), true);
EXPECT_EQ (Dst_exp.get(i ,j) - 0.01 < Dst(i, j), true);

