214 lines
6.6 KiB
C++
214 lines
6.6 KiB
C++
/**
|
|
* \file v1.hpp
|
|
* \brief
|
|
*
|
|
* \author
|
|
* Christos Choutouridis AEM:8997
|
|
* <cchoutou@ece.auth.gr>
|
|
*/
|
|
#ifndef V1_HPP_
|
|
#define V1_HPP_
|
|
|
|
#include <vector>
|
|
#include <algorithm>
|
|
|
|
#include "matrix.hpp"
|
|
#include "v0.hpp"
|
|
#include "config.h"
|
|
|
|
#if defined CILK
|
|
#include <cilk/cilk.h>
|
|
#include <cilk/cilk_api.h>
|
|
//#include <cilk/reducer_opadd.h>
|
|
|
|
#elif defined OMP
|
|
#include <omp.h>
|
|
|
|
#elif defined PTHREADS
|
|
#include <thread>
|
|
#include <numeric>
|
|
#include <functional>
|
|
//#include <random>
|
|
|
|
#else
|
|
#endif
|
|
|
|
|
|
void init_workers();
|
|
|
|
namespace v1 {
|
|
|
|
|
|
/*!
|
|
*
|
|
* Merge knnsearch results and select the closest neighbors
|
|
*
|
|
* \tparam DataType
|
|
* \tparam IndexType
|
|
* \param N1 Neighbors results from one knnsearch
|
|
* \param D1 Distances results from one knnsearcs
|
|
* \param N2 Neighbors results from second knnsearch
|
|
* \param D2 Distances results from second knnsearch
|
|
* \param k How many
|
|
* \param m How accurate
|
|
* \param N Output for Neighbors
|
|
* \param D Output for Distances
|
|
*/
|
|
template <typename DataType, typename IndexType>
|
|
void mergeResultsWithM(mtx::Matrix<IndexType>& N1, mtx::Matrix<DataType>& D1,
|
|
mtx::Matrix<IndexType>& N2, mtx::Matrix<DataType>& D2,
|
|
size_t k, size_t m,
|
|
mtx::Matrix<IndexType>& N, mtx::Matrix<DataType>& D) {
|
|
size_t numQueries = N1.rows();
|
|
size_t maxCandidates = std::min((IndexType)m, (IndexType)(N1.columns() + N2.columns()));
|
|
|
|
for (size_t q = 0; q < numQueries; ++q) {
|
|
// Combine distances and neighbors
|
|
std::vector<std::pair<DataType, IndexType>> candidates(N1.columns() + N2.columns());
|
|
|
|
// Concatenate N1 and N2 rows
|
|
for (size_t i = 0; i < N1.columns(); ++i) {
|
|
candidates[i] = {D1.get(q, i), N1.get(q, i)};
|
|
}
|
|
for (size_t i = 0; i < N2.columns(); ++i) {
|
|
candidates[i + N1.columns()] = {D2.get(q, i), N2.get(q, i)};
|
|
}
|
|
|
|
// Keep only the top-m candidates
|
|
v0::quickselect(candidates, maxCandidates);
|
|
|
|
// Sort the top-m candidates
|
|
std::sort(candidates.begin(), candidates.begin() + maxCandidates);
|
|
|
|
// If m < k, pad the remaining slots with invalid values
|
|
for (size_t i = 0; i < k; ++i) {
|
|
if (i < maxCandidates) {
|
|
D.set(candidates[i].first, q, i);
|
|
N.set(candidates[i].second, q, i);
|
|
} else {
|
|
D.set(std::numeric_limits<DataType>::infinity(), q, i);
|
|
N.set(static_cast<IndexType>(-1), q, i); // Invalid index (end)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* The main parallelizable body
|
|
*/
|
|
template<typename MatrixD, typename MatrixI>
|
|
void worker_body (std::vector<MatrixD>& corpus_slices,
|
|
std::vector<MatrixD>& query_slices,
|
|
MatrixI& idx,
|
|
MatrixD& dst,
|
|
size_t slice,
|
|
size_t num_slices, size_t corpus_slice_size, size_t query_slice_size,
|
|
size_t k,
|
|
size_t m) {
|
|
// "load" types
|
|
using DstType = typename MatrixD::dataType;
|
|
using IdxType = typename MatrixI::dataType;
|
|
|
|
|
|
for (size_t ci = 0; ci < num_slices; ++ci) {
|
|
size_t idx_offset = ci * corpus_slice_size;
|
|
|
|
// Intermediate matrixes for intermediate results
|
|
MatrixI temp_idx(query_slices[slice].rows(), k);
|
|
MatrixD temp_dst(query_slices[slice].rows(), k);
|
|
|
|
// kNN for each combination
|
|
v0::knnsearch(corpus_slices[ci], query_slices[slice], idx_offset, k, m, temp_idx, temp_dst);
|
|
|
|
// Merge temporary results to final results
|
|
MatrixI idx_slice((IdxType*)idx.data(), slice * query_slice_size, query_slices[slice].rows(), k);
|
|
MatrixD dst_slice((DstType*)dst.data(), slice * query_slice_size, query_slices[slice].rows(), k);
|
|
|
|
mergeResultsWithM(idx_slice, dst_slice, temp_idx, temp_dst, k, m, idx_slice, dst_slice);
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* \param C Is a MxD matrix (Corpus)
|
|
* \param Q Is a NxD matrix (Query)
|
|
* \param num_slices How many slices to Corpus-Query
|
|
* \param k The number of nearest neighbors needed
|
|
* \param m accuracy
|
|
* \param idx Is the Nxk matrix with the k indexes of the C points, that are
|
|
* neighbors of the nth point of Q
|
|
* \param dst Is the Nxk matrix with the k distances to the C points of the nth
|
|
* point of Q
|
|
*/
|
|
template<typename MatrixD, typename MatrixI>
|
|
void knnsearch(MatrixD& C, MatrixD& Q, size_t num_slices, size_t k, size_t m, MatrixI& idx, MatrixD& dst) {
|
|
using DstType = typename MatrixD::dataType;
|
|
using IdxType = typename MatrixI::dataType;
|
|
|
|
//Slice calculations
|
|
size_t corpus_slice_size = C.rows() / ((num_slices == 0)? 1:num_slices);
|
|
size_t query_slice_size = Q.rows() / ((num_slices == 0)? 1:num_slices);
|
|
|
|
// Make slices
|
|
std::vector<MatrixD> corpus_slices;
|
|
std::vector<MatrixD> query_slices;
|
|
|
|
for (size_t i = 0; i < num_slices; ++i) {
|
|
corpus_slices.emplace_back(
|
|
(DstType*)C.data(),
|
|
i * corpus_slice_size,
|
|
(i == num_slices - 1 ? C.rows() - i * corpus_slice_size : corpus_slice_size),
|
|
C.columns());
|
|
query_slices.emplace_back(
|
|
(DstType*)Q.data(),
|
|
i * query_slice_size,
|
|
(i == num_slices - 1 ? Q.rows() - i * query_slice_size : query_slice_size),
|
|
Q.columns());
|
|
}
|
|
|
|
// Initialize results
|
|
for (size_t i = 0; i < dst.rows(); ++i) {
|
|
for (size_t j = 0; j < dst.columns(); ++j) {
|
|
dst.set(std::numeric_limits<DstType>::infinity(), i, j);
|
|
idx.set(static_cast<IdxType>(-1), i, j);
|
|
}
|
|
}
|
|
|
|
// Main loop
|
|
#if defined OMP
|
|
#pragma omp parallel for
|
|
for (size_t qi = 0; qi < num_slices; ++qi) {
|
|
worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m);
|
|
}
|
|
#elif defined CILK
|
|
cilk_for (size_t qi = 0; qi < num_slices; ++qi) {
|
|
worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m);
|
|
}
|
|
#elif defined PTHREADS
|
|
std::vector<std::thread> workers;
|
|
for (size_t qi = 0; qi < num_slices; ++qi) {
|
|
workers.push_back(
|
|
std::thread (worker_body<MatrixD, MatrixI>,
|
|
std::ref(corpus_slices), std::ref(query_slices),
|
|
std::ref(idx), std::ref(dst),
|
|
qi,
|
|
num_slices, corpus_slice_size, query_slice_size,
|
|
k, m)
|
|
);
|
|
}
|
|
// Join threads
|
|
std::for_each(workers.begin(), workers.end(), [](std::thread& t){
|
|
t.join();
|
|
});
|
|
|
|
#else
|
|
for (size_t qi = 0; qi < num_slices; ++qi) {
|
|
worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m);
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
} // namespace v1
|
|
|
|
#endif /* V1_HPP_ */
|