|
|
@@ -60,17 +60,17 @@ void mergeResultsWithM(mtx::Matrix<IndexType>& N1, mtx::Matrix<DataType>& D1, |
|
|
|
size_t k, size_t m, |
|
|
|
mtx::Matrix<IndexType>& N, mtx::Matrix<DataType>& D) { |
|
|
|
size_t numQueries = N1.rows(); |
|
|
|
size_t maxCandidates = std::min((IndexType)m, (IndexType)(N1.columns() + N2.columns())); |
|
|
|
size_t maxCandidates = std::min(static_cast<IndexType>(m), static_cast<IndexType>(N1.columns() + N2.columns())); |
|
|
|
|
|
|
|
for (size_t q = 0; q < numQueries; ++q) { |
|
|
|
for (size_t q = 0UL; q < numQueries; ++q) { |
|
|
|
// Combine distances and neighbors |
|
|
|
std::vector<std::pair<DataType, IndexType>> candidates(N1.columns() + N2.columns()); |
|
|
|
|
|
|
|
// Concatenate N1 and N2 rows |
|
|
|
for (size_t i = 0; i < N1.columns(); ++i) { |
|
|
|
for (size_t i = 0UL; i < N1.columns(); ++i) { |
|
|
|
candidates[i] = {D1.get(q, i), N1.get(q, i)}; |
|
|
|
} |
|
|
|
for (size_t i = 0; i < N2.columns(); ++i) { |
|
|
|
for (size_t i = 0UL; i < N2.columns(); ++i) { |
|
|
|
candidates[i + N1.columns()] = {D2.get(q, i), N2.get(q, i)}; |
|
|
|
} |
|
|
|
|
|
|
@@ -81,7 +81,7 @@ void mergeResultsWithM(mtx::Matrix<IndexType>& N1, mtx::Matrix<DataType>& D1, |
|
|
|
std::sort(candidates.begin(), candidates.begin() + maxCandidates); |
|
|
|
|
|
|
|
// If m < k, pad the remaining slots with invalid values |
|
|
|
for (size_t i = 0; i < k; ++i) { |
|
|
|
for (size_t i = 0UL; i < k; ++i) { |
|
|
|
if (i < maxCandidates) { |
|
|
|
D.set(candidates[i].first, q, i); |
|
|
|
N.set(candidates[i].second, q, i); |
|
|
@@ -110,7 +110,7 @@ void worker_body (std::vector<MatrixD>& corpus_slices, |
|
|
|
using IdxType = typename MatrixI::dataType; |
|
|
|
|
|
|
|
|
|
|
|
for (size_t ci = 0; ci < num_slices; ++ci) { |
|
|
|
for (size_t ci = 0UL; ci < num_slices; ++ci) { |
|
|
|
size_t idx_offset = ci * corpus_slice_size; |
|
|
|
|
|
|
|
// Intermediate matrixes for intermediate results |
|
|
@@ -121,8 +121,8 @@ void worker_body (std::vector<MatrixD>& corpus_slices, |
|
|
|
v0::knnsearch(corpus_slices[ci], query_slices[slice], idx_offset, k, m, temp_idx, temp_dst); |
|
|
|
|
|
|
|
// Merge temporary results to final results |
|
|
|
MatrixI idx_slice((IdxType*)idx.data(), slice * query_slice_size, query_slices[slice].rows(), k); |
|
|
|
MatrixD dst_slice((DstType*)dst.data(), slice * query_slice_size, query_slices[slice].rows(), k); |
|
|
|
MatrixI idx_slice(static_cast<IdxType*>(idx.data()), slice * query_slice_size, query_slices[slice].rows(), k); |
|
|
|
MatrixD dst_slice(static_cast<DstType*>(dst.data()), slice * query_slice_size, query_slices[slice].rows(), k); |
|
|
|
|
|
|
|
mergeResultsWithM(idx_slice, dst_slice, temp_idx, temp_dst, k, m, idx_slice, dst_slice); |
|
|
|
} |
|
|
@@ -145,29 +145,29 @@ void knnsearch(MatrixD& C, MatrixD& Q, size_t num_slices, size_t k, size_t m, Ma |
|
|
|
using IdxType = typename MatrixI::dataType; |
|
|
|
|
|
|
|
//Slice calculations |
|
|
|
size_t corpus_slice_size = C.rows() / ((num_slices == 0)? 1:num_slices); |
|
|
|
size_t query_slice_size = Q.rows() / ((num_slices == 0)? 1:num_slices); |
|
|
|
size_t corpus_slice_size = C.rows() / ((num_slices == 0UL)? 1UL:num_slices); |
|
|
|
size_t query_slice_size = Q.rows() / ((num_slices == 0UL)? 1UL:num_slices); |
|
|
|
|
|
|
|
// Make slices |
|
|
|
std::vector<MatrixD> corpus_slices; |
|
|
|
std::vector<MatrixD> query_slices; |
|
|
|
std::vector<MatrixD> corpus_slices{}; |
|
|
|
std::vector<MatrixD> query_slices{}; |
|
|
|
|
|
|
|
for (size_t i = 0; i < num_slices; ++i) { |
|
|
|
for (size_t i = 0UL; i < num_slices; ++i) { |
|
|
|
corpus_slices.emplace_back( |
|
|
|
(DstType*)C.data(), |
|
|
|
static_cast<DstType*>(C.data()), |
|
|
|
i * corpus_slice_size, |
|
|
|
(i == num_slices - 1 ? C.rows() - i * corpus_slice_size : corpus_slice_size), |
|
|
|
(i == num_slices - 1UL ? C.rows() - i * corpus_slice_size : corpus_slice_size), |
|
|
|
C.columns()); |
|
|
|
query_slices.emplace_back( |
|
|
|
(DstType*)Q.data(), |
|
|
|
static_cast<DstType*>(Q.data()), |
|
|
|
i * query_slice_size, |
|
|
|
(i == num_slices - 1 ? Q.rows() - i * query_slice_size : query_slice_size), |
|
|
|
(i == num_slices - 1UL ? Q.rows() - i * query_slice_size : query_slice_size), |
|
|
|
Q.columns()); |
|
|
|
} |
|
|
|
|
|
|
|
// Initialize results |
|
|
|
for (size_t i = 0; i < dst.rows(); ++i) { |
|
|
|
for (size_t j = 0; j < dst.columns(); ++j) { |
|
|
|
for (size_t i = 0UL; i < dst.rows(); ++i) { |
|
|
|
for (size_t j = 0UL; j < dst.columns(); ++j) { |
|
|
|
dst.set(std::numeric_limits<DstType>::infinity(), i, j); |
|
|
|
idx.set(static_cast<IdxType>(-1), i, j); |
|
|
|
} |
|
|
@@ -176,7 +176,7 @@ void knnsearch(MatrixD& C, MatrixD& Q, size_t num_slices, size_t k, size_t m, Ma |
|
|
|
// Main loop |
|
|
|
#if defined OMP |
|
|
|
#pragma omp parallel for |
|
|
|
for (size_t qi = 0; qi < num_slices; ++qi) { |
|
|
|
for (size_t qi = 0UL; qi < num_slices; ++qi) { |
|
|
|
worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m); |
|
|
|
} |
|
|
|
#elif defined CILK |
|
|
@@ -185,7 +185,8 @@ void knnsearch(MatrixD& C, MatrixD& Q, size_t num_slices, size_t k, size_t m, Ma |
|
|
|
} |
|
|
|
#elif defined PTHREADS |
|
|
|
std::vector<std::thread> workers; |
|
|
|
for (size_t qi = 0; qi < num_slices; ++qi) { |
|
|
|
workers.reserve(num_slices); |
|
|
|
for (size_t qi = 0; qi < num_slices; ++qi) { |
|
|
|
workers.push_back( |
|
|
|
std::thread (worker_body<MatrixD, MatrixI>, |
|
|
|
std::ref(corpus_slices), std::ref(query_slices), |
|
|
|