/** * \file v1.hpp * \brief * * \author * Christos Choutouridis AEM:8997 * */ #ifndef V1_HPP_ #define V1_HPP_ #include #include #include "matrix.hpp" #include "v0.hpp" #include "config.h" #if defined CILK #include #include //#include #elif defined OMP #include #elif defined PTHREADS #include #include #include //#include #else #endif void init_workers(); namespace v1 { template void mergeResultsWithM(mtx::Matrix& N1, mtx::Matrix& D1, mtx::Matrix& N2, mtx::Matrix& D2, size_t k, size_t m, mtx::Matrix& N, mtx::Matrix& D) { size_t numQueries = N1.rows(); size_t maxCandidates = std::min((IndexType)m, (IndexType)(N1.columns() + N2.columns())); for (size_t q = 0; q < numQueries; ++q) { // Combine distances and neighbors std::vector> candidates(N1.columns() + N2.columns()); // Concatenate N1 and N2 rows for (size_t i = 0; i < N1.columns(); ++i) { candidates[i] = {D1.get(q, i), N1.get(q, i)}; } for (size_t i = 0; i < N2.columns(); ++i) { candidates[i + N1.columns()] = {D2.get(q, i), N2.get(q, i)}; } // Keep only the top-m candidates v0::quickselect(candidates, maxCandidates); // Sort the top-m candidates std::sort(candidates.begin(), candidates.begin() + maxCandidates); // If m < k, pad the remaining slots with invalid values for (size_t i = 0; i < k; ++i) { if (i < maxCandidates) { D.set(candidates[i].first, q, i); N.set(candidates[i].second, q, i); } else { D.set(std::numeric_limits::infinity(), q, i); N.set(static_cast(-1), q, i); // Invalid index (end) } } } } template void worker_body (std::vector& corpus_slices, std::vector& query_slices, MatrixI& idx, MatrixD& dst, size_t slice, size_t num_slices, size_t corpus_slice_size, size_t query_slice_size, size_t k, size_t m) { // "load" types using DstType = typename MatrixD::dataType; using IdxType = typename MatrixI::dataType; for (size_t ci = 0; ci < num_slices; ++ci) { size_t idx_offset = ci * corpus_slice_size; // Intermediate matrixes for intermediate results MatrixI temp_idx(query_slices[slice].rows(), k); MatrixD temp_dst(query_slices[slice].rows(), k); // kNN for each combination v0::knnsearch(corpus_slices[ci], query_slices[slice], idx_offset, k, m, temp_idx, temp_dst); // Merge temporary results to final results MatrixI idx_slice((IdxType*)idx.data(), slice * query_slice_size, query_slices[slice].rows(), k); MatrixD dst_slice((DstType*)dst.data(), slice * query_slice_size, query_slices[slice].rows(), k); mergeResultsWithM(idx_slice, dst_slice, temp_idx, temp_dst, k, m, idx_slice, dst_slice); } } template void knnsearch(MatrixD& C, MatrixD& Q, size_t num_slices, size_t k, size_t m, MatrixI& idx, MatrixD& dst) { using DstType = typename MatrixD::dataType; using IdxType = typename MatrixI::dataType; //Slice calculations size_t corpus_slice_size = C.rows() / ((num_slices == 0)? 1:num_slices); size_t query_slice_size = Q.rows() / ((num_slices == 0)? 1:num_slices); // Make slices std::vector corpus_slices; std::vector query_slices; for (size_t i = 0; i < num_slices; ++i) { corpus_slices.emplace_back( (DstType*)C.data(), i * corpus_slice_size, (i == num_slices - 1 ? C.rows() - i * corpus_slice_size : corpus_slice_size), C.columns()); query_slices.emplace_back( (DstType*)Q.data(), i * query_slice_size, (i == num_slices - 1 ? Q.rows() - i * query_slice_size : query_slice_size), Q.columns()); } // Intermediate results for (size_t i = 0; i < dst.rows(); ++i) { for (size_t j = 0; j < dst.columns(); ++j) { dst.set(std::numeric_limits::infinity(), i, j); idx.set(static_cast(-1), i, j); } } // Main loop #if defined OMP #pragma omp parallel for for (size_t qi = 0; qi < num_slices; ++qi) { for (size_t qi = 0; qi < num_slices; ++qi) { worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m); } } #elif defined CILK cilk_for (size_t qi = 0; qi < num_slices; ++qi) { for (size_t qi = 0; qi < num_slices; ++qi) { worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m); } } #elif defined PTHREADS std::vector workers; for (size_t qi = 0; qi < num_slices; ++qi) { workers.push_back( std::thread (worker_body, std::ref(corpus_slices), std::ref(query_slices), std::ref(idx), std::ref(dst), qi, num_slices, corpus_slice_size, query_slice_size, k, m) ); } // Join threads std::for_each(workers.begin(), workers.end(), [](std::thread& t){ t.join(); }); #else for (size_t qi = 0; qi < num_slices; ++qi) { for (size_t qi = 0; qi < num_slices; ++qi) { worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m); } } #endif } } // namespace v1 #endif /* V1_HPP_ */