|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119 |
- /**
- * \file v0.hpp
- * \brief
- *
- * \author
- * Christos Choutouridis AEM:8997
- * <cchoutou@ece.auth.gr>
- */
- #ifndef V0_HPP_
- #define V0_HPP_
-
- #include <cblas.h>
- #include <cmath>
- #include <vector>
- #include <algorithm>
-
- #include <matrix.hpp>
- #include <config.h>
-
- namespace v0 {
-
- /*!
- * Function to compute squared Euclidean distances
- *
- * \fn void pdist2(const double*, const double*, double*, int, int, int)
- * \param X m x d matrix (Column major)
- * \param Y n x d matrix (Column major)
- * \param D2 m x n matrix to store distances (Column major)
- * \param m number of rows in X
- * \param n number of rows in Y
- * \param d number of columns in both X and Y
- */
- template<typename DataType>
- void pdist2(const mtx::Matrix<DataType>& X, const mtx::Matrix<DataType>& Y, mtx::Matrix<DataType>& D2) {
- int M = X.rows();
- int N = Y.rows();
- int d = X.columns();
-
- // Compute the squared norms of each row in X and Y
- std::vector<DataType> X_norms(M), Y_norms(N);
- for (int i = 0; i < M ; ++i) {
- X_norms[i] = cblas_ddot(d, X.data() + i * d, 1, X.data() + i * d, 1);
- }
- for (int j = 0; j < N ; ++j) {
- Y_norms[j] = cblas_ddot(d, Y.data() + j * d, 1, Y.data() + j * d, 1);
- }
-
- // Compute -2 * X * Y'
- cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, M, N, d, -2.0, X.data(), d, Y.data(), d, 0.0, D2.data(), N);
-
- // Step 3: Add the squared norms to each entry in D2
- for (int i = 0; i < M ; ++i) {
- for (int j = 0; j < N; ++j) {
- D2.set(D2.get(i, j) + X_norms[i] + Y_norms[j], i, j);
- //D2.set(std::max(D2.get(i, j), 0.0), i, j); // Ensure non-negative
- D2.set(std::sqrt(D2.get(i, j)), i, j); // Take the square root of each
- }
- }
- }
-
- template<typename DataType, typename IndexType>
- void quickselect(std::vector<std::pair<DataType, IndexType>>& vec, int k) {
- std::nth_element(
- vec.begin(),
- vec.begin() + k,
- vec.end(),
- [](const std::pair<DataType, IndexType>& a, const std::pair<DataType, IndexType>& b) {
- return a.first < b.first;
- });
- vec.resize(k); // Keep only the k smallest elements
- }
-
- /*!
- * \param C Is a MxD matrix (Corpus)
- * \param Q Is a NxD matrix (Query)
- * \param k The number of nearest neighbors needed
- * \param idx Is the Nxk matrix with the k indexes of the C points, that are
- * neighbors of the nth point of Q
- * \param dst Is the Nxk matrix with the k distances to the C points of the nth
- * point of Q
- */
- template<typename DataType, typename IndexType>
- void knnsearch(const mtx::Matrix<DataType>& C, const mtx::Matrix<DataType>& Q, int k,
- mtx::Matrix<IndexType>& idx,
- mtx::Matrix<DataType>& dst) {
-
- int M = C.rows();
- int N = Q.rows();
-
- mtx::Matrix<DataType> D(M, N);
-
- pdist2(C, Q, D);
-
- idx.resize(N, k);
- dst.resize(N, k);
-
- for (int j = 0; j < N; ++j) {
- // Create a vector of pairs (distance, index) for the j-th query
- std::vector<std::pair<DataType, IndexType>> dst_idx(M);
- for (int i = 0; i < M; ++i) {
- dst_idx[i] = {D.data()[i * N + j], i};
- }
- // Find the k smallest distances using quickSelectKSmallest
- quickselect(dst_idx, k);
-
- // Sort the k smallest results by distance for consistency
- std::sort(dst_idx.begin(), dst_idx.end());
-
- // Store the indices and distances
- for (int i = 0; i < k; ++i) {
- idx(j, i) = dst_idx[i].second;
- dst(j, i) = dst_idx[i].first;
- }
- }
- }
-
- }
-
- #endif /* V0_HPP_ */
|