AUTH's THMMY "Parallel and distributed systems" course assignments.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

v0.hpp 3.5 KiB

4 päivää sitten
4 päivää sitten
4 päivää sitten
4 päivää sitten
4 päivää sitten
4 päivää sitten
4 päivää sitten
4 päivää sitten
4 päivää sitten
4 päivää sitten
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. /**
  2. * \file v0.hpp
  3. * \brief
  4. *
  5. * \author
  6. * Christos Choutouridis AEM:8997
  7. * <cchoutou@ece.auth.gr>
  8. */
  9. #ifndef V0_HPP_
  10. #define V0_HPP_
  11. #include <cblas.h>
  12. #include <cmath>
  13. #include <vector>
  14. #include <algorithm>
  15. #include "matrix.hpp"
  16. #include "config.h"
  17. namespace v0 {
  18. /*!
  19. * Function to compute squared Euclidean distances
  20. *
  21. * \fn void pdist2(const double*, const double*, double*, int, int, int)
  22. * \param X m x d matrix (Column major)
  23. * \param Y n x d matrix (Column major)
  24. * \param D2 m x n matrix to store distances (Column major)
  25. * \param m number of rows in X
  26. * \param n number of rows in Y
  27. * \param d number of columns in both X and Y
  28. */
  29. template<typename Matrix>
  30. void pdist2(const Matrix& X, const Matrix& Y, Matrix& D2) {
  31. using DataType = typename Matrix::dataType;
  32. int M = X.rows();
  33. int N = Y.rows();
  34. int d = X.columns();
  35. // Compute the squared norms of each row in X and Y
  36. std::vector<DataType> X_norms(M), Y_norms(N);
  37. for (int i = 0; i < M ; ++i) {
  38. X_norms[i] = cblas_ddot(d, X.data() + i * d, 1, X.data() + i * d, 1);
  39. }
  40. for (int j = 0; j < N ; ++j) {
  41. Y_norms[j] = cblas_ddot(d, Y.data() + j * d, 1, Y.data() + j * d, 1);
  42. }
  43. // Compute -2 * X * Y'
  44. cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, M, N, d, -2.0, X.data(), d, Y.data(), d, 0.0, D2.data(), N);
  45. // Step 3: Add the squared norms to each entry in D2
  46. for (int i = 0; i < M ; ++i) {
  47. for (int j = 0; j < N; ++j) {
  48. D2.set(D2.get(i, j) + X_norms[i] + Y_norms[j], i, j);
  49. D2.set(std::max(D2.get(i, j), 0.0), i, j); // Ensure non-negative
  50. D2.set(std::sqrt(D2.get(i, j)), i, j); // Take the square root of each
  51. }
  52. }
  53. M++;
  54. }
  55. template<typename DataType, typename IndexType>
  56. void quickselect(std::vector<std::pair<DataType, IndexType>>& vec, int k) {
  57. std::nth_element(
  58. vec.begin(),
  59. vec.begin() + k,
  60. vec.end(),
  61. [](const std::pair<DataType, IndexType>& a, const std::pair<DataType, IndexType>& b) {
  62. return a.first < b.first;
  63. });
  64. vec.resize(k); // Keep only the k smallest elements
  65. }
  66. /*!
  67. * \param C Is a MxD matrix (Corpus)
  68. * \param Q Is a NxD matrix (Query)
  69. * \param k The number of nearest neighbors needed
  70. * \param idx Is the Nxk matrix with the k indexes of the C points, that are
  71. * neighbors of the nth point of Q
  72. * \param dst Is the Nxk matrix with the k distances to the C points of the nth
  73. * point of Q
  74. */
  75. template<typename MatrixD, typename MatrixI>
  76. void knnsearch(MatrixD& C, MatrixD& Q, size_t idx_offset, size_t k, size_t m, MatrixI& idx, MatrixD& dst) {
  77. using DstType = typename MatrixD::dataType;
  78. using IdxType = typename MatrixI::dataType;
  79. size_t M = C.rows();
  80. size_t N = Q.rows();
  81. mtx::Matrix<DstType> D(M, N);
  82. pdist2(C, Q, D);
  83. for (size_t j = 0; j < N; ++j) {
  84. // Create a vector of pairs (distance, index) for the j-th query
  85. std::vector<std::pair<DstType, IdxType>> dst_idx(M);
  86. for (size_t i = 0; i < M; ++i) {
  87. dst_idx[i] = {D.data()[i * N + j], i};
  88. }
  89. // Find the k smallest distances using quickSelectKSmallest
  90. quickselect(dst_idx, k);
  91. // Sort the k smallest results by distance for consistency
  92. std::sort(dst_idx.begin(), dst_idx.end());
  93. // Store the indices and distances
  94. for (size_t i = 0; i < k; ++i) {
  95. dst.set(dst_idx[i].first, j, i);
  96. idx.set(dst_idx[i].second + idx_offset, j, i);
  97. }
  98. }
  99. }
  100. }
  101. #endif /* V0_HPP_ */