Browse Source

Small changes to HW1

overdue
parent
commit
888fcba64c
5 changed files with 42 additions and 31 deletions
  1. +2
    -0
      .gitignore
  2. +8
    -0
      homework_1/.gitignore
  3. +4
    -4
      homework_1/inc/config.h
  4. +6
    -6
      homework_1/inc/v0.hpp
  5. +22
    -21
      homework_1/inc/v1.hpp

+ 2
- 0
.gitignore View File

@@ -0,0 +1,2 @@
# Assessments
assessments/

+ 8
- 0
homework_1/.gitignore View File

@@ -9,8 +9,16 @@ mtx/
exclude
hpc_auth_sync.sh

# IDEs
.idea/
.clangd

# eclipse
.project
.cproject
.settings/

.vs/
.vscode/



+ 4
- 4
homework_1/inc/config.h View File

@@ -51,13 +51,13 @@ struct session_t {
std::string queryMtxFile {}; //!< optional query matrix file name in HDF5 format
std::string queryDataSet {}; //!< optional query dataset name in HDF5 matrix file
bool queryMtx {false}; //!< Flag to indicate that there is a separate query matrix
size_t k {1}; //!< The number of nearest neighbors to find
size_t k {1UL}; //!< The number of nearest neighbors to find
std::string outMtxFile {"out.hdf5"}; //!< output matrix file name in HDF5 format
std::string outMtxIdxDataSet {"/Idx"}; //!< Index output dataset name in HDF5 matrix file
std::string outMtxDstDataSet {"/Dst"}; //!< Distance output dataset name in HDF5 matrix file
std::size_t max_threads {0}; //!< Maximum threads to use
std::size_t slices {0}; //!< Slices/threads to use
std::size_t accuracy {100}; //!< The neighbor finding accuracy
std::size_t max_threads {0UL}; //!< Maximum threads to use
std::size_t slices {0UL}; //!< Slices/threads to use
std::size_t accuracy {100UL}; //!< The neighbor finding accuracy
bool timing {false}; //!< Enable timing prints of the program
bool verbose {false}; //!< Flag to enable verbose output to stdout
};


+ 6
- 6
homework_1/inc/v0.hpp View File

@@ -39,7 +39,8 @@ void pdist2(const Matrix& X, const Matrix& Y, Matrix& D2) {
int d = X.columns();

// Compute the squared norms of each row in X and Y
std::vector<DataType> X_norms(M), Y_norms(N);
std::vector<DataType> X_norms(M);
std::vector<DataType> Y_norms(N);
for (int i = 0; i < M ; ++i) {
X_norms[i] = cblas_ddot(d, X.data() + i * d, 1, X.data() + i * d, 1);
}
@@ -58,7 +59,6 @@ void pdist2(const Matrix& X, const Matrix& Y, Matrix& D2) {
D2.set(std::sqrt(D2.get(i, j)), i, j); // Take the square root of each
}
}
M++;
}

/*!
@@ -92,7 +92,7 @@ void quickselect(std::vector<std::pair<DataType, IndexType>>& vec, int k) {
* point of Q
*/
template<typename MatrixD, typename MatrixI>
void knnsearch(MatrixD& C, MatrixD& Q, size_t idx_offset, size_t k, size_t m, MatrixI& idx, MatrixD& dst) {
void knnsearch(MatrixD& C, MatrixD& Q, size_t idx_offset, size_t k, [[maybe_unused]] size_t m, MatrixI& idx, MatrixD& dst) {

using DstType = typename MatrixD::dataType;
using IdxType = typename MatrixI::dataType;
@@ -104,10 +104,10 @@ void knnsearch(MatrixD& C, MatrixD& Q, size_t idx_offset, size_t k, size_t m, Ma

pdist2(C, Q, D);

for (size_t j = 0; j < N; ++j) {
for (size_t j = 0UL; j < N; ++j) {
// Create a vector of pairs (distance, index) for the j-th query
std::vector<std::pair<DstType, IdxType>> dst_idx(M);
for (size_t i = 0; i < M; ++i) {
for (size_t i = 0UL; i < M; ++i) {
dst_idx[i] = {D.data()[i * N + j], i};
}
// Find the k smallest distances using quickSelectKSmallest
@@ -117,7 +117,7 @@ void knnsearch(MatrixD& C, MatrixD& Q, size_t idx_offset, size_t k, size_t m, Ma
std::sort(dst_idx.begin(), dst_idx.end());

// Store the indices and distances
for (size_t i = 0; i < k; ++i) {
for (size_t i = 0UL; i < k; ++i) {
dst.set(dst_idx[i].first, j, i);
idx.set(dst_idx[i].second + idx_offset, j, i);
}


+ 22
- 21
homework_1/inc/v1.hpp View File

@@ -60,17 +60,17 @@ void mergeResultsWithM(mtx::Matrix<IndexType>& N1, mtx::Matrix<DataType>& D1,
size_t k, size_t m,
mtx::Matrix<IndexType>& N, mtx::Matrix<DataType>& D) {
size_t numQueries = N1.rows();
size_t maxCandidates = std::min((IndexType)m, (IndexType)(N1.columns() + N2.columns()));
size_t maxCandidates = std::min(static_cast<IndexType>(m), static_cast<IndexType>(N1.columns() + N2.columns()));

for (size_t q = 0; q < numQueries; ++q) {
for (size_t q = 0UL; q < numQueries; ++q) {
// Combine distances and neighbors
std::vector<std::pair<DataType, IndexType>> candidates(N1.columns() + N2.columns());

// Concatenate N1 and N2 rows
for (size_t i = 0; i < N1.columns(); ++i) {
for (size_t i = 0UL; i < N1.columns(); ++i) {
candidates[i] = {D1.get(q, i), N1.get(q, i)};
}
for (size_t i = 0; i < N2.columns(); ++i) {
for (size_t i = 0UL; i < N2.columns(); ++i) {
candidates[i + N1.columns()] = {D2.get(q, i), N2.get(q, i)};
}

@@ -81,7 +81,7 @@ void mergeResultsWithM(mtx::Matrix<IndexType>& N1, mtx::Matrix<DataType>& D1,
std::sort(candidates.begin(), candidates.begin() + maxCandidates);

// If m < k, pad the remaining slots with invalid values
for (size_t i = 0; i < k; ++i) {
for (size_t i = 0UL; i < k; ++i) {
if (i < maxCandidates) {
D.set(candidates[i].first, q, i);
N.set(candidates[i].second, q, i);
@@ -110,7 +110,7 @@ void worker_body (std::vector<MatrixD>& corpus_slices,
using IdxType = typename MatrixI::dataType;


for (size_t ci = 0; ci < num_slices; ++ci) {
for (size_t ci = 0UL; ci < num_slices; ++ci) {
size_t idx_offset = ci * corpus_slice_size;

// Intermediate matrixes for intermediate results
@@ -121,8 +121,8 @@ void worker_body (std::vector<MatrixD>& corpus_slices,
v0::knnsearch(corpus_slices[ci], query_slices[slice], idx_offset, k, m, temp_idx, temp_dst);

// Merge temporary results to final results
MatrixI idx_slice((IdxType*)idx.data(), slice * query_slice_size, query_slices[slice].rows(), k);
MatrixD dst_slice((DstType*)dst.data(), slice * query_slice_size, query_slices[slice].rows(), k);
MatrixI idx_slice(static_cast<IdxType*>(idx.data()), slice * query_slice_size, query_slices[slice].rows(), k);
MatrixD dst_slice(static_cast<DstType*>(dst.data()), slice * query_slice_size, query_slices[slice].rows(), k);

mergeResultsWithM(idx_slice, dst_slice, temp_idx, temp_dst, k, m, idx_slice, dst_slice);
}
@@ -145,29 +145,29 @@ void knnsearch(MatrixD& C, MatrixD& Q, size_t num_slices, size_t k, size_t m, Ma
using IdxType = typename MatrixI::dataType;

//Slice calculations
size_t corpus_slice_size = C.rows() / ((num_slices == 0)? 1:num_slices);
size_t query_slice_size = Q.rows() / ((num_slices == 0)? 1:num_slices);
size_t corpus_slice_size = C.rows() / ((num_slices == 0UL)? 1UL:num_slices);
size_t query_slice_size = Q.rows() / ((num_slices == 0UL)? 1UL:num_slices);

// Make slices
std::vector<MatrixD> corpus_slices;
std::vector<MatrixD> query_slices;
std::vector<MatrixD> corpus_slices{};
std::vector<MatrixD> query_slices{};

for (size_t i = 0; i < num_slices; ++i) {
for (size_t i = 0UL; i < num_slices; ++i) {
corpus_slices.emplace_back(
(DstType*)C.data(),
static_cast<DstType*>(C.data()),
i * corpus_slice_size,
(i == num_slices - 1 ? C.rows() - i * corpus_slice_size : corpus_slice_size),
(i == num_slices - 1UL ? C.rows() - i * corpus_slice_size : corpus_slice_size),
C.columns());
query_slices.emplace_back(
(DstType*)Q.data(),
static_cast<DstType*>(Q.data()),
i * query_slice_size,
(i == num_slices - 1 ? Q.rows() - i * query_slice_size : query_slice_size),
(i == num_slices - 1UL ? Q.rows() - i * query_slice_size : query_slice_size),
Q.columns());
}

// Initialize results
for (size_t i = 0; i < dst.rows(); ++i) {
for (size_t j = 0; j < dst.columns(); ++j) {
for (size_t i = 0UL; i < dst.rows(); ++i) {
for (size_t j = 0UL; j < dst.columns(); ++j) {
dst.set(std::numeric_limits<DstType>::infinity(), i, j);
idx.set(static_cast<IdxType>(-1), i, j);
}
@@ -176,7 +176,7 @@ void knnsearch(MatrixD& C, MatrixD& Q, size_t num_slices, size_t k, size_t m, Ma
// Main loop
#if defined OMP
#pragma omp parallel for
for (size_t qi = 0; qi < num_slices; ++qi) {
for (size_t qi = 0UL; qi < num_slices; ++qi) {
worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m);
}
#elif defined CILK
@@ -185,7 +185,8 @@ void knnsearch(MatrixD& C, MatrixD& Q, size_t num_slices, size_t k, size_t m, Ma
}
#elif defined PTHREADS
std::vector<std::thread> workers;
for (size_t qi = 0; qi < num_slices; ++qi) {
workers.reserve(num_slices);
for (size_t qi = 0; qi < num_slices; ++qi) {
workers.push_back(
std::thread (worker_body<MatrixD, MatrixI>,
std::ref(corpus_slices), std::ref(query_slices),


Loading…
Cancel
Save