Pārlūkot izejas kodu

v1 version and measurements

tags/v2.0
Christos Choutouridis pirms 2 mēnešiem
vecāks
revīzija
494fdddd04
22 mainītis faili ar 979 papildinājumiem un 206 dzēšanām
  1. +32
    -11
      homework_1/Makefile
  2. +207
    -6
      homework_1/gtest/tests.cpp
  3. +3
    -1
      homework_1/inc/config.h
  4. +16
    -125
      homework_1/inc/matrix.hpp
  5. +3
    -2
      homework_1/inc/v0.hpp
  6. +120
    -39
      homework_1/inc/v1.hpp
  7. +4
    -1
      homework_1/matlab/dist2.m
  8. +78
    -0
      homework_1/matlab/measurements_over_acc.m
  9. +75
    -0
      homework_1/matlab/measurements_over_th.m
  10. +100
    -0
      homework_1/matlab/run_test.m
  11. Binārs
      homework_1/measurements/CILK_over_accuracy.png
  12. Binārs
      homework_1/measurements/CILK_over_threads.png
  13. Binārs
      homework_1/measurements/OMP_over_accuracy.png
  14. Binārs
      homework_1/measurements/OMP_over_threads.png
  15. Binārs
      homework_1/measurements/Pthreads_over_accuracy.png
  16. Binārs
      homework_1/measurements/Pthreads_over_threads.png
  17. +122
    -0
      homework_1/measurements/over_acc.txt
  18. +101
    -0
      homework_1/measurements/over_threads.txt
  19. Binārs
      homework_1/out.hdf5
  20. +27
    -0
      homework_1/run_measurements.sh
  21. +41
    -21
      homework_1/src/main.cpp
  22. +50
    -0
      homework_1/src/v1.cpp

+ 32
- 11
homework_1/Makefile Parādīt failu

@@ -27,8 +27,8 @@ SRC_DIR_LIST := src gtest
# Include directories list(space seperated). Makefile-relative path.
INC_DIR_LIST := inc \
src \
/usr/include/hdf5/serial/ \
gtest \
/usr/include/hdf5/serial/
# Libs/MATLAB/R2019b/include/ \

# Exclude files list(space seperated). Filenames only.
@@ -179,7 +179,7 @@ hpc-clean:
rm hpc-results/post

#
# ================ Local via docker build rules =================
# ================ Local (and/or) via docker build rules =================
#
# examples:
# make IMAGE=hpcimage v0
@@ -190,12 +190,6 @@ local_v0: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=0
local_v0: TARGET := local_v0
local_v0: $(BUILD_DIR)/$(TARGET)
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)

local_v0_opt: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=0
local_v0_opt: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=0
local_v0_opt: TARGET := local_v0_opt
local_v0_opt: $(BUILD_DIR)/$(TARGET)
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)
local_v1: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=1
local_v1: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=1
@@ -203,6 +197,21 @@ local_v1: TARGET := local_v1
local_v1: $(BUILD_DIR)/$(TARGET)
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)

local_v1_omp: CFLAGS := $(DEB_CFLAGS) -fopenmp -DCODE_VERSION=1 -DOMP
local_v1_omp: CXXFLAGS := $(DEB_CXXFLAGS) -fopenmp -DCODE_VERSION=1 -DOMP
local_v1_omp: LDFLAGS += -fopenmp
local_v1_omp: TARGET := local_v1_omp
local_v1_omp: $(BUILD_DIR)/$(TARGET)
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)


local_v1_pth: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=1 -DPTHREADS
local_v1_pth: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=1 -DPTHREADS
local_v1_pth: TARGET := local_v1_pth
local_v1_pth: $(BUILD_DIR)/$(TARGET)
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)

v0: DOCKER := $(DOCKER_CMD)
v0: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=0
v0: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=0
@@ -215,7 +224,7 @@ v1_cilk: CXX := /usr/local/OpenCilk-9.0.1-Linux/bin/clang++
v1_cilk: CFLAGS := $(REL_CFLAGS) -fcilkplus -DCODE_VERSION=1 -DCILK
v1_cilk: CXXFLAGS := $(REL_CXXFLAGS) -fcilkplus -DCODE_VERSION=1 -DCILK
v1_cilk: LDFLAGS += -fcilkplus
v1_cilk: TARGET := knnsearch_cilkv1
v1_cilk: TARGET := knnsearch_v1_cilk
v1_cilk: $(BUILD_DIR)/$(TARGET)
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)

@@ -223,10 +232,17 @@ v1_omp: DOCKER := $(DOCKER_CMD)
v1_omp: CFLAGS := $(REL_CFLAGS) -fopenmp -DCODE_VERSION=1 -DOMP
v1_omp: CXXFLAGS := $(REL_CXXFLAGS) -fopenmp -DCODE_VERSION=1 -DOMP
v1_omp: LDFLAGS += -fopenmp
v1_omp: TARGET := knnsearch_ompv1
v1_omp: TARGET := knnsearch_v1_omp
v1_omp: $(BUILD_DIR)/$(TARGET)
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)

v1_pth: DOCKER := $(DOCKER_CMD)
v1_pth: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=1 -DPTHREADS
v1_pth: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=1 -DPTHREADS
v1_pth: TARGET := knnsearch_v1_pth
v1_pth: $(BUILD_DIR)/$(TARGET)
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)
v1: DOCKER := $(DOCKER_CMD)
v1: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=1
v1: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=1
@@ -240,7 +256,12 @@ tests: TARGET := tests
tests: $(BUILD_DIR)/$(TARGET)
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)
tests_rel: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=0 -DTESTING
tests_rel: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=0 -DTESTING
tests_rel: TARGET := tests
tests_rel: $(BUILD_DIR)/$(TARGET)
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET)

#
# ========= Inside CSAL Image build rules ===========
#


homework_1/src/tests.cpp → homework_1/gtest/tests.cpp Parādīt failu

@@ -19,6 +19,8 @@

using matrix_t = mtx::Matrix<int>;

extern void loadMtx(MatrixDst& Corpus, MatrixDst& Query);
extern void storeMtx(MatrixIdx& Idx, MatrixDst& Dst);

// =====================================
// C1, Q1
@@ -140,11 +142,44 @@ TEST(Tv0_UT, pdist2_test2) {
}


TEST(Tv0_UT, pdist2_test3) {

mtx::Matrix<double> D2_exp(16, 16, {
0, 0.7433, 0.6868, 0.8846, 0.6342, 0.4561, 0.5118, 0.6341, 0.5461, 0.7322, 0.6974, 0.4330, 0.7028, 0.6303, 0.6826, 0.4179,
0.7433, 0, 0.3400, 0.4555, 0.4207, 0.9736, 0.9690, 0.7386, 1.1055, 0.5462, 0.5345, 0.6576, 0.8677, 1.0291, 0.5393, 0.8106,
0.6868, 0.3400, 0, 0.5380, 0.6268, 0.9512, 1.0234, 0.8403, 0.9843, 0.8187, 0.3091, 0.7829, 0.5759, 0.9411, 0.7239, 0.9186,
0.8846, 0.4555, 0.5380, 0, 0.6796, 1.1672, 1.0460, 1.1016, 1.1139, 0.7542, 0.6480, 0.9304, 1.0568, 1.3482, 0.8316, 0.9750,
0.6342, 0.4207, 0.6268, 0.6796, 0, 0.9267, 0.8772, 0.4847, 0.9317, 0.4093, 0.8351, 0.4215, 0.9736, 0.9007, 0.5999, 0.5291,
0.4561, 0.9736, 0.9512, 1.1672, 0.9267, 0, 0.3903, 0.7795, 0.9308, 0.8429, 0.8436, 0.5672, 0.9284, 0.7064, 0.6435, 0.5975,
0.5118, 0.9690, 1.0234, 1.0460, 0.8772, 0.3903, 0, 0.8920, 0.9253, 0.7060, 0.9427, 0.5728, 1.1515, 0.9907, 0.6471, 0.4811,
0.6341, 0.7386, 0.8403, 1.1016, 0.4847, 0.7795, 0.8920, 0, 0.9824, 0.6416, 0.9844, 0.3398, 0.9355, 0.5428, 0.6536, 0.5309,
0.5461, 1.1055, 0.9843, 1.1139, 0.9317, 0.9308, 0.9253, 0.9824, 0, 1.1517, 1.0541, 0.8746, 0.8506, 0.8777, 1.2036, 0.7607,
0.7322, 0.5462, 0.8187, 0.7542, 0.4093, 0.8429, 0.7060, 0.6416, 1.1517, 0, 0.9106, 0.4245, 1.2071, 1.0738, 0.3745, 0.5170,
0.6974, 0.5345, 0.3091, 0.6480, 0.8351, 0.8436, 0.9427, 0.9844, 1.0541, 0.9106, 0, 0.8647, 0.5941, 0.9954, 0.7148, 0.9876,
0.4330, 0.6576, 0.7829, 0.9304, 0.4215, 0.5672, 0.5728, 0.3398, 0.8746, 0.4245, 0.8647, 0, 0.9590, 0.6782, 0.4586, 0.2525,
0.7028, 0.8677, 0.5759, 1.0568, 0.9736, 0.9284, 1.1515, 0.9355, 0.8506, 1.2071, 0.5941, 0.9590, 0, 0.6838, 1.0517, 1.0675,
0.6303, 1.0291, 0.9411, 1.3482, 0.9007, 0.7064, 0.9907, 0.5428, 0.8777, 1.0738, 0.9954, 0.6782, 0.6838, 0, 0.9482, 0.7937,
0.6826, 0.5393, 0.7239, 0.8316, 0.5999, 0.6435, 0.6471, 0.6536, 1.2036, 0.3745, 0.7148, 0.4586, 1.0517, 0.9482, 0, 0.6345,
0.4179, 0.8106, 0.9186, 0.9750, 0.5291, 0.5975, 0.4811, 0.5309, 0.7607, 0.5170, 0.9876, 0.2525, 1.0675, 0.7937, 0.6345, 0
});

mtx::Matrix<double> D (16,16);

v0::pdist2(C2, C2, D);

for (size_t i = 0 ; i< D.rows() ; ++i)
for (size_t j = 0 ; j<D.columns() ; ++j) {
EXPECT_EQ (D2_exp.get(i ,j) + 0.01 > D(i, j), true);
EXPECT_EQ (D2_exp.get(i ,j) - 0.01 < D(i, j), true);
}
}


/*
* ==========================================
* v0::knn
*/
TEST(Tv0_UT, knn_test1) {
TEST(Tv0_UT, knn_v0_test1) {
size_t k = 3;
mtx::Matrix<uint32_t> Idx_exp(5, k, {
5, 8, 9,
@@ -177,7 +212,7 @@ TEST(Tv0_UT, knn_test1) {

}

TEST(Tv0_UT, knn_test2) {
TEST(Tv0_UT, knn_v0_test2) {
size_t k = 3;
mtx::Matrix<uint32_t> Idx_exp(8, k, {
14, 13, 1,
@@ -220,7 +255,46 @@ TEST(Tv0_UT, knn_test2) {
* ==========================================
* v1::knn
*/
TEST(Tv1_UT, knn_test1) {
TEST(Tv1_UT, knn_v1_1slice) {
size_t k = 3;
mtx::Matrix<uint32_t> Idx_exp(8, k, {
14, 13, 1,
15, 10, 12,
14, 8, 12,
4, 1, 3,
8, 12, 5,
4, 3, 2,
10, 2, 4,
1, 11, 9
});

mtx::Matrix<double> Dst_exp(8, k, {
0.1939, 0.5768, 0.6020,
0.2708, 0.3686, 0.3740,
0.2684, 0.3103, 0.5277,
0.4709, 0.6050, 0.6492,
0.4397, 0.6842, 0.7074,
0.3402, 0.4360, 0.4475,
0.3708, 0.4037, 0.4417,
0.6352, 0.6653, 0.6758
});

mtx::Matrix<uint32_t> Idx(8, k);
mtx::Matrix<double> Dst(8, k);

v1::knnsearch(C2, Q2, 1, k, k, Idx, Dst);


for (size_t i = 0 ; i< Idx.rows() ; ++i)
for (size_t j = 0 ; j<Idx.columns() ; ++j) {
EXPECT_EQ (Idx_exp(i ,j) == Idx(i, j) + 1, true); // matlab starts from 1
EXPECT_EQ (Dst_exp.get(i ,j) + 0.01 > Dst(i, j), true);
EXPECT_EQ (Dst_exp.get(i ,j) - 0.01 < Dst(i, j), true);
}

}

TEST(Tv1_UT, knn_v1_2slice) {
size_t k = 3;
mtx::Matrix<uint32_t> Idx_exp(8, k, {
14, 13, 1,
@@ -247,7 +321,7 @@ TEST(Tv1_UT, knn_test1) {
mtx::Matrix<uint32_t> Idx(8, k);
mtx::Matrix<double> Dst(8, k);

v1::knnsearch(C2, Q2, 0, k, k, Idx, Dst);
v1::knnsearch(C2, Q2, 2, k, k, Idx, Dst);


for (size_t i = 0 ; i< Idx.rows() ; ++i)
@@ -260,7 +334,7 @@ TEST(Tv1_UT, knn_test1) {
}

// all-to-all
TEST(Tv1_UT, knn_test2) {
TEST(Tv1_UT, knn_v1_4slice) {
size_t k = 3;
mtx::Matrix<uint32_t> Idx_exp(16, k, {
1, 16, 12,
@@ -303,7 +377,7 @@ TEST(Tv1_UT, knn_test2) {
mtx::Matrix<uint32_t> Idx(16, k);
mtx::Matrix<double> Dst(16, k);

v1::knnsearch(C2, C2, 0, k, k, Idx, Dst);
v1::knnsearch(C2, C2, 4, k, k, Idx, Dst);


for (size_t i = 0 ; i< Idx.rows() ; ++i)
@@ -315,3 +389,130 @@ TEST(Tv1_UT, knn_test2) {

}



/*
* ============== Live hdf5 tests ===============
*
* In order to run these test we need the followin hdf5 files in ./mtx directory:
*
* - fasion-mnist-784-euclidean.hdf5
* - mnist-784-euclidean.hdf5
* - sift-128-euclidean.hdf5
* - gist-960-euclidean.hdf5
*
*/

TEST(Tlive_UT, knn_v0_sift_test) {
// Instantiate matrixes
MatrixDst Corpus;
MatrixDst Query;
MatrixIdx Idx;
MatrixDst Dst;

// setup environment
session.corpusMtxFile = "mtx/sift-128-euclidean.hdf5";
session.corpusDataSet = "/test";
session.queryMtx = false;
session.k = 100;
size_t m = session.k;
session.timing = true;
session.outMtxFile = "test/knn_v0.hdf5";


loadMtx(Corpus, Query);

// Prepare output memory (There is no Query, so from Corpus
Idx.resize(Corpus.rows(), session.k);
Dst.resize(Corpus.rows(), session.k);

v0::knnsearch(Corpus, Corpus, 0, session.k, m, Idx, Dst);
storeMtx(Idx, Dst);
EXPECT_EQ(true, true);
}


TEST(Tlive_UT, knn_v1_sift_test_1slice) {
// Instantiate matrixes
MatrixDst Corpus;
MatrixDst Query;
MatrixIdx Idx;
MatrixDst Dst;

// setup environment
session.corpusMtxFile = "mtx/sift-128-euclidean.hdf5";
session.corpusDataSet = "/test";
session.queryMtx = false;
session.k = 100;
size_t m = session.k;
session.timing = true;
session.outMtxFile = "test/knn_v1ser.hdf5";


loadMtx(Corpus, Query);

// Prepare output memory (There is no Query, so from Corpus
Idx.resize(Corpus.rows(), session.k);
Dst.resize(Corpus.rows(), session.k);

v1::knnsearch(Corpus, Corpus, 0, session.k, m, Idx, Dst);
storeMtx(Idx, Dst);
EXPECT_EQ(true, true);
}

TEST(Tlive_UT, knn_v1_sift_test_2slice) {
// Instantiate matrixes
MatrixDst Corpus;
MatrixDst Query;
MatrixIdx Idx;
MatrixDst Dst;

// setup environment
session.corpusMtxFile = "mtx/sift-128-euclidean.hdf5";
session.corpusDataSet = "/test";
session.queryMtx = false;
session.k = 100;
size_t m = session.k;
session.timing = true;
session.outMtxFile = "test/knn_v1ser.hdf5";


loadMtx(Corpus, Query);

// Prepare output memory (There is no Query, so from Corpus
Idx.resize(Corpus.rows(), session.k);
Dst.resize(Corpus.rows(), session.k);

v1::knnsearch(Corpus, Corpus, 2, session.k, m, Idx, Dst);
storeMtx(Idx, Dst);
EXPECT_EQ(true, true);
}

TEST(Tlive_UT, knn_v1_sift_test_4slice) {
// Instantiate matrixes
MatrixDst Corpus;
MatrixDst Query;
MatrixIdx Idx;
MatrixDst Dst;

// setup environment
session.corpusMtxFile = "mtx/sift-128-euclidean.hdf5";
session.corpusDataSet = "/test";
session.queryMtx = false;
session.k = 100;
size_t m = session.k;
session.timing = true;
session.outMtxFile = "test/knn_v1ser.hdf5";


loadMtx(Corpus, Query);

// Prepare output memory (There is no Query, so from Corpus
Idx.resize(Corpus.rows(), session.k);
Dst.resize(Corpus.rows(), session.k);

v1::knnsearch(Corpus, Corpus, 4, session.k, m, Idx, Dst);
storeMtx(Idx, Dst);
EXPECT_EQ(true, true);
}


+ 3
- 1
homework_1/inc/config.h Parādīt failu

@@ -55,7 +55,9 @@ struct session_t {
std::string outMtxFile {"out.hdf5"}; //!< output matrix file name in HDF5 format
std::string outMtxIdxDataSet {"/Idx"}; //!< Index output dataset name in HDF5 matrix file
std::string outMtxDstDataSet {"/Dst"}; //!< Distance output dataset name in HDF5 matrix file
std::size_t max_threads {}; //!< Maximum threads to use
std::size_t max_threads {0}; //!< Maximum threads to use
std::size_t slices {0}; //!< Slices/threads to use
std::size_t accuracy {100}; //!< The neighbor finding accuracy
bool timing {false}; //!< Enable timing prints of the program
bool verbose {false}; //!< Flag to enable verbose output to stdout
};


+ 16
- 125
homework_1/inc/matrix.hpp Parādīt failu

@@ -134,6 +134,9 @@ struct Matrix {
Matrix& operator=(Matrix&& m) noexcept { moves(std::move(m)); return *this; }
Matrix(const Matrix& m) = delete; //!< No copy ctor
Matrix& operator=(const Matrix& m) = delete; //!< No copy
//Matrix(const Matrix& m);
//Matrix& operator=(const Matrix& m) { copy(m); }

//! @}

//! \name Data exposure
@@ -233,6 +236,11 @@ struct Matrix {

// a basic serial iterator support
DataType* data() noexcept { return data_; }
DataType* begin() noexcept { return data_; }
const DataType* begin() const noexcept { return data_; }
DataType* end() noexcept { return data_ + capacity(rows_, cols_); }
const DataType* end() const noexcept { return data_ + capacity(rows_, cols_); }

// IndexType begin_idx() noexcept { return 0; }
// IndexType end_idx() noexcept { return capacity(rows_, cols_); }

@@ -265,17 +273,19 @@ struct Matrix {
std::swap(rows_, src.rows_);
std::swap(cols_, src.cols_);
}

private:
//! move helper
void moves(Matrix&& src) noexcept {
data_ = std::move(src.vector_storage_);
data_ = std::move(src.raw_storage_);
data_ = std::move(src.data_);
data_ = std::move(src.use_vector_);
rows_ = std::move(src.rows_);
cols_ = std::move(src.cols_);
vector_storage_ = std::move(src.vector_storage_);
raw_storage_ = std::move(src.raw_storage_);
data_ = std::move(src.data_);
use_vector_ = std::move(src.use_vector_);
rows_ = std::move(src.rows_);
cols_ = std::move(src.cols_);
}

// Storage
std::vector<DataType>
vector_storage_; //!< Internal storage (if used).
DataType* raw_storage_; //!< External storage (if used).
@@ -528,125 +538,6 @@ private:
};


template<typename ...> struct Matrix_view { };

/*!
* @struct Matrix_view
* @tparam MatrixType
*/
template<template <typename, typename, MatrixType, MatrixOrder, bool> class Matrix,
typename DataType,
typename IndexType,
MatrixType Type,
MatrixOrder Order>
struct Matrix_view<Matrix<DataType, IndexType, Type, Order, false>> {
using owner_t = Matrix<DataType, IndexType, Type, Order, false>;

using dataType = DataType; //!< meta:export of underling data type
using indexType = IndexType; //!< meta:export of underling index type
static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order
static constexpr MatrixType matrixType = Type; //!< meta:export of array type

/*!
* \name Obj lifetime
*/
//! @{

//! Construct a matrix view to entire matrix
Matrix_view(const owner_t* owner) noexcept :
owner_(owner), m_(owner->data()), rows_(owner->rows()), cols_(owner->columns()) { }

Matrix_view(const owner_t* owner, IndexType begin, IndexType end) noexcept :
owner_(owner) {
if constexpr (Order == MatrixOrder::ROWMAJOR) {
m_ = owner->data() + begin * owner->columns();
rows_ = end - begin;
cols_ = owner->columns();
} else if (Order == MatrixOrder::COLMAJOR) {
m_ = owner->data() + begin * owner->rows();
rows_ = owner->rows();
cols_ = end - begin;
}
}

Matrix_view(Matrix_view&& m) = delete; //! No move
Matrix_view& operator=(Matrix_view&& m) = delete;
Matrix_view(const Matrix_view& m) = delete; //!< No copy
Matrix_view& operator=(const Matrix_view& m) = delete;
//! @}

//! Get/Set the size of each dimension
const IndexType rows() const noexcept { return rows_; }
const IndexType columns() const noexcept { return cols_; }

//! Get the interface size of the Matrix (what appears to be the size)
IndexType size() const {
return rows_ * cols_;
}

//! Actual memory capacity of the symmetric matrix
static constexpr IndexType capacity(IndexType M, IndexType N) {
return M*N;
}
/*
* virtual 2D accessors
*/
const DataType get (IndexType i, IndexType j) const {
if constexpr (Order == MatrixOrder::COLMAJOR)
return m_[i + j*rows_];
else
return m_[i*cols_ + j];
}

DataType set (DataType v, IndexType i, IndexType j) {
if constexpr (Order == MatrixOrder::COLMAJOR)
return m_[i + j*rows_] = v;
else
return m_[i*cols_ + j] = v;
}
// DataType operator()(IndexType i, IndexType j) { return get(i, j); }
/*!
* Return a proxy MatVal object with read and write capabilities.
* @param i The row number
* @param j The column number
* @return tHE MatVal object
*/
MatVal<Matrix_view> operator()(IndexType i, IndexType j) noexcept {
return MatVal<Matrix_view>(this, get(i, j), i, j);
}

// a basic serial iterator support
DataType* data() noexcept { return m_.data(); }
// IndexType begin_idx() noexcept { return 0; }
// IndexType end_idx() noexcept { return capacity(rows_, cols_); }

const DataType* data() const noexcept { return m_; }
const IndexType begin_idx() const noexcept { return 0; }
const IndexType end_idx() const noexcept { return capacity(rows_, cols_); }
//! @}

/*!
* \name Safe iteration API
*
* This api automates the iteration over the array based on
* MatrixType
*/
//! @{
template<typename F, typename... Args>
void for_each_in (IndexType begin, IndexType end, F&& lambda, Args&&... args) {
for (IndexType it=begin ; it<end ; ++it) {
std::forward<F>(lambda)(std::forward<Args>(args)..., it);
}
}
//! @}
//!
private:
const owner_t* owner_ {nullptr}; //!< Pointer to Matrix
DataType* m_ {nullptr}; //!< Starting address of the slice/view
IndexType rows_{}; //!< the virtual size of rows.
IndexType cols_{}; //!< the virtual size of columns.
};

/*!
* A view/iterator hybrid object for Matrix columns.
*


+ 3
- 2
homework_1/inc/v0.hpp Parādīt failu

@@ -54,10 +54,11 @@ void pdist2(const Matrix& X, const Matrix& Y, Matrix& D2) {
for (int i = 0; i < M ; ++i) {
for (int j = 0; j < N; ++j) {
D2.set(D2.get(i, j) + X_norms[i] + Y_norms[j], i, j);
//D2.set(std::max(D2.get(i, j), 0.0), i, j); // Ensure non-negative
D2.set(std::max(D2.get(i, j), 0.0), i, j); // Ensure non-negative
D2.set(std::sqrt(D2.get(i, j)), i, j); // Take the square root of each
}
}
M++;
}

template<typename DataType, typename IndexType>
@@ -82,7 +83,7 @@ void quickselect(std::vector<std::pair<DataType, IndexType>>& vec, int k) {
* point of Q
*/
template<typename MatrixD, typename MatrixI>
void knnsearch(const MatrixD& C, const MatrixD& Q, size_t idx_offset, size_t k, size_t m, MatrixI& idx, MatrixD& dst) {
void knnsearch(MatrixD& C, MatrixD& Q, size_t idx_offset, size_t k, size_t m, MatrixI& idx, MatrixD& dst) {

using DstType = typename MatrixD::dataType;
using IdxType = typename MatrixI::dataType;


+ 120
- 39
homework_1/inc/v1.hpp Parādīt failu

@@ -1,5 +1,5 @@
/**
* \file v0.hpp
* \file v1.hpp
* \brief
*
* \author
@@ -16,6 +16,26 @@
#include "v0.hpp"
#include "config.h"

#if defined CILK
#include <cilk/cilk.h>
#include <cilk/cilk_api.h>
//#include <cilk/reducer_opadd.h>

#elif defined OMP
#include <omp.h>

#elif defined PTHREADS
#include <thread>
#include <numeric>
#include <functional>
//#include <random>

#else
#endif


void init_workers();

namespace v1 {

template <typename DataType, typename IndexType>
@@ -57,49 +77,110 @@ void mergeResultsWithM(mtx::Matrix<IndexType>& N1, mtx::Matrix<DataType>& D1,
}
}



template<typename MatrixD, typename MatrixI>
void knnsearch(const MatrixD& C, const MatrixD& Q, size_t idx_offset, size_t k, size_t m, MatrixI& idx, MatrixD& dst) {

void worker_body (std::vector<MatrixD>& corpus_slices,
std::vector<MatrixD>& query_slices,
MatrixI& idx,
MatrixD& dst,
size_t slice,
size_t num_slices, size_t corpus_slice_size, size_t query_slice_size,
size_t k,
size_t m) {
// "load" types
using DstType = typename MatrixD::dataType;
using IdxType = typename MatrixI::dataType;

if (C.rows() <= 8 || Q.rows() <= 4) {
// Base case: Call knnsearch directly
v0::knnsearch(C, Q, idx_offset, k, m, idx, dst);
return;
}

// Divide Corpus and Query into subsets
IdxType midC = C.rows() / 2;
IdxType midQ = Q.rows() / 2;

// Slice corpus and query matrixes
MatrixD C1((DstType*)C.data(), 0, midC, C.columns());
MatrixD C2((DstType*)C.data(), midC, midC, C.columns());
MatrixD Q1((DstType*)Q.data(), 0, midQ, Q.columns());
MatrixD Q2((DstType*)Q.data(), midQ, midQ, Q.columns());

// Allocate temporary matrixes for all permutations
MatrixI N1_1(midQ, k), N1_2(midQ, k), N2_1(midQ, k), N2_2(midQ, k);
MatrixD D1_1(midQ, k), D1_2(midQ, k), D2_1(midQ, k), D2_2(midQ, k);

// Recursive calls
knnsearch(C1, Q1, idx_offset, k, m, N1_1, D1_1);
knnsearch(C2, Q1, idx_offset + midC, k, m, N1_2, D1_2);
knnsearch(C1, Q2, idx_offset, k, m, N2_1, D2_1);
knnsearch(C2, Q2, idx_offset + midC, k, m, N2_2, D2_2);

// slice output matrixes
MatrixI N1((IdxType*)idx.data(), 0, midQ, k);
MatrixI N2((IdxType*)idx.data(), midQ, midQ, k);
MatrixD D1((DstType*)dst.data(), 0, midQ, k);
MatrixD D2((DstType*)dst.data(), midQ, midQ, k);

// Merge results in place
mergeResultsWithM(N1_1, D1_1, N1_2, D1_2, k, m, N1, D1);
mergeResultsWithM(N2_1, D2_1, N2_2, D2_2, k, m, N2, D2);
for (size_t ci = 0; ci < num_slices; ++ci) {
size_t idx_offset = ci * corpus_slice_size;

// Intermediate matrixes for intermediate results
MatrixI temp_idx(query_slices[slice].rows(), k);
MatrixD temp_dst(query_slices[slice].rows(), k);

// kNN for each combination
v0::knnsearch(corpus_slices[ci], query_slices[slice], idx_offset, k, m, temp_idx, temp_dst);

// Merge temporary results to final results
MatrixI idx_slice((IdxType*)idx.data(), slice * query_slice_size, query_slices[slice].rows(), k);
MatrixD dst_slice((DstType*)dst.data(), slice * query_slice_size, query_slices[slice].rows(), k);

mergeResultsWithM(idx_slice, dst_slice, temp_idx, temp_dst, k, m, idx_slice, dst_slice);
}
}

template<typename MatrixD, typename MatrixI>
void knnsearch(MatrixD& C, MatrixD& Q, size_t num_slices, size_t k, size_t m, MatrixI& idx, MatrixD& dst) {
using DstType = typename MatrixD::dataType;
using IdxType = typename MatrixI::dataType;

//Slice calculations
size_t corpus_slice_size = C.rows() / ((num_slices == 0)? 1:num_slices);
size_t query_slice_size = Q.rows() / ((num_slices == 0)? 1:num_slices);

// Make slices
std::vector<MatrixD> corpus_slices;
std::vector<MatrixD> query_slices;

for (size_t i = 0; i < num_slices; ++i) {
corpus_slices.emplace_back(
(DstType*)C.data(),
i * corpus_slice_size,
(i == num_slices - 1 ? C.rows() - i * corpus_slice_size : corpus_slice_size),
C.columns());
query_slices.emplace_back(
(DstType*)Q.data(),
i * query_slice_size,
(i == num_slices - 1 ? Q.rows() - i * query_slice_size : query_slice_size),
Q.columns());
}

// Intermediate results
for (size_t i = 0; i < dst.rows(); ++i) {
for (size_t j = 0; j < dst.columns(); ++j) {
dst.set(std::numeric_limits<DstType>::infinity(), i, j);
idx.set(static_cast<IdxType>(-1), i, j);
}
}

// Main loop
#if defined OMP
#pragma omp parallel for
for (size_t qi = 0; qi < num_slices; ++qi) {
for (size_t qi = 0; qi < num_slices; ++qi) {
worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m);
}
}
#elif defined CILK
cilk_for (size_t qi = 0; qi < num_slices; ++qi) {
for (size_t qi = 0; qi < num_slices; ++qi) {
worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m);
}
}
#elif defined PTHREADS
std::vector<std::thread> workers;
for (size_t qi = 0; qi < num_slices; ++qi) {
workers.push_back(
std::thread (worker_body<MatrixD, MatrixI>,
std::ref(corpus_slices), std::ref(query_slices),
std::ref(idx), std::ref(dst),
qi,
num_slices, corpus_slice_size, query_slice_size,
k, m)
);
}
// Join threads
std::for_each(workers.begin(), workers.end(), [](std::thread& t){
t.join();
});

#else
for (size_t qi = 0; qi < num_slices; ++qi) {
for (size_t qi = 0; qi < num_slices; ++qi) {
worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m);
}
}
#endif

}



+ 4
- 1
homework_1/matlab/dist2.m Parādīt failu

@@ -12,7 +12,10 @@ function [D2] = dist2(X, Y)
if d1 ~= d2
error('X,Y column dimensions must match');
end
%D2 = sqrt((X.^2)*ones(d,m) -2*X*Y' + ones(n,d)*(Y.^2)');
% debug
%X_norm = sum(X.^2, 2);
%Y_norm = sum(Y.^2, 2)';
%XY = 2 * X*Y';
D2 = max(sum(X.^2, 2) - 2 * X*Y' + sum(Y.^2, 2)', 0);
D2 = sqrt(D2);


+ 78
- 0
homework_1/matlab/measurements_over_acc.m Parādīt failu

@@ -0,0 +1,78 @@
% Plot measurements

accuracy = [100 80 60 40 20 10];
ser_sift_acc = [ 4395 4365 4384 4315 4295 4246 ];
ser_mnist_ser_acc = [ 7936 7924 7886 7903 7844 7801 ];

omp_sift_acc = [
4093 4098 4040 4001 3980 3937
];
omp_mnist_acc = [
7575 7463 7389 7416 7321 7303
];
cilk_sift_acc = [
3718 3739 3673 3668 3608 3557
];
cilk_mnist_acc = [
7064 7071 7035 6948 6962 6913
];

pth_sift_acc = [
1157 1159 1121 1100 1084 1075
];
pth_mnist_acc = [
2050 2086 2040 2020 2004 1979
];

% 1ο Διάγραμμα: OMP
figure;
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD
plot(accuracy, ser_sift_acc, '-o', 'DisplayName', 'Serial SIFT');
hold on;
plot(accuracy, ser_mnist_ser_acc, '-s', 'DisplayName', 'Serial MNIST');
plot(accuracy, omp_sift_acc, '-^', 'DisplayName', 'OMP SIFT');
plot(accuracy, omp_mnist_acc, '-d', 'DisplayName', 'OMP MNIST');
hold off;
title('OMP');
xlabel('Accuracy (%)');
ylabel('Execution Time [msec]');
set(gca, 'XDir', 'reverse'); % reverse x
legend('Location', 'northwest');
grid on;
print(gcf, 'OMP_over_accuracy.png', '-dpng', '-r300');

% 2ο Διάγραμμα: CILK
figure;
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD
plot(accuracy, ser_sift_acc, '-o', 'DisplayName', 'Serial SIFT');
hold on;
plot(accuracy, ser_mnist_ser_acc, '-s', 'DisplayName', 'Serial MNIST');
plot(accuracy, cilk_sift_acc, '-^', 'DisplayName', 'CILK SIFT');
plot(accuracy, cilk_mnist_acc, '-d', 'DisplayName', 'CILK MNIST');
hold off;
title('CILK');
xlabel('Accuracy (%)');
ylabel('Execution Time [msec]');
set(gca, 'XDir', 'reverse'); % reverse x
legend('Location', 'northwest');
grid on;
print(gcf, 'CILK_over_accuracy.png', '-dpng', '-r300');

% 3ο Διάγραμμα: Pthreads
figure;
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD
plot(accuracy, ser_sift_acc, '-o', 'DisplayName', 'Serial SIFT');
hold on;
plot(accuracy, ser_mnist_ser_acc, '-s', 'DisplayName', 'Serial MNIST');
plot(accuracy, pth_sift_acc, '-^', 'DisplayName', 'Pthreads SIFT');
plot(accuracy, pth_mnist_acc, '-d', 'DisplayName', 'Pthreads MNIST');
hold off;
title('Pthreads');
xlabel('Accuracy (%)');
ylabel('Execution Time [msec]');
set(gca, 'XDir', 'reverse'); % reverse x
legend('Location', 'northwest');
grid on;
print(gcf, 'Pthreads_over_accuracy.png', '-dpng', '-r300');


+ 75
- 0
homework_1/matlab/measurements_over_th.m Parādīt failu

@@ -0,0 +1,75 @@
% Plot measurements

threads = [1 2 4 6 8 10 12];
ser_sift_threads = [ 4418 4418 4418 4418 4418 4418 4418 ];
ser_mnist_ser_threads = [ 7924 7924 7924 7924 7924 7924 7924 ];

omp_sift_th = [
4469 4283 4096 3822 4060 4241 5193
];
omp_mnist_th = [
8053 7806 7465 6828 7662 8013 8123
];
cilk_sift_th = [
4225 4090 3696 3122 3860 4141 5103
];
cilk_mnist_th = [
7744 7206 6965 6628 7362 7813 8123
];

pth_sift_th = [
4254 2155 1133 877 724 640 682
];
pth_mnist_th = [
7889 3963 2058 1445 1496 1379 1352
];

% 1ο Διάγραμμα: OMP
figure;
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD
plot(threads, ser_sift_threads, '-o', 'DisplayName', 'Serial SIFT');
hold on;
plot(threads, ser_mnist_ser_threads, '-s', 'DisplayName', 'Serial MNIST');
plot(threads, omp_sift_th, '-^', 'DisplayName', 'OMP SIFT');
plot(threads, omp_mnist_th, '-d', 'DisplayName', 'OMP MNIST');
hold off;
title('OMP');
xlabel('Threads');
ylabel('Execution Time [msec]');
legend('Location', 'northeast');
grid on;
print(gcf, 'OMP_over_threads.png', '-dpng', '-r300');

% 2ο Διάγραμμα: CILK
figure;
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD
plot(threads, ser_sift_threads, '-o', 'DisplayName', 'Serial SIFT');
hold on;
plot(threads, ser_mnist_ser_threads, '-s', 'DisplayName', 'Serial MNIST');
plot(threads, cilk_sift_th, '-^', 'DisplayName', 'CILK SIFT');
plot(threads, cilk_mnist_th, '-d', 'DisplayName', 'CILK MNIST');
hold off;
title('CILK');
xlabel('Threads');
ylabel('Execution Time [msec]');
legend('Location', 'northeast');
grid on;
print(gcf, 'CILK_over_threads.png', '-dpng', '-r300');

% 3ο Διάγραμμα: Pthreads
figure;
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD
plot(threads, ser_sift_threads, '-o', 'DisplayName', 'Serial SIFT');
hold on;
plot(threads, ser_mnist_ser_threads, '-s', 'DisplayName', 'Serial MNIST');
plot(threads, pth_sift_th, '-^', 'DisplayName', 'Pthreads SIFT');
plot(threads, pth_mnist_th, '-d', 'DisplayName', 'Pthreads MNIST');
hold off;
title('Pthreads');
xlabel('Threads');
ylabel('Execution Time [msec]');
legend('Location', 'northeast');
grid on;
print(gcf, 'Pthreads_over_threads.png', '-dpng', '-r300');


+ 100
- 0
homework_1/matlab/run_test.m Parādīt failu

@@ -2,6 +2,106 @@
%
%


%
%
%
C1 = [
0.8147 0.1576;
0.9058 0.9706;
0.1270 0.9572;
0.9134 0.4854;
0.6324 0.8003;
0.0975 0.1419;
0.2785 0.4218;
0.5469 0.9157;
0.9575 0.7922;
0.9649 0.9595 ];

Q1 = [
0.6557 0.7577;
0.0357 0.7431;
0.8491 0.3922;
0.9340 0.6555;
0.6787 0.1712 ];

C2 = [
0.7060 0.4456 0.5060 0.6160;
0.0318 0.6463 0.6991 0.4733;
0.2769 0.7094 0.8909 0.3517;
0.0462 0.7547 0.9593 0.8308;
0.0971 0.2760 0.5472 0.5853;
0.8235 0.6797 0.1386 0.5497;
0.6948 0.6551 0.1493 0.9172;
0.3171 0.1626 0.2575 0.2858;
0.9502 0.1190 0.8407 0.7572;
0.0344 0.4984 0.2543 0.7537;
0.4387 0.9597 0.8143 0.3804;
0.3816 0.3404 0.2435 0.5678;
0.7655 0.5853 0.9293 0.0759;
0.7952 0.2238 0.3500 0.0540;
0.1869 0.7513 0.1966 0.5308;
0.4898 0.2551 0.2511 0.7792 ];

Q2 = [
0.9340 0.3112 0.4505 0.0782;
0.1299 0.5285 0.0838 0.4427;
0.5688 0.1656 0.2290 0.1067;
0.4694 0.6020 0.9133 0.9619;
0.0119 0.2630 0.1524 0.0046;
0.3371 0.6541 0.8258 0.7749;
0.1622 0.6892 0.5383 0.8173;
0.7943 0.7482 0.9961 0.8687 ];

D1_exp = [
0.6208 0.9745 0.2371 0.5120 0.1367;
0.3284 0.8993 0.5811 0.3164 0.8310;
0.5651 0.2327 0.9169 0.8616 0.9603;
0.3749 0.9147 0.1132 0.1713 0.3921;
0.0485 0.5994 0.4621 0.3346 0.6308;
0.8312 0.6044 0.7922 0.9815 0.5819;
0.5052 0.4028 0.5714 0.6959 0.4722;
0.1919 0.5395 0.6045 0.4665 0.7561;
0.3037 0.9231 0.4144 0.1387 0.6807;
0.3692 0.9540 0.5790 0.3056 0.8386 ];
D2_exp = [
0.6020 0.7396 0.6583 0.6050 1.0070 0.5542 0.6298 0.6352;
1.0696 0.6348 0.9353 0.6914 0.8160 0.4475 0.4037 0.9145;
0.9268 0.8450 0.9376 0.6492 0.9671 0.4360 0.5956 0.7400;
1.3455 0.9876 1.2953 0.4709 1.2557 0.3402 0.4417 0.7500;
0.9839 0.5476 0.7517 0.7216 0.7074 0.5605 0.4784 0.9954;
0.6839 0.7200 0.7305 0.9495 1.0628 0.8718 0.8178 0.9179;
0.9850 0.7514 0.9585 0.7996 1.2054 0.7784 0.6680 0.8591;
0.6950 0.4730 0.3103 1.0504 0.4397 0.8967 0.8140 1.2066;
0.8065 1.2298 0.9722 0.7153 1.3933 0.8141 1.0204 0.6758;
1.1572 0.3686 0.9031 0.8232 0.7921 0.6656 0.3708 1.0970;
0.9432 0.9049 1.0320 0.6905 1.1167 0.5094 0.6455 0.6653;
0.7672 0.3740 0.5277 0.8247 0.6842 0.6945 0.5648 0.9968;
0.5768 1.1210 0.8403 0.9345 1.1316 0.8292 1.0380 0.8127;
0.1939 0.8703 0.2684 1.1794 0.8103 1.0683 1.1115 1.1646;
1.0106 0.2708 0.8184 0.8954 0.7402 0.6982 0.4509 1.0594;
0.8554 0.5878 0.6834 0.7699 0.9155 0.7161 0.6162 0.9481 ];

% tests
D1 = dist2(C1, Q1);
if norm (D1-pdist2(C1, Q1), 'fro') > 0.01
disp('Error in dist2(C1, Q1)');
end

D2 = dist2(C2, Q2);
if norm (D2-pdist2(C2, Q2), 'fro') > 0.01
disp('Error in dist2(C2, Q2)');
end

D2 = dist2(C2, C2);
if norm (D2-pdist2(C2, C2), 'fro') > 0.01
disp('Error in dist2(C2, C2)');
end


%C = rand(10000, 2); % Corpus
%Q = rand(10000, 2); % Queries
C = rand(20000, 2); % Δύο clusters


Binārs
homework_1/measurements/CILK_over_accuracy.png Parādīt failu

Pirms Pēc
Platums: 4000  |  Augstums: 2250  |  Izmērs: 100 KiB

Binārs
homework_1/measurements/CILK_over_threads.png Parādīt failu

Pirms Pēc
Platums: 4000  |  Augstums: 2250  |  Izmērs: 92 KiB

Binārs
homework_1/measurements/OMP_over_accuracy.png Parādīt failu

Pirms Pēc
Platums: 4000  |  Augstums: 2250  |  Izmērs: 102 KiB

Binārs
homework_1/measurements/OMP_over_threads.png Parādīt failu

Pirms Pēc
Platums: 4000  |  Augstums: 2250  |  Izmērs: 104 KiB

Binārs
homework_1/measurements/Pthreads_over_accuracy.png Parādīt failu

Pirms Pēc
Platums: 4000  |  Augstums: 2250  |  Izmērs: 100 KiB

Binārs
homework_1/measurements/Pthreads_over_threads.png Parādīt failu

Pirms Pēc
Platums: 4000  |  Augstums: 2250  |  Izmērs: 105 KiB

+ 122
- 0
homework_1/measurements/over_acc.txt Parādīt failu

@@ -0,0 +1,122 @@
# Serial-sift over acc
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 100 -k 100 -t
[Timing]: knnsearch: 4395 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 80 -k 100 -t
[Timing]: knnsearch: 4365 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 60 -k 100 -t
[Timing]: knnsearch: 4384 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 40 -k 100 -t
[Timing]: knnsearch: 4315 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 20 -k 100 -t
[Timing]: knnsearch: 4295 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 10 -k 100 -t
[Timing]: knnsearch: 4246 [msec]


# Serial-mnist over acc
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 100 -k 100 -t
[Timing]: knnsearch: 7936 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 80 -k 100 -t
[Timing]: knnsearch: 7924 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 60 -k 100 -t
[Timing]: knnsearch: 7886 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 40 -k 100 -t
[Timing]: knnsearch: 7903 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 20 -k 100 -t
[Timing]: knnsearch: 7844 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 10 -k 100 -t
[Timing]: knnsearch: 7801 [msec]


# OMP-sift over acc
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t
[Timing]: knnsearch: 4093 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t
[Timing]: knnsearch: 4098 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t
[Timing]: knnsearch: 4040 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t
[Timing]: knnsearch: 4001 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t
[Timing]: knnsearch: 3980 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t
[Timing]: knnsearch: 3937 [msec]

# OMP-mnist over acc
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t
[Timing]: knnsearch: 7575 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t
[Timing]: knnsearch: 7463 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t
[Timing]: knnsearch: 7389 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t
[Timing]: knnsearch: 7416 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t
[Timing]: knnsearch: 7321 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t
[Timing]: knnsearch: 7303 [msec]



# CILK-sift over acc
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t
[Timing]: knnsearch: 4218 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t
[Timing]: knnsearch: 4239 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t
[Timing]: knnsearch: 4173 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t
[Timing]: knnsearch: 4168 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t
[Timing]: knnsearch: 4108 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t
[Timing]: knnsearch: 4057 [msec]


# CILK-mnist over acc
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t
[Timing]: knnsearch: 7864 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t
[Timing]: knnsearch: 7871 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t
[Timing]: knnsearch: 7835 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t
[Timing]: knnsearch: 7748 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t
[Timing]: knnsearch: 7762 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t
[Timing]: knnsearch: 7713 [msec]


# Pthreads-sift over acc
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t
[Timing]: knnsearch: 1157 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t
[Timing]: knnsearch: 1159 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t
[Timing]: knnsearch: 1121 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t
[Timing]: knnsearch: 1100 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t
[Timing]: knnsearch: 1084 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t
[Timing]: knnsearch: 1075 [msec]


# Pthreads-mnist over acc
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t
[Timing]: knnsearch: 2050 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t
[Timing]: knnsearch: 2086 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t
[Timing]: knnsearch: 2040 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t
[Timing]: knnsearch: 2020 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t
[Timing]: knnsearch: 2004 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t
[Timing]: knnsearch: 1979 [msec]





+ 101
- 0
homework_1/measurements/over_threads.txt Parādīt failu

@@ -0,0 +1,101 @@
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t
[Timing]: knnsearch: 4418 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t
[Timing]: knnsearch: 7924 [msec]

# OMP-sift over threads
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t
[Timing]: knnsearch: 4469 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 2 -k 100 -t
[Timing]: knnsearch: 4283 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -k 100 -t
[Timing]: knnsearch: 4096 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 6 -k 100 -t
[Timing]: knnsearch: 3822 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 8 -k 100 -t
[Timing]: knnsearch: 4060 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t
[Timing]: knnsearch: 4241 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 12 -k 100 -t
[Timing]: knnsearch: 5193 [msec]

# OMP-mnist over threads
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t
[Timing]: knnsearch: 8053 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 2 -k 100 -t
[Timing]: knnsearch: 7806 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -k 100 -t
[Timing]: knnsearch: 7465 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 6 -k 100 -t
[Timing]: knnsearch: 6828 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 8 -k 100 -t
[Timing]: knnsearch: 7662[msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t
[Timing]: knnsearch: 8013 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 12 -k 100 -t
[Timing]: knnsearch: 8123 [msec]

# CILK-sift over threads
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t
[Timing]: knnsearch: 4225 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 2 -k 100 -t
[Timing]: knnsearch: 4090 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -k 100 -t
[Timing]: knnsearch: 3696 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 6 -k 100 -t
[Timing]: knnsearch: 3122 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 8 -k 100 -t
[Timing]: knnsearch: 3860 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t
[Timing]: knnsearch: 4141 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 12 -k 100 -t
[Timing]: knnsearch: 5103 [msec]

# CILK-mnist over threads
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t
[Timing]: knnsearch: 7744 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 2 -k 100 -t
[Timing]: knnsearch: 7206 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -k 100 -t
[Timing]: knnsearch: 6965 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 6 -k 100 -t
[Timing]: knnsearch: 6628 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 8 -k 100 -t
[Timing]: knnsearch: 7362 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t
[Timing]: knnsearch: 7813 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 12 -k 100 -t
[Timing]: knnsearch: 158123[msec]

# Pthreads-sift over threads
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t
[Timing]: knnsearch: 4254 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 2 -k 100 -t
[Timing]: knnsearch: 2155 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -k 100 -t
[Timing]: knnsearch: 1133 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 6 -k 100 -t
[Timing]: knnsearch: 877 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 8 -k 100 -t
[Timing]: knnsearch: 724 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t
[Timing]: knnsearch: 640 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 12 -k 100 -t
[Timing]: knnsearch: 682 [msec]

# Pthreads-mnist over threads
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t
[Timing]: knnsearch: 7889 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 2 -k 100 -t
[Timing]: knnsearch: 3963 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -k 100 -t
[Timing]: knnsearch: 2058 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 6 -k 100 -t
[Timing]: knnsearch: 1445 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 8 -k 100 -t
[Timing]: knnsearch: 1496 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t
[Timing]: knnsearch: 1379 [msec]
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 12 -k 100 -t
[Timing]: knnsearch: 1352 [msec]


Binārs
homework_1/out.hdf5 Parādīt failu


+ 27
- 0
homework_1/run_measurements.sh Parādīt failu

@@ -0,0 +1,27 @@
#!/usr/bin/env bash

#
# Take measurements
#

DOCK="docker run --rm -v /home/hoo2/Work/AUTH/PDS/homework_1:/usr/src/PDS_homework_1 -w /usr/src/PDS_homework_1/ hoo2/hpcimage"

echo " "
echo "Serial"

echo "./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t"
eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t
echo "./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t"
eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t

echo " "
echo "OMP"

# echo ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t
#eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t
# echo ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t
#eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t
# echo ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t
#eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t
# echo ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t
#eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t

+ 41
- 21
homework_1/src/main.cpp Parādīt failu

@@ -65,9 +65,15 @@ bool get_options(int argc, char* argv[]){
else if (arg == "-k") {
session.k = (i+1 < argc) ? std::atoi(argv[++i]) : session.k;
}
else if (arg == "-n" || arg == "--max_trheads") {
else if (arg == "-n" || arg == "--max_threads") {
session.max_threads = (i+1 < argc) ? std::atoi(argv[++i]) : session.max_threads;
}
else if (arg == "-s" || arg == "--slices") {
session.slices = (i+1 < argc) ? std::atoi(argv[++i]) : session.slices;
}
else if (arg == "-a" || arg == "--accuracy") {
session.accuracy = (i+1 < argc) ? std::atoi(argv[++i]) : session.accuracy;
}
else if (arg == "-t" || arg == "--timing")
session.timing = true;
else if (arg == "-v" || arg == "--verbose")
@@ -87,7 +93,12 @@ bool get_options(int argc, char* argv[]){
std::cout << " -k <number>\n";
std::cout << " Set the number of closest neighbors to find. \n\n";
std::cout << " -n | --max_trheads <threads>\n";
std::cout << " Reduce the thread number for the execution to <threads>. <threads> must be less or equal to available CPUs.\n\n";
std::cout << " Reduce the thread number for the execution to <threads>. <threads> should be less or equal to available CPUs.\n\n";
std::cout << " -s | --slices <slices/threads>\n";
std::cout << " The number of slices to the Corpus matrix. In the parallel version this setting affects the number of threads\n";
std::cout << " <threads> should be less or equal to available CPUs\n\n";
std::cout << " -a | --accuracy <accuracy>\n";
std::cout << " Reduce the accuracy of neighbor finding. The accuracy should be between 1-100 \n\n";
std::cout << " -t | --timing\n";
std::cout << " Request timing measurements output to stdout.\n\n";
std::cout << " -v | --verbose\n";
@@ -109,6 +120,27 @@ bool get_options(int argc, char* argv[]){
return status;
}

void loadMtx(MatrixDst& Corpus, MatrixDst& Query) {
if (access(session.outMtxFile.c_str(), F_OK) == 0)
std::remove(session.outMtxFile.c_str());

// timer.start();
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus);
if (session.queryMtx)
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query);
// timer.stop();
// timer.print_dt("Load hdf5 files");
}

void storeMtx(MatrixIdx& Idx, MatrixDst& Dst) {
// timer.start();
Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx);
Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst);
// timer.stop();
// timer.print_dt("Store hdf5 files");
}


#ifndef TESTING
int main(int argc, char* argv[]) try {
// Instantiate matrixes
@@ -127,38 +159,26 @@ int main(int argc, char* argv[]) try {
if (!get_options(argc, argv))
exit(1);

if (access(session.outMtxFile.c_str(), F_OK) == 0)
std::remove(session.outMtxFile.c_str());
init_workers();

// Load data
timer.start();
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus);
if (session.queryMtx)
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query);
timer.stop();
timer.print_dt("Load hdf5 files");
loadMtx(Corpus, Query);

// Prepare output memory
Idx.resize(Query.rows(), session.k);
Dst.resize(Query.rows(), session.k);
Idx.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k);
Dst.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k);

// Do the search
logger << "Start knnsearch ...";
timer.start();
if (session.queryMtx)
knnsearch(Corpus, Query, 0, session.k, session.k, Idx, Dst);
else
knnsearch(Corpus, Corpus, 0, session.k, session.k, Idx, Dst);
size_t selected_neighbors = (size_t)(session.k*(session.accuracy/100.0));
knnsearch(Corpus, (session.queryMtx) ? Query : Corpus, session.slices, session.k, selected_neighbors, Idx, Dst);
timer.stop();
logger << " Done" << logger.endl;
timer.print_dt("knnsearch");

// Store data
timer.start();
Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx);
Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst);
timer.stop();
timer.print_dt("Store hdf5 files");
storeMtx(Idx, Dst);

return 0;
}


+ 50
- 0
homework_1/src/v1.cpp Parādīt failu

@@ -0,0 +1,50 @@
/**
* \file v1.hpp
* \brief
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/

#include "v1.hpp"


void init_workers() {
#if defined CILK
size_t cilk_w = __cilkrts_get_nworkers();

if (!session.max_threads)
session.max_threads = (session.slices) ? (session.slices) : cilk_w;
// else if (session.max_threads < cilk_w)
// __cilkrts_set_param("nworkers", "4");
// else ignored by cilk

#elif defined OMP
// omp_set_dynamic(1);
size_t omp_w = (size_t)omp_get_max_threads();

if (!session.max_threads) {
session.max_threads = (session.slices) ? (session.slices) : omp_w;
// omp_set_dynamic(1);
}
else if (session.max_threads < omp_w) {
// omp_set_dynamic(0);
omp_set_num_threads(session.max_threads);
}
// else ignored by omp

#elif defined PTHREADS
size_t pth_w = std::thread::hardware_concurrency();

if (!session.max_threads)
session.max_threads = (session.slices) ? (session.slices) : pth_w;
#else

#endif
if (!session.slices)
session.slices = session.max_threads;

openblas_set_num_threads(1); // Limit OpenBLAS to 1 thread
}


Notiek ielāde…
Atcelt
Saglabāt