@@ -27,8 +27,8 @@ SRC_DIR_LIST := src gtest | |||
# Include directories list(space seperated). Makefile-relative path. | |||
INC_DIR_LIST := inc \ | |||
src \ | |||
/usr/include/hdf5/serial/ \ | |||
gtest \ | |||
/usr/include/hdf5/serial/ | |||
# Libs/MATLAB/R2019b/include/ \ | |||
# Exclude files list(space seperated). Filenames only. | |||
@@ -179,7 +179,7 @@ hpc-clean: | |||
rm hpc-results/post | |||
# | |||
# ================ Local via docker build rules ================= | |||
# ================ Local (and/or) via docker build rules ================= | |||
# | |||
# examples: | |||
# make IMAGE=hpcimage v0 | |||
@@ -190,12 +190,6 @@ local_v0: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=0 | |||
local_v0: TARGET := local_v0 | |||
local_v0: $(BUILD_DIR)/$(TARGET) | |||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||
local_v0_opt: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=0 | |||
local_v0_opt: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=0 | |||
local_v0_opt: TARGET := local_v0_opt | |||
local_v0_opt: $(BUILD_DIR)/$(TARGET) | |||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||
local_v1: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=1 | |||
local_v1: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=1 | |||
@@ -203,6 +197,21 @@ local_v1: TARGET := local_v1 | |||
local_v1: $(BUILD_DIR)/$(TARGET) | |||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||
local_v1_omp: CFLAGS := $(DEB_CFLAGS) -fopenmp -DCODE_VERSION=1 -DOMP | |||
local_v1_omp: CXXFLAGS := $(DEB_CXXFLAGS) -fopenmp -DCODE_VERSION=1 -DOMP | |||
local_v1_omp: LDFLAGS += -fopenmp | |||
local_v1_omp: TARGET := local_v1_omp | |||
local_v1_omp: $(BUILD_DIR)/$(TARGET) | |||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||
local_v1_pth: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=1 -DPTHREADS | |||
local_v1_pth: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=1 -DPTHREADS | |||
local_v1_pth: TARGET := local_v1_pth | |||
local_v1_pth: $(BUILD_DIR)/$(TARGET) | |||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||
v0: DOCKER := $(DOCKER_CMD) | |||
v0: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=0 | |||
v0: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=0 | |||
@@ -215,7 +224,7 @@ v1_cilk: CXX := /usr/local/OpenCilk-9.0.1-Linux/bin/clang++ | |||
v1_cilk: CFLAGS := $(REL_CFLAGS) -fcilkplus -DCODE_VERSION=1 -DCILK | |||
v1_cilk: CXXFLAGS := $(REL_CXXFLAGS) -fcilkplus -DCODE_VERSION=1 -DCILK | |||
v1_cilk: LDFLAGS += -fcilkplus | |||
v1_cilk: TARGET := knnsearch_cilkv1 | |||
v1_cilk: TARGET := knnsearch_v1_cilk | |||
v1_cilk: $(BUILD_DIR)/$(TARGET) | |||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||
@@ -223,10 +232,17 @@ v1_omp: DOCKER := $(DOCKER_CMD) | |||
v1_omp: CFLAGS := $(REL_CFLAGS) -fopenmp -DCODE_VERSION=1 -DOMP | |||
v1_omp: CXXFLAGS := $(REL_CXXFLAGS) -fopenmp -DCODE_VERSION=1 -DOMP | |||
v1_omp: LDFLAGS += -fopenmp | |||
v1_omp: TARGET := knnsearch_ompv1 | |||
v1_omp: TARGET := knnsearch_v1_omp | |||
v1_omp: $(BUILD_DIR)/$(TARGET) | |||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||
v1_pth: DOCKER := $(DOCKER_CMD) | |||
v1_pth: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=1 -DPTHREADS | |||
v1_pth: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=1 -DPTHREADS | |||
v1_pth: TARGET := knnsearch_v1_pth | |||
v1_pth: $(BUILD_DIR)/$(TARGET) | |||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||
v1: DOCKER := $(DOCKER_CMD) | |||
v1: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=1 | |||
v1: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=1 | |||
@@ -240,7 +256,12 @@ tests: TARGET := tests | |||
tests: $(BUILD_DIR)/$(TARGET) | |||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||
tests_rel: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=0 -DTESTING | |||
tests_rel: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=0 -DTESTING | |||
tests_rel: TARGET := tests | |||
tests_rel: $(BUILD_DIR)/$(TARGET) | |||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||
# | |||
# ========= Inside CSAL Image build rules =========== | |||
# | |||
@@ -19,6 +19,8 @@ | |||
using matrix_t = mtx::Matrix<int>; | |||
extern void loadMtx(MatrixDst& Corpus, MatrixDst& Query); | |||
extern void storeMtx(MatrixIdx& Idx, MatrixDst& Dst); | |||
// ===================================== | |||
// C1, Q1 | |||
@@ -140,11 +142,44 @@ TEST(Tv0_UT, pdist2_test2) { | |||
} | |||
TEST(Tv0_UT, pdist2_test3) { | |||
mtx::Matrix<double> D2_exp(16, 16, { | |||
0, 0.7433, 0.6868, 0.8846, 0.6342, 0.4561, 0.5118, 0.6341, 0.5461, 0.7322, 0.6974, 0.4330, 0.7028, 0.6303, 0.6826, 0.4179, | |||
0.7433, 0, 0.3400, 0.4555, 0.4207, 0.9736, 0.9690, 0.7386, 1.1055, 0.5462, 0.5345, 0.6576, 0.8677, 1.0291, 0.5393, 0.8106, | |||
0.6868, 0.3400, 0, 0.5380, 0.6268, 0.9512, 1.0234, 0.8403, 0.9843, 0.8187, 0.3091, 0.7829, 0.5759, 0.9411, 0.7239, 0.9186, | |||
0.8846, 0.4555, 0.5380, 0, 0.6796, 1.1672, 1.0460, 1.1016, 1.1139, 0.7542, 0.6480, 0.9304, 1.0568, 1.3482, 0.8316, 0.9750, | |||
0.6342, 0.4207, 0.6268, 0.6796, 0, 0.9267, 0.8772, 0.4847, 0.9317, 0.4093, 0.8351, 0.4215, 0.9736, 0.9007, 0.5999, 0.5291, | |||
0.4561, 0.9736, 0.9512, 1.1672, 0.9267, 0, 0.3903, 0.7795, 0.9308, 0.8429, 0.8436, 0.5672, 0.9284, 0.7064, 0.6435, 0.5975, | |||
0.5118, 0.9690, 1.0234, 1.0460, 0.8772, 0.3903, 0, 0.8920, 0.9253, 0.7060, 0.9427, 0.5728, 1.1515, 0.9907, 0.6471, 0.4811, | |||
0.6341, 0.7386, 0.8403, 1.1016, 0.4847, 0.7795, 0.8920, 0, 0.9824, 0.6416, 0.9844, 0.3398, 0.9355, 0.5428, 0.6536, 0.5309, | |||
0.5461, 1.1055, 0.9843, 1.1139, 0.9317, 0.9308, 0.9253, 0.9824, 0, 1.1517, 1.0541, 0.8746, 0.8506, 0.8777, 1.2036, 0.7607, | |||
0.7322, 0.5462, 0.8187, 0.7542, 0.4093, 0.8429, 0.7060, 0.6416, 1.1517, 0, 0.9106, 0.4245, 1.2071, 1.0738, 0.3745, 0.5170, | |||
0.6974, 0.5345, 0.3091, 0.6480, 0.8351, 0.8436, 0.9427, 0.9844, 1.0541, 0.9106, 0, 0.8647, 0.5941, 0.9954, 0.7148, 0.9876, | |||
0.4330, 0.6576, 0.7829, 0.9304, 0.4215, 0.5672, 0.5728, 0.3398, 0.8746, 0.4245, 0.8647, 0, 0.9590, 0.6782, 0.4586, 0.2525, | |||
0.7028, 0.8677, 0.5759, 1.0568, 0.9736, 0.9284, 1.1515, 0.9355, 0.8506, 1.2071, 0.5941, 0.9590, 0, 0.6838, 1.0517, 1.0675, | |||
0.6303, 1.0291, 0.9411, 1.3482, 0.9007, 0.7064, 0.9907, 0.5428, 0.8777, 1.0738, 0.9954, 0.6782, 0.6838, 0, 0.9482, 0.7937, | |||
0.6826, 0.5393, 0.7239, 0.8316, 0.5999, 0.6435, 0.6471, 0.6536, 1.2036, 0.3745, 0.7148, 0.4586, 1.0517, 0.9482, 0, 0.6345, | |||
0.4179, 0.8106, 0.9186, 0.9750, 0.5291, 0.5975, 0.4811, 0.5309, 0.7607, 0.5170, 0.9876, 0.2525, 1.0675, 0.7937, 0.6345, 0 | |||
}); | |||
mtx::Matrix<double> D (16,16); | |||
v0::pdist2(C2, C2, D); | |||
for (size_t i = 0 ; i< D.rows() ; ++i) | |||
for (size_t j = 0 ; j<D.columns() ; ++j) { | |||
EXPECT_EQ (D2_exp.get(i ,j) + 0.01 > D(i, j), true); | |||
EXPECT_EQ (D2_exp.get(i ,j) - 0.01 < D(i, j), true); | |||
} | |||
} | |||
/* | |||
* ========================================== | |||
* v0::knn | |||
*/ | |||
TEST(Tv0_UT, knn_test1) { | |||
TEST(Tv0_UT, knn_v0_test1) { | |||
size_t k = 3; | |||
mtx::Matrix<uint32_t> Idx_exp(5, k, { | |||
5, 8, 9, | |||
@@ -177,7 +212,7 @@ TEST(Tv0_UT, knn_test1) { | |||
} | |||
TEST(Tv0_UT, knn_test2) { | |||
TEST(Tv0_UT, knn_v0_test2) { | |||
size_t k = 3; | |||
mtx::Matrix<uint32_t> Idx_exp(8, k, { | |||
14, 13, 1, | |||
@@ -220,7 +255,46 @@ TEST(Tv0_UT, knn_test2) { | |||
* ========================================== | |||
* v1::knn | |||
*/ | |||
TEST(Tv1_UT, knn_test1) { | |||
TEST(Tv1_UT, knn_v1_1slice) { | |||
size_t k = 3; | |||
mtx::Matrix<uint32_t> Idx_exp(8, k, { | |||
14, 13, 1, | |||
15, 10, 12, | |||
14, 8, 12, | |||
4, 1, 3, | |||
8, 12, 5, | |||
4, 3, 2, | |||
10, 2, 4, | |||
1, 11, 9 | |||
}); | |||
mtx::Matrix<double> Dst_exp(8, k, { | |||
0.1939, 0.5768, 0.6020, | |||
0.2708, 0.3686, 0.3740, | |||
0.2684, 0.3103, 0.5277, | |||
0.4709, 0.6050, 0.6492, | |||
0.4397, 0.6842, 0.7074, | |||
0.3402, 0.4360, 0.4475, | |||
0.3708, 0.4037, 0.4417, | |||
0.6352, 0.6653, 0.6758 | |||
}); | |||
mtx::Matrix<uint32_t> Idx(8, k); | |||
mtx::Matrix<double> Dst(8, k); | |||
v1::knnsearch(C2, Q2, 1, k, k, Idx, Dst); | |||
for (size_t i = 0 ; i< Idx.rows() ; ++i) | |||
for (size_t j = 0 ; j<Idx.columns() ; ++j) { | |||
EXPECT_EQ (Idx_exp(i ,j) == Idx(i, j) + 1, true); // matlab starts from 1 | |||
EXPECT_EQ (Dst_exp.get(i ,j) + 0.01 > Dst(i, j), true); | |||
EXPECT_EQ (Dst_exp.get(i ,j) - 0.01 < Dst(i, j), true); | |||
} | |||
} | |||
TEST(Tv1_UT, knn_v1_2slice) { | |||
size_t k = 3; | |||
mtx::Matrix<uint32_t> Idx_exp(8, k, { | |||
14, 13, 1, | |||
@@ -247,7 +321,7 @@ TEST(Tv1_UT, knn_test1) { | |||
mtx::Matrix<uint32_t> Idx(8, k); | |||
mtx::Matrix<double> Dst(8, k); | |||
v1::knnsearch(C2, Q2, 0, k, k, Idx, Dst); | |||
v1::knnsearch(C2, Q2, 2, k, k, Idx, Dst); | |||
for (size_t i = 0 ; i< Idx.rows() ; ++i) | |||
@@ -260,7 +334,7 @@ TEST(Tv1_UT, knn_test1) { | |||
} | |||
// all-to-all | |||
TEST(Tv1_UT, knn_test2) { | |||
TEST(Tv1_UT, knn_v1_4slice) { | |||
size_t k = 3; | |||
mtx::Matrix<uint32_t> Idx_exp(16, k, { | |||
1, 16, 12, | |||
@@ -303,7 +377,7 @@ TEST(Tv1_UT, knn_test2) { | |||
mtx::Matrix<uint32_t> Idx(16, k); | |||
mtx::Matrix<double> Dst(16, k); | |||
v1::knnsearch(C2, C2, 0, k, k, Idx, Dst); | |||
v1::knnsearch(C2, C2, 4, k, k, Idx, Dst); | |||
for (size_t i = 0 ; i< Idx.rows() ; ++i) | |||
@@ -315,3 +389,130 @@ TEST(Tv1_UT, knn_test2) { | |||
} | |||
/* | |||
* ============== Live hdf5 tests =============== | |||
* | |||
* In order to run these test we need the followin hdf5 files in ./mtx directory: | |||
* | |||
* - fasion-mnist-784-euclidean.hdf5 | |||
* - mnist-784-euclidean.hdf5 | |||
* - sift-128-euclidean.hdf5 | |||
* - gist-960-euclidean.hdf5 | |||
* | |||
*/ | |||
TEST(Tlive_UT, knn_v0_sift_test) { | |||
// Instantiate matrixes | |||
MatrixDst Corpus; | |||
MatrixDst Query; | |||
MatrixIdx Idx; | |||
MatrixDst Dst; | |||
// setup environment | |||
session.corpusMtxFile = "mtx/sift-128-euclidean.hdf5"; | |||
session.corpusDataSet = "/test"; | |||
session.queryMtx = false; | |||
session.k = 100; | |||
size_t m = session.k; | |||
session.timing = true; | |||
session.outMtxFile = "test/knn_v0.hdf5"; | |||
loadMtx(Corpus, Query); | |||
// Prepare output memory (There is no Query, so from Corpus | |||
Idx.resize(Corpus.rows(), session.k); | |||
Dst.resize(Corpus.rows(), session.k); | |||
v0::knnsearch(Corpus, Corpus, 0, session.k, m, Idx, Dst); | |||
storeMtx(Idx, Dst); | |||
EXPECT_EQ(true, true); | |||
} | |||
TEST(Tlive_UT, knn_v1_sift_test_1slice) { | |||
// Instantiate matrixes | |||
MatrixDst Corpus; | |||
MatrixDst Query; | |||
MatrixIdx Idx; | |||
MatrixDst Dst; | |||
// setup environment | |||
session.corpusMtxFile = "mtx/sift-128-euclidean.hdf5"; | |||
session.corpusDataSet = "/test"; | |||
session.queryMtx = false; | |||
session.k = 100; | |||
size_t m = session.k; | |||
session.timing = true; | |||
session.outMtxFile = "test/knn_v1ser.hdf5"; | |||
loadMtx(Corpus, Query); | |||
// Prepare output memory (There is no Query, so from Corpus | |||
Idx.resize(Corpus.rows(), session.k); | |||
Dst.resize(Corpus.rows(), session.k); | |||
v1::knnsearch(Corpus, Corpus, 0, session.k, m, Idx, Dst); | |||
storeMtx(Idx, Dst); | |||
EXPECT_EQ(true, true); | |||
} | |||
TEST(Tlive_UT, knn_v1_sift_test_2slice) { | |||
// Instantiate matrixes | |||
MatrixDst Corpus; | |||
MatrixDst Query; | |||
MatrixIdx Idx; | |||
MatrixDst Dst; | |||
// setup environment | |||
session.corpusMtxFile = "mtx/sift-128-euclidean.hdf5"; | |||
session.corpusDataSet = "/test"; | |||
session.queryMtx = false; | |||
session.k = 100; | |||
size_t m = session.k; | |||
session.timing = true; | |||
session.outMtxFile = "test/knn_v1ser.hdf5"; | |||
loadMtx(Corpus, Query); | |||
// Prepare output memory (There is no Query, so from Corpus | |||
Idx.resize(Corpus.rows(), session.k); | |||
Dst.resize(Corpus.rows(), session.k); | |||
v1::knnsearch(Corpus, Corpus, 2, session.k, m, Idx, Dst); | |||
storeMtx(Idx, Dst); | |||
EXPECT_EQ(true, true); | |||
} | |||
TEST(Tlive_UT, knn_v1_sift_test_4slice) { | |||
// Instantiate matrixes | |||
MatrixDst Corpus; | |||
MatrixDst Query; | |||
MatrixIdx Idx; | |||
MatrixDst Dst; | |||
// setup environment | |||
session.corpusMtxFile = "mtx/sift-128-euclidean.hdf5"; | |||
session.corpusDataSet = "/test"; | |||
session.queryMtx = false; | |||
session.k = 100; | |||
size_t m = session.k; | |||
session.timing = true; | |||
session.outMtxFile = "test/knn_v1ser.hdf5"; | |||
loadMtx(Corpus, Query); | |||
// Prepare output memory (There is no Query, so from Corpus | |||
Idx.resize(Corpus.rows(), session.k); | |||
Dst.resize(Corpus.rows(), session.k); | |||
v1::knnsearch(Corpus, Corpus, 4, session.k, m, Idx, Dst); | |||
storeMtx(Idx, Dst); | |||
EXPECT_EQ(true, true); | |||
} | |||
@@ -55,7 +55,9 @@ struct session_t { | |||
std::string outMtxFile {"out.hdf5"}; //!< output matrix file name in HDF5 format | |||
std::string outMtxIdxDataSet {"/Idx"}; //!< Index output dataset name in HDF5 matrix file | |||
std::string outMtxDstDataSet {"/Dst"}; //!< Distance output dataset name in HDF5 matrix file | |||
std::size_t max_threads {}; //!< Maximum threads to use | |||
std::size_t max_threads {0}; //!< Maximum threads to use | |||
std::size_t slices {0}; //!< Slices/threads to use | |||
std::size_t accuracy {100}; //!< The neighbor finding accuracy | |||
bool timing {false}; //!< Enable timing prints of the program | |||
bool verbose {false}; //!< Flag to enable verbose output to stdout | |||
}; | |||
@@ -134,6 +134,9 @@ struct Matrix { | |||
Matrix& operator=(Matrix&& m) noexcept { moves(std::move(m)); return *this; } | |||
Matrix(const Matrix& m) = delete; //!< No copy ctor | |||
Matrix& operator=(const Matrix& m) = delete; //!< No copy | |||
//Matrix(const Matrix& m); | |||
//Matrix& operator=(const Matrix& m) { copy(m); } | |||
//! @} | |||
//! \name Data exposure | |||
@@ -233,6 +236,11 @@ struct Matrix { | |||
// a basic serial iterator support | |||
DataType* data() noexcept { return data_; } | |||
DataType* begin() noexcept { return data_; } | |||
const DataType* begin() const noexcept { return data_; } | |||
DataType* end() noexcept { return data_ + capacity(rows_, cols_); } | |||
const DataType* end() const noexcept { return data_ + capacity(rows_, cols_); } | |||
// IndexType begin_idx() noexcept { return 0; } | |||
// IndexType end_idx() noexcept { return capacity(rows_, cols_); } | |||
@@ -265,17 +273,19 @@ struct Matrix { | |||
std::swap(rows_, src.rows_); | |||
std::swap(cols_, src.cols_); | |||
} | |||
private: | |||
//! move helper | |||
void moves(Matrix&& src) noexcept { | |||
data_ = std::move(src.vector_storage_); | |||
data_ = std::move(src.raw_storage_); | |||
data_ = std::move(src.data_); | |||
data_ = std::move(src.use_vector_); | |||
rows_ = std::move(src.rows_); | |||
cols_ = std::move(src.cols_); | |||
vector_storage_ = std::move(src.vector_storage_); | |||
raw_storage_ = std::move(src.raw_storage_); | |||
data_ = std::move(src.data_); | |||
use_vector_ = std::move(src.use_vector_); | |||
rows_ = std::move(src.rows_); | |||
cols_ = std::move(src.cols_); | |||
} | |||
// Storage | |||
std::vector<DataType> | |||
vector_storage_; //!< Internal storage (if used). | |||
DataType* raw_storage_; //!< External storage (if used). | |||
@@ -528,125 +538,6 @@ private: | |||
}; | |||
template<typename ...> struct Matrix_view { }; | |||
/*! | |||
* @struct Matrix_view | |||
* @tparam MatrixType | |||
*/ | |||
template<template <typename, typename, MatrixType, MatrixOrder, bool> class Matrix, | |||
typename DataType, | |||
typename IndexType, | |||
MatrixType Type, | |||
MatrixOrder Order> | |||
struct Matrix_view<Matrix<DataType, IndexType, Type, Order, false>> { | |||
using owner_t = Matrix<DataType, IndexType, Type, Order, false>; | |||
using dataType = DataType; //!< meta:export of underling data type | |||
using indexType = IndexType; //!< meta:export of underling index type | |||
static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order | |||
static constexpr MatrixType matrixType = Type; //!< meta:export of array type | |||
/*! | |||
* \name Obj lifetime | |||
*/ | |||
//! @{ | |||
//! Construct a matrix view to entire matrix | |||
Matrix_view(const owner_t* owner) noexcept : | |||
owner_(owner), m_(owner->data()), rows_(owner->rows()), cols_(owner->columns()) { } | |||
Matrix_view(const owner_t* owner, IndexType begin, IndexType end) noexcept : | |||
owner_(owner) { | |||
if constexpr (Order == MatrixOrder::ROWMAJOR) { | |||
m_ = owner->data() + begin * owner->columns(); | |||
rows_ = end - begin; | |||
cols_ = owner->columns(); | |||
} else if (Order == MatrixOrder::COLMAJOR) { | |||
m_ = owner->data() + begin * owner->rows(); | |||
rows_ = owner->rows(); | |||
cols_ = end - begin; | |||
} | |||
} | |||
Matrix_view(Matrix_view&& m) = delete; //! No move | |||
Matrix_view& operator=(Matrix_view&& m) = delete; | |||
Matrix_view(const Matrix_view& m) = delete; //!< No copy | |||
Matrix_view& operator=(const Matrix_view& m) = delete; | |||
//! @} | |||
//! Get/Set the size of each dimension | |||
const IndexType rows() const noexcept { return rows_; } | |||
const IndexType columns() const noexcept { return cols_; } | |||
//! Get the interface size of the Matrix (what appears to be the size) | |||
IndexType size() const { | |||
return rows_ * cols_; | |||
} | |||
//! Actual memory capacity of the symmetric matrix | |||
static constexpr IndexType capacity(IndexType M, IndexType N) { | |||
return M*N; | |||
} | |||
/* | |||
* virtual 2D accessors | |||
*/ | |||
const DataType get (IndexType i, IndexType j) const { | |||
if constexpr (Order == MatrixOrder::COLMAJOR) | |||
return m_[i + j*rows_]; | |||
else | |||
return m_[i*cols_ + j]; | |||
} | |||
DataType set (DataType v, IndexType i, IndexType j) { | |||
if constexpr (Order == MatrixOrder::COLMAJOR) | |||
return m_[i + j*rows_] = v; | |||
else | |||
return m_[i*cols_ + j] = v; | |||
} | |||
// DataType operator()(IndexType i, IndexType j) { return get(i, j); } | |||
/*! | |||
* Return a proxy MatVal object with read and write capabilities. | |||
* @param i The row number | |||
* @param j The column number | |||
* @return tHE MatVal object | |||
*/ | |||
MatVal<Matrix_view> operator()(IndexType i, IndexType j) noexcept { | |||
return MatVal<Matrix_view>(this, get(i, j), i, j); | |||
} | |||
// a basic serial iterator support | |||
DataType* data() noexcept { return m_.data(); } | |||
// IndexType begin_idx() noexcept { return 0; } | |||
// IndexType end_idx() noexcept { return capacity(rows_, cols_); } | |||
const DataType* data() const noexcept { return m_; } | |||
const IndexType begin_idx() const noexcept { return 0; } | |||
const IndexType end_idx() const noexcept { return capacity(rows_, cols_); } | |||
//! @} | |||
/*! | |||
* \name Safe iteration API | |||
* | |||
* This api automates the iteration over the array based on | |||
* MatrixType | |||
*/ | |||
//! @{ | |||
template<typename F, typename... Args> | |||
void for_each_in (IndexType begin, IndexType end, F&& lambda, Args&&... args) { | |||
for (IndexType it=begin ; it<end ; ++it) { | |||
std::forward<F>(lambda)(std::forward<Args>(args)..., it); | |||
} | |||
} | |||
//! @} | |||
//! | |||
private: | |||
const owner_t* owner_ {nullptr}; //!< Pointer to Matrix | |||
DataType* m_ {nullptr}; //!< Starting address of the slice/view | |||
IndexType rows_{}; //!< the virtual size of rows. | |||
IndexType cols_{}; //!< the virtual size of columns. | |||
}; | |||
/*! | |||
* A view/iterator hybrid object for Matrix columns. | |||
* | |||
@@ -54,10 +54,11 @@ void pdist2(const Matrix& X, const Matrix& Y, Matrix& D2) { | |||
for (int i = 0; i < M ; ++i) { | |||
for (int j = 0; j < N; ++j) { | |||
D2.set(D2.get(i, j) + X_norms[i] + Y_norms[j], i, j); | |||
//D2.set(std::max(D2.get(i, j), 0.0), i, j); // Ensure non-negative | |||
D2.set(std::max(D2.get(i, j), 0.0), i, j); // Ensure non-negative | |||
D2.set(std::sqrt(D2.get(i, j)), i, j); // Take the square root of each | |||
} | |||
} | |||
M++; | |||
} | |||
template<typename DataType, typename IndexType> | |||
@@ -82,7 +83,7 @@ void quickselect(std::vector<std::pair<DataType, IndexType>>& vec, int k) { | |||
* point of Q | |||
*/ | |||
template<typename MatrixD, typename MatrixI> | |||
void knnsearch(const MatrixD& C, const MatrixD& Q, size_t idx_offset, size_t k, size_t m, MatrixI& idx, MatrixD& dst) { | |||
void knnsearch(MatrixD& C, MatrixD& Q, size_t idx_offset, size_t k, size_t m, MatrixI& idx, MatrixD& dst) { | |||
using DstType = typename MatrixD::dataType; | |||
using IdxType = typename MatrixI::dataType; | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* \file v0.hpp | |||
* \file v1.hpp | |||
* \brief | |||
* | |||
* \author | |||
@@ -16,6 +16,26 @@ | |||
#include "v0.hpp" | |||
#include "config.h" | |||
#if defined CILK | |||
#include <cilk/cilk.h> | |||
#include <cilk/cilk_api.h> | |||
//#include <cilk/reducer_opadd.h> | |||
#elif defined OMP | |||
#include <omp.h> | |||
#elif defined PTHREADS | |||
#include <thread> | |||
#include <numeric> | |||
#include <functional> | |||
//#include <random> | |||
#else | |||
#endif | |||
void init_workers(); | |||
namespace v1 { | |||
template <typename DataType, typename IndexType> | |||
@@ -57,49 +77,110 @@ void mergeResultsWithM(mtx::Matrix<IndexType>& N1, mtx::Matrix<DataType>& D1, | |||
} | |||
} | |||
template<typename MatrixD, typename MatrixI> | |||
void knnsearch(const MatrixD& C, const MatrixD& Q, size_t idx_offset, size_t k, size_t m, MatrixI& idx, MatrixD& dst) { | |||
void worker_body (std::vector<MatrixD>& corpus_slices, | |||
std::vector<MatrixD>& query_slices, | |||
MatrixI& idx, | |||
MatrixD& dst, | |||
size_t slice, | |||
size_t num_slices, size_t corpus_slice_size, size_t query_slice_size, | |||
size_t k, | |||
size_t m) { | |||
// "load" types | |||
using DstType = typename MatrixD::dataType; | |||
using IdxType = typename MatrixI::dataType; | |||
if (C.rows() <= 8 || Q.rows() <= 4) { | |||
// Base case: Call knnsearch directly | |||
v0::knnsearch(C, Q, idx_offset, k, m, idx, dst); | |||
return; | |||
} | |||
// Divide Corpus and Query into subsets | |||
IdxType midC = C.rows() / 2; | |||
IdxType midQ = Q.rows() / 2; | |||
// Slice corpus and query matrixes | |||
MatrixD C1((DstType*)C.data(), 0, midC, C.columns()); | |||
MatrixD C2((DstType*)C.data(), midC, midC, C.columns()); | |||
MatrixD Q1((DstType*)Q.data(), 0, midQ, Q.columns()); | |||
MatrixD Q2((DstType*)Q.data(), midQ, midQ, Q.columns()); | |||
// Allocate temporary matrixes for all permutations | |||
MatrixI N1_1(midQ, k), N1_2(midQ, k), N2_1(midQ, k), N2_2(midQ, k); | |||
MatrixD D1_1(midQ, k), D1_2(midQ, k), D2_1(midQ, k), D2_2(midQ, k); | |||
// Recursive calls | |||
knnsearch(C1, Q1, idx_offset, k, m, N1_1, D1_1); | |||
knnsearch(C2, Q1, idx_offset + midC, k, m, N1_2, D1_2); | |||
knnsearch(C1, Q2, idx_offset, k, m, N2_1, D2_1); | |||
knnsearch(C2, Q2, idx_offset + midC, k, m, N2_2, D2_2); | |||
// slice output matrixes | |||
MatrixI N1((IdxType*)idx.data(), 0, midQ, k); | |||
MatrixI N2((IdxType*)idx.data(), midQ, midQ, k); | |||
MatrixD D1((DstType*)dst.data(), 0, midQ, k); | |||
MatrixD D2((DstType*)dst.data(), midQ, midQ, k); | |||
// Merge results in place | |||
mergeResultsWithM(N1_1, D1_1, N1_2, D1_2, k, m, N1, D1); | |||
mergeResultsWithM(N2_1, D2_1, N2_2, D2_2, k, m, N2, D2); | |||
for (size_t ci = 0; ci < num_slices; ++ci) { | |||
size_t idx_offset = ci * corpus_slice_size; | |||
// Intermediate matrixes for intermediate results | |||
MatrixI temp_idx(query_slices[slice].rows(), k); | |||
MatrixD temp_dst(query_slices[slice].rows(), k); | |||
// kNN for each combination | |||
v0::knnsearch(corpus_slices[ci], query_slices[slice], idx_offset, k, m, temp_idx, temp_dst); | |||
// Merge temporary results to final results | |||
MatrixI idx_slice((IdxType*)idx.data(), slice * query_slice_size, query_slices[slice].rows(), k); | |||
MatrixD dst_slice((DstType*)dst.data(), slice * query_slice_size, query_slices[slice].rows(), k); | |||
mergeResultsWithM(idx_slice, dst_slice, temp_idx, temp_dst, k, m, idx_slice, dst_slice); | |||
} | |||
} | |||
template<typename MatrixD, typename MatrixI> | |||
void knnsearch(MatrixD& C, MatrixD& Q, size_t num_slices, size_t k, size_t m, MatrixI& idx, MatrixD& dst) { | |||
using DstType = typename MatrixD::dataType; | |||
using IdxType = typename MatrixI::dataType; | |||
//Slice calculations | |||
size_t corpus_slice_size = C.rows() / ((num_slices == 0)? 1:num_slices); | |||
size_t query_slice_size = Q.rows() / ((num_slices == 0)? 1:num_slices); | |||
// Make slices | |||
std::vector<MatrixD> corpus_slices; | |||
std::vector<MatrixD> query_slices; | |||
for (size_t i = 0; i < num_slices; ++i) { | |||
corpus_slices.emplace_back( | |||
(DstType*)C.data(), | |||
i * corpus_slice_size, | |||
(i == num_slices - 1 ? C.rows() - i * corpus_slice_size : corpus_slice_size), | |||
C.columns()); | |||
query_slices.emplace_back( | |||
(DstType*)Q.data(), | |||
i * query_slice_size, | |||
(i == num_slices - 1 ? Q.rows() - i * query_slice_size : query_slice_size), | |||
Q.columns()); | |||
} | |||
// Intermediate results | |||
for (size_t i = 0; i < dst.rows(); ++i) { | |||
for (size_t j = 0; j < dst.columns(); ++j) { | |||
dst.set(std::numeric_limits<DstType>::infinity(), i, j); | |||
idx.set(static_cast<IdxType>(-1), i, j); | |||
} | |||
} | |||
// Main loop | |||
#if defined OMP | |||
#pragma omp parallel for | |||
for (size_t qi = 0; qi < num_slices; ++qi) { | |||
for (size_t qi = 0; qi < num_slices; ++qi) { | |||
worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m); | |||
} | |||
} | |||
#elif defined CILK | |||
cilk_for (size_t qi = 0; qi < num_slices; ++qi) { | |||
for (size_t qi = 0; qi < num_slices; ++qi) { | |||
worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m); | |||
} | |||
} | |||
#elif defined PTHREADS | |||
std::vector<std::thread> workers; | |||
for (size_t qi = 0; qi < num_slices; ++qi) { | |||
workers.push_back( | |||
std::thread (worker_body<MatrixD, MatrixI>, | |||
std::ref(corpus_slices), std::ref(query_slices), | |||
std::ref(idx), std::ref(dst), | |||
qi, | |||
num_slices, corpus_slice_size, query_slice_size, | |||
k, m) | |||
); | |||
} | |||
// Join threads | |||
std::for_each(workers.begin(), workers.end(), [](std::thread& t){ | |||
t.join(); | |||
}); | |||
#else | |||
for (size_t qi = 0; qi < num_slices; ++qi) { | |||
for (size_t qi = 0; qi < num_slices; ++qi) { | |||
worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m); | |||
} | |||
} | |||
#endif | |||
} | |||
@@ -12,7 +12,10 @@ function [D2] = dist2(X, Y) | |||
if d1 ~= d2 | |||
error('X,Y column dimensions must match'); | |||
end | |||
%D2 = sqrt((X.^2)*ones(d,m) -2*X*Y' + ones(n,d)*(Y.^2)'); | |||
% debug | |||
%X_norm = sum(X.^2, 2); | |||
%Y_norm = sum(Y.^2, 2)'; | |||
%XY = 2 * X*Y'; | |||
D2 = max(sum(X.^2, 2) - 2 * X*Y' + sum(Y.^2, 2)', 0); | |||
D2 = sqrt(D2); | |||
@@ -0,0 +1,78 @@ | |||
% Plot measurements | |||
accuracy = [100 80 60 40 20 10]; | |||
ser_sift_acc = [ 4395 4365 4384 4315 4295 4246 ]; | |||
ser_mnist_ser_acc = [ 7936 7924 7886 7903 7844 7801 ]; | |||
omp_sift_acc = [ | |||
4093 4098 4040 4001 3980 3937 | |||
]; | |||
omp_mnist_acc = [ | |||
7575 7463 7389 7416 7321 7303 | |||
]; | |||
cilk_sift_acc = [ | |||
3718 3739 3673 3668 3608 3557 | |||
]; | |||
cilk_mnist_acc = [ | |||
7064 7071 7035 6948 6962 6913 | |||
]; | |||
pth_sift_acc = [ | |||
1157 1159 1121 1100 1084 1075 | |||
]; | |||
pth_mnist_acc = [ | |||
2050 2086 2040 2020 2004 1979 | |||
]; | |||
% 1ο Διάγραμμα: OMP | |||
figure; | |||
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD | |||
plot(accuracy, ser_sift_acc, '-o', 'DisplayName', 'Serial SIFT'); | |||
hold on; | |||
plot(accuracy, ser_mnist_ser_acc, '-s', 'DisplayName', 'Serial MNIST'); | |||
plot(accuracy, omp_sift_acc, '-^', 'DisplayName', 'OMP SIFT'); | |||
plot(accuracy, omp_mnist_acc, '-d', 'DisplayName', 'OMP MNIST'); | |||
hold off; | |||
title('OMP'); | |||
xlabel('Accuracy (%)'); | |||
ylabel('Execution Time [msec]'); | |||
set(gca, 'XDir', 'reverse'); % reverse x | |||
legend('Location', 'northwest'); | |||
grid on; | |||
print(gcf, 'OMP_over_accuracy.png', '-dpng', '-r300'); | |||
% 2ο Διάγραμμα: CILK | |||
figure; | |||
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD | |||
plot(accuracy, ser_sift_acc, '-o', 'DisplayName', 'Serial SIFT'); | |||
hold on; | |||
plot(accuracy, ser_mnist_ser_acc, '-s', 'DisplayName', 'Serial MNIST'); | |||
plot(accuracy, cilk_sift_acc, '-^', 'DisplayName', 'CILK SIFT'); | |||
plot(accuracy, cilk_mnist_acc, '-d', 'DisplayName', 'CILK MNIST'); | |||
hold off; | |||
title('CILK'); | |||
xlabel('Accuracy (%)'); | |||
ylabel('Execution Time [msec]'); | |||
set(gca, 'XDir', 'reverse'); % reverse x | |||
legend('Location', 'northwest'); | |||
grid on; | |||
print(gcf, 'CILK_over_accuracy.png', '-dpng', '-r300'); | |||
% 3ο Διάγραμμα: Pthreads | |||
figure; | |||
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD | |||
plot(accuracy, ser_sift_acc, '-o', 'DisplayName', 'Serial SIFT'); | |||
hold on; | |||
plot(accuracy, ser_mnist_ser_acc, '-s', 'DisplayName', 'Serial MNIST'); | |||
plot(accuracy, pth_sift_acc, '-^', 'DisplayName', 'Pthreads SIFT'); | |||
plot(accuracy, pth_mnist_acc, '-d', 'DisplayName', 'Pthreads MNIST'); | |||
hold off; | |||
title('Pthreads'); | |||
xlabel('Accuracy (%)'); | |||
ylabel('Execution Time [msec]'); | |||
set(gca, 'XDir', 'reverse'); % reverse x | |||
legend('Location', 'northwest'); | |||
grid on; | |||
print(gcf, 'Pthreads_over_accuracy.png', '-dpng', '-r300'); | |||
@@ -0,0 +1,75 @@ | |||
% Plot measurements | |||
threads = [1 2 4 6 8 10 12]; | |||
ser_sift_threads = [ 4418 4418 4418 4418 4418 4418 4418 ]; | |||
ser_mnist_ser_threads = [ 7924 7924 7924 7924 7924 7924 7924 ]; | |||
omp_sift_th = [ | |||
4469 4283 4096 3822 4060 4241 5193 | |||
]; | |||
omp_mnist_th = [ | |||
8053 7806 7465 6828 7662 8013 8123 | |||
]; | |||
cilk_sift_th = [ | |||
4225 4090 3696 3122 3860 4141 5103 | |||
]; | |||
cilk_mnist_th = [ | |||
7744 7206 6965 6628 7362 7813 8123 | |||
]; | |||
pth_sift_th = [ | |||
4254 2155 1133 877 724 640 682 | |||
]; | |||
pth_mnist_th = [ | |||
7889 3963 2058 1445 1496 1379 1352 | |||
]; | |||
% 1ο Διάγραμμα: OMP | |||
figure; | |||
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD | |||
plot(threads, ser_sift_threads, '-o', 'DisplayName', 'Serial SIFT'); | |||
hold on; | |||
plot(threads, ser_mnist_ser_threads, '-s', 'DisplayName', 'Serial MNIST'); | |||
plot(threads, omp_sift_th, '-^', 'DisplayName', 'OMP SIFT'); | |||
plot(threads, omp_mnist_th, '-d', 'DisplayName', 'OMP MNIST'); | |||
hold off; | |||
title('OMP'); | |||
xlabel('Threads'); | |||
ylabel('Execution Time [msec]'); | |||
legend('Location', 'northeast'); | |||
grid on; | |||
print(gcf, 'OMP_over_threads.png', '-dpng', '-r300'); | |||
% 2ο Διάγραμμα: CILK | |||
figure; | |||
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD | |||
plot(threads, ser_sift_threads, '-o', 'DisplayName', 'Serial SIFT'); | |||
hold on; | |||
plot(threads, ser_mnist_ser_threads, '-s', 'DisplayName', 'Serial MNIST'); | |||
plot(threads, cilk_sift_th, '-^', 'DisplayName', 'CILK SIFT'); | |||
plot(threads, cilk_mnist_th, '-d', 'DisplayName', 'CILK MNIST'); | |||
hold off; | |||
title('CILK'); | |||
xlabel('Threads'); | |||
ylabel('Execution Time [msec]'); | |||
legend('Location', 'northeast'); | |||
grid on; | |||
print(gcf, 'CILK_over_threads.png', '-dpng', '-r300'); | |||
% 3ο Διάγραμμα: Pthreads | |||
figure; | |||
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD | |||
plot(threads, ser_sift_threads, '-o', 'DisplayName', 'Serial SIFT'); | |||
hold on; | |||
plot(threads, ser_mnist_ser_threads, '-s', 'DisplayName', 'Serial MNIST'); | |||
plot(threads, pth_sift_th, '-^', 'DisplayName', 'Pthreads SIFT'); | |||
plot(threads, pth_mnist_th, '-d', 'DisplayName', 'Pthreads MNIST'); | |||
hold off; | |||
title('Pthreads'); | |||
xlabel('Threads'); | |||
ylabel('Execution Time [msec]'); | |||
legend('Location', 'northeast'); | |||
grid on; | |||
print(gcf, 'Pthreads_over_threads.png', '-dpng', '-r300'); | |||
@@ -2,6 +2,106 @@ | |||
% | |||
% | |||
% | |||
% | |||
% | |||
C1 = [ | |||
0.8147 0.1576; | |||
0.9058 0.9706; | |||
0.1270 0.9572; | |||
0.9134 0.4854; | |||
0.6324 0.8003; | |||
0.0975 0.1419; | |||
0.2785 0.4218; | |||
0.5469 0.9157; | |||
0.9575 0.7922; | |||
0.9649 0.9595 ]; | |||
Q1 = [ | |||
0.6557 0.7577; | |||
0.0357 0.7431; | |||
0.8491 0.3922; | |||
0.9340 0.6555; | |||
0.6787 0.1712 ]; | |||
C2 = [ | |||
0.7060 0.4456 0.5060 0.6160; | |||
0.0318 0.6463 0.6991 0.4733; | |||
0.2769 0.7094 0.8909 0.3517; | |||
0.0462 0.7547 0.9593 0.8308; | |||
0.0971 0.2760 0.5472 0.5853; | |||
0.8235 0.6797 0.1386 0.5497; | |||
0.6948 0.6551 0.1493 0.9172; | |||
0.3171 0.1626 0.2575 0.2858; | |||
0.9502 0.1190 0.8407 0.7572; | |||
0.0344 0.4984 0.2543 0.7537; | |||
0.4387 0.9597 0.8143 0.3804; | |||
0.3816 0.3404 0.2435 0.5678; | |||
0.7655 0.5853 0.9293 0.0759; | |||
0.7952 0.2238 0.3500 0.0540; | |||
0.1869 0.7513 0.1966 0.5308; | |||
0.4898 0.2551 0.2511 0.7792 ]; | |||
Q2 = [ | |||
0.9340 0.3112 0.4505 0.0782; | |||
0.1299 0.5285 0.0838 0.4427; | |||
0.5688 0.1656 0.2290 0.1067; | |||
0.4694 0.6020 0.9133 0.9619; | |||
0.0119 0.2630 0.1524 0.0046; | |||
0.3371 0.6541 0.8258 0.7749; | |||
0.1622 0.6892 0.5383 0.8173; | |||
0.7943 0.7482 0.9961 0.8687 ]; | |||
D1_exp = [ | |||
0.6208 0.9745 0.2371 0.5120 0.1367; | |||
0.3284 0.8993 0.5811 0.3164 0.8310; | |||
0.5651 0.2327 0.9169 0.8616 0.9603; | |||
0.3749 0.9147 0.1132 0.1713 0.3921; | |||
0.0485 0.5994 0.4621 0.3346 0.6308; | |||
0.8312 0.6044 0.7922 0.9815 0.5819; | |||
0.5052 0.4028 0.5714 0.6959 0.4722; | |||
0.1919 0.5395 0.6045 0.4665 0.7561; | |||
0.3037 0.9231 0.4144 0.1387 0.6807; | |||
0.3692 0.9540 0.5790 0.3056 0.8386 ]; | |||
D2_exp = [ | |||
0.6020 0.7396 0.6583 0.6050 1.0070 0.5542 0.6298 0.6352; | |||
1.0696 0.6348 0.9353 0.6914 0.8160 0.4475 0.4037 0.9145; | |||
0.9268 0.8450 0.9376 0.6492 0.9671 0.4360 0.5956 0.7400; | |||
1.3455 0.9876 1.2953 0.4709 1.2557 0.3402 0.4417 0.7500; | |||
0.9839 0.5476 0.7517 0.7216 0.7074 0.5605 0.4784 0.9954; | |||
0.6839 0.7200 0.7305 0.9495 1.0628 0.8718 0.8178 0.9179; | |||
0.9850 0.7514 0.9585 0.7996 1.2054 0.7784 0.6680 0.8591; | |||
0.6950 0.4730 0.3103 1.0504 0.4397 0.8967 0.8140 1.2066; | |||
0.8065 1.2298 0.9722 0.7153 1.3933 0.8141 1.0204 0.6758; | |||
1.1572 0.3686 0.9031 0.8232 0.7921 0.6656 0.3708 1.0970; | |||
0.9432 0.9049 1.0320 0.6905 1.1167 0.5094 0.6455 0.6653; | |||
0.7672 0.3740 0.5277 0.8247 0.6842 0.6945 0.5648 0.9968; | |||
0.5768 1.1210 0.8403 0.9345 1.1316 0.8292 1.0380 0.8127; | |||
0.1939 0.8703 0.2684 1.1794 0.8103 1.0683 1.1115 1.1646; | |||
1.0106 0.2708 0.8184 0.8954 0.7402 0.6982 0.4509 1.0594; | |||
0.8554 0.5878 0.6834 0.7699 0.9155 0.7161 0.6162 0.9481 ]; | |||
% tests | |||
D1 = dist2(C1, Q1); | |||
if norm (D1-pdist2(C1, Q1), 'fro') > 0.01 | |||
disp('Error in dist2(C1, Q1)'); | |||
end | |||
D2 = dist2(C2, Q2); | |||
if norm (D2-pdist2(C2, Q2), 'fro') > 0.01 | |||
disp('Error in dist2(C2, Q2)'); | |||
end | |||
D2 = dist2(C2, C2); | |||
if norm (D2-pdist2(C2, C2), 'fro') > 0.01 | |||
disp('Error in dist2(C2, C2)'); | |||
end | |||
%C = rand(10000, 2); % Corpus | |||
%Q = rand(10000, 2); % Queries | |||
C = rand(20000, 2); % Δύο clusters | |||
@@ -0,0 +1,122 @@ | |||
# Serial-sift over acc | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 100 -k 100 -t | |||
[Timing]: knnsearch: 4395 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 80 -k 100 -t | |||
[Timing]: knnsearch: 4365 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 60 -k 100 -t | |||
[Timing]: knnsearch: 4384 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 40 -k 100 -t | |||
[Timing]: knnsearch: 4315 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 20 -k 100 -t | |||
[Timing]: knnsearch: 4295 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 10 -k 100 -t | |||
[Timing]: knnsearch: 4246 [msec] | |||
# Serial-mnist over acc | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 100 -k 100 -t | |||
[Timing]: knnsearch: 7936 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 80 -k 100 -t | |||
[Timing]: knnsearch: 7924 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 60 -k 100 -t | |||
[Timing]: knnsearch: 7886 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 40 -k 100 -t | |||
[Timing]: knnsearch: 7903 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 20 -k 100 -t | |||
[Timing]: knnsearch: 7844 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 10 -k 100 -t | |||
[Timing]: knnsearch: 7801 [msec] | |||
# OMP-sift over acc | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t | |||
[Timing]: knnsearch: 4093 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t | |||
[Timing]: knnsearch: 4098 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t | |||
[Timing]: knnsearch: 4040 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t | |||
[Timing]: knnsearch: 4001 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t | |||
[Timing]: knnsearch: 3980 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t | |||
[Timing]: knnsearch: 3937 [msec] | |||
# OMP-mnist over acc | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t | |||
[Timing]: knnsearch: 7575 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t | |||
[Timing]: knnsearch: 7463 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t | |||
[Timing]: knnsearch: 7389 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t | |||
[Timing]: knnsearch: 7416 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t | |||
[Timing]: knnsearch: 7321 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t | |||
[Timing]: knnsearch: 7303 [msec] | |||
# CILK-sift over acc | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t | |||
[Timing]: knnsearch: 4218 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t | |||
[Timing]: knnsearch: 4239 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t | |||
[Timing]: knnsearch: 4173 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t | |||
[Timing]: knnsearch: 4168 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t | |||
[Timing]: knnsearch: 4108 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t | |||
[Timing]: knnsearch: 4057 [msec] | |||
# CILK-mnist over acc | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t | |||
[Timing]: knnsearch: 7864 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t | |||
[Timing]: knnsearch: 7871 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t | |||
[Timing]: knnsearch: 7835 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t | |||
[Timing]: knnsearch: 7748 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t | |||
[Timing]: knnsearch: 7762 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t | |||
[Timing]: knnsearch: 7713 [msec] | |||
# Pthreads-sift over acc | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t | |||
[Timing]: knnsearch: 1157 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t | |||
[Timing]: knnsearch: 1159 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t | |||
[Timing]: knnsearch: 1121 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t | |||
[Timing]: knnsearch: 1100 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t | |||
[Timing]: knnsearch: 1084 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t | |||
[Timing]: knnsearch: 1075 [msec] | |||
# Pthreads-mnist over acc | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t | |||
[Timing]: knnsearch: 2050 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t | |||
[Timing]: knnsearch: 2086 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t | |||
[Timing]: knnsearch: 2040 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t | |||
[Timing]: knnsearch: 2020 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t | |||
[Timing]: knnsearch: 2004 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t | |||
[Timing]: knnsearch: 1979 [msec] | |||
@@ -0,0 +1,101 @@ | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t | |||
[Timing]: knnsearch: 4418 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t | |||
[Timing]: knnsearch: 7924 [msec] | |||
# OMP-sift over threads | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t | |||
[Timing]: knnsearch: 4469 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 2 -k 100 -t | |||
[Timing]: knnsearch: 4283 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -k 100 -t | |||
[Timing]: knnsearch: 4096 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 6 -k 100 -t | |||
[Timing]: knnsearch: 3822 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 8 -k 100 -t | |||
[Timing]: knnsearch: 4060 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t | |||
[Timing]: knnsearch: 4241 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 12 -k 100 -t | |||
[Timing]: knnsearch: 5193 [msec] | |||
# OMP-mnist over threads | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t | |||
[Timing]: knnsearch: 8053 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 2 -k 100 -t | |||
[Timing]: knnsearch: 7806 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -k 100 -t | |||
[Timing]: knnsearch: 7465 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 6 -k 100 -t | |||
[Timing]: knnsearch: 6828 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 8 -k 100 -t | |||
[Timing]: knnsearch: 7662[msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t | |||
[Timing]: knnsearch: 8013 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 12 -k 100 -t | |||
[Timing]: knnsearch: 8123 [msec] | |||
# CILK-sift over threads | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t | |||
[Timing]: knnsearch: 4225 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 2 -k 100 -t | |||
[Timing]: knnsearch: 4090 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -k 100 -t | |||
[Timing]: knnsearch: 3696 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 6 -k 100 -t | |||
[Timing]: knnsearch: 3122 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 8 -k 100 -t | |||
[Timing]: knnsearch: 3860 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t | |||
[Timing]: knnsearch: 4141 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 12 -k 100 -t | |||
[Timing]: knnsearch: 5103 [msec] | |||
# CILK-mnist over threads | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t | |||
[Timing]: knnsearch: 7744 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 2 -k 100 -t | |||
[Timing]: knnsearch: 7206 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -k 100 -t | |||
[Timing]: knnsearch: 6965 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 6 -k 100 -t | |||
[Timing]: knnsearch: 6628 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 8 -k 100 -t | |||
[Timing]: knnsearch: 7362 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t | |||
[Timing]: knnsearch: 7813 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 12 -k 100 -t | |||
[Timing]: knnsearch: 158123[msec] | |||
# Pthreads-sift over threads | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t | |||
[Timing]: knnsearch: 4254 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 2 -k 100 -t | |||
[Timing]: knnsearch: 2155 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -k 100 -t | |||
[Timing]: knnsearch: 1133 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 6 -k 100 -t | |||
[Timing]: knnsearch: 877 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 8 -k 100 -t | |||
[Timing]: knnsearch: 724 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t | |||
[Timing]: knnsearch: 640 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 12 -k 100 -t | |||
[Timing]: knnsearch: 682 [msec] | |||
# Pthreads-mnist over threads | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t | |||
[Timing]: knnsearch: 7889 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 2 -k 100 -t | |||
[Timing]: knnsearch: 3963 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -k 100 -t | |||
[Timing]: knnsearch: 2058 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 6 -k 100 -t | |||
[Timing]: knnsearch: 1445 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 8 -k 100 -t | |||
[Timing]: knnsearch: 1496 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t | |||
[Timing]: knnsearch: 1379 [msec] | |||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 12 -k 100 -t | |||
[Timing]: knnsearch: 1352 [msec] | |||
@@ -0,0 +1,27 @@ | |||
#!/usr/bin/env bash | |||
# | |||
# Take measurements | |||
# | |||
DOCK="docker run --rm -v /home/hoo2/Work/AUTH/PDS/homework_1:/usr/src/PDS_homework_1 -w /usr/src/PDS_homework_1/ hoo2/hpcimage" | |||
echo " " | |||
echo "Serial" | |||
echo "./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t" | |||
eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t | |||
echo "./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t" | |||
eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t | |||
echo " " | |||
echo "OMP" | |||
# echo ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t | |||
#eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t | |||
# echo ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t | |||
#eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t | |||
# echo ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t | |||
#eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t | |||
# echo ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t | |||
#eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t |
@@ -65,9 +65,15 @@ bool get_options(int argc, char* argv[]){ | |||
else if (arg == "-k") { | |||
session.k = (i+1 < argc) ? std::atoi(argv[++i]) : session.k; | |||
} | |||
else if (arg == "-n" || arg == "--max_trheads") { | |||
else if (arg == "-n" || arg == "--max_threads") { | |||
session.max_threads = (i+1 < argc) ? std::atoi(argv[++i]) : session.max_threads; | |||
} | |||
else if (arg == "-s" || arg == "--slices") { | |||
session.slices = (i+1 < argc) ? std::atoi(argv[++i]) : session.slices; | |||
} | |||
else if (arg == "-a" || arg == "--accuracy") { | |||
session.accuracy = (i+1 < argc) ? std::atoi(argv[++i]) : session.accuracy; | |||
} | |||
else if (arg == "-t" || arg == "--timing") | |||
session.timing = true; | |||
else if (arg == "-v" || arg == "--verbose") | |||
@@ -87,7 +93,12 @@ bool get_options(int argc, char* argv[]){ | |||
std::cout << " -k <number>\n"; | |||
std::cout << " Set the number of closest neighbors to find. \n\n"; | |||
std::cout << " -n | --max_trheads <threads>\n"; | |||
std::cout << " Reduce the thread number for the execution to <threads>. <threads> must be less or equal to available CPUs.\n\n"; | |||
std::cout << " Reduce the thread number for the execution to <threads>. <threads> should be less or equal to available CPUs.\n\n"; | |||
std::cout << " -s | --slices <slices/threads>\n"; | |||
std::cout << " The number of slices to the Corpus matrix. In the parallel version this setting affects the number of threads\n"; | |||
std::cout << " <threads> should be less or equal to available CPUs\n\n"; | |||
std::cout << " -a | --accuracy <accuracy>\n"; | |||
std::cout << " Reduce the accuracy of neighbor finding. The accuracy should be between 1-100 \n\n"; | |||
std::cout << " -t | --timing\n"; | |||
std::cout << " Request timing measurements output to stdout.\n\n"; | |||
std::cout << " -v | --verbose\n"; | |||
@@ -109,6 +120,27 @@ bool get_options(int argc, char* argv[]){ | |||
return status; | |||
} | |||
void loadMtx(MatrixDst& Corpus, MatrixDst& Query) { | |||
if (access(session.outMtxFile.c_str(), F_OK) == 0) | |||
std::remove(session.outMtxFile.c_str()); | |||
// timer.start(); | |||
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus); | |||
if (session.queryMtx) | |||
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query); | |||
// timer.stop(); | |||
// timer.print_dt("Load hdf5 files"); | |||
} | |||
void storeMtx(MatrixIdx& Idx, MatrixDst& Dst) { | |||
// timer.start(); | |||
Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx); | |||
Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst); | |||
// timer.stop(); | |||
// timer.print_dt("Store hdf5 files"); | |||
} | |||
#ifndef TESTING | |||
int main(int argc, char* argv[]) try { | |||
// Instantiate matrixes | |||
@@ -127,38 +159,26 @@ int main(int argc, char* argv[]) try { | |||
if (!get_options(argc, argv)) | |||
exit(1); | |||
if (access(session.outMtxFile.c_str(), F_OK) == 0) | |||
std::remove(session.outMtxFile.c_str()); | |||
init_workers(); | |||
// Load data | |||
timer.start(); | |||
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus); | |||
if (session.queryMtx) | |||
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query); | |||
timer.stop(); | |||
timer.print_dt("Load hdf5 files"); | |||
loadMtx(Corpus, Query); | |||
// Prepare output memory | |||
Idx.resize(Query.rows(), session.k); | |||
Dst.resize(Query.rows(), session.k); | |||
Idx.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k); | |||
Dst.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k); | |||
// Do the search | |||
logger << "Start knnsearch ..."; | |||
timer.start(); | |||
if (session.queryMtx) | |||
knnsearch(Corpus, Query, 0, session.k, session.k, Idx, Dst); | |||
else | |||
knnsearch(Corpus, Corpus, 0, session.k, session.k, Idx, Dst); | |||
size_t selected_neighbors = (size_t)(session.k*(session.accuracy/100.0)); | |||
knnsearch(Corpus, (session.queryMtx) ? Query : Corpus, session.slices, session.k, selected_neighbors, Idx, Dst); | |||
timer.stop(); | |||
logger << " Done" << logger.endl; | |||
timer.print_dt("knnsearch"); | |||
// Store data | |||
timer.start(); | |||
Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx); | |||
Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst); | |||
timer.stop(); | |||
timer.print_dt("Store hdf5 files"); | |||
storeMtx(Idx, Dst); | |||
return 0; | |||
} | |||
@@ -0,0 +1,50 @@ | |||
/** | |||
* \file v1.hpp | |||
* \brief | |||
* | |||
* \author | |||
* Christos Choutouridis AEM:8997 | |||
* <cchoutou@ece.auth.gr> | |||
*/ | |||
#include "v1.hpp" | |||
void init_workers() { | |||
#if defined CILK | |||
size_t cilk_w = __cilkrts_get_nworkers(); | |||
if (!session.max_threads) | |||
session.max_threads = (session.slices) ? (session.slices) : cilk_w; | |||
// else if (session.max_threads < cilk_w) | |||
// __cilkrts_set_param("nworkers", "4"); | |||
// else ignored by cilk | |||
#elif defined OMP | |||
// omp_set_dynamic(1); | |||
size_t omp_w = (size_t)omp_get_max_threads(); | |||
if (!session.max_threads) { | |||
session.max_threads = (session.slices) ? (session.slices) : omp_w; | |||
// omp_set_dynamic(1); | |||
} | |||
else if (session.max_threads < omp_w) { | |||
// omp_set_dynamic(0); | |||
omp_set_num_threads(session.max_threads); | |||
} | |||
// else ignored by omp | |||
#elif defined PTHREADS | |||
size_t pth_w = std::thread::hardware_concurrency(); | |||
if (!session.max_threads) | |||
session.max_threads = (session.slices) ? (session.slices) : pth_w; | |||
#else | |||
#endif | |||
if (!session.slices) | |||
session.slices = session.max_threads; | |||
openblas_set_num_threads(1); // Limit OpenBLAS to 1 thread | |||
} | |||