@@ -27,8 +27,8 @@ SRC_DIR_LIST := src gtest | |||||
# Include directories list(space seperated). Makefile-relative path. | # Include directories list(space seperated). Makefile-relative path. | ||||
INC_DIR_LIST := inc \ | INC_DIR_LIST := inc \ | ||||
src \ | src \ | ||||
/usr/include/hdf5/serial/ \ | |||||
gtest \ | gtest \ | ||||
/usr/include/hdf5/serial/ | |||||
# Libs/MATLAB/R2019b/include/ \ | # Libs/MATLAB/R2019b/include/ \ | ||||
# Exclude files list(space seperated). Filenames only. | # Exclude files list(space seperated). Filenames only. | ||||
@@ -179,7 +179,7 @@ hpc-clean: | |||||
rm hpc-results/post | rm hpc-results/post | ||||
# | # | ||||
# ================ Local via docker build rules ================= | |||||
# ================ Local (and/or) via docker build rules ================= | |||||
# | # | ||||
# examples: | # examples: | ||||
# make IMAGE=hpcimage v0 | # make IMAGE=hpcimage v0 | ||||
@@ -190,12 +190,6 @@ local_v0: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=0 | |||||
local_v0: TARGET := local_v0 | local_v0: TARGET := local_v0 | ||||
local_v0: $(BUILD_DIR)/$(TARGET) | local_v0: $(BUILD_DIR)/$(TARGET) | ||||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | ||||
local_v0_opt: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=0 | |||||
local_v0_opt: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=0 | |||||
local_v0_opt: TARGET := local_v0_opt | |||||
local_v0_opt: $(BUILD_DIR)/$(TARGET) | |||||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||||
local_v1: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=1 | local_v1: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=1 | ||||
local_v1: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=1 | local_v1: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=1 | ||||
@@ -203,6 +197,21 @@ local_v1: TARGET := local_v1 | |||||
local_v1: $(BUILD_DIR)/$(TARGET) | local_v1: $(BUILD_DIR)/$(TARGET) | ||||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | ||||
local_v1_omp: CFLAGS := $(DEB_CFLAGS) -fopenmp -DCODE_VERSION=1 -DOMP | |||||
local_v1_omp: CXXFLAGS := $(DEB_CXXFLAGS) -fopenmp -DCODE_VERSION=1 -DOMP | |||||
local_v1_omp: LDFLAGS += -fopenmp | |||||
local_v1_omp: TARGET := local_v1_omp | |||||
local_v1_omp: $(BUILD_DIR)/$(TARGET) | |||||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||||
local_v1_pth: CFLAGS := $(DEB_CFLAGS) -DCODE_VERSION=1 -DPTHREADS | |||||
local_v1_pth: CXXFLAGS := $(DEB_CXXFLAGS) -DCODE_VERSION=1 -DPTHREADS | |||||
local_v1_pth: TARGET := local_v1_pth | |||||
local_v1_pth: $(BUILD_DIR)/$(TARGET) | |||||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||||
v0: DOCKER := $(DOCKER_CMD) | v0: DOCKER := $(DOCKER_CMD) | ||||
v0: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=0 | v0: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=0 | ||||
v0: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=0 | v0: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=0 | ||||
@@ -215,7 +224,7 @@ v1_cilk: CXX := /usr/local/OpenCilk-9.0.1-Linux/bin/clang++ | |||||
v1_cilk: CFLAGS := $(REL_CFLAGS) -fcilkplus -DCODE_VERSION=1 -DCILK | v1_cilk: CFLAGS := $(REL_CFLAGS) -fcilkplus -DCODE_VERSION=1 -DCILK | ||||
v1_cilk: CXXFLAGS := $(REL_CXXFLAGS) -fcilkplus -DCODE_VERSION=1 -DCILK | v1_cilk: CXXFLAGS := $(REL_CXXFLAGS) -fcilkplus -DCODE_VERSION=1 -DCILK | ||||
v1_cilk: LDFLAGS += -fcilkplus | v1_cilk: LDFLAGS += -fcilkplus | ||||
v1_cilk: TARGET := knnsearch_cilkv1 | |||||
v1_cilk: TARGET := knnsearch_v1_cilk | |||||
v1_cilk: $(BUILD_DIR)/$(TARGET) | v1_cilk: $(BUILD_DIR)/$(TARGET) | ||||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | ||||
@@ -223,10 +232,17 @@ v1_omp: DOCKER := $(DOCKER_CMD) | |||||
v1_omp: CFLAGS := $(REL_CFLAGS) -fopenmp -DCODE_VERSION=1 -DOMP | v1_omp: CFLAGS := $(REL_CFLAGS) -fopenmp -DCODE_VERSION=1 -DOMP | ||||
v1_omp: CXXFLAGS := $(REL_CXXFLAGS) -fopenmp -DCODE_VERSION=1 -DOMP | v1_omp: CXXFLAGS := $(REL_CXXFLAGS) -fopenmp -DCODE_VERSION=1 -DOMP | ||||
v1_omp: LDFLAGS += -fopenmp | v1_omp: LDFLAGS += -fopenmp | ||||
v1_omp: TARGET := knnsearch_ompv1 | |||||
v1_omp: TARGET := knnsearch_v1_omp | |||||
v1_omp: $(BUILD_DIR)/$(TARGET) | v1_omp: $(BUILD_DIR)/$(TARGET) | ||||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | ||||
v1_pth: DOCKER := $(DOCKER_CMD) | |||||
v1_pth: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=1 -DPTHREADS | |||||
v1_pth: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=1 -DPTHREADS | |||||
v1_pth: TARGET := knnsearch_v1_pth | |||||
v1_pth: $(BUILD_DIR)/$(TARGET) | |||||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||||
v1: DOCKER := $(DOCKER_CMD) | v1: DOCKER := $(DOCKER_CMD) | ||||
v1: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=1 | v1: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=1 | ||||
v1: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=1 | v1: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=1 | ||||
@@ -240,7 +256,12 @@ tests: TARGET := tests | |||||
tests: $(BUILD_DIR)/$(TARGET) | tests: $(BUILD_DIR)/$(TARGET) | ||||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | ||||
tests_rel: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=0 -DTESTING | |||||
tests_rel: CXXFLAGS := $(REL_CXXFLAGS) -DCODE_VERSION=0 -DTESTING | |||||
tests_rel: TARGET := tests | |||||
tests_rel: $(BUILD_DIR)/$(TARGET) | |||||
cp $(BUILD_DIR)/$(TARGET) out/$(TARGET) | |||||
# | # | ||||
# ========= Inside CSAL Image build rules =========== | # ========= Inside CSAL Image build rules =========== | ||||
# | # | ||||
@@ -19,6 +19,8 @@ | |||||
using matrix_t = mtx::Matrix<int>; | using matrix_t = mtx::Matrix<int>; | ||||
extern void loadMtx(MatrixDst& Corpus, MatrixDst& Query); | |||||
extern void storeMtx(MatrixIdx& Idx, MatrixDst& Dst); | |||||
// ===================================== | // ===================================== | ||||
// C1, Q1 | // C1, Q1 | ||||
@@ -140,11 +142,44 @@ TEST(Tv0_UT, pdist2_test2) { | |||||
} | } | ||||
TEST(Tv0_UT, pdist2_test3) { | |||||
mtx::Matrix<double> D2_exp(16, 16, { | |||||
0, 0.7433, 0.6868, 0.8846, 0.6342, 0.4561, 0.5118, 0.6341, 0.5461, 0.7322, 0.6974, 0.4330, 0.7028, 0.6303, 0.6826, 0.4179, | |||||
0.7433, 0, 0.3400, 0.4555, 0.4207, 0.9736, 0.9690, 0.7386, 1.1055, 0.5462, 0.5345, 0.6576, 0.8677, 1.0291, 0.5393, 0.8106, | |||||
0.6868, 0.3400, 0, 0.5380, 0.6268, 0.9512, 1.0234, 0.8403, 0.9843, 0.8187, 0.3091, 0.7829, 0.5759, 0.9411, 0.7239, 0.9186, | |||||
0.8846, 0.4555, 0.5380, 0, 0.6796, 1.1672, 1.0460, 1.1016, 1.1139, 0.7542, 0.6480, 0.9304, 1.0568, 1.3482, 0.8316, 0.9750, | |||||
0.6342, 0.4207, 0.6268, 0.6796, 0, 0.9267, 0.8772, 0.4847, 0.9317, 0.4093, 0.8351, 0.4215, 0.9736, 0.9007, 0.5999, 0.5291, | |||||
0.4561, 0.9736, 0.9512, 1.1672, 0.9267, 0, 0.3903, 0.7795, 0.9308, 0.8429, 0.8436, 0.5672, 0.9284, 0.7064, 0.6435, 0.5975, | |||||
0.5118, 0.9690, 1.0234, 1.0460, 0.8772, 0.3903, 0, 0.8920, 0.9253, 0.7060, 0.9427, 0.5728, 1.1515, 0.9907, 0.6471, 0.4811, | |||||
0.6341, 0.7386, 0.8403, 1.1016, 0.4847, 0.7795, 0.8920, 0, 0.9824, 0.6416, 0.9844, 0.3398, 0.9355, 0.5428, 0.6536, 0.5309, | |||||
0.5461, 1.1055, 0.9843, 1.1139, 0.9317, 0.9308, 0.9253, 0.9824, 0, 1.1517, 1.0541, 0.8746, 0.8506, 0.8777, 1.2036, 0.7607, | |||||
0.7322, 0.5462, 0.8187, 0.7542, 0.4093, 0.8429, 0.7060, 0.6416, 1.1517, 0, 0.9106, 0.4245, 1.2071, 1.0738, 0.3745, 0.5170, | |||||
0.6974, 0.5345, 0.3091, 0.6480, 0.8351, 0.8436, 0.9427, 0.9844, 1.0541, 0.9106, 0, 0.8647, 0.5941, 0.9954, 0.7148, 0.9876, | |||||
0.4330, 0.6576, 0.7829, 0.9304, 0.4215, 0.5672, 0.5728, 0.3398, 0.8746, 0.4245, 0.8647, 0, 0.9590, 0.6782, 0.4586, 0.2525, | |||||
0.7028, 0.8677, 0.5759, 1.0568, 0.9736, 0.9284, 1.1515, 0.9355, 0.8506, 1.2071, 0.5941, 0.9590, 0, 0.6838, 1.0517, 1.0675, | |||||
0.6303, 1.0291, 0.9411, 1.3482, 0.9007, 0.7064, 0.9907, 0.5428, 0.8777, 1.0738, 0.9954, 0.6782, 0.6838, 0, 0.9482, 0.7937, | |||||
0.6826, 0.5393, 0.7239, 0.8316, 0.5999, 0.6435, 0.6471, 0.6536, 1.2036, 0.3745, 0.7148, 0.4586, 1.0517, 0.9482, 0, 0.6345, | |||||
0.4179, 0.8106, 0.9186, 0.9750, 0.5291, 0.5975, 0.4811, 0.5309, 0.7607, 0.5170, 0.9876, 0.2525, 1.0675, 0.7937, 0.6345, 0 | |||||
}); | |||||
mtx::Matrix<double> D (16,16); | |||||
v0::pdist2(C2, C2, D); | |||||
for (size_t i = 0 ; i< D.rows() ; ++i) | |||||
for (size_t j = 0 ; j<D.columns() ; ++j) { | |||||
EXPECT_EQ (D2_exp.get(i ,j) + 0.01 > D(i, j), true); | |||||
EXPECT_EQ (D2_exp.get(i ,j) - 0.01 < D(i, j), true); | |||||
} | |||||
} | |||||
/* | /* | ||||
* ========================================== | * ========================================== | ||||
* v0::knn | * v0::knn | ||||
*/ | */ | ||||
TEST(Tv0_UT, knn_test1) { | |||||
TEST(Tv0_UT, knn_v0_test1) { | |||||
size_t k = 3; | size_t k = 3; | ||||
mtx::Matrix<uint32_t> Idx_exp(5, k, { | mtx::Matrix<uint32_t> Idx_exp(5, k, { | ||||
5, 8, 9, | 5, 8, 9, | ||||
@@ -177,7 +212,7 @@ TEST(Tv0_UT, knn_test1) { | |||||
} | } | ||||
TEST(Tv0_UT, knn_test2) { | |||||
TEST(Tv0_UT, knn_v0_test2) { | |||||
size_t k = 3; | size_t k = 3; | ||||
mtx::Matrix<uint32_t> Idx_exp(8, k, { | mtx::Matrix<uint32_t> Idx_exp(8, k, { | ||||
14, 13, 1, | 14, 13, 1, | ||||
@@ -220,7 +255,46 @@ TEST(Tv0_UT, knn_test2) { | |||||
* ========================================== | * ========================================== | ||||
* v1::knn | * v1::knn | ||||
*/ | */ | ||||
TEST(Tv1_UT, knn_test1) { | |||||
TEST(Tv1_UT, knn_v1_1slice) { | |||||
size_t k = 3; | |||||
mtx::Matrix<uint32_t> Idx_exp(8, k, { | |||||
14, 13, 1, | |||||
15, 10, 12, | |||||
14, 8, 12, | |||||
4, 1, 3, | |||||
8, 12, 5, | |||||
4, 3, 2, | |||||
10, 2, 4, | |||||
1, 11, 9 | |||||
}); | |||||
mtx::Matrix<double> Dst_exp(8, k, { | |||||
0.1939, 0.5768, 0.6020, | |||||
0.2708, 0.3686, 0.3740, | |||||
0.2684, 0.3103, 0.5277, | |||||
0.4709, 0.6050, 0.6492, | |||||
0.4397, 0.6842, 0.7074, | |||||
0.3402, 0.4360, 0.4475, | |||||
0.3708, 0.4037, 0.4417, | |||||
0.6352, 0.6653, 0.6758 | |||||
}); | |||||
mtx::Matrix<uint32_t> Idx(8, k); | |||||
mtx::Matrix<double> Dst(8, k); | |||||
v1::knnsearch(C2, Q2, 1, k, k, Idx, Dst); | |||||
for (size_t i = 0 ; i< Idx.rows() ; ++i) | |||||
for (size_t j = 0 ; j<Idx.columns() ; ++j) { | |||||
EXPECT_EQ (Idx_exp(i ,j) == Idx(i, j) + 1, true); // matlab starts from 1 | |||||
EXPECT_EQ (Dst_exp.get(i ,j) + 0.01 > Dst(i, j), true); | |||||
EXPECT_EQ (Dst_exp.get(i ,j) - 0.01 < Dst(i, j), true); | |||||
} | |||||
} | |||||
TEST(Tv1_UT, knn_v1_2slice) { | |||||
size_t k = 3; | size_t k = 3; | ||||
mtx::Matrix<uint32_t> Idx_exp(8, k, { | mtx::Matrix<uint32_t> Idx_exp(8, k, { | ||||
14, 13, 1, | 14, 13, 1, | ||||
@@ -247,7 +321,7 @@ TEST(Tv1_UT, knn_test1) { | |||||
mtx::Matrix<uint32_t> Idx(8, k); | mtx::Matrix<uint32_t> Idx(8, k); | ||||
mtx::Matrix<double> Dst(8, k); | mtx::Matrix<double> Dst(8, k); | ||||
v1::knnsearch(C2, Q2, 0, k, k, Idx, Dst); | |||||
v1::knnsearch(C2, Q2, 2, k, k, Idx, Dst); | |||||
for (size_t i = 0 ; i< Idx.rows() ; ++i) | for (size_t i = 0 ; i< Idx.rows() ; ++i) | ||||
@@ -260,7 +334,7 @@ TEST(Tv1_UT, knn_test1) { | |||||
} | } | ||||
// all-to-all | // all-to-all | ||||
TEST(Tv1_UT, knn_test2) { | |||||
TEST(Tv1_UT, knn_v1_4slice) { | |||||
size_t k = 3; | size_t k = 3; | ||||
mtx::Matrix<uint32_t> Idx_exp(16, k, { | mtx::Matrix<uint32_t> Idx_exp(16, k, { | ||||
1, 16, 12, | 1, 16, 12, | ||||
@@ -303,7 +377,7 @@ TEST(Tv1_UT, knn_test2) { | |||||
mtx::Matrix<uint32_t> Idx(16, k); | mtx::Matrix<uint32_t> Idx(16, k); | ||||
mtx::Matrix<double> Dst(16, k); | mtx::Matrix<double> Dst(16, k); | ||||
v1::knnsearch(C2, C2, 0, k, k, Idx, Dst); | |||||
v1::knnsearch(C2, C2, 4, k, k, Idx, Dst); | |||||
for (size_t i = 0 ; i< Idx.rows() ; ++i) | for (size_t i = 0 ; i< Idx.rows() ; ++i) | ||||
@@ -315,3 +389,130 @@ TEST(Tv1_UT, knn_test2) { | |||||
} | } | ||||
/* | |||||
* ============== Live hdf5 tests =============== | |||||
* | |||||
* In order to run these test we need the followin hdf5 files in ./mtx directory: | |||||
* | |||||
* - fasion-mnist-784-euclidean.hdf5 | |||||
* - mnist-784-euclidean.hdf5 | |||||
* - sift-128-euclidean.hdf5 | |||||
* - gist-960-euclidean.hdf5 | |||||
* | |||||
*/ | |||||
TEST(Tlive_UT, knn_v0_sift_test) { | |||||
// Instantiate matrixes | |||||
MatrixDst Corpus; | |||||
MatrixDst Query; | |||||
MatrixIdx Idx; | |||||
MatrixDst Dst; | |||||
// setup environment | |||||
session.corpusMtxFile = "mtx/sift-128-euclidean.hdf5"; | |||||
session.corpusDataSet = "/test"; | |||||
session.queryMtx = false; | |||||
session.k = 100; | |||||
size_t m = session.k; | |||||
session.timing = true; | |||||
session.outMtxFile = "test/knn_v0.hdf5"; | |||||
loadMtx(Corpus, Query); | |||||
// Prepare output memory (There is no Query, so from Corpus | |||||
Idx.resize(Corpus.rows(), session.k); | |||||
Dst.resize(Corpus.rows(), session.k); | |||||
v0::knnsearch(Corpus, Corpus, 0, session.k, m, Idx, Dst); | |||||
storeMtx(Idx, Dst); | |||||
EXPECT_EQ(true, true); | |||||
} | |||||
TEST(Tlive_UT, knn_v1_sift_test_1slice) { | |||||
// Instantiate matrixes | |||||
MatrixDst Corpus; | |||||
MatrixDst Query; | |||||
MatrixIdx Idx; | |||||
MatrixDst Dst; | |||||
// setup environment | |||||
session.corpusMtxFile = "mtx/sift-128-euclidean.hdf5"; | |||||
session.corpusDataSet = "/test"; | |||||
session.queryMtx = false; | |||||
session.k = 100; | |||||
size_t m = session.k; | |||||
session.timing = true; | |||||
session.outMtxFile = "test/knn_v1ser.hdf5"; | |||||
loadMtx(Corpus, Query); | |||||
// Prepare output memory (There is no Query, so from Corpus | |||||
Idx.resize(Corpus.rows(), session.k); | |||||
Dst.resize(Corpus.rows(), session.k); | |||||
v1::knnsearch(Corpus, Corpus, 0, session.k, m, Idx, Dst); | |||||
storeMtx(Idx, Dst); | |||||
EXPECT_EQ(true, true); | |||||
} | |||||
TEST(Tlive_UT, knn_v1_sift_test_2slice) { | |||||
// Instantiate matrixes | |||||
MatrixDst Corpus; | |||||
MatrixDst Query; | |||||
MatrixIdx Idx; | |||||
MatrixDst Dst; | |||||
// setup environment | |||||
session.corpusMtxFile = "mtx/sift-128-euclidean.hdf5"; | |||||
session.corpusDataSet = "/test"; | |||||
session.queryMtx = false; | |||||
session.k = 100; | |||||
size_t m = session.k; | |||||
session.timing = true; | |||||
session.outMtxFile = "test/knn_v1ser.hdf5"; | |||||
loadMtx(Corpus, Query); | |||||
// Prepare output memory (There is no Query, so from Corpus | |||||
Idx.resize(Corpus.rows(), session.k); | |||||
Dst.resize(Corpus.rows(), session.k); | |||||
v1::knnsearch(Corpus, Corpus, 2, session.k, m, Idx, Dst); | |||||
storeMtx(Idx, Dst); | |||||
EXPECT_EQ(true, true); | |||||
} | |||||
TEST(Tlive_UT, knn_v1_sift_test_4slice) { | |||||
// Instantiate matrixes | |||||
MatrixDst Corpus; | |||||
MatrixDst Query; | |||||
MatrixIdx Idx; | |||||
MatrixDst Dst; | |||||
// setup environment | |||||
session.corpusMtxFile = "mtx/sift-128-euclidean.hdf5"; | |||||
session.corpusDataSet = "/test"; | |||||
session.queryMtx = false; | |||||
session.k = 100; | |||||
size_t m = session.k; | |||||
session.timing = true; | |||||
session.outMtxFile = "test/knn_v1ser.hdf5"; | |||||
loadMtx(Corpus, Query); | |||||
// Prepare output memory (There is no Query, so from Corpus | |||||
Idx.resize(Corpus.rows(), session.k); | |||||
Dst.resize(Corpus.rows(), session.k); | |||||
v1::knnsearch(Corpus, Corpus, 4, session.k, m, Idx, Dst); | |||||
storeMtx(Idx, Dst); | |||||
EXPECT_EQ(true, true); | |||||
} | |||||
@@ -55,7 +55,9 @@ struct session_t { | |||||
std::string outMtxFile {"out.hdf5"}; //!< output matrix file name in HDF5 format | std::string outMtxFile {"out.hdf5"}; //!< output matrix file name in HDF5 format | ||||
std::string outMtxIdxDataSet {"/Idx"}; //!< Index output dataset name in HDF5 matrix file | std::string outMtxIdxDataSet {"/Idx"}; //!< Index output dataset name in HDF5 matrix file | ||||
std::string outMtxDstDataSet {"/Dst"}; //!< Distance output dataset name in HDF5 matrix file | std::string outMtxDstDataSet {"/Dst"}; //!< Distance output dataset name in HDF5 matrix file | ||||
std::size_t max_threads {}; //!< Maximum threads to use | |||||
std::size_t max_threads {0}; //!< Maximum threads to use | |||||
std::size_t slices {0}; //!< Slices/threads to use | |||||
std::size_t accuracy {100}; //!< The neighbor finding accuracy | |||||
bool timing {false}; //!< Enable timing prints of the program | bool timing {false}; //!< Enable timing prints of the program | ||||
bool verbose {false}; //!< Flag to enable verbose output to stdout | bool verbose {false}; //!< Flag to enable verbose output to stdout | ||||
}; | }; | ||||
@@ -134,6 +134,9 @@ struct Matrix { | |||||
Matrix& operator=(Matrix&& m) noexcept { moves(std::move(m)); return *this; } | Matrix& operator=(Matrix&& m) noexcept { moves(std::move(m)); return *this; } | ||||
Matrix(const Matrix& m) = delete; //!< No copy ctor | Matrix(const Matrix& m) = delete; //!< No copy ctor | ||||
Matrix& operator=(const Matrix& m) = delete; //!< No copy | Matrix& operator=(const Matrix& m) = delete; //!< No copy | ||||
//Matrix(const Matrix& m); | |||||
//Matrix& operator=(const Matrix& m) { copy(m); } | |||||
//! @} | //! @} | ||||
//! \name Data exposure | //! \name Data exposure | ||||
@@ -233,6 +236,11 @@ struct Matrix { | |||||
// a basic serial iterator support | // a basic serial iterator support | ||||
DataType* data() noexcept { return data_; } | DataType* data() noexcept { return data_; } | ||||
DataType* begin() noexcept { return data_; } | |||||
const DataType* begin() const noexcept { return data_; } | |||||
DataType* end() noexcept { return data_ + capacity(rows_, cols_); } | |||||
const DataType* end() const noexcept { return data_ + capacity(rows_, cols_); } | |||||
// IndexType begin_idx() noexcept { return 0; } | // IndexType begin_idx() noexcept { return 0; } | ||||
// IndexType end_idx() noexcept { return capacity(rows_, cols_); } | // IndexType end_idx() noexcept { return capacity(rows_, cols_); } | ||||
@@ -265,17 +273,19 @@ struct Matrix { | |||||
std::swap(rows_, src.rows_); | std::swap(rows_, src.rows_); | ||||
std::swap(cols_, src.cols_); | std::swap(cols_, src.cols_); | ||||
} | } | ||||
private: | private: | ||||
//! move helper | //! move helper | ||||
void moves(Matrix&& src) noexcept { | void moves(Matrix&& src) noexcept { | ||||
data_ = std::move(src.vector_storage_); | |||||
data_ = std::move(src.raw_storage_); | |||||
data_ = std::move(src.data_); | |||||
data_ = std::move(src.use_vector_); | |||||
rows_ = std::move(src.rows_); | |||||
cols_ = std::move(src.cols_); | |||||
vector_storage_ = std::move(src.vector_storage_); | |||||
raw_storage_ = std::move(src.raw_storage_); | |||||
data_ = std::move(src.data_); | |||||
use_vector_ = std::move(src.use_vector_); | |||||
rows_ = std::move(src.rows_); | |||||
cols_ = std::move(src.cols_); | |||||
} | } | ||||
// Storage | |||||
std::vector<DataType> | std::vector<DataType> | ||||
vector_storage_; //!< Internal storage (if used). | vector_storage_; //!< Internal storage (if used). | ||||
DataType* raw_storage_; //!< External storage (if used). | DataType* raw_storage_; //!< External storage (if used). | ||||
@@ -528,125 +538,6 @@ private: | |||||
}; | }; | ||||
template<typename ...> struct Matrix_view { }; | |||||
/*! | |||||
* @struct Matrix_view | |||||
* @tparam MatrixType | |||||
*/ | |||||
template<template <typename, typename, MatrixType, MatrixOrder, bool> class Matrix, | |||||
typename DataType, | |||||
typename IndexType, | |||||
MatrixType Type, | |||||
MatrixOrder Order> | |||||
struct Matrix_view<Matrix<DataType, IndexType, Type, Order, false>> { | |||||
using owner_t = Matrix<DataType, IndexType, Type, Order, false>; | |||||
using dataType = DataType; //!< meta:export of underling data type | |||||
using indexType = IndexType; //!< meta:export of underling index type | |||||
static constexpr MatrixOrder matrixOrder = Order; //!< meta:export of array order | |||||
static constexpr MatrixType matrixType = Type; //!< meta:export of array type | |||||
/*! | |||||
* \name Obj lifetime | |||||
*/ | |||||
//! @{ | |||||
//! Construct a matrix view to entire matrix | |||||
Matrix_view(const owner_t* owner) noexcept : | |||||
owner_(owner), m_(owner->data()), rows_(owner->rows()), cols_(owner->columns()) { } | |||||
Matrix_view(const owner_t* owner, IndexType begin, IndexType end) noexcept : | |||||
owner_(owner) { | |||||
if constexpr (Order == MatrixOrder::ROWMAJOR) { | |||||
m_ = owner->data() + begin * owner->columns(); | |||||
rows_ = end - begin; | |||||
cols_ = owner->columns(); | |||||
} else if (Order == MatrixOrder::COLMAJOR) { | |||||
m_ = owner->data() + begin * owner->rows(); | |||||
rows_ = owner->rows(); | |||||
cols_ = end - begin; | |||||
} | |||||
} | |||||
Matrix_view(Matrix_view&& m) = delete; //! No move | |||||
Matrix_view& operator=(Matrix_view&& m) = delete; | |||||
Matrix_view(const Matrix_view& m) = delete; //!< No copy | |||||
Matrix_view& operator=(const Matrix_view& m) = delete; | |||||
//! @} | |||||
//! Get/Set the size of each dimension | |||||
const IndexType rows() const noexcept { return rows_; } | |||||
const IndexType columns() const noexcept { return cols_; } | |||||
//! Get the interface size of the Matrix (what appears to be the size) | |||||
IndexType size() const { | |||||
return rows_ * cols_; | |||||
} | |||||
//! Actual memory capacity of the symmetric matrix | |||||
static constexpr IndexType capacity(IndexType M, IndexType N) { | |||||
return M*N; | |||||
} | |||||
/* | |||||
* virtual 2D accessors | |||||
*/ | |||||
const DataType get (IndexType i, IndexType j) const { | |||||
if constexpr (Order == MatrixOrder::COLMAJOR) | |||||
return m_[i + j*rows_]; | |||||
else | |||||
return m_[i*cols_ + j]; | |||||
} | |||||
DataType set (DataType v, IndexType i, IndexType j) { | |||||
if constexpr (Order == MatrixOrder::COLMAJOR) | |||||
return m_[i + j*rows_] = v; | |||||
else | |||||
return m_[i*cols_ + j] = v; | |||||
} | |||||
// DataType operator()(IndexType i, IndexType j) { return get(i, j); } | |||||
/*! | |||||
* Return a proxy MatVal object with read and write capabilities. | |||||
* @param i The row number | |||||
* @param j The column number | |||||
* @return tHE MatVal object | |||||
*/ | |||||
MatVal<Matrix_view> operator()(IndexType i, IndexType j) noexcept { | |||||
return MatVal<Matrix_view>(this, get(i, j), i, j); | |||||
} | |||||
// a basic serial iterator support | |||||
DataType* data() noexcept { return m_.data(); } | |||||
// IndexType begin_idx() noexcept { return 0; } | |||||
// IndexType end_idx() noexcept { return capacity(rows_, cols_); } | |||||
const DataType* data() const noexcept { return m_; } | |||||
const IndexType begin_idx() const noexcept { return 0; } | |||||
const IndexType end_idx() const noexcept { return capacity(rows_, cols_); } | |||||
//! @} | |||||
/*! | |||||
* \name Safe iteration API | |||||
* | |||||
* This api automates the iteration over the array based on | |||||
* MatrixType | |||||
*/ | |||||
//! @{ | |||||
template<typename F, typename... Args> | |||||
void for_each_in (IndexType begin, IndexType end, F&& lambda, Args&&... args) { | |||||
for (IndexType it=begin ; it<end ; ++it) { | |||||
std::forward<F>(lambda)(std::forward<Args>(args)..., it); | |||||
} | |||||
} | |||||
//! @} | |||||
//! | |||||
private: | |||||
const owner_t* owner_ {nullptr}; //!< Pointer to Matrix | |||||
DataType* m_ {nullptr}; //!< Starting address of the slice/view | |||||
IndexType rows_{}; //!< the virtual size of rows. | |||||
IndexType cols_{}; //!< the virtual size of columns. | |||||
}; | |||||
/*! | /*! | ||||
* A view/iterator hybrid object for Matrix columns. | * A view/iterator hybrid object for Matrix columns. | ||||
* | * | ||||
@@ -54,10 +54,11 @@ void pdist2(const Matrix& X, const Matrix& Y, Matrix& D2) { | |||||
for (int i = 0; i < M ; ++i) { | for (int i = 0; i < M ; ++i) { | ||||
for (int j = 0; j < N; ++j) { | for (int j = 0; j < N; ++j) { | ||||
D2.set(D2.get(i, j) + X_norms[i] + Y_norms[j], i, j); | D2.set(D2.get(i, j) + X_norms[i] + Y_norms[j], i, j); | ||||
//D2.set(std::max(D2.get(i, j), 0.0), i, j); // Ensure non-negative | |||||
D2.set(std::max(D2.get(i, j), 0.0), i, j); // Ensure non-negative | |||||
D2.set(std::sqrt(D2.get(i, j)), i, j); // Take the square root of each | D2.set(std::sqrt(D2.get(i, j)), i, j); // Take the square root of each | ||||
} | } | ||||
} | } | ||||
M++; | |||||
} | } | ||||
template<typename DataType, typename IndexType> | template<typename DataType, typename IndexType> | ||||
@@ -82,7 +83,7 @@ void quickselect(std::vector<std::pair<DataType, IndexType>>& vec, int k) { | |||||
* point of Q | * point of Q | ||||
*/ | */ | ||||
template<typename MatrixD, typename MatrixI> | template<typename MatrixD, typename MatrixI> | ||||
void knnsearch(const MatrixD& C, const MatrixD& Q, size_t idx_offset, size_t k, size_t m, MatrixI& idx, MatrixD& dst) { | |||||
void knnsearch(MatrixD& C, MatrixD& Q, size_t idx_offset, size_t k, size_t m, MatrixI& idx, MatrixD& dst) { | |||||
using DstType = typename MatrixD::dataType; | using DstType = typename MatrixD::dataType; | ||||
using IdxType = typename MatrixI::dataType; | using IdxType = typename MatrixI::dataType; | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* \file v0.hpp | |||||
* \file v1.hpp | |||||
* \brief | * \brief | ||||
* | * | ||||
* \author | * \author | ||||
@@ -16,6 +16,26 @@ | |||||
#include "v0.hpp" | #include "v0.hpp" | ||||
#include "config.h" | #include "config.h" | ||||
#if defined CILK | |||||
#include <cilk/cilk.h> | |||||
#include <cilk/cilk_api.h> | |||||
//#include <cilk/reducer_opadd.h> | |||||
#elif defined OMP | |||||
#include <omp.h> | |||||
#elif defined PTHREADS | |||||
#include <thread> | |||||
#include <numeric> | |||||
#include <functional> | |||||
//#include <random> | |||||
#else | |||||
#endif | |||||
void init_workers(); | |||||
namespace v1 { | namespace v1 { | ||||
template <typename DataType, typename IndexType> | template <typename DataType, typename IndexType> | ||||
@@ -57,49 +77,110 @@ void mergeResultsWithM(mtx::Matrix<IndexType>& N1, mtx::Matrix<DataType>& D1, | |||||
} | } | ||||
} | } | ||||
template<typename MatrixD, typename MatrixI> | template<typename MatrixD, typename MatrixI> | ||||
void knnsearch(const MatrixD& C, const MatrixD& Q, size_t idx_offset, size_t k, size_t m, MatrixI& idx, MatrixD& dst) { | |||||
void worker_body (std::vector<MatrixD>& corpus_slices, | |||||
std::vector<MatrixD>& query_slices, | |||||
MatrixI& idx, | |||||
MatrixD& dst, | |||||
size_t slice, | |||||
size_t num_slices, size_t corpus_slice_size, size_t query_slice_size, | |||||
size_t k, | |||||
size_t m) { | |||||
// "load" types | |||||
using DstType = typename MatrixD::dataType; | using DstType = typename MatrixD::dataType; | ||||
using IdxType = typename MatrixI::dataType; | using IdxType = typename MatrixI::dataType; | ||||
if (C.rows() <= 8 || Q.rows() <= 4) { | |||||
// Base case: Call knnsearch directly | |||||
v0::knnsearch(C, Q, idx_offset, k, m, idx, dst); | |||||
return; | |||||
} | |||||
// Divide Corpus and Query into subsets | |||||
IdxType midC = C.rows() / 2; | |||||
IdxType midQ = Q.rows() / 2; | |||||
// Slice corpus and query matrixes | |||||
MatrixD C1((DstType*)C.data(), 0, midC, C.columns()); | |||||
MatrixD C2((DstType*)C.data(), midC, midC, C.columns()); | |||||
MatrixD Q1((DstType*)Q.data(), 0, midQ, Q.columns()); | |||||
MatrixD Q2((DstType*)Q.data(), midQ, midQ, Q.columns()); | |||||
// Allocate temporary matrixes for all permutations | |||||
MatrixI N1_1(midQ, k), N1_2(midQ, k), N2_1(midQ, k), N2_2(midQ, k); | |||||
MatrixD D1_1(midQ, k), D1_2(midQ, k), D2_1(midQ, k), D2_2(midQ, k); | |||||
// Recursive calls | |||||
knnsearch(C1, Q1, idx_offset, k, m, N1_1, D1_1); | |||||
knnsearch(C2, Q1, idx_offset + midC, k, m, N1_2, D1_2); | |||||
knnsearch(C1, Q2, idx_offset, k, m, N2_1, D2_1); | |||||
knnsearch(C2, Q2, idx_offset + midC, k, m, N2_2, D2_2); | |||||
// slice output matrixes | |||||
MatrixI N1((IdxType*)idx.data(), 0, midQ, k); | |||||
MatrixI N2((IdxType*)idx.data(), midQ, midQ, k); | |||||
MatrixD D1((DstType*)dst.data(), 0, midQ, k); | |||||
MatrixD D2((DstType*)dst.data(), midQ, midQ, k); | |||||
// Merge results in place | |||||
mergeResultsWithM(N1_1, D1_1, N1_2, D1_2, k, m, N1, D1); | |||||
mergeResultsWithM(N2_1, D2_1, N2_2, D2_2, k, m, N2, D2); | |||||
for (size_t ci = 0; ci < num_slices; ++ci) { | |||||
size_t idx_offset = ci * corpus_slice_size; | |||||
// Intermediate matrixes for intermediate results | |||||
MatrixI temp_idx(query_slices[slice].rows(), k); | |||||
MatrixD temp_dst(query_slices[slice].rows(), k); | |||||
// kNN for each combination | |||||
v0::knnsearch(corpus_slices[ci], query_slices[slice], idx_offset, k, m, temp_idx, temp_dst); | |||||
// Merge temporary results to final results | |||||
MatrixI idx_slice((IdxType*)idx.data(), slice * query_slice_size, query_slices[slice].rows(), k); | |||||
MatrixD dst_slice((DstType*)dst.data(), slice * query_slice_size, query_slices[slice].rows(), k); | |||||
mergeResultsWithM(idx_slice, dst_slice, temp_idx, temp_dst, k, m, idx_slice, dst_slice); | |||||
} | |||||
} | |||||
template<typename MatrixD, typename MatrixI> | |||||
void knnsearch(MatrixD& C, MatrixD& Q, size_t num_slices, size_t k, size_t m, MatrixI& idx, MatrixD& dst) { | |||||
using DstType = typename MatrixD::dataType; | |||||
using IdxType = typename MatrixI::dataType; | |||||
//Slice calculations | |||||
size_t corpus_slice_size = C.rows() / ((num_slices == 0)? 1:num_slices); | |||||
size_t query_slice_size = Q.rows() / ((num_slices == 0)? 1:num_slices); | |||||
// Make slices | |||||
std::vector<MatrixD> corpus_slices; | |||||
std::vector<MatrixD> query_slices; | |||||
for (size_t i = 0; i < num_slices; ++i) { | |||||
corpus_slices.emplace_back( | |||||
(DstType*)C.data(), | |||||
i * corpus_slice_size, | |||||
(i == num_slices - 1 ? C.rows() - i * corpus_slice_size : corpus_slice_size), | |||||
C.columns()); | |||||
query_slices.emplace_back( | |||||
(DstType*)Q.data(), | |||||
i * query_slice_size, | |||||
(i == num_slices - 1 ? Q.rows() - i * query_slice_size : query_slice_size), | |||||
Q.columns()); | |||||
} | |||||
// Intermediate results | |||||
for (size_t i = 0; i < dst.rows(); ++i) { | |||||
for (size_t j = 0; j < dst.columns(); ++j) { | |||||
dst.set(std::numeric_limits<DstType>::infinity(), i, j); | |||||
idx.set(static_cast<IdxType>(-1), i, j); | |||||
} | |||||
} | |||||
// Main loop | |||||
#if defined OMP | |||||
#pragma omp parallel for | |||||
for (size_t qi = 0; qi < num_slices; ++qi) { | |||||
for (size_t qi = 0; qi < num_slices; ++qi) { | |||||
worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m); | |||||
} | |||||
} | |||||
#elif defined CILK | |||||
cilk_for (size_t qi = 0; qi < num_slices; ++qi) { | |||||
for (size_t qi = 0; qi < num_slices; ++qi) { | |||||
worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m); | |||||
} | |||||
} | |||||
#elif defined PTHREADS | |||||
std::vector<std::thread> workers; | |||||
for (size_t qi = 0; qi < num_slices; ++qi) { | |||||
workers.push_back( | |||||
std::thread (worker_body<MatrixD, MatrixI>, | |||||
std::ref(corpus_slices), std::ref(query_slices), | |||||
std::ref(idx), std::ref(dst), | |||||
qi, | |||||
num_slices, corpus_slice_size, query_slice_size, | |||||
k, m) | |||||
); | |||||
} | |||||
// Join threads | |||||
std::for_each(workers.begin(), workers.end(), [](std::thread& t){ | |||||
t.join(); | |||||
}); | |||||
#else | |||||
for (size_t qi = 0; qi < num_slices; ++qi) { | |||||
for (size_t qi = 0; qi < num_slices; ++qi) { | |||||
worker_body (corpus_slices, query_slices, idx, dst, qi, num_slices, corpus_slice_size, query_slice_size, k, m); | |||||
} | |||||
} | |||||
#endif | |||||
} | } | ||||
@@ -12,7 +12,10 @@ function [D2] = dist2(X, Y) | |||||
if d1 ~= d2 | if d1 ~= d2 | ||||
error('X,Y column dimensions must match'); | error('X,Y column dimensions must match'); | ||||
end | end | ||||
%D2 = sqrt((X.^2)*ones(d,m) -2*X*Y' + ones(n,d)*(Y.^2)'); | |||||
% debug | |||||
%X_norm = sum(X.^2, 2); | |||||
%Y_norm = sum(Y.^2, 2)'; | |||||
%XY = 2 * X*Y'; | |||||
D2 = max(sum(X.^2, 2) - 2 * X*Y' + sum(Y.^2, 2)', 0); | D2 = max(sum(X.^2, 2) - 2 * X*Y' + sum(Y.^2, 2)', 0); | ||||
D2 = sqrt(D2); | D2 = sqrt(D2); | ||||
@@ -0,0 +1,78 @@ | |||||
% Plot measurements | |||||
accuracy = [100 80 60 40 20 10]; | |||||
ser_sift_acc = [ 4395 4365 4384 4315 4295 4246 ]; | |||||
ser_mnist_ser_acc = [ 7936 7924 7886 7903 7844 7801 ]; | |||||
omp_sift_acc = [ | |||||
4093 4098 4040 4001 3980 3937 | |||||
]; | |||||
omp_mnist_acc = [ | |||||
7575 7463 7389 7416 7321 7303 | |||||
]; | |||||
cilk_sift_acc = [ | |||||
3718 3739 3673 3668 3608 3557 | |||||
]; | |||||
cilk_mnist_acc = [ | |||||
7064 7071 7035 6948 6962 6913 | |||||
]; | |||||
pth_sift_acc = [ | |||||
1157 1159 1121 1100 1084 1075 | |||||
]; | |||||
pth_mnist_acc = [ | |||||
2050 2086 2040 2020 2004 1979 | |||||
]; | |||||
% 1ο Διάγραμμα: OMP | |||||
figure; | |||||
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD | |||||
plot(accuracy, ser_sift_acc, '-o', 'DisplayName', 'Serial SIFT'); | |||||
hold on; | |||||
plot(accuracy, ser_mnist_ser_acc, '-s', 'DisplayName', 'Serial MNIST'); | |||||
plot(accuracy, omp_sift_acc, '-^', 'DisplayName', 'OMP SIFT'); | |||||
plot(accuracy, omp_mnist_acc, '-d', 'DisplayName', 'OMP MNIST'); | |||||
hold off; | |||||
title('OMP'); | |||||
xlabel('Accuracy (%)'); | |||||
ylabel('Execution Time [msec]'); | |||||
set(gca, 'XDir', 'reverse'); % reverse x | |||||
legend('Location', 'northwest'); | |||||
grid on; | |||||
print(gcf, 'OMP_over_accuracy.png', '-dpng', '-r300'); | |||||
% 2ο Διάγραμμα: CILK | |||||
figure; | |||||
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD | |||||
plot(accuracy, ser_sift_acc, '-o', 'DisplayName', 'Serial SIFT'); | |||||
hold on; | |||||
plot(accuracy, ser_mnist_ser_acc, '-s', 'DisplayName', 'Serial MNIST'); | |||||
plot(accuracy, cilk_sift_acc, '-^', 'DisplayName', 'CILK SIFT'); | |||||
plot(accuracy, cilk_mnist_acc, '-d', 'DisplayName', 'CILK MNIST'); | |||||
hold off; | |||||
title('CILK'); | |||||
xlabel('Accuracy (%)'); | |||||
ylabel('Execution Time [msec]'); | |||||
set(gca, 'XDir', 'reverse'); % reverse x | |||||
legend('Location', 'northwest'); | |||||
grid on; | |||||
print(gcf, 'CILK_over_accuracy.png', '-dpng', '-r300'); | |||||
% 3ο Διάγραμμα: Pthreads | |||||
figure; | |||||
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD | |||||
plot(accuracy, ser_sift_acc, '-o', 'DisplayName', 'Serial SIFT'); | |||||
hold on; | |||||
plot(accuracy, ser_mnist_ser_acc, '-s', 'DisplayName', 'Serial MNIST'); | |||||
plot(accuracy, pth_sift_acc, '-^', 'DisplayName', 'Pthreads SIFT'); | |||||
plot(accuracy, pth_mnist_acc, '-d', 'DisplayName', 'Pthreads MNIST'); | |||||
hold off; | |||||
title('Pthreads'); | |||||
xlabel('Accuracy (%)'); | |||||
ylabel('Execution Time [msec]'); | |||||
set(gca, 'XDir', 'reverse'); % reverse x | |||||
legend('Location', 'northwest'); | |||||
grid on; | |||||
print(gcf, 'Pthreads_over_accuracy.png', '-dpng', '-r300'); | |||||
@@ -0,0 +1,75 @@ | |||||
% Plot measurements | |||||
threads = [1 2 4 6 8 10 12]; | |||||
ser_sift_threads = [ 4418 4418 4418 4418 4418 4418 4418 ]; | |||||
ser_mnist_ser_threads = [ 7924 7924 7924 7924 7924 7924 7924 ]; | |||||
omp_sift_th = [ | |||||
4469 4283 4096 3822 4060 4241 5193 | |||||
]; | |||||
omp_mnist_th = [ | |||||
8053 7806 7465 6828 7662 8013 8123 | |||||
]; | |||||
cilk_sift_th = [ | |||||
4225 4090 3696 3122 3860 4141 5103 | |||||
]; | |||||
cilk_mnist_th = [ | |||||
7744 7206 6965 6628 7362 7813 8123 | |||||
]; | |||||
pth_sift_th = [ | |||||
4254 2155 1133 877 724 640 682 | |||||
]; | |||||
pth_mnist_th = [ | |||||
7889 3963 2058 1445 1496 1379 1352 | |||||
]; | |||||
% 1ο Διάγραμμα: OMP | |||||
figure; | |||||
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD | |||||
plot(threads, ser_sift_threads, '-o', 'DisplayName', 'Serial SIFT'); | |||||
hold on; | |||||
plot(threads, ser_mnist_ser_threads, '-s', 'DisplayName', 'Serial MNIST'); | |||||
plot(threads, omp_sift_th, '-^', 'DisplayName', 'OMP SIFT'); | |||||
plot(threads, omp_mnist_th, '-d', 'DisplayName', 'OMP MNIST'); | |||||
hold off; | |||||
title('OMP'); | |||||
xlabel('Threads'); | |||||
ylabel('Execution Time [msec]'); | |||||
legend('Location', 'northeast'); | |||||
grid on; | |||||
print(gcf, 'OMP_over_threads.png', '-dpng', '-r300'); | |||||
% 2ο Διάγραμμα: CILK | |||||
figure; | |||||
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD | |||||
plot(threads, ser_sift_threads, '-o', 'DisplayName', 'Serial SIFT'); | |||||
hold on; | |||||
plot(threads, ser_mnist_ser_threads, '-s', 'DisplayName', 'Serial MNIST'); | |||||
plot(threads, cilk_sift_th, '-^', 'DisplayName', 'CILK SIFT'); | |||||
plot(threads, cilk_mnist_th, '-d', 'DisplayName', 'CILK MNIST'); | |||||
hold off; | |||||
title('CILK'); | |||||
xlabel('Threads'); | |||||
ylabel('Execution Time [msec]'); | |||||
legend('Location', 'northeast'); | |||||
grid on; | |||||
print(gcf, 'CILK_over_threads.png', '-dpng', '-r300'); | |||||
% 3ο Διάγραμμα: Pthreads | |||||
figure; | |||||
set(gcf, 'Position', [100, 100, 1280, 720]); % Set the figure size to HD | |||||
plot(threads, ser_sift_threads, '-o', 'DisplayName', 'Serial SIFT'); | |||||
hold on; | |||||
plot(threads, ser_mnist_ser_threads, '-s', 'DisplayName', 'Serial MNIST'); | |||||
plot(threads, pth_sift_th, '-^', 'DisplayName', 'Pthreads SIFT'); | |||||
plot(threads, pth_mnist_th, '-d', 'DisplayName', 'Pthreads MNIST'); | |||||
hold off; | |||||
title('Pthreads'); | |||||
xlabel('Threads'); | |||||
ylabel('Execution Time [msec]'); | |||||
legend('Location', 'northeast'); | |||||
grid on; | |||||
print(gcf, 'Pthreads_over_threads.png', '-dpng', '-r300'); | |||||
@@ -2,6 +2,106 @@ | |||||
% | % | ||||
% | % | ||||
% | |||||
% | |||||
% | |||||
C1 = [ | |||||
0.8147 0.1576; | |||||
0.9058 0.9706; | |||||
0.1270 0.9572; | |||||
0.9134 0.4854; | |||||
0.6324 0.8003; | |||||
0.0975 0.1419; | |||||
0.2785 0.4218; | |||||
0.5469 0.9157; | |||||
0.9575 0.7922; | |||||
0.9649 0.9595 ]; | |||||
Q1 = [ | |||||
0.6557 0.7577; | |||||
0.0357 0.7431; | |||||
0.8491 0.3922; | |||||
0.9340 0.6555; | |||||
0.6787 0.1712 ]; | |||||
C2 = [ | |||||
0.7060 0.4456 0.5060 0.6160; | |||||
0.0318 0.6463 0.6991 0.4733; | |||||
0.2769 0.7094 0.8909 0.3517; | |||||
0.0462 0.7547 0.9593 0.8308; | |||||
0.0971 0.2760 0.5472 0.5853; | |||||
0.8235 0.6797 0.1386 0.5497; | |||||
0.6948 0.6551 0.1493 0.9172; | |||||
0.3171 0.1626 0.2575 0.2858; | |||||
0.9502 0.1190 0.8407 0.7572; | |||||
0.0344 0.4984 0.2543 0.7537; | |||||
0.4387 0.9597 0.8143 0.3804; | |||||
0.3816 0.3404 0.2435 0.5678; | |||||
0.7655 0.5853 0.9293 0.0759; | |||||
0.7952 0.2238 0.3500 0.0540; | |||||
0.1869 0.7513 0.1966 0.5308; | |||||
0.4898 0.2551 0.2511 0.7792 ]; | |||||
Q2 = [ | |||||
0.9340 0.3112 0.4505 0.0782; | |||||
0.1299 0.5285 0.0838 0.4427; | |||||
0.5688 0.1656 0.2290 0.1067; | |||||
0.4694 0.6020 0.9133 0.9619; | |||||
0.0119 0.2630 0.1524 0.0046; | |||||
0.3371 0.6541 0.8258 0.7749; | |||||
0.1622 0.6892 0.5383 0.8173; | |||||
0.7943 0.7482 0.9961 0.8687 ]; | |||||
D1_exp = [ | |||||
0.6208 0.9745 0.2371 0.5120 0.1367; | |||||
0.3284 0.8993 0.5811 0.3164 0.8310; | |||||
0.5651 0.2327 0.9169 0.8616 0.9603; | |||||
0.3749 0.9147 0.1132 0.1713 0.3921; | |||||
0.0485 0.5994 0.4621 0.3346 0.6308; | |||||
0.8312 0.6044 0.7922 0.9815 0.5819; | |||||
0.5052 0.4028 0.5714 0.6959 0.4722; | |||||
0.1919 0.5395 0.6045 0.4665 0.7561; | |||||
0.3037 0.9231 0.4144 0.1387 0.6807; | |||||
0.3692 0.9540 0.5790 0.3056 0.8386 ]; | |||||
D2_exp = [ | |||||
0.6020 0.7396 0.6583 0.6050 1.0070 0.5542 0.6298 0.6352; | |||||
1.0696 0.6348 0.9353 0.6914 0.8160 0.4475 0.4037 0.9145; | |||||
0.9268 0.8450 0.9376 0.6492 0.9671 0.4360 0.5956 0.7400; | |||||
1.3455 0.9876 1.2953 0.4709 1.2557 0.3402 0.4417 0.7500; | |||||
0.9839 0.5476 0.7517 0.7216 0.7074 0.5605 0.4784 0.9954; | |||||
0.6839 0.7200 0.7305 0.9495 1.0628 0.8718 0.8178 0.9179; | |||||
0.9850 0.7514 0.9585 0.7996 1.2054 0.7784 0.6680 0.8591; | |||||
0.6950 0.4730 0.3103 1.0504 0.4397 0.8967 0.8140 1.2066; | |||||
0.8065 1.2298 0.9722 0.7153 1.3933 0.8141 1.0204 0.6758; | |||||
1.1572 0.3686 0.9031 0.8232 0.7921 0.6656 0.3708 1.0970; | |||||
0.9432 0.9049 1.0320 0.6905 1.1167 0.5094 0.6455 0.6653; | |||||
0.7672 0.3740 0.5277 0.8247 0.6842 0.6945 0.5648 0.9968; | |||||
0.5768 1.1210 0.8403 0.9345 1.1316 0.8292 1.0380 0.8127; | |||||
0.1939 0.8703 0.2684 1.1794 0.8103 1.0683 1.1115 1.1646; | |||||
1.0106 0.2708 0.8184 0.8954 0.7402 0.6982 0.4509 1.0594; | |||||
0.8554 0.5878 0.6834 0.7699 0.9155 0.7161 0.6162 0.9481 ]; | |||||
% tests | |||||
D1 = dist2(C1, Q1); | |||||
if norm (D1-pdist2(C1, Q1), 'fro') > 0.01 | |||||
disp('Error in dist2(C1, Q1)'); | |||||
end | |||||
D2 = dist2(C2, Q2); | |||||
if norm (D2-pdist2(C2, Q2), 'fro') > 0.01 | |||||
disp('Error in dist2(C2, Q2)'); | |||||
end | |||||
D2 = dist2(C2, C2); | |||||
if norm (D2-pdist2(C2, C2), 'fro') > 0.01 | |||||
disp('Error in dist2(C2, C2)'); | |||||
end | |||||
%C = rand(10000, 2); % Corpus | %C = rand(10000, 2); % Corpus | ||||
%Q = rand(10000, 2); % Queries | %Q = rand(10000, 2); % Queries | ||||
C = rand(20000, 2); % Δύο clusters | C = rand(20000, 2); % Δύο clusters | ||||
@@ -0,0 +1,122 @@ | |||||
# Serial-sift over acc | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 100 -k 100 -t | |||||
[Timing]: knnsearch: 4395 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 80 -k 100 -t | |||||
[Timing]: knnsearch: 4365 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 60 -k 100 -t | |||||
[Timing]: knnsearch: 4384 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 40 -k 100 -t | |||||
[Timing]: knnsearch: 4315 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 20 -k 100 -t | |||||
[Timing]: knnsearch: 4295 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -a 10 -k 100 -t | |||||
[Timing]: knnsearch: 4246 [msec] | |||||
# Serial-mnist over acc | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 100 -k 100 -t | |||||
[Timing]: knnsearch: 7936 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 80 -k 100 -t | |||||
[Timing]: knnsearch: 7924 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 60 -k 100 -t | |||||
[Timing]: knnsearch: 7886 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 40 -k 100 -t | |||||
[Timing]: knnsearch: 7903 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 20 -k 100 -t | |||||
[Timing]: knnsearch: 7844 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -a 10 -k 100 -t | |||||
[Timing]: knnsearch: 7801 [msec] | |||||
# OMP-sift over acc | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t | |||||
[Timing]: knnsearch: 4093 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t | |||||
[Timing]: knnsearch: 4098 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t | |||||
[Timing]: knnsearch: 4040 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t | |||||
[Timing]: knnsearch: 4001 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t | |||||
[Timing]: knnsearch: 3980 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t | |||||
[Timing]: knnsearch: 3937 [msec] | |||||
# OMP-mnist over acc | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t | |||||
[Timing]: knnsearch: 7575 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t | |||||
[Timing]: knnsearch: 7463 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t | |||||
[Timing]: knnsearch: 7389 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t | |||||
[Timing]: knnsearch: 7416 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t | |||||
[Timing]: knnsearch: 7321 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t | |||||
[Timing]: knnsearch: 7303 [msec] | |||||
# CILK-sift over acc | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t | |||||
[Timing]: knnsearch: 4218 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t | |||||
[Timing]: knnsearch: 4239 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t | |||||
[Timing]: knnsearch: 4173 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t | |||||
[Timing]: knnsearch: 4168 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t | |||||
[Timing]: knnsearch: 4108 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t | |||||
[Timing]: knnsearch: 4057 [msec] | |||||
# CILK-mnist over acc | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t | |||||
[Timing]: knnsearch: 7864 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t | |||||
[Timing]: knnsearch: 7871 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t | |||||
[Timing]: knnsearch: 7835 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t | |||||
[Timing]: knnsearch: 7748 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t | |||||
[Timing]: knnsearch: 7762 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t | |||||
[Timing]: knnsearch: 7713 [msec] | |||||
# Pthreads-sift over acc | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t | |||||
[Timing]: knnsearch: 1157 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t | |||||
[Timing]: knnsearch: 1159 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t | |||||
[Timing]: knnsearch: 1121 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t | |||||
[Timing]: knnsearch: 1100 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t | |||||
[Timing]: knnsearch: 1084 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t | |||||
[Timing]: knnsearch: 1075 [msec] | |||||
# Pthreads-mnist over acc | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 100 -k 100 -t | |||||
[Timing]: knnsearch: 2050 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 80 -k 100 -t | |||||
[Timing]: knnsearch: 2086 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 60 -k 100 -t | |||||
[Timing]: knnsearch: 2040 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 40 -k 100 -t | |||||
[Timing]: knnsearch: 2020 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 20 -k 100 -t | |||||
[Timing]: knnsearch: 2004 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -a 10 -k 100 -t | |||||
[Timing]: knnsearch: 1979 [msec] | |||||
@@ -0,0 +1,101 @@ | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t | |||||
[Timing]: knnsearch: 4418 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t | |||||
[Timing]: knnsearch: 7924 [msec] | |||||
# OMP-sift over threads | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t | |||||
[Timing]: knnsearch: 4469 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 2 -k 100 -t | |||||
[Timing]: knnsearch: 4283 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 4 -k 100 -t | |||||
[Timing]: knnsearch: 4096 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 6 -k 100 -t | |||||
[Timing]: knnsearch: 3822 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 8 -k 100 -t | |||||
[Timing]: knnsearch: 4060 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t | |||||
[Timing]: knnsearch: 4241 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 12 -k 100 -t | |||||
[Timing]: knnsearch: 5193 [msec] | |||||
# OMP-mnist over threads | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t | |||||
[Timing]: knnsearch: 8053 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 2 -k 100 -t | |||||
[Timing]: knnsearch: 7806 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -k 100 -t | |||||
[Timing]: knnsearch: 7465 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 6 -k 100 -t | |||||
[Timing]: knnsearch: 6828 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 8 -k 100 -t | |||||
[Timing]: knnsearch: 7662[msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t | |||||
[Timing]: knnsearch: 8013 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 12 -k 100 -t | |||||
[Timing]: knnsearch: 8123 [msec] | |||||
# CILK-sift over threads | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t | |||||
[Timing]: knnsearch: 4225 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 2 -k 100 -t | |||||
[Timing]: knnsearch: 4090 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 4 -k 100 -t | |||||
[Timing]: knnsearch: 3696 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 6 -k 100 -t | |||||
[Timing]: knnsearch: 3122 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 8 -k 100 -t | |||||
[Timing]: knnsearch: 3860 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t | |||||
[Timing]: knnsearch: 4141 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/sift-128-euclidean.hdf5 /test -s 12 -k 100 -t | |||||
[Timing]: knnsearch: 5103 [msec] | |||||
# CILK-mnist over threads | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t | |||||
[Timing]: knnsearch: 7744 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 2 -k 100 -t | |||||
[Timing]: knnsearch: 7206 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -k 100 -t | |||||
[Timing]: knnsearch: 6965 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 6 -k 100 -t | |||||
[Timing]: knnsearch: 6628 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 8 -k 100 -t | |||||
[Timing]: knnsearch: 7362 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t | |||||
[Timing]: knnsearch: 7813 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_cilk -c mtx/mnist-784-euclidean.hdf5 /test -s 12 -k 100 -t | |||||
[Timing]: knnsearch: 158123[msec] | |||||
# Pthreads-sift over threads | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t | |||||
[Timing]: knnsearch: 4254 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 2 -k 100 -t | |||||
[Timing]: knnsearch: 2155 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 4 -k 100 -t | |||||
[Timing]: knnsearch: 1133 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 6 -k 100 -t | |||||
[Timing]: knnsearch: 877 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 8 -k 100 -t | |||||
[Timing]: knnsearch: 724 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t | |||||
[Timing]: knnsearch: 640 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/sift-128-euclidean.hdf5 /test -s 12 -k 100 -t | |||||
[Timing]: knnsearch: 682 [msec] | |||||
# Pthreads-mnist over threads | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t | |||||
[Timing]: knnsearch: 7889 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 2 -k 100 -t | |||||
[Timing]: knnsearch: 3963 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 4 -k 100 -t | |||||
[Timing]: knnsearch: 2058 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 6 -k 100 -t | |||||
[Timing]: knnsearch: 1445 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 8 -k 100 -t | |||||
[Timing]: knnsearch: 1496 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t | |||||
[Timing]: knnsearch: 1379 [msec] | |||||
hoo2@shirka:~/Work/AUTH/PDS/homework_1$ eval $DOCK ./out/knnsearch_v1_pth -c mtx/mnist-784-euclidean.hdf5 /test -s 12 -k 100 -t | |||||
[Timing]: knnsearch: 1352 [msec] | |||||
@@ -0,0 +1,27 @@ | |||||
#!/usr/bin/env bash | |||||
# | |||||
# Take measurements | |||||
# | |||||
DOCK="docker run --rm -v /home/hoo2/Work/AUTH/PDS/homework_1:/usr/src/PDS_homework_1 -w /usr/src/PDS_homework_1/ hoo2/hpcimage" | |||||
echo " " | |||||
echo "Serial" | |||||
echo "./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t" | |||||
eval $DOCK ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t | |||||
echo "./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t" | |||||
eval $DOCK ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t | |||||
echo " " | |||||
echo "OMP" | |||||
# echo ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t | |||||
#eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t | |||||
# echo ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t | |||||
#eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t | |||||
# echo ./out/knnsearch_v1 -c mtx/sift-128-euclidean.hdf5 /test -s 1 -k 100 -t | |||||
#eval $DOCK ./out/knnsearch_v1_omp -c mtx/sift-128-euclidean.hdf5 /test -s 10 -k 100 -t | |||||
# echo ./out/knnsearch_v1 -c mtx/mnist-784-euclidean.hdf5 /test -s 1 -k 100 -t | |||||
#eval $DOCK ./out/knnsearch_v1_omp -c mtx/mnist-784-euclidean.hdf5 /test -s 10 -k 100 -t |
@@ -65,9 +65,15 @@ bool get_options(int argc, char* argv[]){ | |||||
else if (arg == "-k") { | else if (arg == "-k") { | ||||
session.k = (i+1 < argc) ? std::atoi(argv[++i]) : session.k; | session.k = (i+1 < argc) ? std::atoi(argv[++i]) : session.k; | ||||
} | } | ||||
else if (arg == "-n" || arg == "--max_trheads") { | |||||
else if (arg == "-n" || arg == "--max_threads") { | |||||
session.max_threads = (i+1 < argc) ? std::atoi(argv[++i]) : session.max_threads; | session.max_threads = (i+1 < argc) ? std::atoi(argv[++i]) : session.max_threads; | ||||
} | } | ||||
else if (arg == "-s" || arg == "--slices") { | |||||
session.slices = (i+1 < argc) ? std::atoi(argv[++i]) : session.slices; | |||||
} | |||||
else if (arg == "-a" || arg == "--accuracy") { | |||||
session.accuracy = (i+1 < argc) ? std::atoi(argv[++i]) : session.accuracy; | |||||
} | |||||
else if (arg == "-t" || arg == "--timing") | else if (arg == "-t" || arg == "--timing") | ||||
session.timing = true; | session.timing = true; | ||||
else if (arg == "-v" || arg == "--verbose") | else if (arg == "-v" || arg == "--verbose") | ||||
@@ -87,7 +93,12 @@ bool get_options(int argc, char* argv[]){ | |||||
std::cout << " -k <number>\n"; | std::cout << " -k <number>\n"; | ||||
std::cout << " Set the number of closest neighbors to find. \n\n"; | std::cout << " Set the number of closest neighbors to find. \n\n"; | ||||
std::cout << " -n | --max_trheads <threads>\n"; | std::cout << " -n | --max_trheads <threads>\n"; | ||||
std::cout << " Reduce the thread number for the execution to <threads>. <threads> must be less or equal to available CPUs.\n\n"; | |||||
std::cout << " Reduce the thread number for the execution to <threads>. <threads> should be less or equal to available CPUs.\n\n"; | |||||
std::cout << " -s | --slices <slices/threads>\n"; | |||||
std::cout << " The number of slices to the Corpus matrix. In the parallel version this setting affects the number of threads\n"; | |||||
std::cout << " <threads> should be less or equal to available CPUs\n\n"; | |||||
std::cout << " -a | --accuracy <accuracy>\n"; | |||||
std::cout << " Reduce the accuracy of neighbor finding. The accuracy should be between 1-100 \n\n"; | |||||
std::cout << " -t | --timing\n"; | std::cout << " -t | --timing\n"; | ||||
std::cout << " Request timing measurements output to stdout.\n\n"; | std::cout << " Request timing measurements output to stdout.\n\n"; | ||||
std::cout << " -v | --verbose\n"; | std::cout << " -v | --verbose\n"; | ||||
@@ -109,6 +120,27 @@ bool get_options(int argc, char* argv[]){ | |||||
return status; | return status; | ||||
} | } | ||||
void loadMtx(MatrixDst& Corpus, MatrixDst& Query) { | |||||
if (access(session.outMtxFile.c_str(), F_OK) == 0) | |||||
std::remove(session.outMtxFile.c_str()); | |||||
// timer.start(); | |||||
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus); | |||||
if (session.queryMtx) | |||||
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query); | |||||
// timer.stop(); | |||||
// timer.print_dt("Load hdf5 files"); | |||||
} | |||||
void storeMtx(MatrixIdx& Idx, MatrixDst& Dst) { | |||||
// timer.start(); | |||||
Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx); | |||||
Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst); | |||||
// timer.stop(); | |||||
// timer.print_dt("Store hdf5 files"); | |||||
} | |||||
#ifndef TESTING | #ifndef TESTING | ||||
int main(int argc, char* argv[]) try { | int main(int argc, char* argv[]) try { | ||||
// Instantiate matrixes | // Instantiate matrixes | ||||
@@ -127,38 +159,26 @@ int main(int argc, char* argv[]) try { | |||||
if (!get_options(argc, argv)) | if (!get_options(argc, argv)) | ||||
exit(1); | exit(1); | ||||
if (access(session.outMtxFile.c_str(), F_OK) == 0) | |||||
std::remove(session.outMtxFile.c_str()); | |||||
init_workers(); | |||||
// Load data | // Load data | ||||
timer.start(); | |||||
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus); | |||||
if (session.queryMtx) | |||||
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query); | |||||
timer.stop(); | |||||
timer.print_dt("Load hdf5 files"); | |||||
loadMtx(Corpus, Query); | |||||
// Prepare output memory | // Prepare output memory | ||||
Idx.resize(Query.rows(), session.k); | |||||
Dst.resize(Query.rows(), session.k); | |||||
Idx.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k); | |||||
Dst.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k); | |||||
// Do the search | // Do the search | ||||
logger << "Start knnsearch ..."; | logger << "Start knnsearch ..."; | ||||
timer.start(); | timer.start(); | ||||
if (session.queryMtx) | |||||
knnsearch(Corpus, Query, 0, session.k, session.k, Idx, Dst); | |||||
else | |||||
knnsearch(Corpus, Corpus, 0, session.k, session.k, Idx, Dst); | |||||
size_t selected_neighbors = (size_t)(session.k*(session.accuracy/100.0)); | |||||
knnsearch(Corpus, (session.queryMtx) ? Query : Corpus, session.slices, session.k, selected_neighbors, Idx, Dst); | |||||
timer.stop(); | timer.stop(); | ||||
logger << " Done" << logger.endl; | logger << " Done" << logger.endl; | ||||
timer.print_dt("knnsearch"); | timer.print_dt("knnsearch"); | ||||
// Store data | // Store data | ||||
timer.start(); | |||||
Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx); | |||||
Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst); | |||||
timer.stop(); | |||||
timer.print_dt("Store hdf5 files"); | |||||
storeMtx(Idx, Dst); | |||||
return 0; | return 0; | ||||
} | } | ||||
@@ -0,0 +1,50 @@ | |||||
/** | |||||
* \file v1.hpp | |||||
* \brief | |||||
* | |||||
* \author | |||||
* Christos Choutouridis AEM:8997 | |||||
* <cchoutou@ece.auth.gr> | |||||
*/ | |||||
#include "v1.hpp" | |||||
void init_workers() { | |||||
#if defined CILK | |||||
size_t cilk_w = __cilkrts_get_nworkers(); | |||||
if (!session.max_threads) | |||||
session.max_threads = (session.slices) ? (session.slices) : cilk_w; | |||||
// else if (session.max_threads < cilk_w) | |||||
// __cilkrts_set_param("nworkers", "4"); | |||||
// else ignored by cilk | |||||
#elif defined OMP | |||||
// omp_set_dynamic(1); | |||||
size_t omp_w = (size_t)omp_get_max_threads(); | |||||
if (!session.max_threads) { | |||||
session.max_threads = (session.slices) ? (session.slices) : omp_w; | |||||
// omp_set_dynamic(1); | |||||
} | |||||
else if (session.max_threads < omp_w) { | |||||
// omp_set_dynamic(0); | |||||
omp_set_num_threads(session.max_threads); | |||||
} | |||||
// else ignored by omp | |||||
#elif defined PTHREADS | |||||
size_t pth_w = std::thread::hardware_concurrency(); | |||||
if (!session.max_threads) | |||||
session.max_threads = (session.slices) ? (session.slices) : pth_w; | |||||
#else | |||||
#endif | |||||
if (!session.slices) | |||||
session.slices = session.max_threads; | |||||
openblas_set_num_threads(1); // Limit OpenBLAS to 1 thread | |||||
} | |||||
@@ -0,0 +1,9 @@ | |||||
## NOTE ## | |||||
This folder contains the output files from tests. | |||||
In order to populate it, run the tests | |||||
$> make tests | |||||
$> ./tests | |||||