diff --git a/.gitignore b/.gitignore index e309908..07a920e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # project bin/ out/ +resources/ # hpc related diff --git a/Makefile b/Makefile index cf760eb..ae27996 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,7 @@ REL_CFLAGS := -Wall -Wextra -O2 # ============== Linker settings ============== # Linker flags (example: -pthread -lm) -LDFLAGS := +LDFLAGS := -lm -lopenblas # Map output file MAP_FILE := output.map MAP_FLAG := -Xlinker -Map=$(BUILD_DIR)/$(MAP_FILE) @@ -53,11 +53,11 @@ MAP_FLAG := -Xlinker -Map=$(BUILD_DIR)/$(MAP_FILE) # ============== Docker settings ============== # We need: # - Bind the entire project directory(the dir that icludes all the code) as volume. -# - In docker instance, change to working directory(where the makefile is). +# - In docker instance, change to working directory(where the makefile is). DOCKER_VOL_DIR := $${PWD} -DOCKER_WRK_DIR := -DOCKER_RUN := docker run -DOCKER_FLAGS := --rm -v $(DOCKER_VOL_DIR):/usr/src/$(PROJECT) -w /usr/src/$(PROJECT)/$(DOCKER_WRK_DIR) +DOCKER_WRK_DIR := +DOCKER_RUN := docker run --rm +DOCKER_FLAGS := -v $(DOCKER_VOL_DIR):/usr/src/$(PROJECT) -w /usr/src/$(PROJECT)/$(DOCKER_WRK_DIR) # docker invoke mechanism (edit with care) # note: diff --git a/matlab/distXY.m b/matlab/distXY.m new file mode 100644 index 0000000..0fc6d94 --- /dev/null +++ b/matlab/distXY.m @@ -0,0 +1,21 @@ +function D = distXY(X, Y) +%distXY Calculate an m x n Euclidean distance matrix D of X and Y +% +% Calculate an m x n Euclidean distance matrix D between two set +% points X and Y of m and n points respectively +% +% X : [m x d] Corpus data points (d dimensions) +% Y : [n x d] Query data points (d dimensions) +% D : [m x n] Distance matrix where D(i,j) the distance of X(i,:) and Y(j,:) + + [m, d1] = size(X); + [n, d2] = size(Y); + if d1 == d2 + d = d1; + else + error('Corpus(X) and Query(Y) data points must have the same dimensions (d)'); + end + D = (X.*X) * ones(d,1)*ones(1,n) -2 * X*Y.' + ones(m,1)*ones(1,d) * (Y.*Y).'; + %D = sum(X.^2, 2) - 2 * X*Y.' + sum(Y.^2, 2).' + D = sqrt(D); +end diff --git a/matlab/distXY.m~ b/matlab/distXY.m~ new file mode 100644 index 0000000..1295e58 --- /dev/null +++ b/matlab/distXY.m~ @@ -0,0 +1,18 @@ +function D = distXY(X, Y) +%distXY Calculate an m x n Euclidean distance matrix 𝐷 of X and Y +% +% Calculate an m x n Euclidean distance matrix 𝐷 between two sets points 𝑋 and 𝑌 of 𝑚 and 𝑛 points respectively +% X : [m x d] Corpus data points (d dimensions) +% Y : [n x d] Query data poinsts (d dimensions) +% D : [m x n] Distance matrix where D(i,j) the distance of X(i) and Y(j) + + [m d1] = size(X); + [n d2] = size(Y); + if d1 == d2 + d = d1; + else + error('Corpus(X) and Query(Y) data points have to have the same dimensions'); + end + %D = (X.*X) * ones(d,1)*ones(1,n) -2 * X*Y.' + ones(m,1)*ones(1,d) * (Y.*Y).'; + D = sum(X.^2, 2) - 2 * X*Y.' + sum(Y.^2, 2).' +end diff --git a/matlab/kNN.m b/matlab/kNN.m new file mode 100644 index 0000000..6367e5b --- /dev/null +++ b/matlab/kNN.m @@ -0,0 +1,30 @@ +function [I, D] = kNN(X, Y, k) +%kNN return the k-nearest neighbors Of Y into dataset X +% +% Outputs: +% I : [n x k] The indexes of X where the nearest neighbors of Y lies +% D : [n x k] The distances of each neighbor +% +% Inputs: +% X : [m x d] Corpus data points (d dimensions) +% Y : [n x d] Query data points (d dimensions) +% k : [scalar] The number of neighbors + + disMat = distXY(X, Y); + [m, n] = size(disMat); + + II = repmat([1:k].', 1, n); % init the min algorithm + DD = disMat(1:k,:); + for j = 1:n + for i = k+1:m + % calculate candidate and canditate index + [tail, taili] = maxIdx(DD(:, j)); + if disMat(i,j) < tail + DD(taili, j) = disMat(i,j); + II(taili, j) = i; + end + end + end + I = II.'; + D = DD.'; +end diff --git a/matlab/kNN.m~ b/matlab/kNN.m~ new file mode 100644 index 0000000..356ac5e --- /dev/null +++ b/matlab/kNN.m~ @@ -0,0 +1,29 @@ +function [I, D] = kNN(X, Y, k) +%kNN return the k-nearest neighbors Of Y into dataset X +% +% Outputs: +% I : [n x k] The indexes of X where the nearest neighbors of Y lies +% D : [n x k] The distances of each neighbor +% +% Inputs: +% X : [m x d] Corpus data points (d dimensions) +% Y : [n x d] Query data points (d dimensions) +% k : [scalar] The number of neighbors + + disMat = distXY(X, Y); + [m, n] = size(disMat); + + II = repmat([1:k].', 1, n); % init the min algorithm + DD = disMat(1:k,:); + for i = k+1:m + for j = 1:n + [c, ci] = tail(DD); % calculate candidate and canditate index + if disMat(i,j) < c(j) + DD() + end + end + end + I = II.'; + D = DD.'; +end + diff --git a/matlab/maxIdx.m b/matlab/maxIdx.m new file mode 100644 index 0000000..822f864 --- /dev/null +++ b/matlab/maxIdx.m @@ -0,0 +1,14 @@ +function [M, I] = maxIdx(Vec) +%maxIdx Calculate the max,index pair of each element of a vector +% + n = length(Vec); + I = 0; + M = -Inf; + for j = 1:n + if M < Vec(j) + M = Vec(j); + I = j; + end + end +end + diff --git a/matlab/tail.m~ b/matlab/tail.m~ new file mode 100644 index 0000000..4f65aa3 --- /dev/null +++ b/matlab/tail.m~ @@ -0,0 +1,14 @@ +function [M, I] = maxIdx(Vec) +%tail Calculate the max,index pair of each Vec(:) +% + n = length(Vec); + I = 0; + M = -1; + for j = 1:n + if M < Vec(j) + M(j) = Mat(i,j); + I(j) = i; + end + end +end + diff --git a/src/main.cpp b/src/main.cpp index aa367ba..4608330 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,11 +1,39 @@ -/* - * main.cpp +/*! + * \file main.cpp + * \brief Main application file * - * Created on: Jan 2, 2021 - * Author: hoo2 + * \author + * Christos Choutouridis AEM:8997 + * */ #include +// Definition of the kNN result struct +typedef struct knnresult{ + int * nidx; //!< Indices (0-based) of nearest neighbors [m-by-k] + double * ndist; //!< Distance of nearest neighbors [m-by-k] + int m; //!< Number of query points [scalar] + int k; //!< Number of nearest neighbors [scalar] +} knnresult; + +//! Compute k nearest neighbors of each point in X [n-by-d] +/*! + + \param X Corpus data points [n-by-d] + \param Y Query data points [m-by-d] + \param n Number of corpus points [scalar] + \param m Number of query points [scalar] + \param d Number of dimensions [scalar] + \param k Number of neighbors [scalar] + + \return The kNN result +*/ +knnresult kNN(double * X, double * Y, int n, int m, int d, int k) { + + + +} + int main () { std::cout << "Lets start!\n";