|
@@ -7,48 +7,139 @@ |
|
|
\author Christos Choutouridis 8997 <cchoutou@ece.auth.gr> |
|
|
\author Christos Choutouridis 8997 <cchoutou@ece.auth.gr> |
|
|
\date 2020-05-05 |
|
|
\date 2020-05-05 |
|
|
*/ |
|
|
*/ |
|
|
|
|
|
|
|
|
#include <stdio.h> |
|
|
|
|
|
#include <stdlib.h> |
|
|
|
|
|
#include <math.h> |
|
|
|
|
|
#include <sys/time.h> |
|
|
|
|
|
#include <assert.h> |
|
|
|
|
|
|
|
|
|
|
|
#define MAX_ITER 10 |
|
|
|
|
|
#define sub2ind(i,j,n) (j) + (i)*(n) |
|
|
|
|
|
|
|
|
#include "matmul.h" |
|
|
|
|
|
|
|
|
/*! |
|
|
/*! |
|
|
* Square Matrix multiplication |
|
|
|
|
|
|
|
|
* Square Matrix multiplication - ijk |
|
|
* \param C pointer to output matrix |
|
|
* \param C pointer to output matrix |
|
|
* \param A pointer to input matrix A |
|
|
* \param A pointer to input matrix A |
|
|
* \param B pointer to input matrix B |
|
|
* \param B pointer to input matrix B |
|
|
* \param n Size of matrices (both sizes) |
|
|
* \param n Size of matrices (both sizes) |
|
|
* \return none |
|
|
* \return none |
|
|
* |
|
|
|
|
|
* \note |
|
|
|
|
|
* This version executes row major order ijk |
|
|
|
|
|
*/ |
|
|
*/ |
|
|
void matrixMult(float * const C, float const * const A, float const * const B, int const n) { |
|
|
|
|
|
|
|
|
void matrixMult_ijk(float * const C, float const * const A, float const * const B, int const n) { |
|
|
|
|
|
for (int i = 0; i < n; ++i) |
|
|
|
|
|
for (int j = 0; j < n; ++j) { |
|
|
|
|
|
int k =0; |
|
|
|
|
|
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ]; |
|
|
|
|
|
for (k = 1; k < n; ++k) |
|
|
|
|
|
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ]; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
for (int i = 0; i < n; i++) { /* rows */ |
|
|
|
|
|
for (int j = 0; j < n; j++) { /* cols */ |
|
|
|
|
|
C[ sub2ind(i,j,n) ] = 0; /* initialize output value */ |
|
|
|
|
|
for (int k = 0; k < n; k++) { /* accumulate products */ |
|
|
|
|
|
C[ sub2ind(i,j,n) ] += |
|
|
|
|
|
A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ]; |
|
|
|
|
|
|
|
|
/*! |
|
|
|
|
|
* Square Matrix multiplication - ikj |
|
|
|
|
|
* \param C pointer to output matrix |
|
|
|
|
|
* \param A pointer to input matrix A |
|
|
|
|
|
* \param B pointer to input matrix B |
|
|
|
|
|
* \param n Size of matrices (both sizes) |
|
|
|
|
|
* \return none |
|
|
|
|
|
*/ |
|
|
|
|
|
void matrixMult_ikj(float * const C, float const * const A, float const * const B, int const n) { |
|
|
|
|
|
for (int i = 0; i < n; ++i) |
|
|
|
|
|
for (int k = 0; k < n; ++k) { |
|
|
|
|
|
if (!k) { |
|
|
|
|
|
for (int j = 0; j < n; ++j) |
|
|
|
|
|
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ]; |
|
|
|
|
|
} else { |
|
|
|
|
|
for (int j = 0; j < n; ++j) |
|
|
|
|
|
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ]; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*! |
|
|
|
|
|
* Square Matrix multiplication - jik |
|
|
|
|
|
* \param C pointer to output matrix |
|
|
|
|
|
* \param A pointer to input matrix A |
|
|
|
|
|
* \param B pointer to input matrix B |
|
|
|
|
|
* \param n Size of matrices (both sizes) |
|
|
|
|
|
* \return none |
|
|
|
|
|
*/ |
|
|
|
|
|
void matrixMult_jik(float * const C, float const * const A, float const * const B, int const n) { |
|
|
|
|
|
for (int j = 0; j < n; j++) |
|
|
|
|
|
for (int i = 0; i < n; i++) { |
|
|
|
|
|
int k =0; |
|
|
|
|
|
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ]; |
|
|
|
|
|
for (k = 1; k < n; k++) |
|
|
|
|
|
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ]; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*! |
|
|
|
|
|
* Square Matrix multiplication - jki |
|
|
|
|
|
* \param C pointer to output matrix |
|
|
|
|
|
* \param A pointer to input matrix A |
|
|
|
|
|
* \param B pointer to input matrix B |
|
|
|
|
|
* \param n Size of matrices (both sizes) |
|
|
|
|
|
* \return none |
|
|
|
|
|
*/ |
|
|
|
|
|
void matrixMult_jki(float * const C, float const * const A, float const * const B, int const n) { |
|
|
|
|
|
for (int j = 0; j < n; ++j) |
|
|
|
|
|
for (int k = 0; k < n; ++k) { |
|
|
|
|
|
if (!k) { |
|
|
|
|
|
for (int i = 0; i < n; ++i) |
|
|
|
|
|
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ]; |
|
|
|
|
|
} else { |
|
|
|
|
|
for (int i = 0; i < n; ++i) |
|
|
|
|
|
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ]; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*! |
|
|
|
|
|
* Square Matrix multiplication - kij |
|
|
|
|
|
* \param C pointer to output matrix |
|
|
|
|
|
* \param A pointer to input matrix A |
|
|
|
|
|
* \param B pointer to input matrix B |
|
|
|
|
|
* \param n Size of matrices (both sizes) |
|
|
|
|
|
* \return none |
|
|
|
|
|
*/ |
|
|
|
|
|
void matrixMult_kij(float * const C, float const * const A, float const * const B, int const n) { |
|
|
|
|
|
for (int k = 0; k < n; ++k) { |
|
|
|
|
|
if (!k) { |
|
|
|
|
|
for (int i = 0; i < n; ++i) |
|
|
|
|
|
for (int j = 0; j < n; ++j) |
|
|
|
|
|
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ]; |
|
|
|
|
|
} else { |
|
|
|
|
|
for (int i = 0; i < n; ++i) |
|
|
|
|
|
for (int j = 0; j < n; ++j) |
|
|
|
|
|
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ]; |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*! |
|
|
|
|
|
* Square Matrix multiplication - kji |
|
|
|
|
|
* \param C pointer to output matrix |
|
|
|
|
|
* \param A pointer to input matrix A |
|
|
|
|
|
* \param B pointer to input matrix B |
|
|
|
|
|
* \param n Size of matrices (both sizes) |
|
|
|
|
|
* \return none |
|
|
|
|
|
* xxx |
|
|
|
|
|
*/ |
|
|
|
|
|
void matrixMult_kji(float * const C, float const * const A, float const * const B, int const n) { |
|
|
|
|
|
for (int k = 0; k < n; ++k) { |
|
|
|
|
|
if (!k) { |
|
|
|
|
|
for (int j = 0; j < n; ++j) |
|
|
|
|
|
for (int i = 0; i < n; ++i) |
|
|
|
|
|
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ]; |
|
|
|
|
|
} else { |
|
|
|
|
|
for (int j = 0; j < n; ++j) |
|
|
|
|
|
for (int i = 0; i < n; ++i) |
|
|
|
|
|
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ]; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*! |
|
|
/*! |
|
|
* Initialize matrix with random indices and return the matrix pointer. |
|
|
* Initialize matrix with random indices and return the matrix pointer. |
|
|
* |
|
|
* |
|
|
* \param n The size of the matrix (both of them) |
|
|
* \param n The size of the matrix (both of them) |
|
|
* \return Pointer to allocated and initialized matrix |
|
|
* \return Pointer to allocated and initialized matrix |
|
|
*/ |
|
|
*/ |
|
|
float * matrixInit(int const n) { |
|
|
|
|
|
|
|
|
float* matrixInit(int const n) { |
|
|
|
|
|
|
|
|
float *M = (float *) malloc( n*n*sizeof(float) ); |
|
|
float *M = (float *) malloc( n*n*sizeof(float) ); |
|
|
|
|
|
|
|
@@ -59,67 +150,5 @@ float * matrixInit(int const n) { |
|
|
return M; |
|
|
return M; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
int cmpfunc (const void * a, const void * b) { |
|
|
|
|
|
double v =*(double*)a - *(double*)b; |
|
|
|
|
|
return (v < 0) ? -1 : (v > 0) ? 1 : 0; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*! |
|
|
|
|
|
* A unit testing like main function to profile our code |
|
|
|
|
|
*/ |
|
|
|
|
|
int main(int argc, char **argv) |
|
|
|
|
|
{ |
|
|
|
|
|
|
|
|
|
|
|
struct timeval start, end; /* time structs */ |
|
|
|
|
|
double time[MAX_ITER] = {0.0}; /* execution time array in ms */ |
|
|
|
|
|
float *A, *B, *C; /* matrix declarations */ |
|
|
|
|
|
int n; /* matrix size */ |
|
|
|
|
|
|
|
|
|
|
|
/* read matrix size (or use default) */ |
|
|
|
|
|
if (argc != 2){ |
|
|
|
|
|
fprintf( stderr, "Usage:\n %s n\n where n is the matrix size.\n", |
|
|
|
|
|
argv[0]); |
|
|
|
|
|
exit(1); |
|
|
|
|
|
} |
|
|
|
|
|
n = atoi( argv[1] ); |
|
|
|
|
|
|
|
|
|
|
|
/* initialize matrices */ |
|
|
|
|
|
A = matrixInit( n ); |
|
|
|
|
|
B = matrixInit( n ); |
|
|
|
|
|
C = (float *) malloc( n*n*sizeof(float) ); |
|
|
|
|
|
|
|
|
|
|
|
/* compute matrix multiplication */ |
|
|
|
|
|
for (int it = 0; it < MAX_ITER; it++) { |
|
|
|
|
|
gettimeofday(&start, NULL); |
|
|
|
|
|
matrixMult( C, A, B, n ); |
|
|
|
|
|
gettimeofday(&end, NULL); |
|
|
|
|
|
|
|
|
|
|
|
time[it] = (end.tv_sec - start.tv_sec) * 1000.0 + /* sec to ms */ |
|
|
|
|
|
(end.tv_usec - start.tv_usec) / 1000.0; /* us to ms */ |
|
|
|
|
|
|
|
|
|
|
|
printf("Iter: %d Time: %f ms\n", it, time[it]); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* we need to use the result -- verify it */ |
|
|
|
|
|
for (int i = 0; i < n; i++) { /* rows */ |
|
|
|
|
|
for (int j = 0; j < n; j++) { /* cols */ |
|
|
|
|
|
|
|
|
|
|
|
float gold = 0; |
|
|
|
|
|
|
|
|
|
|
|
for (int k = 0; k < n; k++) { /* accumulate products */ |
|
|
|
|
|
gold += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ]; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
assert( (gold - C[sub2ind(i,j,n)]) < 1e-3 ); |
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// median calculation |
|
|
|
|
|
qsort ((void*)time, MAX_ITER, sizeof(time[0]), cmpfunc); |
|
|
|
|
|
printf("Median: %f [msec]\n", (MAX_ITER % 2) ? |
|
|
|
|
|
time[MAX_ITER/2] : |
|
|
|
|
|
(time[MAX_ITER/2] + time[MAX_ITER/2 -1]) /2 |
|
|
|
|
|
); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|