@@ -12,6 +12,6 @@ | |||||
Βήμα 1: Λόγος διάμεσων χρόνων: 20.265193309 | Βήμα 1: Λόγος διάμεσων χρόνων: 20.265193309 | ||||
Βήμα 2: Λόγος διάμεσων χρόνων: xxxx | |||||
Βήμα 2: Λόγος διάμεσων χρόνων: 24.61113257 (s=1024) | |||||
Βήμα 3: Λόγος διάμεσων χρόνων: xxxx | Βήμα 3: Λόγος διάμεσων χρόνων: xxxx |
@@ -52,7 +52,16 @@ mMult_ft multSelect (char* order) { | |||||
else return matrixMult_ijk; | else return matrixMult_ijk; | ||||
} | } | ||||
mMultBlock_ft multBlockSelect (char* order) { | |||||
if (! strcmp ((const char*)order, "ijk")) return matrixMult_ijk_block; | |||||
else if (! strcmp ((const char*)order, "ikj")) return matrixMult_ikj_block; | |||||
else if (! strcmp ((const char*)order, "jik")) return matrixMult_jik_block; | |||||
else if (! strcmp ((const char*)order, "jki")) return matrixMult_jki_block; | |||||
else if (! strcmp ((const char*)order, "kij")) return matrixMult_kij_block; | |||||
else if (! strcmp ((const char*)order, "kji")) return matrixMult_kji_block; | |||||
else return matrixMult_ijk_block; | |||||
} | |||||
/*! | /*! | ||||
* A unit testing like main function to profile our code | * A unit testing like main function to profile our code | ||||
@@ -61,36 +70,48 @@ int main(int argc, char **argv) { | |||||
struct timeval start, end; /* time structs */ | struct timeval start, end; /* time structs */ | ||||
double time[MAX_ITER] = {0.0}; /* execution time array in ms */ | double time[MAX_ITER] = {0.0}; /* execution time array in ms */ | ||||
float *A, *B, *C; /* matrix declarations */ | float *A, *B, *C; /* matrix declarations */ | ||||
mMult_ft mMult =NULL; | |||||
mMultBlock_ft mMultBlock =NULL; | |||||
/* read matrix size (or use default) */ | /* read matrix size (or use default) */ | ||||
if (argc != 3){ | |||||
if (argc < 3){ | |||||
fprintf( stderr, | fprintf( stderr, | ||||
"Usage:\n" | "Usage:\n" | ||||
"%s n order, where \n" | |||||
"%s n order [block], where \n" | |||||
" n: is the matrix size.\n" | " n: is the matrix size.\n" | ||||
" order: the loop order ex: ijk , jik, ...\n", | |||||
" order: the loop order ex: ijk , jik, ...\n" | |||||
" block: Optional block size\n", | |||||
argv[0]); | argv[0]); | ||||
exit(1); | exit(1); | ||||
} | } | ||||
int n = atoi( argv[1] ); | int n = atoi( argv[1] ); | ||||
mMult_ft mMult = multSelect(argv[2]); | |||||
if (argc == 4) | |||||
mMultBlock = multBlockSelect(argv[2]); | |||||
else | |||||
mMult = multSelect(argv[2]); | |||||
/* initialize matrices */ | /* initialize matrices */ | ||||
A = matrixInit( n ); | A = matrixInit( n ); | ||||
B = matrixInit( n ); | B = matrixInit( n ); | ||||
C = (float *) malloc( n*n*sizeof(float) ); | C = (float *) malloc( n*n*sizeof(float) ); | ||||
/* compute matrix multiplication */ | |||||
for (int it = 0; it < MAX_ITER; it++) { | |||||
gettimeofday(&start, NULL); | |||||
mMult( C, A, B, n ); | |||||
gettimeofday(&end, NULL); | |||||
time[it] = (end.tv_sec - start.tv_sec) * 1000.0 + /* sec to ms */ | |||||
(end.tv_usec - start.tv_usec) / 1000.0; /* us to ms */ | |||||
/* compute matrix multiplication */ | |||||
for (int it = 0; it < MAX_ITER; it++) { | |||||
if (argc == 4) { | |||||
gettimeofday(&start, NULL); | |||||
mMultBlock( C, A, B, n, atoi(argv[3]) ); | |||||
gettimeofday(&end, NULL); | |||||
} | |||||
else { | |||||
gettimeofday(&start, NULL); | |||||
mMult( C, A, B, n ); | |||||
gettimeofday(&end, NULL); | |||||
} | |||||
time[it] = (end.tv_sec - start.tv_sec) * 1000.0 + /* sec to ms */ | |||||
(end.tv_usec - start.tv_usec) / 1000.0; /* us to ms */ | |||||
printf("Iter: %d Time: %f ms\n", it, time[it]); | |||||
} | |||||
printf("Iter: %d Time: %f ms\n", it, time[it]); | |||||
} | |||||
/* we need to use the result -- verify it */ | /* we need to use the result -- verify it */ | ||||
for (int i = 0; i < n; i++) { /* rows */ | for (int i = 0; i < n; i++) { /* rows */ | ||||
@@ -116,7 +116,6 @@ void matrixMult_kij(float * const C, float const * const A, float const * const | |||||
* \param B pointer to input matrix B | * \param B pointer to input matrix B | ||||
* \param n Size of matrices (both sizes) | * \param n Size of matrices (both sizes) | ||||
* \return none | * \return none | ||||
* xxx | |||||
*/ | */ | ||||
void matrixMult_kji(float * const C, float const * const A, float const * const B, int const n) { | void matrixMult_kji(float * const C, float const * const A, float const * const B, int const n) { | ||||
for (int k = 0; k < n; ++k) { | for (int k = 0; k < n; ++k) { | ||||
@@ -132,6 +131,147 @@ void matrixMult_kji(float * const C, float const * const A, float const * const | |||||
} | } | ||||
} | } | ||||
/*! | |||||
* Square Matrix multiplication in blocks - ijk | |||||
* \param C pointer to output matrix | |||||
* \param A pointer to input matrix A | |||||
* \param B pointer to input matrix B | |||||
* \param n Size of matrices (both sizes) | |||||
* \param s The block size | |||||
* \return none | |||||
*/ | |||||
void matrixMult_ijk_block(float * const C, float const * const A, float const * const B, int const n, int const s) { | |||||
for (int I =0; I<n; I +=s) | |||||
for (int J =0; J<n; J +=s) | |||||
for (int K = 0; K<n; K +=s) | |||||
for (int i =0; i<s; ++i) | |||||
for (int j =0; j<s; ++j) { | |||||
int k =0; | |||||
if (K+k == 0) | |||||
C[ sub2ind(I+i,J+j,n) ] = 0; | |||||
for (k =0; k<s; ++k) | |||||
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; | |||||
} | |||||
} | |||||
/*! | |||||
* Square Matrix multiplication in blocks - ikj | |||||
* \param C pointer to output matrix | |||||
* \param A pointer to input matrix A | |||||
* \param B pointer to input matrix B | |||||
* \param n Size of matrices (both sizes) | |||||
* \param s The block size | |||||
* \return none | |||||
*/ | |||||
void matrixMult_ikj_block(float * const C, float const * const A, float const * const B, int const n, int const s) { | |||||
for (int I =0; I<n; I +=s) | |||||
for (int K =0; K<n; K +=s) | |||||
for (int J = 0; J<n; J +=s) | |||||
for (int i =0; i<s; ++i) | |||||
for (int k =0; k<s; ++k) { | |||||
if ((k+K) == 0) { | |||||
for (int j =0; j<s; ++j) | |||||
C[ sub2ind(I+i,J+j,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; | |||||
} | |||||
else { | |||||
for (int j =0; j<s; ++j) | |||||
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; | |||||
} | |||||
} | |||||
} | |||||
/*! | |||||
* Square Matrix multiplication in blocks - jik | |||||
* \param C pointer to output matrix | |||||
* \param A pointer to input matrix A | |||||
* \param B pointer to input matrix B | |||||
* \param n Size of matrices (both sizes) | |||||
* \param s The block size | |||||
* \return none | |||||
*/ | |||||
void matrixMult_jik_block(float * const C, float const * const A, float const * const B, int const n, int const s) { | |||||
for (int J =0; J<n; J +=s) | |||||
for (int I =0; I<n; I +=s) | |||||
for (int K = 0; K<n; K +=s) | |||||
for (int j =0; j<s; ++j) | |||||
for (int i =0; i<s; ++i) { | |||||
int k =0; | |||||
if (K+k == 0) | |||||
C[ sub2ind(I+i,J+j,n) ] = 0; | |||||
for (k =0; k<s; ++k) | |||||
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; | |||||
} | |||||
} | |||||
/*! | |||||
* Square Matrix multiplication in blocks - jki | |||||
* \param C pointer to output matrix | |||||
* \param A pointer to input matrix A | |||||
* \param B pointer to input matrix B | |||||
* \param n Size of matrices (both sizes) | |||||
* \param s The block size | |||||
* \return none | |||||
*/ | |||||
void matrixMult_jki_block(float * const C, float const * const A, float const * const B, int const n, int const s) { | |||||
for (int J =0; J<n; J +=s) | |||||
for (int K =0; K<n; K +=s) | |||||
for (int I = 0; I<n; I +=s) | |||||
for (int j =0; j<s; ++j) | |||||
for (int k =0; k<s; ++k) { | |||||
if ((k+K) == 0) { | |||||
for (int i =0; i<s; ++i) | |||||
C[ sub2ind(I+i,J+j,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; | |||||
} | |||||
else { | |||||
for (int i =0; i<s; ++i) | |||||
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; | |||||
} | |||||
} | |||||
} | |||||
/*! | |||||
* Square Matrix multiplication in blocks - kij | |||||
* \param C pointer to output matrix | |||||
* \param A pointer to input matrix A | |||||
* \param B pointer to input matrix B | |||||
* \param n Size of matrices (both sizes) | |||||
* \param s The block size | |||||
* \return none | |||||
* | |||||
* \warning | |||||
* Calling this function will trigger undefined result. There is no initialization of C. | |||||
*/ | |||||
void matrixMult_kij_block(float * const C, float const * const A, float const * const B, int const n, int const s) { | |||||
for (int K =0; K<n; K +=s) | |||||
for (int I =0; I<n; I +=s) | |||||
for (int J = 0; J<n; J +=s) | |||||
for (int k =0; k<s; ++k) | |||||
for (int i =0; i<s; ++i) | |||||
for (int j =0; j<s; ++j) | |||||
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; | |||||
} | |||||
/*! | |||||
* Square Matrix multiplication in blocks - kji | |||||
* \param C pointer to output matrix | |||||
* \param A pointer to input matrix A | |||||
* \param B pointer to input matrix B | |||||
* \param n Size of matrices (both sizes) | |||||
* \param s The block size | |||||
* \return none | |||||
* | |||||
* \warning | |||||
* Calling this function will trigger undefined result. There is no initialization of C. | |||||
*/ | |||||
void matrixMult_kji_block(float * const C, float const * const A, float const * const B, int const n, int const s) { | |||||
for (int K =0; K<n; K +=s) | |||||
for (int J =0; J<n; J +=s) | |||||
for (int I = 0; I<n; I +=s) | |||||
for (int k =0; k<s; ++k) | |||||
for (int j =0; j<s; ++j) | |||||
for (int i =0; i<s; ++i) | |||||
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; | |||||
} | |||||
/*! | /*! | ||||
* Initialize matrix with random indices and return the matrix pointer. | * Initialize matrix with random indices and return the matrix pointer. | ||||
@@ -16,7 +16,8 @@ | |||||
#define sub2ind(i,j,n) (j) + (i)*(n) | #define sub2ind(i,j,n) (j) + (i)*(n) | ||||
//! Function pointer type to matrix multiplication back-end. | //! Function pointer type to matrix multiplication back-end. | ||||
typedef void (*mMult_ft)(float * const C, float const * const A, float const * const B, int const n); | |||||
typedef void (*mMult_ft)(float * const, float const * const, float const * const, int const); | |||||
typedef void (*mMultBlock_ft)(float * const, float const * const, float const * const, int const, int const); | |||||
void matrixMult_ijk(float * const C, float const * const A, float const * const B, int const n); | void matrixMult_ijk(float * const C, float const * const A, float const * const B, int const n); | ||||
void matrixMult_ikj(float * const C, float const * const A, float const * const B, int const n); | void matrixMult_ikj(float * const C, float const * const A, float const * const B, int const n); | ||||
@@ -25,6 +26,14 @@ void matrixMult_jki(float * const C, float const * const A, float const * const | |||||
void matrixMult_kij(float * const C, float const * const A, float const * const B, int const n); | void matrixMult_kij(float * const C, float const * const A, float const * const B, int const n); | ||||
void matrixMult_kji(float * const C, float const * const A, float const * const B, int const n); | void matrixMult_kji(float * const C, float const * const A, float const * const B, int const n); | ||||
void matrixMult_ijk_block(float * const C, float const * const A, float const * const B, int const n, int const s); | |||||
void matrixMult_ikj_block(float * const C, float const * const A, float const * const B, int const n, int const s); | |||||
void matrixMult_jik_block(float * const C, float const * const A, float const * const B, int const n, int const s); | |||||
void matrixMult_jki_block(float * const C, float const * const A, float const * const B, int const n, int const s); | |||||
void matrixMult_kij_block(float * const C, float const * const A, float const * const B, int const n, int const s); | |||||
void matrixMult_kji_block(float * const C, float const * const A, float const * const B, int const n, int const s); | |||||
float* matrixInit(int const n); | float* matrixInit(int const n); | ||||
#endif /* SRC_MATMUL_H_ */ | #endif /* SRC_MATMUL_H_ */ |