Q6: step 3

This commit is contained in:
Christos Choutouridis 2020-05-17 17:09:48 +03:00
parent 10be4df8be
commit 1c810a1d2b
4 changed files with 42 additions and 1 deletions

View File

@ -12,6 +12,7 @@
Βήμα 1: Λόγος διάμεσων χρόνων: 20.265193309 Βήμα 1: Λόγος διάμεσων χρόνων: 20.265193309
Βήμα 2: Λόγος διάμεσων χρόνων: 6.719256593 (s=8)
Βήμα 2: Λόγος διάμεσων χρόνων: 24.61113257 (s=1024) Βήμα 2: Λόγος διάμεσων χρόνων: 24.61113257 (s=1024)
Βήμα 3: Λόγος διάμεσων χρόνων: xxxx Βήμα 3: Λόγος διάμεσων χρόνων: 4.667896091 (s=8)

View File

@ -49,6 +49,7 @@ mMult_ft multSelect (char* order) {
else if (! strcmp ((const char*)order, "jki")) return matrixMult_jki; else if (! strcmp ((const char*)order, "jki")) return matrixMult_jki;
else if (! strcmp ((const char*)order, "kij")) return matrixMult_kij; else if (! strcmp ((const char*)order, "kij")) return matrixMult_kij;
else if (! strcmp ((const char*)order, "kji")) return matrixMult_kji; else if (! strcmp ((const char*)order, "kji")) return matrixMult_kji;
else if (! strcmp ((const char*)order, "ikj8")) return matrixMult_ikj8;
else return matrixMult_ijk; else return matrixMult_ijk;
} }

View File

@ -273,6 +273,44 @@ void matrixMult_kji_block(float * const C, float const * const A, float const *
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
} }
/*!
* Square Matrix multiplication in unrolling block of 8 - ikj
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*/
void matrixMult_ikj8(float * const C, float const * const A, float const * const B, int const n) {
for (int I =0; I<n; I +=8)
for (int K =0; K<n; K +=8)
for (int J = 0; J<n; J +=8)
for (int i =0; i<8; ++i)
for (int k =0; k<8; ++k) {
if ((k+K) == 0) {
C[ sub2ind(I+i,J+0,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+0,n) ];
C[ sub2ind(I+i,J+1,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+1,n) ];
C[ sub2ind(I+i,J+2,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+2,n) ];
C[ sub2ind(I+i,J+3,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+3,n) ];
C[ sub2ind(I+i,J+4,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+4,n) ];
C[ sub2ind(I+i,J+5,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+5,n) ];
C[ sub2ind(I+i,J+6,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+6,n) ];
C[ sub2ind(I+i,J+7,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+7,n) ];
}
else {
C[ sub2ind(I+i,J+0,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+0,n) ];
C[ sub2ind(I+i,J+1,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+1,n) ];
C[ sub2ind(I+i,J+2,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+2,n) ];
C[ sub2ind(I+i,J+3,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+3,n) ];
C[ sub2ind(I+i,J+4,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+4,n) ];
C[ sub2ind(I+i,J+5,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+5,n) ];
C[ sub2ind(I+i,J+6,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+6,n) ];
C[ sub2ind(I+i,J+7,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+7,n) ];
}
}
}
/*! /*!
* Initialize matrix with random indices and return the matrix pointer. * Initialize matrix with random indices and return the matrix pointer.
* *

View File

@ -33,6 +33,7 @@ void matrixMult_jki_block(float * const C, float const * const A, float const *
void matrixMult_kij_block(float * const C, float const * const A, float const * const B, int const n, int const s); void matrixMult_kij_block(float * const C, float const * const A, float const * const B, int const n, int const s);
void matrixMult_kji_block(float * const C, float const * const A, float const * const B, int const n, int const s); void matrixMult_kji_block(float * const C, float const * const A, float const * const B, int const n, int const s);
void matrixMult_ikj8(float * const C, float const * const A, float const * const B, int const n);
float* matrixInit(int const n); float* matrixInit(int const n);