|
|
@@ -116,7 +116,6 @@ void matrixMult_kij(float * const C, float const * const A, float const * const |
|
|
|
* \param B pointer to input matrix B |
|
|
|
* \param n Size of matrices (both sizes) |
|
|
|
* \return none |
|
|
|
* xxx |
|
|
|
*/ |
|
|
|
void matrixMult_kji(float * const C, float const * const A, float const * const B, int const n) { |
|
|
|
for (int k = 0; k < n; ++k) { |
|
|
@@ -132,6 +131,147 @@ void matrixMult_kji(float * const C, float const * const A, float const * const |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/*! |
|
|
|
* Square Matrix multiplication in blocks - ijk |
|
|
|
* \param C pointer to output matrix |
|
|
|
* \param A pointer to input matrix A |
|
|
|
* \param B pointer to input matrix B |
|
|
|
* \param n Size of matrices (both sizes) |
|
|
|
* \param s The block size |
|
|
|
* \return none |
|
|
|
*/ |
|
|
|
void matrixMult_ijk_block(float * const C, float const * const A, float const * const B, int const n, int const s) { |
|
|
|
for (int I =0; I<n; I +=s) |
|
|
|
for (int J =0; J<n; J +=s) |
|
|
|
for (int K = 0; K<n; K +=s) |
|
|
|
for (int i =0; i<s; ++i) |
|
|
|
for (int j =0; j<s; ++j) { |
|
|
|
int k =0; |
|
|
|
if (K+k == 0) |
|
|
|
C[ sub2ind(I+i,J+j,n) ] = 0; |
|
|
|
for (k =0; k<s; ++k) |
|
|
|
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/*! |
|
|
|
* Square Matrix multiplication in blocks - ikj |
|
|
|
* \param C pointer to output matrix |
|
|
|
* \param A pointer to input matrix A |
|
|
|
* \param B pointer to input matrix B |
|
|
|
* \param n Size of matrices (both sizes) |
|
|
|
* \param s The block size |
|
|
|
* \return none |
|
|
|
*/ |
|
|
|
void matrixMult_ikj_block(float * const C, float const * const A, float const * const B, int const n, int const s) { |
|
|
|
for (int I =0; I<n; I +=s) |
|
|
|
for (int K =0; K<n; K +=s) |
|
|
|
for (int J = 0; J<n; J +=s) |
|
|
|
for (int i =0; i<s; ++i) |
|
|
|
for (int k =0; k<s; ++k) { |
|
|
|
if ((k+K) == 0) { |
|
|
|
for (int j =0; j<s; ++j) |
|
|
|
C[ sub2ind(I+i,J+j,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; |
|
|
|
} |
|
|
|
else { |
|
|
|
for (int j =0; j<s; ++j) |
|
|
|
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/*! |
|
|
|
* Square Matrix multiplication in blocks - jik |
|
|
|
* \param C pointer to output matrix |
|
|
|
* \param A pointer to input matrix A |
|
|
|
* \param B pointer to input matrix B |
|
|
|
* \param n Size of matrices (both sizes) |
|
|
|
* \param s The block size |
|
|
|
* \return none |
|
|
|
*/ |
|
|
|
void matrixMult_jik_block(float * const C, float const * const A, float const * const B, int const n, int const s) { |
|
|
|
for (int J =0; J<n; J +=s) |
|
|
|
for (int I =0; I<n; I +=s) |
|
|
|
for (int K = 0; K<n; K +=s) |
|
|
|
for (int j =0; j<s; ++j) |
|
|
|
for (int i =0; i<s; ++i) { |
|
|
|
int k =0; |
|
|
|
if (K+k == 0) |
|
|
|
C[ sub2ind(I+i,J+j,n) ] = 0; |
|
|
|
for (k =0; k<s; ++k) |
|
|
|
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/*! |
|
|
|
* Square Matrix multiplication in blocks - jki |
|
|
|
* \param C pointer to output matrix |
|
|
|
* \param A pointer to input matrix A |
|
|
|
* \param B pointer to input matrix B |
|
|
|
* \param n Size of matrices (both sizes) |
|
|
|
* \param s The block size |
|
|
|
* \return none |
|
|
|
*/ |
|
|
|
void matrixMult_jki_block(float * const C, float const * const A, float const * const B, int const n, int const s) { |
|
|
|
for (int J =0; J<n; J +=s) |
|
|
|
for (int K =0; K<n; K +=s) |
|
|
|
for (int I = 0; I<n; I +=s) |
|
|
|
for (int j =0; j<s; ++j) |
|
|
|
for (int k =0; k<s; ++k) { |
|
|
|
if ((k+K) == 0) { |
|
|
|
for (int i =0; i<s; ++i) |
|
|
|
C[ sub2ind(I+i,J+j,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; |
|
|
|
} |
|
|
|
else { |
|
|
|
for (int i =0; i<s; ++i) |
|
|
|
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/*! |
|
|
|
* Square Matrix multiplication in blocks - kij |
|
|
|
* \param C pointer to output matrix |
|
|
|
* \param A pointer to input matrix A |
|
|
|
* \param B pointer to input matrix B |
|
|
|
* \param n Size of matrices (both sizes) |
|
|
|
* \param s The block size |
|
|
|
* \return none |
|
|
|
* |
|
|
|
* \warning |
|
|
|
* Calling this function will trigger undefined result. There is no initialization of C. |
|
|
|
*/ |
|
|
|
void matrixMult_kij_block(float * const C, float const * const A, float const * const B, int const n, int const s) { |
|
|
|
for (int K =0; K<n; K +=s) |
|
|
|
for (int I =0; I<n; I +=s) |
|
|
|
for (int J = 0; J<n; J +=s) |
|
|
|
for (int k =0; k<s; ++k) |
|
|
|
for (int i =0; i<s; ++i) |
|
|
|
for (int j =0; j<s; ++j) |
|
|
|
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; |
|
|
|
} |
|
|
|
|
|
|
|
/*! |
|
|
|
* Square Matrix multiplication in blocks - kji |
|
|
|
* \param C pointer to output matrix |
|
|
|
* \param A pointer to input matrix A |
|
|
|
* \param B pointer to input matrix B |
|
|
|
* \param n Size of matrices (both sizes) |
|
|
|
* \param s The block size |
|
|
|
* \return none |
|
|
|
* |
|
|
|
* \warning |
|
|
|
* Calling this function will trigger undefined result. There is no initialization of C. |
|
|
|
*/ |
|
|
|
void matrixMult_kji_block(float * const C, float const * const A, float const * const B, int const n, int const s) { |
|
|
|
for (int K =0; K<n; K +=s) |
|
|
|
for (int J =0; J<n; J +=s) |
|
|
|
for (int I = 0; I<n; I +=s) |
|
|
|
for (int k =0; k<s; ++k) |
|
|
|
for (int j =0; j<s; ++j) |
|
|
|
for (int i =0; i<s; ++i) |
|
|
|
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; |
|
|
|
} |
|
|
|
|
|
|
|
/*! |
|
|
|
* Initialize matrix with random indices and return the matrix pointer. |
|
|
|