|
@@ -273,6 +273,44 @@ void matrixMult_kji_block(float * const C, float const * const A, float const * |
|
|
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; |
|
|
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ]; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*! |
|
|
|
|
|
* Square Matrix multiplication in unrolling block of 8 - ikj |
|
|
|
|
|
* \param C pointer to output matrix |
|
|
|
|
|
* \param A pointer to input matrix A |
|
|
|
|
|
* \param B pointer to input matrix B |
|
|
|
|
|
* \param n Size of matrices (both sizes) |
|
|
|
|
|
* \param s The block size |
|
|
|
|
|
* \return none |
|
|
|
|
|
*/ |
|
|
|
|
|
void matrixMult_ikj8(float * const C, float const * const A, float const * const B, int const n) { |
|
|
|
|
|
for (int I =0; I<n; I +=8) |
|
|
|
|
|
for (int K =0; K<n; K +=8) |
|
|
|
|
|
for (int J = 0; J<n; J +=8) |
|
|
|
|
|
for (int i =0; i<8; ++i) |
|
|
|
|
|
for (int k =0; k<8; ++k) { |
|
|
|
|
|
if ((k+K) == 0) { |
|
|
|
|
|
C[ sub2ind(I+i,J+0,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+0,n) ]; |
|
|
|
|
|
C[ sub2ind(I+i,J+1,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+1,n) ]; |
|
|
|
|
|
C[ sub2ind(I+i,J+2,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+2,n) ]; |
|
|
|
|
|
C[ sub2ind(I+i,J+3,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+3,n) ]; |
|
|
|
|
|
C[ sub2ind(I+i,J+4,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+4,n) ]; |
|
|
|
|
|
C[ sub2ind(I+i,J+5,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+5,n) ]; |
|
|
|
|
|
C[ sub2ind(I+i,J+6,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+6,n) ]; |
|
|
|
|
|
C[ sub2ind(I+i,J+7,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+7,n) ]; |
|
|
|
|
|
} |
|
|
|
|
|
else { |
|
|
|
|
|
C[ sub2ind(I+i,J+0,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+0,n) ]; |
|
|
|
|
|
C[ sub2ind(I+i,J+1,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+1,n) ]; |
|
|
|
|
|
C[ sub2ind(I+i,J+2,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+2,n) ]; |
|
|
|
|
|
C[ sub2ind(I+i,J+3,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+3,n) ]; |
|
|
|
|
|
C[ sub2ind(I+i,J+4,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+4,n) ]; |
|
|
|
|
|
C[ sub2ind(I+i,J+5,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+5,n) ]; |
|
|
|
|
|
C[ sub2ind(I+i,J+6,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+6,n) ]; |
|
|
|
|
|
C[ sub2ind(I+i,J+7,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+7,n) ]; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
/*! |
|
|
/*! |
|
|
* Initialize matrix with random indices and return the matrix pointer. |
|
|
* Initialize matrix with random indices and return the matrix pointer. |
|
|
* |
|
|
* |
|
|