Browse Source

Q6: step 2

master
parent
commit
10be4df8be
4 changed files with 187 additions and 17 deletions
  1. +1
    -1
      Q6-cache/info.txt
  2. +35
    -14
      Q6-cache/src/main.c
  3. +141
    -1
      Q6-cache/src/matmul.c
  4. +10
    -1
      Q6-cache/src/matmul.h

+ 1
- 1
Q6-cache/info.txt View File

@@ -12,6 +12,6 @@


Βήμα 1: Λόγος διάμεσων χρόνων: 20.265193309 Βήμα 1: Λόγος διάμεσων χρόνων: 20.265193309


Βήμα 2: Λόγος διάμεσων χρόνων: xxxx
Βήμα 2: Λόγος διάμεσων χρόνων: 24.61113257 (s=1024)


Βήμα 3: Λόγος διάμεσων χρόνων: xxxx Βήμα 3: Λόγος διάμεσων χρόνων: xxxx

+ 35
- 14
Q6-cache/src/main.c View File

@@ -52,7 +52,16 @@ mMult_ft multSelect (char* order) {
else return matrixMult_ijk; else return matrixMult_ijk;
} }


mMultBlock_ft multBlockSelect (char* order) {
if (! strcmp ((const char*)order, "ijk")) return matrixMult_ijk_block;
else if (! strcmp ((const char*)order, "ikj")) return matrixMult_ikj_block;
else if (! strcmp ((const char*)order, "jik")) return matrixMult_jik_block;
else if (! strcmp ((const char*)order, "jki")) return matrixMult_jki_block;
else if (! strcmp ((const char*)order, "kij")) return matrixMult_kij_block;
else if (! strcmp ((const char*)order, "kji")) return matrixMult_kji_block;
else return matrixMult_ijk_block;


}


/*! /*!
* A unit testing like main function to profile our code * A unit testing like main function to profile our code
@@ -61,36 +70,48 @@ int main(int argc, char **argv) {
struct timeval start, end; /* time structs */ struct timeval start, end; /* time structs */
double time[MAX_ITER] = {0.0}; /* execution time array in ms */ double time[MAX_ITER] = {0.0}; /* execution time array in ms */
float *A, *B, *C; /* matrix declarations */ float *A, *B, *C; /* matrix declarations */
mMult_ft mMult =NULL;
mMultBlock_ft mMultBlock =NULL;


/* read matrix size (or use default) */ /* read matrix size (or use default) */
if (argc != 3){
if (argc < 3){
fprintf( stderr, fprintf( stderr,
"Usage:\n" "Usage:\n"
"%s n order, where \n"
"%s n order [block], where \n"
" n: is the matrix size.\n" " n: is the matrix size.\n"
" order: the loop order ex: ijk , jik, ...\n",
" order: the loop order ex: ijk , jik, ...\n"
" block: Optional block size\n",
argv[0]); argv[0]);
exit(1); exit(1);
} }
int n = atoi( argv[1] ); int n = atoi( argv[1] );
mMult_ft mMult = multSelect(argv[2]);
if (argc == 4)
mMultBlock = multBlockSelect(argv[2]);
else
mMult = multSelect(argv[2]);


/* initialize matrices */ /* initialize matrices */
A = matrixInit( n ); A = matrixInit( n );
B = matrixInit( n ); B = matrixInit( n );
C = (float *) malloc( n*n*sizeof(float) ); C = (float *) malloc( n*n*sizeof(float) );


/* compute matrix multiplication */
for (int it = 0; it < MAX_ITER; it++) {
gettimeofday(&start, NULL);
mMult( C, A, B, n );
gettimeofday(&end, NULL);

time[it] = (end.tv_sec - start.tv_sec) * 1000.0 + /* sec to ms */
(end.tv_usec - start.tv_usec) / 1000.0; /* us to ms */
/* compute matrix multiplication */
for (int it = 0; it < MAX_ITER; it++) {
if (argc == 4) {
gettimeofday(&start, NULL);
mMultBlock( C, A, B, n, atoi(argv[3]) );
gettimeofday(&end, NULL);
}
else {
gettimeofday(&start, NULL);
mMult( C, A, B, n );
gettimeofday(&end, NULL);
}
time[it] = (end.tv_sec - start.tv_sec) * 1000.0 + /* sec to ms */
(end.tv_usec - start.tv_usec) / 1000.0; /* us to ms */


printf("Iter: %d Time: %f ms\n", it, time[it]);
}
printf("Iter: %d Time: %f ms\n", it, time[it]);
}


/* we need to use the result -- verify it */ /* we need to use the result -- verify it */
for (int i = 0; i < n; i++) { /* rows */ for (int i = 0; i < n; i++) { /* rows */


+ 141
- 1
Q6-cache/src/matmul.c View File

@@ -116,7 +116,6 @@ void matrixMult_kij(float * const C, float const * const A, float const * const
* \param B pointer to input matrix B * \param B pointer to input matrix B
* \param n Size of matrices (both sizes) * \param n Size of matrices (both sizes)
* \return none * \return none
* xxx
*/ */
void matrixMult_kji(float * const C, float const * const A, float const * const B, int const n) { void matrixMult_kji(float * const C, float const * const A, float const * const B, int const n) {
for (int k = 0; k < n; ++k) { for (int k = 0; k < n; ++k) {
@@ -132,6 +131,147 @@ void matrixMult_kji(float * const C, float const * const A, float const * const
} }
} }


/*!
* Square Matrix multiplication in blocks - ijk
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*/
void matrixMult_ijk_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int I =0; I<n; I +=s)
for (int J =0; J<n; J +=s)
for (int K = 0; K<n; K +=s)
for (int i =0; i<s; ++i)
for (int j =0; j<s; ++j) {
int k =0;
if (K+k == 0)
C[ sub2ind(I+i,J+j,n) ] = 0;
for (k =0; k<s; ++k)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
}

/*!
* Square Matrix multiplication in blocks - ikj
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*/
void matrixMult_ikj_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int I =0; I<n; I +=s)
for (int K =0; K<n; K +=s)
for (int J = 0; J<n; J +=s)
for (int i =0; i<s; ++i)
for (int k =0; k<s; ++k) {
if ((k+K) == 0) {
for (int j =0; j<s; ++j)
C[ sub2ind(I+i,J+j,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
else {
for (int j =0; j<s; ++j)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
}
}

/*!
* Square Matrix multiplication in blocks - jik
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*/
void matrixMult_jik_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int J =0; J<n; J +=s)
for (int I =0; I<n; I +=s)
for (int K = 0; K<n; K +=s)
for (int j =0; j<s; ++j)
for (int i =0; i<s; ++i) {
int k =0;
if (K+k == 0)
C[ sub2ind(I+i,J+j,n) ] = 0;
for (k =0; k<s; ++k)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
}

/*!
* Square Matrix multiplication in blocks - jki
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*/
void matrixMult_jki_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int J =0; J<n; J +=s)
for (int K =0; K<n; K +=s)
for (int I = 0; I<n; I +=s)
for (int j =0; j<s; ++j)
for (int k =0; k<s; ++k) {
if ((k+K) == 0) {
for (int i =0; i<s; ++i)
C[ sub2ind(I+i,J+j,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
else {
for (int i =0; i<s; ++i)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
}
}

/*!
* Square Matrix multiplication in blocks - kij
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*
* \warning
* Calling this function will trigger undefined result. There is no initialization of C.
*/
void matrixMult_kij_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int K =0; K<n; K +=s)
for (int I =0; I<n; I +=s)
for (int J = 0; J<n; J +=s)
for (int k =0; k<s; ++k)
for (int i =0; i<s; ++i)
for (int j =0; j<s; ++j)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}

/*!
* Square Matrix multiplication in blocks - kji
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*
* \warning
* Calling this function will trigger undefined result. There is no initialization of C.
*/
void matrixMult_kji_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int K =0; K<n; K +=s)
for (int J =0; J<n; J +=s)
for (int I = 0; I<n; I +=s)
for (int k =0; k<s; ++k)
for (int j =0; j<s; ++j)
for (int i =0; i<s; ++i)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}


/*! /*!
* Initialize matrix with random indices and return the matrix pointer. * Initialize matrix with random indices and return the matrix pointer.


+ 10
- 1
Q6-cache/src/matmul.h View File

@@ -16,7 +16,8 @@
#define sub2ind(i,j,n) (j) + (i)*(n) #define sub2ind(i,j,n) (j) + (i)*(n)


//! Function pointer type to matrix multiplication back-end. //! Function pointer type to matrix multiplication back-end.
typedef void (*mMult_ft)(float * const C, float const * const A, float const * const B, int const n);
typedef void (*mMult_ft)(float * const, float const * const, float const * const, int const);
typedef void (*mMultBlock_ft)(float * const, float const * const, float const * const, int const, int const);


void matrixMult_ijk(float * const C, float const * const A, float const * const B, int const n); void matrixMult_ijk(float * const C, float const * const A, float const * const B, int const n);
void matrixMult_ikj(float * const C, float const * const A, float const * const B, int const n); void matrixMult_ikj(float * const C, float const * const A, float const * const B, int const n);
@@ -25,6 +26,14 @@ void matrixMult_jki(float * const C, float const * const A, float const * const
void matrixMult_kij(float * const C, float const * const A, float const * const B, int const n); void matrixMult_kij(float * const C, float const * const A, float const * const B, int const n);
void matrixMult_kji(float * const C, float const * const A, float const * const B, int const n); void matrixMult_kji(float * const C, float const * const A, float const * const B, int const n);


void matrixMult_ijk_block(float * const C, float const * const A, float const * const B, int const n, int const s);
void matrixMult_ikj_block(float * const C, float const * const A, float const * const B, int const n, int const s);
void matrixMult_jik_block(float * const C, float const * const A, float const * const B, int const n, int const s);
void matrixMult_jki_block(float * const C, float const * const A, float const * const B, int const n, int const s);
void matrixMult_kij_block(float * const C, float const * const A, float const * const B, int const n, int const s);
void matrixMult_kji_block(float * const C, float const * const A, float const * const B, int const n, int const s);


float* matrixInit(int const n); float* matrixInit(int const n);


#endif /* SRC_MATMUL_H_ */ #endif /* SRC_MATMUL_H_ */

Loading…
Cancel
Save