소스 검색

Q6: step 2

master
부모
커밋
10be4df8be
4개의 변경된 파일187개의 추가작업 그리고 17개의 파일을 삭제
  1. +1
    -1
      Q6-cache/info.txt
  2. +35
    -14
      Q6-cache/src/main.c
  3. +141
    -1
      Q6-cache/src/matmul.c
  4. +10
    -1
      Q6-cache/src/matmul.h

+ 1
- 1
Q6-cache/info.txt 파일 보기

@@ -12,6 +12,6 @@

Βήμα 1: Λόγος διάμεσων χρόνων: 20.265193309

Βήμα 2: Λόγος διάμεσων χρόνων: xxxx
Βήμα 2: Λόγος διάμεσων χρόνων: 24.61113257 (s=1024)

Βήμα 3: Λόγος διάμεσων χρόνων: xxxx

+ 35
- 14
Q6-cache/src/main.c 파일 보기

@@ -52,7 +52,16 @@ mMult_ft multSelect (char* order) {
else return matrixMult_ijk;
}

mMultBlock_ft multBlockSelect (char* order) {
if (! strcmp ((const char*)order, "ijk")) return matrixMult_ijk_block;
else if (! strcmp ((const char*)order, "ikj")) return matrixMult_ikj_block;
else if (! strcmp ((const char*)order, "jik")) return matrixMult_jik_block;
else if (! strcmp ((const char*)order, "jki")) return matrixMult_jki_block;
else if (! strcmp ((const char*)order, "kij")) return matrixMult_kij_block;
else if (! strcmp ((const char*)order, "kji")) return matrixMult_kji_block;
else return matrixMult_ijk_block;

}

/*!
* A unit testing like main function to profile our code
@@ -61,36 +70,48 @@ int main(int argc, char **argv) {
struct timeval start, end; /* time structs */
double time[MAX_ITER] = {0.0}; /* execution time array in ms */
float *A, *B, *C; /* matrix declarations */
mMult_ft mMult =NULL;
mMultBlock_ft mMultBlock =NULL;

/* read matrix size (or use default) */
if (argc != 3){
if (argc < 3){
fprintf( stderr,
"Usage:\n"
"%s n order, where \n"
"%s n order [block], where \n"
" n: is the matrix size.\n"
" order: the loop order ex: ijk , jik, ...\n",
" order: the loop order ex: ijk , jik, ...\n"
" block: Optional block size\n",
argv[0]);
exit(1);
}
int n = atoi( argv[1] );
mMult_ft mMult = multSelect(argv[2]);
if (argc == 4)
mMultBlock = multBlockSelect(argv[2]);
else
mMult = multSelect(argv[2]);

/* initialize matrices */
A = matrixInit( n );
B = matrixInit( n );
C = (float *) malloc( n*n*sizeof(float) );

/* compute matrix multiplication */
for (int it = 0; it < MAX_ITER; it++) {
gettimeofday(&start, NULL);
mMult( C, A, B, n );
gettimeofday(&end, NULL);

time[it] = (end.tv_sec - start.tv_sec) * 1000.0 + /* sec to ms */
(end.tv_usec - start.tv_usec) / 1000.0; /* us to ms */
/* compute matrix multiplication */
for (int it = 0; it < MAX_ITER; it++) {
if (argc == 4) {
gettimeofday(&start, NULL);
mMultBlock( C, A, B, n, atoi(argv[3]) );
gettimeofday(&end, NULL);
}
else {
gettimeofday(&start, NULL);
mMult( C, A, B, n );
gettimeofday(&end, NULL);
}
time[it] = (end.tv_sec - start.tv_sec) * 1000.0 + /* sec to ms */
(end.tv_usec - start.tv_usec) / 1000.0; /* us to ms */

printf("Iter: %d Time: %f ms\n", it, time[it]);
}
printf("Iter: %d Time: %f ms\n", it, time[it]);
}

/* we need to use the result -- verify it */
for (int i = 0; i < n; i++) { /* rows */


+ 141
- 1
Q6-cache/src/matmul.c 파일 보기

@@ -116,7 +116,6 @@ void matrixMult_kij(float * const C, float const * const A, float const * const
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \return none
* xxx
*/
void matrixMult_kji(float * const C, float const * const A, float const * const B, int const n) {
for (int k = 0; k < n; ++k) {
@@ -132,6 +131,147 @@ void matrixMult_kji(float * const C, float const * const A, float const * const
}
}

/*!
* Square Matrix multiplication in blocks - ijk
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*/
void matrixMult_ijk_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int I =0; I<n; I +=s)
for (int J =0; J<n; J +=s)
for (int K = 0; K<n; K +=s)
for (int i =0; i<s; ++i)
for (int j =0; j<s; ++j) {
int k =0;
if (K+k == 0)
C[ sub2ind(I+i,J+j,n) ] = 0;
for (k =0; k<s; ++k)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
}

/*!
* Square Matrix multiplication in blocks - ikj
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*/
void matrixMult_ikj_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int I =0; I<n; I +=s)
for (int K =0; K<n; K +=s)
for (int J = 0; J<n; J +=s)
for (int i =0; i<s; ++i)
for (int k =0; k<s; ++k) {
if ((k+K) == 0) {
for (int j =0; j<s; ++j)
C[ sub2ind(I+i,J+j,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
else {
for (int j =0; j<s; ++j)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
}
}

/*!
* Square Matrix multiplication in blocks - jik
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*/
void matrixMult_jik_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int J =0; J<n; J +=s)
for (int I =0; I<n; I +=s)
for (int K = 0; K<n; K +=s)
for (int j =0; j<s; ++j)
for (int i =0; i<s; ++i) {
int k =0;
if (K+k == 0)
C[ sub2ind(I+i,J+j,n) ] = 0;
for (k =0; k<s; ++k)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
}

/*!
* Square Matrix multiplication in blocks - jki
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*/
void matrixMult_jki_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int J =0; J<n; J +=s)
for (int K =0; K<n; K +=s)
for (int I = 0; I<n; I +=s)
for (int j =0; j<s; ++j)
for (int k =0; k<s; ++k) {
if ((k+K) == 0) {
for (int i =0; i<s; ++i)
C[ sub2ind(I+i,J+j,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
else {
for (int i =0; i<s; ++i)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
}
}

/*!
* Square Matrix multiplication in blocks - kij
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*
* \warning
* Calling this function will trigger undefined result. There is no initialization of C.
*/
void matrixMult_kij_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int K =0; K<n; K +=s)
for (int I =0; I<n; I +=s)
for (int J = 0; J<n; J +=s)
for (int k =0; k<s; ++k)
for (int i =0; i<s; ++i)
for (int j =0; j<s; ++j)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}

/*!
* Square Matrix multiplication in blocks - kji
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*
* \warning
* Calling this function will trigger undefined result. There is no initialization of C.
*/
void matrixMult_kji_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int K =0; K<n; K +=s)
for (int J =0; J<n; J +=s)
for (int I = 0; I<n; I +=s)
for (int k =0; k<s; ++k)
for (int j =0; j<s; ++j)
for (int i =0; i<s; ++i)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}

/*!
* Initialize matrix with random indices and return the matrix pointer.


+ 10
- 1
Q6-cache/src/matmul.h 파일 보기

@@ -16,7 +16,8 @@
#define sub2ind(i,j,n) (j) + (i)*(n)

//! Function pointer type to matrix multiplication back-end.
typedef void (*mMult_ft)(float * const C, float const * const A, float const * const B, int const n);
typedef void (*mMult_ft)(float * const, float const * const, float const * const, int const);
typedef void (*mMultBlock_ft)(float * const, float const * const, float const * const, int const, int const);

void matrixMult_ijk(float * const C, float const * const A, float const * const B, int const n);
void matrixMult_ikj(float * const C, float const * const A, float const * const B, int const n);
@@ -25,6 +26,14 @@ void matrixMult_jki(float * const C, float const * const A, float const * const
void matrixMult_kij(float * const C, float const * const A, float const * const B, int const n);
void matrixMult_kji(float * const C, float const * const A, float const * const B, int const n);

void matrixMult_ijk_block(float * const C, float const * const A, float const * const B, int const n, int const s);
void matrixMult_ikj_block(float * const C, float const * const A, float const * const B, int const n, int const s);
void matrixMult_jik_block(float * const C, float const * const A, float const * const B, int const n, int const s);
void matrixMult_jki_block(float * const C, float const * const A, float const * const B, int const n, int const s);
void matrixMult_kij_block(float * const C, float const * const A, float const * const B, int const n, int const s);
void matrixMult_kji_block(float * const C, float const * const A, float const * const B, int const n, int const s);


float* matrixInit(int const n);

#endif /* SRC_MATMUL_H_ */

불러오는 중...
취소
저장