From 225837ed0e670262f3c5e4db0d608a0e37514c2a Mon Sep 17 00:00:00 2001 From: Christos Choutouridis Date: Fri, 11 Dec 2020 13:11:28 +0200 Subject: [PATCH] Comment changes --- Makefile | 4 +++- inc/config.h | 2 +- inc/impl.hpp | 46 +++++++++++++++++++++++++++++++--------------- src/elearn.cpp | 2 +- src/main.cpp | 15 +++++++++++---- src/v3.cpp | 14 +++++++++----- src/v4.cpp | 33 +++++++++++++++++++-------------- 7 files changed, 75 insertions(+), 41 deletions(-) diff --git a/Makefile b/Makefile index 28162b9..4c7c40f 100644 --- a/Makefile +++ b/Makefile @@ -236,7 +236,9 @@ v4_pthreads: $(BUILD_DIR)/$(TARGET) # > make clean # 3) for v4 cilk for example: # > make csal_v4_cilk -# 4) run executables from `bin/` +# 4) run executables from `bin/`. Examples: +# > ./bin/tcount_ompv3 -i mtx/NACA0015.mtx --timing -r 3 -o /dev/null +# > ./bin/tcount_pthv4 -i mtx/com_Youtube.mtx --timing --dynamic --print_count csal_v3: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=3 csal_v3: TARGET := tcount_v3 diff --git a/inc/config.h b/inc/config.h index 983b2c7..efa2c5f 100644 --- a/inc/config.h +++ b/inc/config.h @@ -22,7 +22,7 @@ #define V3 3 #define V4 4 -// Fail-safe verision selection +// Fail-safe version selection #if !defined CODE_VERSION #define CODE_VERSION V4 #endif diff --git a/inc/impl.hpp b/inc/impl.hpp index bcdfc02..33c8480 100644 --- a/inc/impl.hpp +++ b/inc/impl.hpp @@ -39,7 +39,7 @@ enum class MatrixType { }; /* - * Forward type declerations + * Forward type declarations */ template struct Matrix; template struct SpMat; @@ -298,7 +298,7 @@ struct SpMat { */ DataType get_lin(IndexType i, IndexType j) { IndexType idx; bool found; - std::tie(idx, found) =find_lin_idx(rows, col_ptr[j], col_ptr[j+1], i); + std::tie(idx, found) =find_place_idx(rows, col_ptr[j], col_ptr[j+1], i); return (found) ? values[idx] : 0; } @@ -309,8 +309,9 @@ struct SpMat { * If so we just change it to a new value. If not we add the item on the matrix. * * @note - * We don't increase the NNZ value of the struct. We expect the user has already - * change the NNZ value to the right one using @see capacity() function. + * When change a value, we don't increase the NNZ value of the struct. We expect the user has already + * change the NNZ value to the right one using @see capacity() function. When adding a value we + * increase the NNZ. * * @param i The row number * @param j The column number @@ -318,7 +319,7 @@ struct SpMat { */ DataType set(DataType v, IndexType i, IndexType j) { IndexType idx; bool found; - std::tie(idx, found) = find_lin_idx(rows, col_ptr[j], col_ptr[j+1], i); + std::tie(idx, found) = find_place_idx(rows, col_ptr[j], col_ptr[j+1], i); if (found) return values[idx] = v; // we don't change NNZ even if we write "0" else { @@ -392,11 +393,13 @@ private: * \param begin The vector's index to begin * \param end The vector's index to end * \param match What to search - * @return The index of the item or end on failure. + * \return An pair. + * index is the index of the item or end if not found + * status is true if found, false otherwise */ std::pair find_idx(const std::vector& v, IndexType begin, IndexType end, IndexType match) { IndexType b = begin, e = end-1; - while (true) { + while (b <= e) { IndexType m = (b+e)/2; if (v[m] == match) return std::make_pair(m, true); else if (b >= e) return std::make_pair(end, false); @@ -417,8 +420,11 @@ private: * \param begin The vector's index to begin * \param end The vector's index to end * \param match What to search + * \return An pair. + * index is the index of the item or end if not found + * status is true if found, false otherwise */ - std::pair find_lin_idx(const std::vector& v, IndexType begin, IndexType end, IndexType match) { + std::pair find_place_idx(const std::vector& v, IndexType begin, IndexType end, IndexType match) { for ( ; begin < end ; ++begin) { if (match == v[begin]) return std::make_pair(begin, true); else if (match < v[begin]) return std::make_pair(begin, false); @@ -437,7 +443,7 @@ private: //! @{ std::vector values {}; //!< vector to store the values of the matrix std::vector rows{}; //!< vector to store the row information - std::vector col_ptr{1,0}; //!< vector to stor the column pointers + std::vector col_ptr{1,0}; //!< vector to store the column pointers IndexType N{0}; //!< The dimension of the matrix (square) IndexType NNZ{0}; //!< The NNZ (capacity of the matrix) //! @} @@ -496,20 +502,25 @@ struct SpMatCol { /*! * Multiplication operator + * + * We follow only the non-zero values and multiply only the common indexes. + * * @tparam C Universal reference for the type right half site column * * @param c The right hand site matrix * @return The value of the inner product of two vectors + * @note The time complexity is \$ O(nnz1+nnz2) \$. + * Where the nnz is the max NNZ elements of the column of the matrix */ template DataType operator* (C&& c) { static_assert(std::is_same, SpMatCol>(), ""); DataType v{}; while (index() != end() && c.index() != c.end()) { - if (index() < c.index()) advance(); - else if (index() > c.index()) ++c; + if (index() < c.index()) advance(); // advance me + else if (index() > c.index()) ++c; // advance other else { //index() == c.index() - v += get() * *c; + v += get() * *c; // multiply and advance both ++c; advance(); } @@ -597,20 +608,25 @@ struct SpMatRow { /*! * Multiplication operator + * + * We follow only the non-zero values and multiply only the common indexes. + * * @tparam C Universal reference for the type right half site column * * @param c The right hand site matrix * @return The value of the inner product of two vectors + * @note The time complexity is \$ O(N+nnz2) \$ and way heavier the ColxCol multiplication. + * Where the nnz is the max NNZ elements of the column of the matrix */ template DataType operator* (C&& c) { static_assert(std::is_same, SpMatCol>(), ""); DataType v{}; while (index() != end() && c.index() != c.end()) { - if (index() < c.index()) advance(); - else if (index() > c.index()) ++c; + if (index() < c.index()) advance(); // advance me + else if (index() > c.index()) ++c; // advance other else { //index() == c.index() - v += get()* *c; + v += get() * *c; // multiply and advance both ++c; advance(); } diff --git a/src/elearn.cpp b/src/elearn.cpp index 6010e4c..66827cb 100644 --- a/src/elearn.cpp +++ b/src/elearn.cpp @@ -51,7 +51,7 @@ static void coo2csc_e( */ uint32_t find_idx(const uint32_t* v, uint32_t begin, uint32_t end, uint32_t match) { uint32_t b = begin, e = end-1; - while (1) { + while (b <= e) { uint32_t m = (b+e)/2; if (v[m] == match) return m; else if (b >= e) return end; diff --git a/src/main.cpp b/src/main.cpp index 06c7eb9..9358122 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -116,18 +116,25 @@ bool get_options(int argc, char* argv[]){ std::cout << " --print_graph \n"; std::cout << " Prints the first x part of the matrix to stdout.\n\n"; std::cout << " -h | --help \n"; - std::cout << " Prints this and exit.\n"; + std::cout << " Prints this and exit.\n\n"; + std::cout << "Examples:\n\n"; + std::cout << " Get the total count of matrix in and calculate the time for vector creation 5 times:\n"; + std::cout << " > ./tcount -i --timing --print_count -r 5\n\n"; + std::cout << " Get the vector to of matrix in and print the time to stdout using dynamic scheduling\n"; + std::cout << " > ./tcount -i -o --timing --dynamic\n\n"; + std::cout << " Get the total count of matrix in using workers only\n"; + std::cout << " > ./tcount -i -n --print_count\n"; exit(0); } else { // parse error - std::cout << "Invokation error. Try -h for details.\n"; + std::cout << "Invocation error. Try -h for details.\n"; status = false; } } // Input checkers if (session.inputMatrix == InputMatrix::UNSPECIFIED) { - std::cout << "Invokation error. Try -h for details.\n"; + std::cout << "Invocation error. Try -h for details.\n"; status = false; } #if CODE_VERSION == V4 @@ -166,7 +173,7 @@ void prepare_matrix (matrix& A, Timing& timer) { timer.print_dt("load matrix"); } - if (session.verbose && session.mtx_print) { + if (session.mtx_print) { logger << "\nMatrix:" << logger.endl; print_graph (A); } diff --git a/src/v3.cpp b/src/v3.cpp index 6bf0b9b..abd2987 100644 --- a/src/v3.cpp +++ b/src/v3.cpp @@ -41,8 +41,8 @@ int nworkers() { * - A lower triangular matrix which update c[i], c[j], c[k]. This is wayyy faster. */ std::vector triang_v(matrix& A) { - std::vector> c(A.size()); - std::vector ret(A.size()); + std::vector> c(A.size()); // atomic for c[j], c[k] only + std::vector ret(A.size()); // unrestricted c[i] access cilk_for (int i=0 ; i triang_v(matrix& A) { c[i] = c[i]/2; } } + // merge c to ret and return it for (index_t i =0 ; i& v, index_t begin, index_t e */ value_t sum (std::vector& v) { int n = nworkers(); - std::vector sum_v(n, 0); // result of each do_sum invokation. + std::vector sum_v(n, 0); // result of each do_sum invocation. // We spawn workers in a more statically way. for (index_t i =0 ; i < n ; ++i) { @@ -141,8 +142,8 @@ int nworkers() { * - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster. */ std::vector triang_v(matrix& A) { - std::vector> c(A.size()); - std::vector ret(A.size()); + std::vector> c(A.size()); // atomic for c[j], c[k] only + std::vector ret(A.size()); // unrestricted c[i] access // OMP schedule selection if (session.dynamic) omp_set_schedule (omp_sched_dynamic, 0); @@ -165,6 +166,7 @@ std::vector triang_v(matrix& A) { c[i] = c[i]/2; } } + // merge c to ret and return it for (index_t i =0 ; i triang_v(matrix& A) { ++c[i]; c[j.index()] += (!session.makeSymmetric)? 1:0; c[k.index()] += (!session.makeSymmetric)? 1:0; + //^ We set other nodes in case of lower triangular } } } if (session.makeSymmetric) c[i] /= 2; + //^ We don't have to divide by 2 in case of lower triangular } return c; } diff --git a/src/v4.cpp b/src/v4.cpp index 69c662a..7648473 100644 --- a/src/v4.cpp +++ b/src/v4.cpp @@ -37,15 +37,16 @@ int nworkers() { * vector = --- * (A.* (A*B))*ones_N * 2 * We squeezed all that to one function for performance. The row*column multiplication - * uses the inner CSC structure of sparse matrix and follows only non-zero members. + * uses the inner CSC structure of sparse matrix and follows only non-zero members + * with a time complexity of \$ O(nnz1 + nnz2) \$ * * \param A The first matrix to use. * \param B The second matrix to use (they can be the same). * \return The count vector. RVO is used here. * \note * We use two methods of calculation based on \c --make_symmetric or \c --triangular_only - * - A full matrix calculation which update only c[i] - * - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster. + * - A full matrix calculation + * - A lower triangular matrix * \warning * The later(--triangular_only) produce correct results ONLY if we are after the total count. */ @@ -76,7 +77,7 @@ void do_sum (value_t& out_sum, std::vector& v, index_t begin, index_t e */ value_t sum (std::vector& v) { int n = nworkers(); - std::vector sum_v(n, 0); // result of each do_sum invokation. + std::vector sum_v(n, 0); // result of each do_sum invocation. // We spawn workers in a more statically way. for (index_t i =0 ; i < n ; ++i) { @@ -120,15 +121,16 @@ int nworkers() { * vector = --- * (A.* (A*B))*ones_N * 2 * We squeezed all that to one function for performance. The row*column multiplication - * uses the inner CSC structure of sparse matrix and follows only non-zero members. + * uses the inner CSC structure of sparse matrix and follows only non-zero members + * with a time complexity of \$ O(nnz1 + nnz2) \$ * * \param A The first matrix to use. * \param B The second matrix to use (they can be the same). * \return The count vector. RVO is used here. * \note * We use two methods of calculation based on \c --make_symmetric or \c --triangular_only - * - A full matrix calculation which update only c[i] - * - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster. + * - A full matrix calculation + * - A lower triangular matrix * \warning * The later(--triangular_only) produce correct results ONLY if we are after the total count. */ @@ -189,7 +191,8 @@ int nworkers() { * 2 * * We squeezed all that to one function for performance. The row*column multiplication - * uses the inner CSC structure of sparse matrix and follows only non-zero members. + * uses the inner CSC structure of sparse matrix and follows only non-zero members + * with a time complexity of \$ O(nnz1 + nnz2) \$ * * \param out Reference to output vector * \param A The first matrix to use. @@ -198,8 +201,8 @@ int nworkers() { * \return The count vector. RVO is used here. * \note * We use two methods of calculation based on \c --make_symmetric or \c --triangular_only - * - A full matrix calculation which update only c[i] - * - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster. + * - A full matrix calculation + * - A lower triangular matrix * \warning * The later(--triangular_only) produce correct results ONLY if we are after the total count. */ @@ -260,7 +263,7 @@ void do_sum (value_t& out_sum, std::vector& v, index_t begin, index_t e */ value_t sum (std::vector& v) { int n = nworkers(); - std::vector sum_v(n, 0); // result of each do_sum invokation. + std::vector sum_v(n, 0); // result of each do_sum invocation. std::vector workers; // We spawn workers in a more statically way. @@ -288,16 +291,18 @@ int nworkers() { return 1; } * 1 * vector = --- * (A.* (A*B))*ones_N * 2 + * * We squeezed all that to one function for performance. The row*column multiplication - * uses the inner CSC structure of sparse matrix and follows only non-zero members. + * uses the inner CSC structure of sparse matrix and follows only non-zero members + * with a time complexity of \$ O(nnz1 + nnz2) \$ * * \param A The first matrix to use. * \param B The second matrix to use (they can be the same). * \return The count vector. RVO is used here. * \note * We use two methods of calculation based on \c --make_symmetric or \c --triangular_only - * - A full matrix calculation which update only c[i] - * - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster. + * - A full matrix calculation + * - A lower triangular matrix * \warning * The later(--triangular_only) produce correct results ONLY if we are after the total count. */