Comment changes

4 years ago · 225837ed0e
--- a/+ 3
+++ b/+ 3
@@ -236,7 +236,9 @@ v4_pthreads: $(BUILD_DIR)/$(TARGET)
 #    > make clean
 # 3) for v4 cilk for example:
 #    > make csal_v4_cilk
 # 4) run executables from `bin/`
 # 4) run executables from `bin/`. Examples:
 #    > ./bin/tcount_ompv3 -i mtx/NACA0015.mtx --timing -r 3 -o /dev/null
 #    > ./bin/tcount_pthv4 -i mtx/com_Youtube.mtx --timing --dynamic --print_count
 csal_v3: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=3
 csal_v3: TARGET := tcount_v3
--- a/inc/config.h
+++ b/inc/config.h
@@ -22,7 +22,7 @@
 #define  V3    3
 #define  V4    4
 // Fail-safe verision selection
 // Fail-safe version selection
 #if !defined CODE_VERSION
 #define CODE_VERSION   V4
 #endif
--- a/inc/impl.hpp
+++ b/inc/impl.hpp
@@ -39,7 +39,7 @@ enum class MatrixType {
 };
 /*
 * Forward type declerations
 * Forward type declarations
 */
 template<typename DataType, typename IndexType, MatrixType Type = MatrixType::SYMMETRIC> struct Matrix;
 template<typename DataType, typename IndexType, MatrixType Type = MatrixType::SYMMETRIC> struct SpMat;
@@ -298,7 +298,7 @@ struct SpMat {
    */
   DataType get_lin(IndexType i, IndexType j) {
      IndexType idx; bool found;
      std::tie(idx, found) =find_lin_idx(rows, col_ptr[j], col_ptr[j+1], i);
      std::tie(idx, found) =find_place_idx(rows, col_ptr[j], col_ptr[j+1], i);
      return (found) ? values[idx] : 0;
   }
@@ -309,8 +309,9 @@ struct SpMat {
    * If so we just change it to a new value. If not we add the item on the matrix.
    *
    * @note
    *    We don't increase the NNZ value of the struct. We expect the user has already
    *    change the NNZ value to the right one using @see capacity() function.
    *    When change a value, we don't increase the NNZ value of the struct. We expect the user has already
    *    change the NNZ value to the right one using @see capacity() function. When adding a value we
    *    increase the NNZ.
    *
    * @param i    The row number
    * @param j    The column number
@@ -318,7 +319,7 @@ struct SpMat {
    */
   DataType set(DataType v, IndexType i, IndexType j) {
      IndexType idx; bool found;
      std::tie(idx, found) = find_lin_idx(rows, col_ptr[j], col_ptr[j+1], i);
      std::tie(idx, found) = find_place_idx(rows, col_ptr[j], col_ptr[j+1], i);
      if (found)
         return values[idx] = v;    // we don't change NNZ even if we write "0"
      else {
@@ -392,11 +393,13 @@ private:
    * \param   begin The vector's index to begin
    * \param   end   The vector's index to end
    * \param   match What to search
    * @return        The index of the item or end on failure.
    * \return  An <index, status> pair.
    *                index    is the index of the item or end if not found
    *                status   is true if found, false otherwise
    */
   std::pair<IndexType, bool> find_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) {
      IndexType b = begin, e = end-1;
      while (true) {
      while (b <= e) {
         IndexType m = (b+e)/2;
         if       (v[m] == match)   return  std::make_pair(m, true);
         else if  (b >= e)          return  std::make_pair(end, false);
@@ -417,8 +420,11 @@ private:
    * \param   begin The vector's index to begin
    * \param   end   The vector's index to end
    * \param   match What to search
    * \return  An <index, status> pair.
    *                index    is the index of the item or end if not found
    *                status   is true if found, false otherwise
    */
   std::pair<IndexType, bool> find_lin_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) {
   std::pair<IndexType, bool> find_place_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) {
      for ( ; begin < end ; ++begin) {
         if       (match == v[begin])   return std::make_pair(begin, true);
         else if  (match <  v[begin])   return std::make_pair(begin, false);
@@ -437,7 +443,7 @@ private:
   //! @{
   std::vector<DataType>   values {};     //!< vector to store the values of the matrix
   std::vector<IndexType>  rows{};        //!< vector to store the row information
   std::vector<IndexType>  col_ptr{1,0};  //!< vector to stor the column pointers
   std::vector<IndexType>  col_ptr{1,0};  //!< vector to store the column pointers
   IndexType   N{0};                      //!< The dimension of the matrix (square)
   IndexType   NNZ{0};                    //!< The NNZ (capacity of the matrix)
   //! @}
@@ -496,20 +502,25 @@ struct SpMatCol {
   /*!
    * Multiplication operator
    *
    * We follow only the non-zero values and multiply only the common indexes.
    *
    * @tparam C   Universal reference for the type right half site column
    *
    * @param c    The right hand site matrix
    * @return     The value of the inner product of two vectors
    * @note       The time complexity is \$ O(nnz1+nnz2) \$.
    *             Where the nnz is the max NNZ elements of the column of the matrix
    */
   template <typename C>
   DataType operator* (C&& c) {
      static_assert(std::is_same<remove_cvref_t<C>, SpMatCol<DataType, IndexType>>(), "");
      DataType v{};
      while (index() != end() && c.index() != c.end()) {
         if      (index() < c.index())  advance();
         else if (index() > c.index())  ++c;
         if      (index() < c.index())  advance(); // advance me
         else if (index() > c.index())  ++c;       // advance other
         else { //index() == c.index()
            v += get() * *c;
            v += get() * *c;                       // multiply and advance both
            ++c;
            advance();
         }
@@ -597,20 +608,25 @@ struct SpMatRow {
   /*!
    * Multiplication operator
    *
    * We follow only the non-zero values and multiply only the common indexes.
    *
    * @tparam C   Universal reference for the type right half site column
    *
    * @param c    The right hand site matrix
    * @return     The value of the inner product of two vectors
    * @note       The time complexity is \$ O(N+nnz2) \$ and way heavier the ColxCol multiplication.
    *             Where the nnz is the max NNZ elements of the column of the matrix
    */
   template <typename C>
   DataType operator* (C&& c) {
      static_assert(std::is_same<remove_cvref_t<C>, SpMatCol<DataType, IndexType>>(), "");
      DataType v{};
      while (index() != end() && c.index() != c.end()) {
         if      (index() < c.index())  advance();
         else if (index() > c.index())  ++c;
         if      (index() < c.index())  advance(); // advance me
         else if (index() > c.index())  ++c;       // advance other
         else { //index() == c.index()
            v += get()* *c;
            v += get() * *c;                       // multiply and advance both
            ++c;
            advance();
         }
--- a/src/elearn.cpp
+++ b/src/elearn.cpp
@@ -51,7 +51,7 @@ static void coo2csc_e(
 */
 uint32_t find_idx(const uint32_t* v, uint32_t begin, uint32_t end, uint32_t match) {
   uint32_t b = begin, e = end-1;
   while (1) {
   while (b <= e) {
      uint32_t m = (b+e)/2;
      if       (v[m] == match)   return  m;
      else if  (b >= e)          return  end;
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -116,18 +116,25 @@ bool get_options(int argc, char* argv[]){
         std::cout << "   --print_graph <size>\n";
         std::cout << "      Prints the first <size> x <size> part of the matrix to stdout.\n\n";
         std::cout << "   -h | --help <size>\n";
         std::cout << "      Prints this and exit.\n";
         std::cout << "      Prints this and exit.\n\n";
         std::cout << "Examples:\n\n";
         std::cout << "   Get the total count of matrix in <MFILE> and calculate the time for vector creation 5 times:\n";
         std::cout << "   > ./tcount -i <MFILE> --timing --print_count -r 5\n\n";
         std::cout << "   Get the vector to <OUTFILE> of matrix in <MFILE> and print the time to stdout using dynamic scheduling\n";
         std::cout << "   > ./tcount -i <MFILE> -o <OUTFILE> --timing --dynamic\n\n";
         std::cout << "   Get the total count of matrix in <MFILE> using <N> workers only\n";
         std::cout << "   > ./tcount -i <MFILE> -n <N> --print_count\n";
         exit(0);
      }
      else {   // parse error
         std::cout << "Invokation error. Try -h for details.\n";
         std::cout << "Invocation error. Try -h for details.\n";
         status = false;
      }
   }
   // Input checkers
   if (session.inputMatrix == InputMatrix::UNSPECIFIED) {
      std::cout << "Invokation error. Try -h for details.\n";
      std::cout << "Invocation error. Try -h for details.\n";
      status = false;
   }
 #if CODE_VERSION == V4
@@ -166,7 +173,7 @@ void prepare_matrix (matrix& A, Timing& timer) {
      timer.print_dt("load matrix");
   }
   if (session.verbose && session.mtx_print) {
   if (session.mtx_print) {
      logger << "\nMatrix:" << logger.endl;
      print_graph (A);
   }
--- a/src/v3.cpp
+++ b/src/v3.cpp
@@ -41,8 +41,8 @@ int nworkers() {
 *    - A lower triangular matrix which update c[i], c[j], c[k]. This is wayyy faster.
 */
 std::vector<value_t> triang_v(matrix& A) {
   std::vector<std::atomic<value_t>> c(A.size());
   std::vector<value_t> ret(A.size());
   std::vector<std::atomic<value_t>> c(A.size());  // atomic for c[j], c[k] only
   std::vector<value_t> ret(A.size());             // unrestricted c[i] access
   cilk_for (int i=0 ; i<A.size() ; ++i) {
      for (auto j = A.getCol(i); j.index() != j.end() ; ++j) {
@@ -61,6 +61,7 @@ std::vector<value_t> triang_v(matrix& A) {
         c[i] = c[i]/2;
      }
   }
   // merge c to ret and return it
   for (index_t i =0 ; i<A.size() ; ++i)   ret[i] += c[i];
   return ret;
 }
@@ -80,7 +81,7 @@ void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t e
 */
 value_t sum (std::vector<value_t>& v) {
   int n = nworkers();
   std::vector<value_t> sum_v(n, 0);   // result of each do_sum invokation.
   std::vector<value_t> sum_v(n, 0);   // result of each do_sum invocation.
   // We spawn workers in a more statically way.
   for (index_t i =0 ; i < n ; ++i) {
@@ -141,8 +142,8 @@ int nworkers() {
 *    - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster.
 */
 std::vector<value_t> triang_v(matrix& A) {
   std::vector<std::atomic<value_t>> c(A.size());
   std::vector<value_t> ret(A.size());
   std::vector<std::atomic<value_t>> c(A.size());  // atomic for c[j], c[k] only
   std::vector<value_t> ret(A.size());             // unrestricted c[i] access
   // OMP schedule selection
   if (session.dynamic)    omp_set_schedule (omp_sched_dynamic, 0);
@@ -165,6 +166,7 @@ std::vector<value_t> triang_v(matrix& A) {
         c[i] = c[i]/2;
      }
   }
   // merge c to ret and return it
   for (index_t i =0 ; i<A.size() ; ++i)   ret[i] += c[i];
   return ret;
 }
@@ -210,10 +212,12 @@ std::vector<value_t> triang_v(matrix& A) {
               ++c[i];
               c[j.index()] += (!session.makeSymmetric)? 1:0;
               c[k.index()] += (!session.makeSymmetric)? 1:0;
               //^ We set other nodes in case of lower triangular
            }
         }
      }
      if (session.makeSymmetric) c[i] /= 2;
      //^ We don't have to divide by 2 in case of lower triangular
   }
   return c;
 }
--- a/src/v4.cpp
+++ b/src/v4.cpp
@@ -37,15 +37,16 @@ int nworkers() {
 * vector = --- * (A.* (A*B))*ones_N
 *           2
 * We squeezed all that to one function for performance. The row*column multiplication
 * uses the inner CSC structure of sparse matrix and follows only non-zero members.
 * uses the inner CSC structure of sparse matrix and follows only non-zero members
 * with a time complexity of \$ O(nnz1 + nnz2) \$
 *
 * \param   A  The first matrix to use.
 * \param   B  The second matrix to use (they can be the same).
 * \return  The count vector. RVO is used here.
 * \note
 *    We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
 *    - A full matrix calculation which update only c[i]
 *    - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster.
 *    - A full matrix calculation
 *    - A lower triangular matrix
 * \warning
 *    The later(--triangular_only) produce correct results ONLY if we are after the total count.
 */
@@ -76,7 +77,7 @@ void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t e
 */
 value_t sum (std::vector<value_t>& v) {
   int n = nworkers();
   std::vector<value_t> sum_v(n, 0);   // result of each do_sum invokation.
   std::vector<value_t> sum_v(n, 0);   // result of each do_sum invocation.
   // We spawn workers in a more statically way.
   for (index_t i =0 ; i < n ; ++i) {
@@ -120,15 +121,16 @@ int nworkers() {
 * vector = --- * (A.* (A*B))*ones_N
 *           2
 * We squeezed all that to one function for performance. The row*column multiplication
 * uses the inner CSC structure of sparse matrix and follows only non-zero members.
 * uses the inner CSC structure of sparse matrix and follows only non-zero members
 * with a time complexity of \$ O(nnz1 + nnz2) \$
 *
 * \param   A  The first matrix to use.
 * \param   B  The second matrix to use (they can be the same).
 * \return  The count vector. RVO is used here.
 * \note
 *    We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
 *    - A full matrix calculation which update only c[i]
 *    - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster.
 *    - A full matrix calculation
 *    - A lower triangular matrix
 * \warning
 *    The later(--triangular_only) produce correct results ONLY if we are after the total count.
 */
@@ -189,7 +191,8 @@ int nworkers() {
 *                       2
 *
 * We squeezed all that to one function for performance. The row*column multiplication
 * uses the inner CSC structure of sparse matrix and follows only non-zero members.
 * uses the inner CSC structure of sparse matrix and follows only non-zero members
 * with a time complexity of \$ O(nnz1 + nnz2) \$
 *
 * \param   out   Reference to output vector
 * \param   A     The first matrix to use.
@@ -198,8 +201,8 @@ int nworkers() {
 * \return  The count vector. RVO is used here.
 * \note
 *    We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
 *    - A full matrix calculation which update only c[i]
 *    - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster.
 *    - A full matrix calculation
 *    - A lower triangular matrix
 * \warning
 *    The later(--triangular_only) produce correct results ONLY if we are after the total count.
 */
@@ -260,7 +263,7 @@ void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t e
 */
 value_t sum (std::vector<value_t>& v) {
   int n = nworkers();
   std::vector<value_t> sum_v(n, 0);   // result of each do_sum invokation.
   std::vector<value_t> sum_v(n, 0);   // result of each do_sum invocation.
   std::vector<std::thread> workers;
   // We spawn workers in a more statically way.
@@ -288,16 +291,18 @@ int nworkers() { return 1; }
 *           1
 * vector = --- * (A.* (A*B))*ones_N
 *           2
 *
 * We squeezed all that to one function for performance. The row*column multiplication
 * uses the inner CSC structure of sparse matrix and follows only non-zero members.
 * uses the inner CSC structure of sparse matrix and follows only non-zero members
 * with a time complexity of \$ O(nnz1 + nnz2) \$
 *
 * \param   A  The first matrix to use.
 * \param   B  The second matrix to use (they can be the same).
 * \return  The count vector. RVO is used here.
 * \note
 *    We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
 *    - A full matrix calculation which update only c[i]
 *    - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster.
 *    - A full matrix calculation
 *    - A lower triangular matrix
 * \warning
 *    The later(--triangular_only) produce correct results ONLY if we are after the total count.
 */