@@ -236,7 +236,9 @@ v4_pthreads: $(BUILD_DIR)/$(TARGET) | |||||
# > make clean | # > make clean | ||||
# 3) for v4 cilk for example: | # 3) for v4 cilk for example: | ||||
# > make csal_v4_cilk | # > make csal_v4_cilk | ||||
# 4) run executables from `bin/` | |||||
# 4) run executables from `bin/`. Examples: | |||||
# > ./bin/tcount_ompv3 -i mtx/NACA0015.mtx --timing -r 3 -o /dev/null | |||||
# > ./bin/tcount_pthv4 -i mtx/com_Youtube.mtx --timing --dynamic --print_count | |||||
csal_v3: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=3 | csal_v3: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=3 | ||||
csal_v3: TARGET := tcount_v3 | csal_v3: TARGET := tcount_v3 | ||||
@@ -22,7 +22,7 @@ | |||||
#define V3 3 | #define V3 3 | ||||
#define V4 4 | #define V4 4 | ||||
// Fail-safe verision selection | |||||
// Fail-safe version selection | |||||
#if !defined CODE_VERSION | #if !defined CODE_VERSION | ||||
#define CODE_VERSION V4 | #define CODE_VERSION V4 | ||||
#endif | #endif | ||||
@@ -39,7 +39,7 @@ enum class MatrixType { | |||||
}; | }; | ||||
/* | /* | ||||
* Forward type declerations | |||||
* Forward type declarations | |||||
*/ | */ | ||||
template<typename DataType, typename IndexType, MatrixType Type = MatrixType::SYMMETRIC> struct Matrix; | template<typename DataType, typename IndexType, MatrixType Type = MatrixType::SYMMETRIC> struct Matrix; | ||||
template<typename DataType, typename IndexType, MatrixType Type = MatrixType::SYMMETRIC> struct SpMat; | template<typename DataType, typename IndexType, MatrixType Type = MatrixType::SYMMETRIC> struct SpMat; | ||||
@@ -298,7 +298,7 @@ struct SpMat { | |||||
*/ | */ | ||||
DataType get_lin(IndexType i, IndexType j) { | DataType get_lin(IndexType i, IndexType j) { | ||||
IndexType idx; bool found; | IndexType idx; bool found; | ||||
std::tie(idx, found) =find_lin_idx(rows, col_ptr[j], col_ptr[j+1], i); | |||||
std::tie(idx, found) =find_place_idx(rows, col_ptr[j], col_ptr[j+1], i); | |||||
return (found) ? values[idx] : 0; | return (found) ? values[idx] : 0; | ||||
} | } | ||||
@@ -309,8 +309,9 @@ struct SpMat { | |||||
* If so we just change it to a new value. If not we add the item on the matrix. | * If so we just change it to a new value. If not we add the item on the matrix. | ||||
* | * | ||||
* @note | * @note | ||||
* We don't increase the NNZ value of the struct. We expect the user has already | |||||
* change the NNZ value to the right one using @see capacity() function. | |||||
* When change a value, we don't increase the NNZ value of the struct. We expect the user has already | |||||
* change the NNZ value to the right one using @see capacity() function. When adding a value we | |||||
* increase the NNZ. | |||||
* | * | ||||
* @param i The row number | * @param i The row number | ||||
* @param j The column number | * @param j The column number | ||||
@@ -318,7 +319,7 @@ struct SpMat { | |||||
*/ | */ | ||||
DataType set(DataType v, IndexType i, IndexType j) { | DataType set(DataType v, IndexType i, IndexType j) { | ||||
IndexType idx; bool found; | IndexType idx; bool found; | ||||
std::tie(idx, found) = find_lin_idx(rows, col_ptr[j], col_ptr[j+1], i); | |||||
std::tie(idx, found) = find_place_idx(rows, col_ptr[j], col_ptr[j+1], i); | |||||
if (found) | if (found) | ||||
return values[idx] = v; // we don't change NNZ even if we write "0" | return values[idx] = v; // we don't change NNZ even if we write "0" | ||||
else { | else { | ||||
@@ -392,11 +393,13 @@ private: | |||||
* \param begin The vector's index to begin | * \param begin The vector's index to begin | ||||
* \param end The vector's index to end | * \param end The vector's index to end | ||||
* \param match What to search | * \param match What to search | ||||
* @return The index of the item or end on failure. | |||||
* \return An <index, status> pair. | |||||
* index is the index of the item or end if not found | |||||
* status is true if found, false otherwise | |||||
*/ | */ | ||||
std::pair<IndexType, bool> find_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) { | std::pair<IndexType, bool> find_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) { | ||||
IndexType b = begin, e = end-1; | IndexType b = begin, e = end-1; | ||||
while (true) { | |||||
while (b <= e) { | |||||
IndexType m = (b+e)/2; | IndexType m = (b+e)/2; | ||||
if (v[m] == match) return std::make_pair(m, true); | if (v[m] == match) return std::make_pair(m, true); | ||||
else if (b >= e) return std::make_pair(end, false); | else if (b >= e) return std::make_pair(end, false); | ||||
@@ -417,8 +420,11 @@ private: | |||||
* \param begin The vector's index to begin | * \param begin The vector's index to begin | ||||
* \param end The vector's index to end | * \param end The vector's index to end | ||||
* \param match What to search | * \param match What to search | ||||
* \return An <index, status> pair. | |||||
* index is the index of the item or end if not found | |||||
* status is true if found, false otherwise | |||||
*/ | */ | ||||
std::pair<IndexType, bool> find_lin_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) { | |||||
std::pair<IndexType, bool> find_place_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) { | |||||
for ( ; begin < end ; ++begin) { | for ( ; begin < end ; ++begin) { | ||||
if (match == v[begin]) return std::make_pair(begin, true); | if (match == v[begin]) return std::make_pair(begin, true); | ||||
else if (match < v[begin]) return std::make_pair(begin, false); | else if (match < v[begin]) return std::make_pair(begin, false); | ||||
@@ -437,7 +443,7 @@ private: | |||||
//! @{ | //! @{ | ||||
std::vector<DataType> values {}; //!< vector to store the values of the matrix | std::vector<DataType> values {}; //!< vector to store the values of the matrix | ||||
std::vector<IndexType> rows{}; //!< vector to store the row information | std::vector<IndexType> rows{}; //!< vector to store the row information | ||||
std::vector<IndexType> col_ptr{1,0}; //!< vector to stor the column pointers | |||||
std::vector<IndexType> col_ptr{1,0}; //!< vector to store the column pointers | |||||
IndexType N{0}; //!< The dimension of the matrix (square) | IndexType N{0}; //!< The dimension of the matrix (square) | ||||
IndexType NNZ{0}; //!< The NNZ (capacity of the matrix) | IndexType NNZ{0}; //!< The NNZ (capacity of the matrix) | ||||
//! @} | //! @} | ||||
@@ -496,20 +502,25 @@ struct SpMatCol { | |||||
/*! | /*! | ||||
* Multiplication operator | * Multiplication operator | ||||
* | |||||
* We follow only the non-zero values and multiply only the common indexes. | |||||
* | |||||
* @tparam C Universal reference for the type right half site column | * @tparam C Universal reference for the type right half site column | ||||
* | * | ||||
* @param c The right hand site matrix | * @param c The right hand site matrix | ||||
* @return The value of the inner product of two vectors | * @return The value of the inner product of two vectors | ||||
* @note The time complexity is \$ O(nnz1+nnz2) \$. | |||||
* Where the nnz is the max NNZ elements of the column of the matrix | |||||
*/ | */ | ||||
template <typename C> | template <typename C> | ||||
DataType operator* (C&& c) { | DataType operator* (C&& c) { | ||||
static_assert(std::is_same<remove_cvref_t<C>, SpMatCol<DataType, IndexType>>(), ""); | static_assert(std::is_same<remove_cvref_t<C>, SpMatCol<DataType, IndexType>>(), ""); | ||||
DataType v{}; | DataType v{}; | ||||
while (index() != end() && c.index() != c.end()) { | while (index() != end() && c.index() != c.end()) { | ||||
if (index() < c.index()) advance(); | |||||
else if (index() > c.index()) ++c; | |||||
if (index() < c.index()) advance(); // advance me | |||||
else if (index() > c.index()) ++c; // advance other | |||||
else { //index() == c.index() | else { //index() == c.index() | ||||
v += get() * *c; | |||||
v += get() * *c; // multiply and advance both | |||||
++c; | ++c; | ||||
advance(); | advance(); | ||||
} | } | ||||
@@ -597,20 +608,25 @@ struct SpMatRow { | |||||
/*! | /*! | ||||
* Multiplication operator | * Multiplication operator | ||||
* | |||||
* We follow only the non-zero values and multiply only the common indexes. | |||||
* | |||||
* @tparam C Universal reference for the type right half site column | * @tparam C Universal reference for the type right half site column | ||||
* | * | ||||
* @param c The right hand site matrix | * @param c The right hand site matrix | ||||
* @return The value of the inner product of two vectors | * @return The value of the inner product of two vectors | ||||
* @note The time complexity is \$ O(N+nnz2) \$ and way heavier the ColxCol multiplication. | |||||
* Where the nnz is the max NNZ elements of the column of the matrix | |||||
*/ | */ | ||||
template <typename C> | template <typename C> | ||||
DataType operator* (C&& c) { | DataType operator* (C&& c) { | ||||
static_assert(std::is_same<remove_cvref_t<C>, SpMatCol<DataType, IndexType>>(), ""); | static_assert(std::is_same<remove_cvref_t<C>, SpMatCol<DataType, IndexType>>(), ""); | ||||
DataType v{}; | DataType v{}; | ||||
while (index() != end() && c.index() != c.end()) { | while (index() != end() && c.index() != c.end()) { | ||||
if (index() < c.index()) advance(); | |||||
else if (index() > c.index()) ++c; | |||||
if (index() < c.index()) advance(); // advance me | |||||
else if (index() > c.index()) ++c; // advance other | |||||
else { //index() == c.index() | else { //index() == c.index() | ||||
v += get()* *c; | |||||
v += get() * *c; // multiply and advance both | |||||
++c; | ++c; | ||||
advance(); | advance(); | ||||
} | } | ||||
@@ -51,7 +51,7 @@ static void coo2csc_e( | |||||
*/ | */ | ||||
uint32_t find_idx(const uint32_t* v, uint32_t begin, uint32_t end, uint32_t match) { | uint32_t find_idx(const uint32_t* v, uint32_t begin, uint32_t end, uint32_t match) { | ||||
uint32_t b = begin, e = end-1; | uint32_t b = begin, e = end-1; | ||||
while (1) { | |||||
while (b <= e) { | |||||
uint32_t m = (b+e)/2; | uint32_t m = (b+e)/2; | ||||
if (v[m] == match) return m; | if (v[m] == match) return m; | ||||
else if (b >= e) return end; | else if (b >= e) return end; | ||||
@@ -116,18 +116,25 @@ bool get_options(int argc, char* argv[]){ | |||||
std::cout << " --print_graph <size>\n"; | std::cout << " --print_graph <size>\n"; | ||||
std::cout << " Prints the first <size> x <size> part of the matrix to stdout.\n\n"; | std::cout << " Prints the first <size> x <size> part of the matrix to stdout.\n\n"; | ||||
std::cout << " -h | --help <size>\n"; | std::cout << " -h | --help <size>\n"; | ||||
std::cout << " Prints this and exit.\n"; | |||||
std::cout << " Prints this and exit.\n\n"; | |||||
std::cout << "Examples:\n\n"; | |||||
std::cout << " Get the total count of matrix in <MFILE> and calculate the time for vector creation 5 times:\n"; | |||||
std::cout << " > ./tcount -i <MFILE> --timing --print_count -r 5\n\n"; | |||||
std::cout << " Get the vector to <OUTFILE> of matrix in <MFILE> and print the time to stdout using dynamic scheduling\n"; | |||||
std::cout << " > ./tcount -i <MFILE> -o <OUTFILE> --timing --dynamic\n\n"; | |||||
std::cout << " Get the total count of matrix in <MFILE> using <N> workers only\n"; | |||||
std::cout << " > ./tcount -i <MFILE> -n <N> --print_count\n"; | |||||
exit(0); | exit(0); | ||||
} | } | ||||
else { // parse error | else { // parse error | ||||
std::cout << "Invokation error. Try -h for details.\n"; | |||||
std::cout << "Invocation error. Try -h for details.\n"; | |||||
status = false; | status = false; | ||||
} | } | ||||
} | } | ||||
// Input checkers | // Input checkers | ||||
if (session.inputMatrix == InputMatrix::UNSPECIFIED) { | if (session.inputMatrix == InputMatrix::UNSPECIFIED) { | ||||
std::cout << "Invokation error. Try -h for details.\n"; | |||||
std::cout << "Invocation error. Try -h for details.\n"; | |||||
status = false; | status = false; | ||||
} | } | ||||
#if CODE_VERSION == V4 | #if CODE_VERSION == V4 | ||||
@@ -166,7 +173,7 @@ void prepare_matrix (matrix& A, Timing& timer) { | |||||
timer.print_dt("load matrix"); | timer.print_dt("load matrix"); | ||||
} | } | ||||
if (session.verbose && session.mtx_print) { | |||||
if (session.mtx_print) { | |||||
logger << "\nMatrix:" << logger.endl; | logger << "\nMatrix:" << logger.endl; | ||||
print_graph (A); | print_graph (A); | ||||
} | } | ||||
@@ -41,8 +41,8 @@ int nworkers() { | |||||
* - A lower triangular matrix which update c[i], c[j], c[k]. This is wayyy faster. | * - A lower triangular matrix which update c[i], c[j], c[k]. This is wayyy faster. | ||||
*/ | */ | ||||
std::vector<value_t> triang_v(matrix& A) { | std::vector<value_t> triang_v(matrix& A) { | ||||
std::vector<std::atomic<value_t>> c(A.size()); | |||||
std::vector<value_t> ret(A.size()); | |||||
std::vector<std::atomic<value_t>> c(A.size()); // atomic for c[j], c[k] only | |||||
std::vector<value_t> ret(A.size()); // unrestricted c[i] access | |||||
cilk_for (int i=0 ; i<A.size() ; ++i) { | cilk_for (int i=0 ; i<A.size() ; ++i) { | ||||
for (auto j = A.getCol(i); j.index() != j.end() ; ++j) { | for (auto j = A.getCol(i); j.index() != j.end() ; ++j) { | ||||
@@ -61,6 +61,7 @@ std::vector<value_t> triang_v(matrix& A) { | |||||
c[i] = c[i]/2; | c[i] = c[i]/2; | ||||
} | } | ||||
} | } | ||||
// merge c to ret and return it | |||||
for (index_t i =0 ; i<A.size() ; ++i) ret[i] += c[i]; | for (index_t i =0 ; i<A.size() ; ++i) ret[i] += c[i]; | ||||
return ret; | return ret; | ||||
} | } | ||||
@@ -80,7 +81,7 @@ void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t e | |||||
*/ | */ | ||||
value_t sum (std::vector<value_t>& v) { | value_t sum (std::vector<value_t>& v) { | ||||
int n = nworkers(); | int n = nworkers(); | ||||
std::vector<value_t> sum_v(n, 0); // result of each do_sum invokation. | |||||
std::vector<value_t> sum_v(n, 0); // result of each do_sum invocation. | |||||
// We spawn workers in a more statically way. | // We spawn workers in a more statically way. | ||||
for (index_t i =0 ; i < n ; ++i) { | for (index_t i =0 ; i < n ; ++i) { | ||||
@@ -141,8 +142,8 @@ int nworkers() { | |||||
* - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster. | * - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster. | ||||
*/ | */ | ||||
std::vector<value_t> triang_v(matrix& A) { | std::vector<value_t> triang_v(matrix& A) { | ||||
std::vector<std::atomic<value_t>> c(A.size()); | |||||
std::vector<value_t> ret(A.size()); | |||||
std::vector<std::atomic<value_t>> c(A.size()); // atomic for c[j], c[k] only | |||||
std::vector<value_t> ret(A.size()); // unrestricted c[i] access | |||||
// OMP schedule selection | // OMP schedule selection | ||||
if (session.dynamic) omp_set_schedule (omp_sched_dynamic, 0); | if (session.dynamic) omp_set_schedule (omp_sched_dynamic, 0); | ||||
@@ -165,6 +166,7 @@ std::vector<value_t> triang_v(matrix& A) { | |||||
c[i] = c[i]/2; | c[i] = c[i]/2; | ||||
} | } | ||||
} | } | ||||
// merge c to ret and return it | |||||
for (index_t i =0 ; i<A.size() ; ++i) ret[i] += c[i]; | for (index_t i =0 ; i<A.size() ; ++i) ret[i] += c[i]; | ||||
return ret; | return ret; | ||||
} | } | ||||
@@ -210,10 +212,12 @@ std::vector<value_t> triang_v(matrix& A) { | |||||
++c[i]; | ++c[i]; | ||||
c[j.index()] += (!session.makeSymmetric)? 1:0; | c[j.index()] += (!session.makeSymmetric)? 1:0; | ||||
c[k.index()] += (!session.makeSymmetric)? 1:0; | c[k.index()] += (!session.makeSymmetric)? 1:0; | ||||
//^ We set other nodes in case of lower triangular | |||||
} | } | ||||
} | } | ||||
} | } | ||||
if (session.makeSymmetric) c[i] /= 2; | if (session.makeSymmetric) c[i] /= 2; | ||||
//^ We don't have to divide by 2 in case of lower triangular | |||||
} | } | ||||
return c; | return c; | ||||
} | } | ||||
@@ -37,15 +37,16 @@ int nworkers() { | |||||
* vector = --- * (A.* (A*B))*ones_N | * vector = --- * (A.* (A*B))*ones_N | ||||
* 2 | * 2 | ||||
* We squeezed all that to one function for performance. The row*column multiplication | * We squeezed all that to one function for performance. The row*column multiplication | ||||
* uses the inner CSC structure of sparse matrix and follows only non-zero members. | |||||
* uses the inner CSC structure of sparse matrix and follows only non-zero members | |||||
* with a time complexity of \$ O(nnz1 + nnz2) \$ | |||||
* | * | ||||
* \param A The first matrix to use. | * \param A The first matrix to use. | ||||
* \param B The second matrix to use (they can be the same). | * \param B The second matrix to use (they can be the same). | ||||
* \return The count vector. RVO is used here. | * \return The count vector. RVO is used here. | ||||
* \note | * \note | ||||
* We use two methods of calculation based on \c --make_symmetric or \c --triangular_only | * We use two methods of calculation based on \c --make_symmetric or \c --triangular_only | ||||
* - A full matrix calculation which update only c[i] | |||||
* - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster. | |||||
* - A full matrix calculation | |||||
* - A lower triangular matrix | |||||
* \warning | * \warning | ||||
* The later(--triangular_only) produce correct results ONLY if we are after the total count. | * The later(--triangular_only) produce correct results ONLY if we are after the total count. | ||||
*/ | */ | ||||
@@ -76,7 +77,7 @@ void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t e | |||||
*/ | */ | ||||
value_t sum (std::vector<value_t>& v) { | value_t sum (std::vector<value_t>& v) { | ||||
int n = nworkers(); | int n = nworkers(); | ||||
std::vector<value_t> sum_v(n, 0); // result of each do_sum invokation. | |||||
std::vector<value_t> sum_v(n, 0); // result of each do_sum invocation. | |||||
// We spawn workers in a more statically way. | // We spawn workers in a more statically way. | ||||
for (index_t i =0 ; i < n ; ++i) { | for (index_t i =0 ; i < n ; ++i) { | ||||
@@ -120,15 +121,16 @@ int nworkers() { | |||||
* vector = --- * (A.* (A*B))*ones_N | * vector = --- * (A.* (A*B))*ones_N | ||||
* 2 | * 2 | ||||
* We squeezed all that to one function for performance. The row*column multiplication | * We squeezed all that to one function for performance. The row*column multiplication | ||||
* uses the inner CSC structure of sparse matrix and follows only non-zero members. | |||||
* uses the inner CSC structure of sparse matrix and follows only non-zero members | |||||
* with a time complexity of \$ O(nnz1 + nnz2) \$ | |||||
* | * | ||||
* \param A The first matrix to use. | * \param A The first matrix to use. | ||||
* \param B The second matrix to use (they can be the same). | * \param B The second matrix to use (they can be the same). | ||||
* \return The count vector. RVO is used here. | * \return The count vector. RVO is used here. | ||||
* \note | * \note | ||||
* We use two methods of calculation based on \c --make_symmetric or \c --triangular_only | * We use two methods of calculation based on \c --make_symmetric or \c --triangular_only | ||||
* - A full matrix calculation which update only c[i] | |||||
* - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster. | |||||
* - A full matrix calculation | |||||
* - A lower triangular matrix | |||||
* \warning | * \warning | ||||
* The later(--triangular_only) produce correct results ONLY if we are after the total count. | * The later(--triangular_only) produce correct results ONLY if we are after the total count. | ||||
*/ | */ | ||||
@@ -189,7 +191,8 @@ int nworkers() { | |||||
* 2 | * 2 | ||||
* | * | ||||
* We squeezed all that to one function for performance. The row*column multiplication | * We squeezed all that to one function for performance. The row*column multiplication | ||||
* uses the inner CSC structure of sparse matrix and follows only non-zero members. | |||||
* uses the inner CSC structure of sparse matrix and follows only non-zero members | |||||
* with a time complexity of \$ O(nnz1 + nnz2) \$ | |||||
* | * | ||||
* \param out Reference to output vector | * \param out Reference to output vector | ||||
* \param A The first matrix to use. | * \param A The first matrix to use. | ||||
@@ -198,8 +201,8 @@ int nworkers() { | |||||
* \return The count vector. RVO is used here. | * \return The count vector. RVO is used here. | ||||
* \note | * \note | ||||
* We use two methods of calculation based on \c --make_symmetric or \c --triangular_only | * We use two methods of calculation based on \c --make_symmetric or \c --triangular_only | ||||
* - A full matrix calculation which update only c[i] | |||||
* - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster. | |||||
* - A full matrix calculation | |||||
* - A lower triangular matrix | |||||
* \warning | * \warning | ||||
* The later(--triangular_only) produce correct results ONLY if we are after the total count. | * The later(--triangular_only) produce correct results ONLY if we are after the total count. | ||||
*/ | */ | ||||
@@ -260,7 +263,7 @@ void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t e | |||||
*/ | */ | ||||
value_t sum (std::vector<value_t>& v) { | value_t sum (std::vector<value_t>& v) { | ||||
int n = nworkers(); | int n = nworkers(); | ||||
std::vector<value_t> sum_v(n, 0); // result of each do_sum invokation. | |||||
std::vector<value_t> sum_v(n, 0); // result of each do_sum invocation. | |||||
std::vector<std::thread> workers; | std::vector<std::thread> workers; | ||||
// We spawn workers in a more statically way. | // We spawn workers in a more statically way. | ||||
@@ -288,16 +291,18 @@ int nworkers() { return 1; } | |||||
* 1 | * 1 | ||||
* vector = --- * (A.* (A*B))*ones_N | * vector = --- * (A.* (A*B))*ones_N | ||||
* 2 | * 2 | ||||
* | |||||
* We squeezed all that to one function for performance. The row*column multiplication | * We squeezed all that to one function for performance. The row*column multiplication | ||||
* uses the inner CSC structure of sparse matrix and follows only non-zero members. | |||||
* uses the inner CSC structure of sparse matrix and follows only non-zero members | |||||
* with a time complexity of \$ O(nnz1 + nnz2) \$ | |||||
* | * | ||||
* \param A The first matrix to use. | * \param A The first matrix to use. | ||||
* \param B The second matrix to use (they can be the same). | * \param B The second matrix to use (they can be the same). | ||||
* \return The count vector. RVO is used here. | * \return The count vector. RVO is used here. | ||||
* \note | * \note | ||||
* We use two methods of calculation based on \c --make_symmetric or \c --triangular_only | * We use two methods of calculation based on \c --make_symmetric or \c --triangular_only | ||||
* - A full matrix calculation which update only c[i] | |||||
* - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster. | |||||
* - A full matrix calculation | |||||
* - A lower triangular matrix | |||||
* \warning | * \warning | ||||
* The later(--triangular_only) produce correct results ONLY if we are after the total count. | * The later(--triangular_only) produce correct results ONLY if we are after the total count. | ||||
*/ | */ | ||||