Comment changes
This commit is contained in:
parent
450b73e8d9
commit
225837ed0e
4
Makefile
4
Makefile
@ -236,7 +236,9 @@ v4_pthreads: $(BUILD_DIR)/$(TARGET)
|
|||||||
# > make clean
|
# > make clean
|
||||||
# 3) for v4 cilk for example:
|
# 3) for v4 cilk for example:
|
||||||
# > make csal_v4_cilk
|
# > make csal_v4_cilk
|
||||||
# 4) run executables from `bin/`
|
# 4) run executables from `bin/`. Examples:
|
||||||
|
# > ./bin/tcount_ompv3 -i mtx/NACA0015.mtx --timing -r 3 -o /dev/null
|
||||||
|
# > ./bin/tcount_pthv4 -i mtx/com_Youtube.mtx --timing --dynamic --print_count
|
||||||
|
|
||||||
csal_v3: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=3
|
csal_v3: CFLAGS := $(REL_CFLAGS) -DCODE_VERSION=3
|
||||||
csal_v3: TARGET := tcount_v3
|
csal_v3: TARGET := tcount_v3
|
||||||
|
@ -22,7 +22,7 @@
|
|||||||
#define V3 3
|
#define V3 3
|
||||||
#define V4 4
|
#define V4 4
|
||||||
|
|
||||||
// Fail-safe verision selection
|
// Fail-safe version selection
|
||||||
#if !defined CODE_VERSION
|
#if !defined CODE_VERSION
|
||||||
#define CODE_VERSION V4
|
#define CODE_VERSION V4
|
||||||
#endif
|
#endif
|
||||||
|
46
inc/impl.hpp
46
inc/impl.hpp
@ -39,7 +39,7 @@ enum class MatrixType {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Forward type declerations
|
* Forward type declarations
|
||||||
*/
|
*/
|
||||||
template<typename DataType, typename IndexType, MatrixType Type = MatrixType::SYMMETRIC> struct Matrix;
|
template<typename DataType, typename IndexType, MatrixType Type = MatrixType::SYMMETRIC> struct Matrix;
|
||||||
template<typename DataType, typename IndexType, MatrixType Type = MatrixType::SYMMETRIC> struct SpMat;
|
template<typename DataType, typename IndexType, MatrixType Type = MatrixType::SYMMETRIC> struct SpMat;
|
||||||
@ -298,7 +298,7 @@ struct SpMat {
|
|||||||
*/
|
*/
|
||||||
DataType get_lin(IndexType i, IndexType j) {
|
DataType get_lin(IndexType i, IndexType j) {
|
||||||
IndexType idx; bool found;
|
IndexType idx; bool found;
|
||||||
std::tie(idx, found) =find_lin_idx(rows, col_ptr[j], col_ptr[j+1], i);
|
std::tie(idx, found) =find_place_idx(rows, col_ptr[j], col_ptr[j+1], i);
|
||||||
return (found) ? values[idx] : 0;
|
return (found) ? values[idx] : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -309,8 +309,9 @@ struct SpMat {
|
|||||||
* If so we just change it to a new value. If not we add the item on the matrix.
|
* If so we just change it to a new value. If not we add the item on the matrix.
|
||||||
*
|
*
|
||||||
* @note
|
* @note
|
||||||
* We don't increase the NNZ value of the struct. We expect the user has already
|
* When change a value, we don't increase the NNZ value of the struct. We expect the user has already
|
||||||
* change the NNZ value to the right one using @see capacity() function.
|
* change the NNZ value to the right one using @see capacity() function. When adding a value we
|
||||||
|
* increase the NNZ.
|
||||||
*
|
*
|
||||||
* @param i The row number
|
* @param i The row number
|
||||||
* @param j The column number
|
* @param j The column number
|
||||||
@ -318,7 +319,7 @@ struct SpMat {
|
|||||||
*/
|
*/
|
||||||
DataType set(DataType v, IndexType i, IndexType j) {
|
DataType set(DataType v, IndexType i, IndexType j) {
|
||||||
IndexType idx; bool found;
|
IndexType idx; bool found;
|
||||||
std::tie(idx, found) = find_lin_idx(rows, col_ptr[j], col_ptr[j+1], i);
|
std::tie(idx, found) = find_place_idx(rows, col_ptr[j], col_ptr[j+1], i);
|
||||||
if (found)
|
if (found)
|
||||||
return values[idx] = v; // we don't change NNZ even if we write "0"
|
return values[idx] = v; // we don't change NNZ even if we write "0"
|
||||||
else {
|
else {
|
||||||
@ -392,11 +393,13 @@ private:
|
|||||||
* \param begin The vector's index to begin
|
* \param begin The vector's index to begin
|
||||||
* \param end The vector's index to end
|
* \param end The vector's index to end
|
||||||
* \param match What to search
|
* \param match What to search
|
||||||
* @return The index of the item or end on failure.
|
* \return An <index, status> pair.
|
||||||
|
* index is the index of the item or end if not found
|
||||||
|
* status is true if found, false otherwise
|
||||||
*/
|
*/
|
||||||
std::pair<IndexType, bool> find_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) {
|
std::pair<IndexType, bool> find_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) {
|
||||||
IndexType b = begin, e = end-1;
|
IndexType b = begin, e = end-1;
|
||||||
while (true) {
|
while (b <= e) {
|
||||||
IndexType m = (b+e)/2;
|
IndexType m = (b+e)/2;
|
||||||
if (v[m] == match) return std::make_pair(m, true);
|
if (v[m] == match) return std::make_pair(m, true);
|
||||||
else if (b >= e) return std::make_pair(end, false);
|
else if (b >= e) return std::make_pair(end, false);
|
||||||
@ -417,8 +420,11 @@ private:
|
|||||||
* \param begin The vector's index to begin
|
* \param begin The vector's index to begin
|
||||||
* \param end The vector's index to end
|
* \param end The vector's index to end
|
||||||
* \param match What to search
|
* \param match What to search
|
||||||
|
* \return An <index, status> pair.
|
||||||
|
* index is the index of the item or end if not found
|
||||||
|
* status is true if found, false otherwise
|
||||||
*/
|
*/
|
||||||
std::pair<IndexType, bool> find_lin_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) {
|
std::pair<IndexType, bool> find_place_idx(const std::vector<IndexType>& v, IndexType begin, IndexType end, IndexType match) {
|
||||||
for ( ; begin < end ; ++begin) {
|
for ( ; begin < end ; ++begin) {
|
||||||
if (match == v[begin]) return std::make_pair(begin, true);
|
if (match == v[begin]) return std::make_pair(begin, true);
|
||||||
else if (match < v[begin]) return std::make_pair(begin, false);
|
else if (match < v[begin]) return std::make_pair(begin, false);
|
||||||
@ -437,7 +443,7 @@ private:
|
|||||||
//! @{
|
//! @{
|
||||||
std::vector<DataType> values {}; //!< vector to store the values of the matrix
|
std::vector<DataType> values {}; //!< vector to store the values of the matrix
|
||||||
std::vector<IndexType> rows{}; //!< vector to store the row information
|
std::vector<IndexType> rows{}; //!< vector to store the row information
|
||||||
std::vector<IndexType> col_ptr{1,0}; //!< vector to stor the column pointers
|
std::vector<IndexType> col_ptr{1,0}; //!< vector to store the column pointers
|
||||||
IndexType N{0}; //!< The dimension of the matrix (square)
|
IndexType N{0}; //!< The dimension of the matrix (square)
|
||||||
IndexType NNZ{0}; //!< The NNZ (capacity of the matrix)
|
IndexType NNZ{0}; //!< The NNZ (capacity of the matrix)
|
||||||
//! @}
|
//! @}
|
||||||
@ -496,20 +502,25 @@ struct SpMatCol {
|
|||||||
|
|
||||||
/*!
|
/*!
|
||||||
* Multiplication operator
|
* Multiplication operator
|
||||||
|
*
|
||||||
|
* We follow only the non-zero values and multiply only the common indexes.
|
||||||
|
*
|
||||||
* @tparam C Universal reference for the type right half site column
|
* @tparam C Universal reference for the type right half site column
|
||||||
*
|
*
|
||||||
* @param c The right hand site matrix
|
* @param c The right hand site matrix
|
||||||
* @return The value of the inner product of two vectors
|
* @return The value of the inner product of two vectors
|
||||||
|
* @note The time complexity is \$ O(nnz1+nnz2) \$.
|
||||||
|
* Where the nnz is the max NNZ elements of the column of the matrix
|
||||||
*/
|
*/
|
||||||
template <typename C>
|
template <typename C>
|
||||||
DataType operator* (C&& c) {
|
DataType operator* (C&& c) {
|
||||||
static_assert(std::is_same<remove_cvref_t<C>, SpMatCol<DataType, IndexType>>(), "");
|
static_assert(std::is_same<remove_cvref_t<C>, SpMatCol<DataType, IndexType>>(), "");
|
||||||
DataType v{};
|
DataType v{};
|
||||||
while (index() != end() && c.index() != c.end()) {
|
while (index() != end() && c.index() != c.end()) {
|
||||||
if (index() < c.index()) advance();
|
if (index() < c.index()) advance(); // advance me
|
||||||
else if (index() > c.index()) ++c;
|
else if (index() > c.index()) ++c; // advance other
|
||||||
else { //index() == c.index()
|
else { //index() == c.index()
|
||||||
v += get() * *c;
|
v += get() * *c; // multiply and advance both
|
||||||
++c;
|
++c;
|
||||||
advance();
|
advance();
|
||||||
}
|
}
|
||||||
@ -597,20 +608,25 @@ struct SpMatRow {
|
|||||||
|
|
||||||
/*!
|
/*!
|
||||||
* Multiplication operator
|
* Multiplication operator
|
||||||
|
*
|
||||||
|
* We follow only the non-zero values and multiply only the common indexes.
|
||||||
|
*
|
||||||
* @tparam C Universal reference for the type right half site column
|
* @tparam C Universal reference for the type right half site column
|
||||||
*
|
*
|
||||||
* @param c The right hand site matrix
|
* @param c The right hand site matrix
|
||||||
* @return The value of the inner product of two vectors
|
* @return The value of the inner product of two vectors
|
||||||
|
* @note The time complexity is \$ O(N+nnz2) \$ and way heavier the ColxCol multiplication.
|
||||||
|
* Where the nnz is the max NNZ elements of the column of the matrix
|
||||||
*/
|
*/
|
||||||
template <typename C>
|
template <typename C>
|
||||||
DataType operator* (C&& c) {
|
DataType operator* (C&& c) {
|
||||||
static_assert(std::is_same<remove_cvref_t<C>, SpMatCol<DataType, IndexType>>(), "");
|
static_assert(std::is_same<remove_cvref_t<C>, SpMatCol<DataType, IndexType>>(), "");
|
||||||
DataType v{};
|
DataType v{};
|
||||||
while (index() != end() && c.index() != c.end()) {
|
while (index() != end() && c.index() != c.end()) {
|
||||||
if (index() < c.index()) advance();
|
if (index() < c.index()) advance(); // advance me
|
||||||
else if (index() > c.index()) ++c;
|
else if (index() > c.index()) ++c; // advance other
|
||||||
else { //index() == c.index()
|
else { //index() == c.index()
|
||||||
v += get()* *c;
|
v += get() * *c; // multiply and advance both
|
||||||
++c;
|
++c;
|
||||||
advance();
|
advance();
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,7 @@ static void coo2csc_e(
|
|||||||
*/
|
*/
|
||||||
uint32_t find_idx(const uint32_t* v, uint32_t begin, uint32_t end, uint32_t match) {
|
uint32_t find_idx(const uint32_t* v, uint32_t begin, uint32_t end, uint32_t match) {
|
||||||
uint32_t b = begin, e = end-1;
|
uint32_t b = begin, e = end-1;
|
||||||
while (1) {
|
while (b <= e) {
|
||||||
uint32_t m = (b+e)/2;
|
uint32_t m = (b+e)/2;
|
||||||
if (v[m] == match) return m;
|
if (v[m] == match) return m;
|
||||||
else if (b >= e) return end;
|
else if (b >= e) return end;
|
||||||
|
15
src/main.cpp
15
src/main.cpp
@ -116,18 +116,25 @@ bool get_options(int argc, char* argv[]){
|
|||||||
std::cout << " --print_graph <size>\n";
|
std::cout << " --print_graph <size>\n";
|
||||||
std::cout << " Prints the first <size> x <size> part of the matrix to stdout.\n\n";
|
std::cout << " Prints the first <size> x <size> part of the matrix to stdout.\n\n";
|
||||||
std::cout << " -h | --help <size>\n";
|
std::cout << " -h | --help <size>\n";
|
||||||
std::cout << " Prints this and exit.\n";
|
std::cout << " Prints this and exit.\n\n";
|
||||||
|
std::cout << "Examples:\n\n";
|
||||||
|
std::cout << " Get the total count of matrix in <MFILE> and calculate the time for vector creation 5 times:\n";
|
||||||
|
std::cout << " > ./tcount -i <MFILE> --timing --print_count -r 5\n\n";
|
||||||
|
std::cout << " Get the vector to <OUTFILE> of matrix in <MFILE> and print the time to stdout using dynamic scheduling\n";
|
||||||
|
std::cout << " > ./tcount -i <MFILE> -o <OUTFILE> --timing --dynamic\n\n";
|
||||||
|
std::cout << " Get the total count of matrix in <MFILE> using <N> workers only\n";
|
||||||
|
std::cout << " > ./tcount -i <MFILE> -n <N> --print_count\n";
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
else { // parse error
|
else { // parse error
|
||||||
std::cout << "Invokation error. Try -h for details.\n";
|
std::cout << "Invocation error. Try -h for details.\n";
|
||||||
status = false;
|
status = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Input checkers
|
// Input checkers
|
||||||
if (session.inputMatrix == InputMatrix::UNSPECIFIED) {
|
if (session.inputMatrix == InputMatrix::UNSPECIFIED) {
|
||||||
std::cout << "Invokation error. Try -h for details.\n";
|
std::cout << "Invocation error. Try -h for details.\n";
|
||||||
status = false;
|
status = false;
|
||||||
}
|
}
|
||||||
#if CODE_VERSION == V4
|
#if CODE_VERSION == V4
|
||||||
@ -166,7 +173,7 @@ void prepare_matrix (matrix& A, Timing& timer) {
|
|||||||
timer.print_dt("load matrix");
|
timer.print_dt("load matrix");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (session.verbose && session.mtx_print) {
|
if (session.mtx_print) {
|
||||||
logger << "\nMatrix:" << logger.endl;
|
logger << "\nMatrix:" << logger.endl;
|
||||||
print_graph (A);
|
print_graph (A);
|
||||||
}
|
}
|
||||||
|
14
src/v3.cpp
14
src/v3.cpp
@ -41,8 +41,8 @@ int nworkers() {
|
|||||||
* - A lower triangular matrix which update c[i], c[j], c[k]. This is wayyy faster.
|
* - A lower triangular matrix which update c[i], c[j], c[k]. This is wayyy faster.
|
||||||
*/
|
*/
|
||||||
std::vector<value_t> triang_v(matrix& A) {
|
std::vector<value_t> triang_v(matrix& A) {
|
||||||
std::vector<std::atomic<value_t>> c(A.size());
|
std::vector<std::atomic<value_t>> c(A.size()); // atomic for c[j], c[k] only
|
||||||
std::vector<value_t> ret(A.size());
|
std::vector<value_t> ret(A.size()); // unrestricted c[i] access
|
||||||
|
|
||||||
cilk_for (int i=0 ; i<A.size() ; ++i) {
|
cilk_for (int i=0 ; i<A.size() ; ++i) {
|
||||||
for (auto j = A.getCol(i); j.index() != j.end() ; ++j) {
|
for (auto j = A.getCol(i); j.index() != j.end() ; ++j) {
|
||||||
@ -61,6 +61,7 @@ std::vector<value_t> triang_v(matrix& A) {
|
|||||||
c[i] = c[i]/2;
|
c[i] = c[i]/2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// merge c to ret and return it
|
||||||
for (index_t i =0 ; i<A.size() ; ++i) ret[i] += c[i];
|
for (index_t i =0 ; i<A.size() ; ++i) ret[i] += c[i];
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -80,7 +81,7 @@ void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t e
|
|||||||
*/
|
*/
|
||||||
value_t sum (std::vector<value_t>& v) {
|
value_t sum (std::vector<value_t>& v) {
|
||||||
int n = nworkers();
|
int n = nworkers();
|
||||||
std::vector<value_t> sum_v(n, 0); // result of each do_sum invokation.
|
std::vector<value_t> sum_v(n, 0); // result of each do_sum invocation.
|
||||||
|
|
||||||
// We spawn workers in a more statically way.
|
// We spawn workers in a more statically way.
|
||||||
for (index_t i =0 ; i < n ; ++i) {
|
for (index_t i =0 ; i < n ; ++i) {
|
||||||
@ -141,8 +142,8 @@ int nworkers() {
|
|||||||
* - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster.
|
* - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster.
|
||||||
*/
|
*/
|
||||||
std::vector<value_t> triang_v(matrix& A) {
|
std::vector<value_t> triang_v(matrix& A) {
|
||||||
std::vector<std::atomic<value_t>> c(A.size());
|
std::vector<std::atomic<value_t>> c(A.size()); // atomic for c[j], c[k] only
|
||||||
std::vector<value_t> ret(A.size());
|
std::vector<value_t> ret(A.size()); // unrestricted c[i] access
|
||||||
|
|
||||||
// OMP schedule selection
|
// OMP schedule selection
|
||||||
if (session.dynamic) omp_set_schedule (omp_sched_dynamic, 0);
|
if (session.dynamic) omp_set_schedule (omp_sched_dynamic, 0);
|
||||||
@ -165,6 +166,7 @@ std::vector<value_t> triang_v(matrix& A) {
|
|||||||
c[i] = c[i]/2;
|
c[i] = c[i]/2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// merge c to ret and return it
|
||||||
for (index_t i =0 ; i<A.size() ; ++i) ret[i] += c[i];
|
for (index_t i =0 ; i<A.size() ; ++i) ret[i] += c[i];
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -210,10 +212,12 @@ std::vector<value_t> triang_v(matrix& A) {
|
|||||||
++c[i];
|
++c[i];
|
||||||
c[j.index()] += (!session.makeSymmetric)? 1:0;
|
c[j.index()] += (!session.makeSymmetric)? 1:0;
|
||||||
c[k.index()] += (!session.makeSymmetric)? 1:0;
|
c[k.index()] += (!session.makeSymmetric)? 1:0;
|
||||||
|
//^ We set other nodes in case of lower triangular
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (session.makeSymmetric) c[i] /= 2;
|
if (session.makeSymmetric) c[i] /= 2;
|
||||||
|
//^ We don't have to divide by 2 in case of lower triangular
|
||||||
}
|
}
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
33
src/v4.cpp
33
src/v4.cpp
@ -37,15 +37,16 @@ int nworkers() {
|
|||||||
* vector = --- * (A.* (A*B))*ones_N
|
* vector = --- * (A.* (A*B))*ones_N
|
||||||
* 2
|
* 2
|
||||||
* We squeezed all that to one function for performance. The row*column multiplication
|
* We squeezed all that to one function for performance. The row*column multiplication
|
||||||
* uses the inner CSC structure of sparse matrix and follows only non-zero members.
|
* uses the inner CSC structure of sparse matrix and follows only non-zero members
|
||||||
|
* with a time complexity of \$ O(nnz1 + nnz2) \$
|
||||||
*
|
*
|
||||||
* \param A The first matrix to use.
|
* \param A The first matrix to use.
|
||||||
* \param B The second matrix to use (they can be the same).
|
* \param B The second matrix to use (they can be the same).
|
||||||
* \return The count vector. RVO is used here.
|
* \return The count vector. RVO is used here.
|
||||||
* \note
|
* \note
|
||||||
* We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
|
* We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
|
||||||
* - A full matrix calculation which update only c[i]
|
* - A full matrix calculation
|
||||||
* - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster.
|
* - A lower triangular matrix
|
||||||
* \warning
|
* \warning
|
||||||
* The later(--triangular_only) produce correct results ONLY if we are after the total count.
|
* The later(--triangular_only) produce correct results ONLY if we are after the total count.
|
||||||
*/
|
*/
|
||||||
@ -76,7 +77,7 @@ void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t e
|
|||||||
*/
|
*/
|
||||||
value_t sum (std::vector<value_t>& v) {
|
value_t sum (std::vector<value_t>& v) {
|
||||||
int n = nworkers();
|
int n = nworkers();
|
||||||
std::vector<value_t> sum_v(n, 0); // result of each do_sum invokation.
|
std::vector<value_t> sum_v(n, 0); // result of each do_sum invocation.
|
||||||
|
|
||||||
// We spawn workers in a more statically way.
|
// We spawn workers in a more statically way.
|
||||||
for (index_t i =0 ; i < n ; ++i) {
|
for (index_t i =0 ; i < n ; ++i) {
|
||||||
@ -120,15 +121,16 @@ int nworkers() {
|
|||||||
* vector = --- * (A.* (A*B))*ones_N
|
* vector = --- * (A.* (A*B))*ones_N
|
||||||
* 2
|
* 2
|
||||||
* We squeezed all that to one function for performance. The row*column multiplication
|
* We squeezed all that to one function for performance. The row*column multiplication
|
||||||
* uses the inner CSC structure of sparse matrix and follows only non-zero members.
|
* uses the inner CSC structure of sparse matrix and follows only non-zero members
|
||||||
|
* with a time complexity of \$ O(nnz1 + nnz2) \$
|
||||||
*
|
*
|
||||||
* \param A The first matrix to use.
|
* \param A The first matrix to use.
|
||||||
* \param B The second matrix to use (they can be the same).
|
* \param B The second matrix to use (they can be the same).
|
||||||
* \return The count vector. RVO is used here.
|
* \return The count vector. RVO is used here.
|
||||||
* \note
|
* \note
|
||||||
* We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
|
* We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
|
||||||
* - A full matrix calculation which update only c[i]
|
* - A full matrix calculation
|
||||||
* - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster.
|
* - A lower triangular matrix
|
||||||
* \warning
|
* \warning
|
||||||
* The later(--triangular_only) produce correct results ONLY if we are after the total count.
|
* The later(--triangular_only) produce correct results ONLY if we are after the total count.
|
||||||
*/
|
*/
|
||||||
@ -189,7 +191,8 @@ int nworkers() {
|
|||||||
* 2
|
* 2
|
||||||
*
|
*
|
||||||
* We squeezed all that to one function for performance. The row*column multiplication
|
* We squeezed all that to one function for performance. The row*column multiplication
|
||||||
* uses the inner CSC structure of sparse matrix and follows only non-zero members.
|
* uses the inner CSC structure of sparse matrix and follows only non-zero members
|
||||||
|
* with a time complexity of \$ O(nnz1 + nnz2) \$
|
||||||
*
|
*
|
||||||
* \param out Reference to output vector
|
* \param out Reference to output vector
|
||||||
* \param A The first matrix to use.
|
* \param A The first matrix to use.
|
||||||
@ -198,8 +201,8 @@ int nworkers() {
|
|||||||
* \return The count vector. RVO is used here.
|
* \return The count vector. RVO is used here.
|
||||||
* \note
|
* \note
|
||||||
* We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
|
* We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
|
||||||
* - A full matrix calculation which update only c[i]
|
* - A full matrix calculation
|
||||||
* - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster.
|
* - A lower triangular matrix
|
||||||
* \warning
|
* \warning
|
||||||
* The later(--triangular_only) produce correct results ONLY if we are after the total count.
|
* The later(--triangular_only) produce correct results ONLY if we are after the total count.
|
||||||
*/
|
*/
|
||||||
@ -260,7 +263,7 @@ void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t e
|
|||||||
*/
|
*/
|
||||||
value_t sum (std::vector<value_t>& v) {
|
value_t sum (std::vector<value_t>& v) {
|
||||||
int n = nworkers();
|
int n = nworkers();
|
||||||
std::vector<value_t> sum_v(n, 0); // result of each do_sum invokation.
|
std::vector<value_t> sum_v(n, 0); // result of each do_sum invocation.
|
||||||
std::vector<std::thread> workers;
|
std::vector<std::thread> workers;
|
||||||
|
|
||||||
// We spawn workers in a more statically way.
|
// We spawn workers in a more statically way.
|
||||||
@ -288,16 +291,18 @@ int nworkers() { return 1; }
|
|||||||
* 1
|
* 1
|
||||||
* vector = --- * (A.* (A*B))*ones_N
|
* vector = --- * (A.* (A*B))*ones_N
|
||||||
* 2
|
* 2
|
||||||
|
*
|
||||||
* We squeezed all that to one function for performance. The row*column multiplication
|
* We squeezed all that to one function for performance. The row*column multiplication
|
||||||
* uses the inner CSC structure of sparse matrix and follows only non-zero members.
|
* uses the inner CSC structure of sparse matrix and follows only non-zero members
|
||||||
|
* with a time complexity of \$ O(nnz1 + nnz2) \$
|
||||||
*
|
*
|
||||||
* \param A The first matrix to use.
|
* \param A The first matrix to use.
|
||||||
* \param B The second matrix to use (they can be the same).
|
* \param B The second matrix to use (they can be the same).
|
||||||
* \return The count vector. RVO is used here.
|
* \return The count vector. RVO is used here.
|
||||||
* \note
|
* \note
|
||||||
* We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
|
* We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
|
||||||
* - A full matrix calculation which update only c[i]
|
* - A full matrix calculation
|
||||||
* - A lower triangular matrix which update c[i], c[j], c[k]. This is waaayyy faster.
|
* - A lower triangular matrix
|
||||||
* \warning
|
* \warning
|
||||||
* The later(--triangular_only) produce correct results ONLY if we are after the total count.
|
* The later(--triangular_only) produce correct results ONLY if we are after the total count.
|
||||||
*/
|
*/
|
||||||
|
Loading…
x
Reference in New Issue
Block a user