|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239 |
- /*!
- * \file v3.cpp
- * \brief vv3 part of the exercise.
- *
- * \author
- * Christos Choutouridis AEM:8997
- * <cchoutou@ece.auth.gr>
- */
- #include <v3.h>
-
- namespace v3 {
-
- #if defined CILK
-
- /*!
- * Utility function to get/set the number of threads.
- *
- * The number of threads are controlled via environment variable \c CILK_NWORKERS
- *
- * \return The number of threads used.
- * \note
- * The user can reduce the number with the command option \c --max_threads.
- * If so the requested number will be used even if the environment has more threads available.
- */
- int nworkers() {
- if (session.max_threads)
- return (session.max_threads < __cilkrts_get_nworkers()) ?
- session.max_threads : __cilkrts_get_nworkers();
- else
- return __cilkrts_get_nworkers();
- }
-
- /*!
- * Calculate and return a vertex-wise count vector.
- *
- * \param A The matrix to use.
- * \return The count vector. RVO is used here.
- * \note
- * We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
- * - A full matrix calculation which update only c[i]
- * - A lower triangular matrix which update c[i], c[j], c[k]. This is wayyy faster.
- */
- std::vector<value_t> triang_v(matrix& A) {
- std::vector<std::atomic<value_t>> c(A.size());
- std::vector<value_t> ret(A.size());
-
- cilk_for (int i=0 ; i<A.size() ; ++i) {
- for (auto j = A.getCol(i); j.index() != j.end() ; ++j) {
- // j list all the edges with i
- for (auto k = A.getCol(j.index()); k.index() != k.end() ; ++k) {
- // k list all the edges with j
- if (A.get(k.index(), i)) {
- ++ret[i];
- c[j.index()] += (!session.makeSymmetric)? 1:0;
- c[k.index()] += (!session.makeSymmetric)? 1:0;
- }
- }
- }
- if (session.makeSymmetric) {
- ret[i] = ret[i]/2;
- c[i] = c[i]/2;
- }
- }
- for (index_t i =0 ; i<A.size() ; ++i) ret[i] += c[i];
- return ret;
- }
-
- /*!
- * A sum utility to use as spawn function for parallelized sum.
- * \return The sum of \c v from \c begin to \c end.
- */
- void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t end) {
- for (auto i =begin ; i != end ; ++i)
- out_sum += v[i];
- }
-
- /*!
- * A parallelized version of sum. Just because ;)
- * \return The total sum of vector \c v
- */
- value_t sum (std::vector<value_t>& v) {
- int n = nworkers();
- std::vector<value_t> sum_v(n, 0); // result of each do_sum invokation.
-
- // We spawn workers in a more statically way.
- for (index_t i =0 ; i < n ; ++i) {
- cilk_spawn do_sum(sum_v[i], v, i*v.size()/n, (i+1)*v.size()/n);
- }
- cilk_sync;
-
- // sum the sums (a sum to rule them all)
- value_t s =0; for (auto& it : sum_v) s += it;
- return s;
- }
-
- #elif defined OMP
-
- /*!
- * A "simple" user defined OpenMP reduction for vector<value_t>
- * \note
- * Not used. Reason: The atomic version of the code performs better.
- */
- #pragma omp declare reduction(vec_value_plus : std::vector<value_t> : \
- std::transform( \
- omp_out.begin(), omp_out.end(), omp_in.begin(), omp_out.begin(), std::plus<value_t>() \
- ) \
- ) \
- initializer(omp_priv = decltype(omp_orig)(omp_orig.size()))
-
-
- /*!
- * Utility function to get/set the number of threads.
- *
- * The number of threads are controlled via environment variable \c OMP_NUM_THREADS
- *
- * \return The number of threads used.
- * \note
- * The user can reduce the number with the command option \c --max_threads.
- * If so the requested number will be used even if the environment has more threads available.
- */
- int nworkers() {
- if (session.max_threads && session.max_threads < (size_t)omp_get_max_threads()) {
- omp_set_dynamic(0);
- omp_set_num_threads(session.max_threads);
- return session.max_threads;
- }
- else {
- omp_set_dynamic(1);
- return omp_get_max_threads();
- }
- }
-
- /*!
- * Calculate and return a vertex-wise count vector.
- *
- * \param A The matrix to use.
- * \return The count vector. RVO is used here.
- * \note
- * We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
- * - A full matrix calculation which update only c[i]
- * - A lower triangular matrix which update c[i], c[j], c[k]. This is wayyy faster.
- */
- std::vector<value_t> triang_v(matrix& A) {
- std::vector<std::atomic<value_t>> c(A.size());
- std::vector<value_t> ret(A.size());
-
- // OMP schedule selection
- if (session.dynamic) omp_set_schedule (omp_sched_dynamic, 0);
- else omp_set_schedule (omp_sched_static, 0);
- #pragma omp parallel for schedule(runtime) //reduction(vec_value_plus : c)
- for (int i=0 ; i<A.size() ; ++i) {
- for (auto j = A.getCol(i); j.index() != j.end() ; ++j) {
- // j list all the edges with i
- for (auto k = A.getCol(j.index()); k.index() != k.end() ; ++k) {
- // k list all the edges with j
- if (A.get(k.index(), i)) {
- ++ret[i];
- c[j.index()] += (!session.makeSymmetric)? 1:0;
- c[k.index()] += (!session.makeSymmetric)? 1:0;
- }
- }
- }
- if (session.makeSymmetric) {
- ret[i] = ret[i]/2;
- c[i] = c[i]/2;
- }
- }
- for (index_t i =0 ; i<A.size() ; ++i) ret[i] += c[i];
- return ret;
- }
-
- /*!
- * A parallelized version of sum. Just because ;)
- * \return The total sum of vector \c v
- */
- value_t sum (std::vector<value_t>& v) {
- value_t s =0;
-
- #pragma omp parallel for reduction(+:s)
- for (auto i =0u ; i<v.size() ; ++i)
- s += v[i];
- return s;
- }
-
- #else
-
- //! Return the number of workers.
- //! \note This function is just for completion
- int nworkers() { return 1; }
-
- /*!
- * Calculate and return a vertex-wise count vector.
- *
- * \param A The matrix to use.
- * \return The count vector. RVO is used here.
- * \note
- * We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
- * - A full matrix calculation which update only c[i]
- * - A lower triangular matrix which update c[i], c[j], c[k]. This is wayyy faster.
- */
- std::vector<value_t> triang_v(matrix& A) {
- std::vector<value_t> c(A.size());
-
- for (int i=0 ; i<A.size() ; ++i) {
- for (auto j = A.getCol(i); j.index() != j.end() ; ++j) {
- // j list all the edges with i
- for (auto k = A.getCol(j.index()); k.index() != k.end() ; ++k) {
- // k list all the edges with j
- if (A.get(k.index(), i)) {
- ++c[i];
- c[j.index()] += (!session.makeSymmetric)? 1:0;
- c[k.index()] += (!session.makeSymmetric)? 1:0;
- }
- }
- }
- if (session.makeSymmetric) c[i] /= 2;
- }
- return c;
- }
-
- /*!
- * Summation functionality.
- * \return The total sum of vector \c v
- */
- value_t sum (std::vector<value_t>& v) {
- value_t s =0;
- for (auto& it : v)
- s += it;
- return s;
- }
-
- #endif
-
- //! Polymorphic interface function for sum results
- value_t triang_count (std::vector<value_t>& c) {
- return sum(c)/3;
- }
-
- }
|