|
- /*!
- * \file v4.cpp
- * \brief vv3 part of the exercise.
- *
- * \author
- * Christos Choutouridis AEM:8997
- * <cchoutou@ece.auth.gr>
- */
- #include <v4.h>
-
- namespace v4 {
-
- #if defined CILK
-
- // export CILK_NWORKERS=<num>
- int nworkers() {
- if (session.max_threads)
- return (session.max_threads < __cilkrts_get_nworkers()) ?
- session.max_threads : __cilkrts_get_nworkers();
- else
- return __cilkrts_get_nworkers();
- }
-
- std::vector<value_t> mmacc_v(matrix& A, matrix& B) {
- std::vector<value_t> c(A.size());
-
- cilk_for (int i=0 ; i<A.size() ; ++i) {
- for (auto j = A.getRow(i); j.index() != j.end() ; ++j){
- c[i] += A.getRow(i)*B.getCol(j.index());
- }
- }
- if (session.makeSymmetric)
- std::transform (c.begin(), c.end(), c.begin(), [] (value_t& x) {
- return x/2;
- });
- return c;
- }
-
- void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t end) {
- for (auto i =begin ; i != end ; ++i)
- out_sum += v[i];
- }
-
- value_t sum (std::vector<value_t>& v) {
- int n = nworkers();
- std::vector<value_t> sum_v(n, 0);
-
- for (index_t i =0 ; i < n ; ++i) {
- cilk_spawn do_sum(sum_v[i], v, i*v.size()/n, (i+1)*v.size()/n);
- }
- cilk_sync;
-
- value_t s =0;
- for (auto& it : sum_v) s += it;
- return s;
- }
-
- #elif defined OMP
-
- /*
- // export OMP_NUM_THREADS=<num>
- */
- int nworkers() {
- if (session.max_threads && session.max_threads < (size_t)omp_get_max_threads()) {
- omp_set_dynamic(0);
- omp_set_num_threads(session.max_threads);
- return session.max_threads;
- }
- else {
- omp_set_dynamic(1);
- return omp_get_max_threads();
- }
- }
-
- std::vector<value_t> mmacc_v(matrix& A, matrix& B) {
- std::vector<value_t> c(A.size());
-
- #pragma omp parallel for shared(c)
- for (int i=0 ; i<A.size() ; ++i) {
- for (auto j = A.getRow(i); j.index() != j.end() ; ++j) {
- c[i] += A.getRow(i)*B.getCol(j.index());
- }
- }
- if (session.makeSymmetric)
- std::transform (c.begin(), c.end(), c.begin(), [] (value_t& x) {
- return x/2;
- });
- return c;
- }
-
- value_t sum (std::vector<value_t>& v) {
- value_t s =0;
-
- #pragma omp parallel for reduction(+:s)
- for (auto i =0u ; i<v.size() ; ++i)
- s += v[i];
- return s;
- }
-
- #elif defined THREADS
-
- /*
- * std::thread::hardware_concurrency()
- */
- int nworkers() {
- if (session.max_threads)
- return (session.max_threads < std::thread::hardware_concurrency()) ?
- session.max_threads : std::thread::hardware_concurrency();
- else
- return std::thread::hardware_concurrency();
- }
-
- std::vector<value_t> mmacc_v_rng(std::vector<value_t>& out, matrix& A, matrix& B, index_t begin, index_t end) {
- for (index_t i=begin ; i<end ; ++i) {
- for (auto j = A.getRow(i); j.index() != j.end() ; ++j){
- out[i] += A.getRow(i)*B.getCol(j.index());
- }
- }
- return out;
- }
-
- std::vector<value_t> mmacc_v(matrix& A, matrix& B) {
- std::vector<std::thread> workers;
- std::vector<value_t> c(A.size());
- int n = nworkers();
-
- for (index_t i=0 ; i<n ; ++i)
- workers.push_back (std::thread (mmacc_v_rng, std::ref(c), std::ref(A), std::ref(B), i*c.size()/n, (i+1)*c.size()/n));
-
- std::for_each(workers.begin(), workers.end(), [](std::thread& t){
- t.join();
- });
- if (session.makeSymmetric)
- std::transform (c.begin(), c.end(), c.begin(), [] (value_t& x) {
- return x/2;
- });
- return c;
- }
-
- void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t end) {
- for (auto i =begin ; i != end ; ++i)
- out_sum += v[i];
- }
-
- value_t sum (std::vector<value_t>& v) {
- int n = nworkers();
- std::vector<value_t> sum_v(n, 0);
- std::vector<std::thread> workers;
-
- for (index_t i =0 ; i < n ; ++i)
- workers.push_back (std::thread (do_sum, std::ref(sum_v[i]), std::ref(v), i*v.size()/n, (i+1)*v.size()/n));
-
- std::for_each(workers.begin(), workers.end(), [](std::thread& t){
- t.join();
- });
-
- value_t s =0;
- for (auto& it : sum_v) s += it;
- return s;
- }
-
- #else
-
- int nworkers() { return 1; }
-
- std::vector<value_t> mmacc_v(matrix& A, matrix& B) {
- std::vector<value_t> c(A.size());
- for (int i=0 ; i<A.size() ; ++i) {
- for (auto j = A.getRow(i); j.index() != j.end() ; ++j){
- c[i] += A.getRow(i)*B.getCol(j.index());
- }
- }
- if (session.makeSymmetric)
- std::transform (c.begin(), c.end(), c.begin(), [] (value_t& x) {
- return x/2;
- });
- return c;
- }
-
- value_t sum (std::vector<value_t>& v) {
- value_t s =0;
- for (auto& it : v)
- s += it;
- return s;
- }
-
- #endif
-
- std::vector<value_t> triang_v(matrix& A) {
- return mmacc_v(A, A);
- }
-
- value_t triang_count (std::vector<value_t>& c) {
- return (session.makeSymmetric) ? sum(c)/3 : sum(c);
- }
-
- } // namespace v4
|