A triangle counting assignment for A.U.TH Parallel and distributed systems class.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

198 lines
4.7 KiB

  1. /*!
  2. * \file v4.cpp
  3. * \brief vv3 part of the exercise.
  4. *
  5. * \author
  6. * Christos Choutouridis AEM:8997
  7. * <cchoutou@ece.auth.gr>
  8. */
  9. #include <v4.h>
  10. namespace v4 {
  11. #if defined CILK
  12. // export CILK_NWORKERS=<num>
  13. int nworkers() {
  14. if (session.max_threads)
  15. return (session.max_threads < __cilkrts_get_nworkers()) ?
  16. session.max_threads : __cilkrts_get_nworkers();
  17. else
  18. return __cilkrts_get_nworkers();
  19. }
  20. std::vector<value_t> mmacc_v(matrix& A, matrix& B) {
  21. std::vector<value_t> c(A.size());
  22. cilk_for (int i=0 ; i<A.size() ; ++i) {
  23. for (auto j = A.getRow(i); j.index() != j.end() ; ++j){
  24. c[i] += A.getRow(i)*B.getCol(j.index());
  25. }
  26. }
  27. if (session.makeSymmetric)
  28. std::transform (c.begin(), c.end(), c.begin(), [] (value_t& x) {
  29. return x/2;
  30. });
  31. return c;
  32. }
  33. void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t end) {
  34. for (auto i =begin ; i != end ; ++i)
  35. out_sum += v[i];
  36. }
  37. value_t sum (std::vector<value_t>& v) {
  38. int n = nworkers();
  39. std::vector<value_t> sum_v(n, 0);
  40. for (index_t i =0 ; i < n ; ++i) {
  41. cilk_spawn do_sum(sum_v[i], v, i*v.size()/n, (i+1)*v.size()/n);
  42. }
  43. cilk_sync;
  44. value_t s =0;
  45. for (auto& it : sum_v) s += it;
  46. return s;
  47. }
  48. #elif defined OMP
  49. /*
  50. // export OMP_NUM_THREADS=<num>
  51. */
  52. int nworkers() {
  53. if (session.max_threads && session.max_threads < (size_t)omp_get_max_threads()) {
  54. omp_set_dynamic(0);
  55. omp_set_num_threads(session.max_threads);
  56. return session.max_threads;
  57. }
  58. else {
  59. omp_set_dynamic(1);
  60. return omp_get_max_threads();
  61. }
  62. }
  63. std::vector<value_t> mmacc_v(matrix& A, matrix& B) {
  64. std::vector<value_t> c(A.size());
  65. #pragma omp parallel for shared(c)
  66. for (int i=0 ; i<A.size() ; ++i) {
  67. for (auto j = A.getRow(i); j.index() != j.end() ; ++j) {
  68. c[i] += A.getRow(i)*B.getCol(j.index());
  69. }
  70. }
  71. if (session.makeSymmetric)
  72. std::transform (c.begin(), c.end(), c.begin(), [] (value_t& x) {
  73. return x/2;
  74. });
  75. return c;
  76. }
  77. value_t sum (std::vector<value_t>& v) {
  78. value_t s =0;
  79. #pragma omp parallel for reduction(+:s)
  80. for (auto i =0u ; i<v.size() ; ++i)
  81. s += v[i];
  82. return s;
  83. }
  84. #elif defined THREADS
  85. /*
  86. * std::thread::hardware_concurrency()
  87. */
  88. int nworkers() {
  89. if (session.max_threads)
  90. return (session.max_threads < std::thread::hardware_concurrency()) ?
  91. session.max_threads : std::thread::hardware_concurrency();
  92. else
  93. return std::thread::hardware_concurrency();
  94. }
  95. std::vector<value_t> mmacc_v_rng(std::vector<value_t>& out, matrix& A, matrix& B, index_t begin, index_t end) {
  96. for (index_t i=begin ; i<end ; ++i) {
  97. for (auto j = A.getRow(i); j.index() != j.end() ; ++j){
  98. out[i] += A.getRow(i)*B.getCol(j.index());
  99. }
  100. }
  101. return out;
  102. }
  103. std::vector<value_t> mmacc_v(matrix& A, matrix& B) {
  104. std::vector<std::thread> workers;
  105. std::vector<value_t> c(A.size());
  106. int n = nworkers();
  107. for (index_t i=0 ; i<n ; ++i)
  108. workers.push_back (std::thread (mmacc_v_rng, std::ref(c), std::ref(A), std::ref(B), i*c.size()/n, (i+1)*c.size()/n));
  109. std::for_each(workers.begin(), workers.end(), [](std::thread& t){
  110. t.join();
  111. });
  112. if (session.makeSymmetric)
  113. std::transform (c.begin(), c.end(), c.begin(), [] (value_t& x) {
  114. return x/2;
  115. });
  116. return c;
  117. }
  118. void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t end) {
  119. for (auto i =begin ; i != end ; ++i)
  120. out_sum += v[i];
  121. }
  122. value_t sum (std::vector<value_t>& v) {
  123. int n = nworkers();
  124. std::vector<value_t> sum_v(n, 0);
  125. std::vector<std::thread> workers;
  126. for (index_t i =0 ; i < n ; ++i)
  127. workers.push_back (std::thread (do_sum, std::ref(sum_v[i]), std::ref(v), i*v.size()/n, (i+1)*v.size()/n));
  128. std::for_each(workers.begin(), workers.end(), [](std::thread& t){
  129. t.join();
  130. });
  131. value_t s =0;
  132. for (auto& it : sum_v) s += it;
  133. return s;
  134. }
  135. #else
  136. int nworkers() { return 1; }
  137. std::vector<value_t> mmacc_v(matrix& A, matrix& B) {
  138. std::vector<value_t> c(A.size());
  139. for (int i=0 ; i<A.size() ; ++i) {
  140. for (auto j = A.getRow(i); j.index() != j.end() ; ++j){
  141. c[i] += A.getRow(i)*B.getCol(j.index());
  142. }
  143. }
  144. if (session.makeSymmetric)
  145. std::transform (c.begin(), c.end(), c.begin(), [] (value_t& x) {
  146. return x/2;
  147. });
  148. return c;
  149. }
  150. value_t sum (std::vector<value_t>& v) {
  151. value_t s =0;
  152. for (auto& it : v)
  153. s += it;
  154. return s;
  155. }
  156. #endif
  157. std::vector<value_t> triang_v(matrix& A) {
  158. return mmacc_v(A, A);
  159. }
  160. value_t triang_count (std::vector<value_t>& c) {
  161. return (session.makeSymmetric) ? sum(c)/3 : sum(c);
  162. }
  163. } // namespace v4