A triangle counting assignment for A.U.TH Parallel and distributed systems class.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

v4.cpp 4.7 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. /*!
  2. * \file v4.cpp
  3. * \brief vv3 part of the exercise.
  4. *
  5. * \author
  6. * Christos Choutouridis AEM:8997
  7. * <cchoutou@ece.auth.gr>
  8. */
  9. #include <v4.h>
  10. namespace v4 {
  11. #if defined CILK
  12. // export CILK_NWORKERS=<num>
  13. int nworkers() {
  14. if (session.max_threads)
  15. return (session.max_threads < __cilkrts_get_nworkers()) ?
  16. session.max_threads : __cilkrts_get_nworkers();
  17. else
  18. return __cilkrts_get_nworkers();
  19. }
  20. std::vector<value_t> mmacc_v(matrix& A, matrix& B) {
  21. std::vector<value_t> c(A.size());
  22. cilk_for (int i=0 ; i<A.size() ; ++i) {
  23. for (auto j = A.getRow(i); j.index() != j.end() ; ++j){
  24. c[i] += A.getRow(i)*B.getCol(j.index());
  25. }
  26. }
  27. if (session.makeSymmetric)
  28. std::transform (c.begin(), c.end(), c.begin(), [] (value_t& x) {
  29. return x/2;
  30. });
  31. return c;
  32. }
  33. void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t end) {
  34. for (auto i =begin ; i != end ; ++i)
  35. out_sum += v[i];
  36. }
  37. value_t sum (std::vector<value_t>& v) {
  38. int n = nworkers();
  39. std::vector<value_t> sum_v(n, 0);
  40. for (index_t i =0 ; i < n ; ++i) {
  41. cilk_spawn do_sum(sum_v[i], v, i*v.size()/n, (i+1)*v.size()/n);
  42. }
  43. cilk_sync;
  44. value_t s =0;
  45. for (auto& it : sum_v) s += it;
  46. return s;
  47. }
  48. #elif defined OMP
  49. /*
  50. // export OMP_NUM_THREADS=<num>
  51. */
  52. int nworkers() {
  53. if (session.max_threads && session.max_threads < (size_t)omp_get_max_threads()) {
  54. omp_set_dynamic(0);
  55. omp_set_num_threads(session.max_threads);
  56. return session.max_threads;
  57. }
  58. else {
  59. omp_set_dynamic(1);
  60. return omp_get_max_threads();
  61. }
  62. }
  63. std::vector<value_t> mmacc_v(matrix& A, matrix& B) {
  64. std::vector<value_t> c(A.size());
  65. #pragma omp parallel for shared(c)
  66. for (int i=0 ; i<A.size() ; ++i) {
  67. for (auto j = A.getRow(i); j.index() != j.end() ; ++j) {
  68. c[i] += A.getRow(i)*B.getCol(j.index());
  69. }
  70. }
  71. if (session.makeSymmetric)
  72. std::transform (c.begin(), c.end(), c.begin(), [] (value_t& x) {
  73. return x/2;
  74. });
  75. return c;
  76. }
  77. value_t sum (std::vector<value_t>& v) {
  78. value_t s =0;
  79. #pragma omp parallel for reduction(+:s)
  80. for (auto i =0u ; i<v.size() ; ++i)
  81. s += v[i];
  82. return s;
  83. }
  84. #elif defined THREADS
  85. /*
  86. * std::thread::hardware_concurrency()
  87. */
  88. int nworkers() {
  89. if (session.max_threads)
  90. return (session.max_threads < std::thread::hardware_concurrency()) ?
  91. session.max_threads : std::thread::hardware_concurrency();
  92. else
  93. return std::thread::hardware_concurrency();
  94. }
  95. std::vector<value_t> mmacc_v_rng(std::vector<value_t>& out, matrix& A, matrix& B, index_t begin, index_t end) {
  96. for (index_t i=begin ; i<end ; ++i) {
  97. for (auto j = A.getRow(i); j.index() != j.end() ; ++j){
  98. out[i] += A.getRow(i)*B.getCol(j.index());
  99. }
  100. }
  101. return out;
  102. }
  103. std::vector<value_t> mmacc_v(matrix& A, matrix& B) {
  104. std::vector<std::thread> workers;
  105. std::vector<value_t> c(A.size());
  106. int n = nworkers();
  107. for (index_t i=0 ; i<n ; ++i)
  108. workers.push_back (std::thread (mmacc_v_rng, std::ref(c), std::ref(A), std::ref(B), i*c.size()/n, (i+1)*c.size()/n));
  109. std::for_each(workers.begin(), workers.end(), [](std::thread& t){
  110. t.join();
  111. });
  112. if (session.makeSymmetric)
  113. std::transform (c.begin(), c.end(), c.begin(), [] (value_t& x) {
  114. return x/2;
  115. });
  116. return c;
  117. }
  118. void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t end) {
  119. for (auto i =begin ; i != end ; ++i)
  120. out_sum += v[i];
  121. }
  122. value_t sum (std::vector<value_t>& v) {
  123. int n = nworkers();
  124. std::vector<value_t> sum_v(n, 0);
  125. std::vector<std::thread> workers;
  126. for (index_t i =0 ; i < n ; ++i)
  127. workers.push_back (std::thread (do_sum, std::ref(sum_v[i]), std::ref(v), i*v.size()/n, (i+1)*v.size()/n));
  128. std::for_each(workers.begin(), workers.end(), [](std::thread& t){
  129. t.join();
  130. });
  131. value_t s =0;
  132. for (auto& it : sum_v) s += it;
  133. return s;
  134. }
  135. #else
  136. int nworkers() { return 1; }
  137. std::vector<value_t> mmacc_v(matrix& A, matrix& B) {
  138. std::vector<value_t> c(A.size());
  139. for (int i=0 ; i<A.size() ; ++i) {
  140. for (auto j = A.getRow(i); j.index() != j.end() ; ++j){
  141. c[i] += A.getRow(i)*B.getCol(j.index());
  142. }
  143. }
  144. if (session.makeSymmetric)
  145. std::transform (c.begin(), c.end(), c.begin(), [] (value_t& x) {
  146. return x/2;
  147. });
  148. return c;
  149. }
  150. value_t sum (std::vector<value_t>& v) {
  151. value_t s =0;
  152. for (auto& it : v)
  153. s += it;
  154. return s;
  155. }
  156. #endif
  157. std::vector<value_t> triang_v(matrix& A) {
  158. return mmacc_v(A, A);
  159. }
  160. value_t triang_count (std::vector<value_t>& c) {
  161. return (session.makeSymmetric) ? sum(c)/3 : sum(c);
  162. }
  163. } // namespace v4