A triangle counting assignment for A.U.TH Parallel and distributed systems class.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 
 
 
 

240 lignes
7.2 KiB

  1. /*!
  2. * \file v3.cpp
  3. * \brief vv3 part of the exercise.
  4. *
  5. * \author
  6. * Christos Choutouridis AEM:8997
  7. * <cchoutou@ece.auth.gr>
  8. */
  9. #include <v3.h>
  10. namespace v3 {
  11. #if defined CILK
  12. /*!
  13. * Utility function to get/set the number of threads.
  14. *
  15. * The number of threads are controlled via environment variable \c CILK_NWORKERS
  16. *
  17. * \return The number of threads used.
  18. * \note
  19. * The user can reduce the number with the command option \c --max_threads.
  20. * If so the requested number will be used even if the environment has more threads available.
  21. */
  22. int nworkers() {
  23. if (session.max_threads)
  24. return (session.max_threads < __cilkrts_get_nworkers()) ?
  25. session.max_threads : __cilkrts_get_nworkers();
  26. else
  27. return __cilkrts_get_nworkers();
  28. }
  29. /*!
  30. * Calculate and return a vertex-wise count vector.
  31. *
  32. * \param A The matrix to use.
  33. * \return The count vector. RVO is used here.
  34. * \note
  35. * We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
  36. * - A full matrix calculation which update only c[i]
  37. * - A lower triangular matrix which update c[i], c[j], c[k]. This is wayyy faster.
  38. */
  39. std::vector<value_t> triang_v(matrix& A) {
  40. std::vector<std::atomic<value_t>> c(A.size());
  41. std::vector<value_t> ret(A.size());
  42. cilk_for (int i=0 ; i<A.size() ; ++i) {
  43. for (auto j = A.getCol(i); j.index() != j.end() ; ++j) {
  44. // j list all the edges with i
  45. for (auto k = A.getCol(j.index()); k.index() != k.end() ; ++k) {
  46. // k list all the edges with j
  47. if (A.get(k.index(), i)) {
  48. ++ret[i];
  49. c[j.index()] += (!session.makeSymmetric)? 1:0;
  50. c[k.index()] += (!session.makeSymmetric)? 1:0;
  51. }
  52. }
  53. }
  54. if (session.makeSymmetric) {
  55. ret[i] = ret[i]/2;
  56. c[i] = c[i]/2;
  57. }
  58. }
  59. for (index_t i =0 ; i<A.size() ; ++i) ret[i] += c[i];
  60. return ret;
  61. }
  62. /*!
  63. * A sum utility to use as spawn function for parallelized sum.
  64. * \return The sum of \c v from \c begin to \c end.
  65. */
  66. void do_sum (value_t& out_sum, std::vector<value_t>& v, index_t begin, index_t end) {
  67. for (auto i =begin ; i != end ; ++i)
  68. out_sum += v[i];
  69. }
  70. /*!
  71. * A parallelized version of sum. Just because ;)
  72. * \return The total sum of vector \c v
  73. */
  74. value_t sum (std::vector<value_t>& v) {
  75. int n = nworkers();
  76. std::vector<value_t> sum_v(n, 0); // result of each do_sum invokation.
  77. // We spawn workers in a more statically way.
  78. for (index_t i =0 ; i < n ; ++i) {
  79. cilk_spawn do_sum(sum_v[i], v, i*v.size()/n, (i+1)*v.size()/n);
  80. }
  81. cilk_sync;
  82. // sum the sums (a sum to rule them all)
  83. value_t s =0; for (auto& it : sum_v) s += it;
  84. return s;
  85. }
  86. #elif defined OMP
  87. /*!
  88. * A "simple" user defined OpenMP reduction for vector<value_t>
  89. * \note
  90. * Not used. Reason: The atomic version of the code performs better.
  91. */
  92. #pragma omp declare reduction(vec_value_plus : std::vector<value_t> : \
  93. std::transform( \
  94. omp_out.begin(), omp_out.end(), omp_in.begin(), omp_out.begin(), std::plus<value_t>() \
  95. ) \
  96. ) \
  97. initializer(omp_priv = decltype(omp_orig)(omp_orig.size()))
  98. /*!
  99. * Utility function to get/set the number of threads.
  100. *
  101. * The number of threads are controlled via environment variable \c OMP_NUM_THREADS
  102. *
  103. * \return The number of threads used.
  104. * \note
  105. * The user can reduce the number with the command option \c --max_threads.
  106. * If so the requested number will be used even if the environment has more threads available.
  107. */
  108. int nworkers() {
  109. if (session.max_threads && session.max_threads < (size_t)omp_get_max_threads()) {
  110. omp_set_dynamic(0);
  111. omp_set_num_threads(session.max_threads);
  112. return session.max_threads;
  113. }
  114. else {
  115. omp_set_dynamic(1);
  116. return omp_get_max_threads();
  117. }
  118. }
  119. /*!
  120. * Calculate and return a vertex-wise count vector.
  121. *
  122. * \param A The matrix to use.
  123. * \return The count vector. RVO is used here.
  124. * \note
  125. * We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
  126. * - A full matrix calculation which update only c[i]
  127. * - A lower triangular matrix which update c[i], c[j], c[k]. This is wayyy faster.
  128. */
  129. std::vector<value_t> triang_v(matrix& A) {
  130. std::vector<std::atomic<value_t>> c(A.size());
  131. std::vector<value_t> ret(A.size());
  132. // OMP schedule selection
  133. if (session.dynamic) omp_set_schedule (omp_sched_dynamic, 0);
  134. else omp_set_schedule (omp_sched_static, 0);
  135. #pragma omp parallel for schedule(runtime) //reduction(vec_value_plus : c)
  136. for (int i=0 ; i<A.size() ; ++i) {
  137. for (auto j = A.getCol(i); j.index() != j.end() ; ++j) {
  138. // j list all the edges with i
  139. for (auto k = A.getCol(j.index()); k.index() != k.end() ; ++k) {
  140. // k list all the edges with j
  141. if (A.get(k.index(), i)) {
  142. ++ret[i];
  143. c[j.index()] += (!session.makeSymmetric)? 1:0;
  144. c[k.index()] += (!session.makeSymmetric)? 1:0;
  145. }
  146. }
  147. }
  148. if (session.makeSymmetric) {
  149. ret[i] = ret[i]/2;
  150. c[i] = c[i]/2;
  151. }
  152. }
  153. for (index_t i =0 ; i<A.size() ; ++i) ret[i] += c[i];
  154. return ret;
  155. }
  156. /*!
  157. * A parallelized version of sum. Just because ;)
  158. * \return The total sum of vector \c v
  159. */
  160. value_t sum (std::vector<value_t>& v) {
  161. value_t s =0;
  162. #pragma omp parallel for reduction(+:s)
  163. for (auto i =0u ; i<v.size() ; ++i)
  164. s += v[i];
  165. return s;
  166. }
  167. #else
  168. //! Return the number of workers.
  169. //! \note This function is just for completion
  170. int nworkers() { return 1; }
  171. /*!
  172. * Calculate and return a vertex-wise count vector.
  173. *
  174. * \param A The matrix to use.
  175. * \return The count vector. RVO is used here.
  176. * \note
  177. * We use two methods of calculation based on \c --make_symmetric or \c --triangular_only
  178. * - A full matrix calculation which update only c[i]
  179. * - A lower triangular matrix which update c[i], c[j], c[k]. This is wayyy faster.
  180. */
  181. std::vector<value_t> triang_v(matrix& A) {
  182. std::vector<value_t> c(A.size());
  183. for (int i=0 ; i<A.size() ; ++i) {
  184. for (auto j = A.getCol(i); j.index() != j.end() ; ++j) {
  185. // j list all the edges with i
  186. for (auto k = A.getCol(j.index()); k.index() != k.end() ; ++k) {
  187. // k list all the edges with j
  188. if (A.get(k.index(), i)) {
  189. ++c[i];
  190. c[j.index()] += (!session.makeSymmetric)? 1:0;
  191. c[k.index()] += (!session.makeSymmetric)? 1:0;
  192. }
  193. }
  194. }
  195. if (session.makeSymmetric) c[i] /= 2;
  196. }
  197. return c;
  198. }
  199. /*!
  200. * Summation functionality.
  201. * \return The total sum of vector \c v
  202. */
  203. value_t sum (std::vector<value_t>& v) {
  204. value_t s =0;
  205. for (auto& it : v)
  206. s += it;
  207. return s;
  208. }
  209. #endif
  210. //! Polymorphic interface function for sum results
  211. value_t triang_count (std::vector<value_t>& c) {
  212. return sum(c)/3;
  213. }
  214. }