From f849e8a3090ef626f88f217b1fc2aa029fa79720 Mon Sep 17 00:00:00 2001 From: Christos Choutouridis Date: Thu, 9 Jan 2025 00:30:16 +0200 Subject: [PATCH] HW2: RC4 - [Not tested] The final version --- homework_2/Makefile | 2 +- homework_2/include/config.h | 11 ++- homework_2/include/distsort.hpp | 102 ++++++++++++++++++---- homework_2/include/utils.hpp | 111 ++++++++++++++++++++++-- homework_2/report/homework_2_report.pdf | Bin 98794 -> 98788 bytes homework_2/src/distsort.cpp | 16 ++-- homework_2/src/main.cpp | 91 ++++++++++++++----- 7 files changed, 269 insertions(+), 64 deletions(-) diff --git a/homework_2/Makefile b/homework_2/Makefile index 1905b6e..5d76421 100644 --- a/homework_2/Makefile +++ b/homework_2/Makefile @@ -42,7 +42,7 @@ BUILD_DIR := bin OBJ_DIR := $(BUILD_DIR)/obj DEP_DIR := $(BUILD_DIR)/.dep -OUTPUT_DIR := out-rc3b +OUTPUT_DIR := out # ========== Compiler settings ========== # Compiler flags for debug and release diff --git a/homework_2/include/config.h b/homework_2/include/config.h index e3bfd42..5c62edf 100644 --- a/homework_2/include/config.h +++ b/homework_2/include/config.h @@ -49,14 +49,19 @@ static constexpr size_t MAX_PIPELINE_SIZE = 64UL; using distValue_t = uint32_t; /*! - * Session option for each invocation of the executable + * Session option for each invocation of the executable. + * + * @note + * The values of the members are set from the command line. */ struct config_t { size_t arraySize{DEFAULT_DATA_SIZE}; //!< The array size of the local data to sort. - size_t pipeline{1UL}; //!< Pipeline stages + bool exchangeOpt{false}; //!< Flag to request the exchange optimization + size_t pipeline{1UL}; //!< Pipeline stages (1 to disable) bool validation{false}; //!< Request a full validation at the end, performed by process rank 0. bool ndebug{false}; //!< Skips debug trap on DEBUG builds. - bool perf{false}; //!< Enable performance timing measurements and prints. + size_t perf{1}; //!< Enable performance timing measurements and prints and repeat + //!< the performs the sorting times to average the measurements bool verbose{false}; //!< Flag to enable verbose output to stdout. }; diff --git a/homework_2/include/distsort.hpp b/homework_2/include/distsort.hpp index 3eb3575..f95d21d 100644 --- a/homework_2/include/distsort.hpp +++ b/homework_2/include/distsort.hpp @@ -22,7 +22,15 @@ #include "utils.hpp" -extern Timing TfullSort, Texchange, Tminmax, TelbowSort; // make timers public +/* + * Exported timers + */ +extern Timing Ttotal; +extern Timing TfullSort; +extern Timing Texchange; +extern Timing Tminmax; +extern Timing TelbowSort; + /*! * Enumerator for the different versions of the sorting method @@ -167,6 +175,9 @@ void fullSort(RangeT& data, bool ascending) noexcept { else { __gnu_parallel::sort(data.begin(), data.end(), std::greater<>()); } + + if (config.exchangeOpt) + updateMinMax(localStat, data); } /*! @@ -231,6 +242,43 @@ void elbowSort(ShadowedDataT& data, bool ascending) noexcept { elbowSortCore(data, ascending, std::greater<>()); } +/*! + * Predicate for exchange optimization. Returns true only if an exchange between partners is needed. + * In order to do that we exchange min and max statistics of the partner's data. + * + * @tparam StatT Statistics data type (for min-max) + * + * @param lstat [const StatT] Reference to the local statistic data + * @param rstat [StatT] Reference to the remote statistic data to fill + * @param part [mpi_id_t] The partner for the exchange + * @param tag [int] The tag to use for the exchange of stats + * @param keepSmall [bool] Flag to indicate if the local thread keeps the small ro the large values + * @return True if we need data exchange, false otherwise + */ +template +bool needsExchange(const StatT& lstat, StatT& rstat, mpi_id_t part, int tag, bool keepSmall) { + timeCall(Texchange, mpi.exchange_it, lstat, rstat, part, tag); + return (keepSmall) ? + rstat.min < lstat.max // Lmin: rstat.min - Smax: lstat.max + : lstat.min < rstat.max; // Lmin: lstat.min - Smax: rstat.max +} + + +/*! + * Update stats utility + * + * @tparam RangeT A range type with random access iterator + * @tparam StatT Statistics data type (for min-max) + * + * @param stat [StatT] Reference to the statistic data to update + * @param data [const RangeT] Reference to the sequence to extract stats from + */ +template +void updateMinMax(StatT& stat, const RangeT& data) noexcept { + auto [min, max] = std::minmax_element(data.begin(), data.end()); + stat.min = *min; + stat.max = *max; +} /*! * Takes two sequences and selects either the larger or the smaller items @@ -276,7 +324,11 @@ void keepMinOrMax(ValueT* local, const ValueT* remote, size_t count, bool keepSm size_t tagGenerator(size_t depth, size_t step, size_t stage = 0); /*! - * A pipeline loop for mixing min-max process with mpi data exchange + * An exchange functionality to support both Bubbletonic and Bitonic sort algorithms. + * + * @note + * In case of pipeline request it switches to non-blocking MPI communication for + * pipelining min-max process with mpi data exchange * * @tparam ShadowedDataT A Shadowed buffer type with random access iterator. * @@ -289,28 +341,38 @@ size_t tagGenerator(size_t depth, size_t step, size_t stage = 0); * The @c tag is increased inside the pipeline loop for each different data exchange */ template -void exchangePipeline(ShadowedDataT& data, mpi_id_t partner, bool keepSmall, int tag) { +void exchange(ShadowedDataT& data, mpi_id_t partner, bool keepSmall, int tag) { using Value_t = typename ShadowedDataT::value_type; // Init counters and pointers - size_t count = data.size() / config.pipeline; Value_t* active = data.getActive().data(); Value_t* shadow = data.getShadow().data(); + size_t count = data.size() / config.pipeline; - // Pipeline - Texchange.start(); - mpi.exchange_start(active, shadow, count, partner, tag); - for (size_t stage = 0 ; stage < config.pipeline ; active += count, shadow += count) { - // Wait previous chunk - mpi.exchange_wait(); Texchange.stop(); - if (++stage < config.pipeline) { - // Start next chunk if there is a next one - Texchange.start(); - mpi.exchange_start(active + count, shadow + count, count, partner, ++tag); + if (config.pipeline > 1) { + // Pipeline case - use async MPI + Texchange.start(); + mpi.exchange_start(active, shadow, count, partner, tag); + for (size_t stage = 0; stage < config.pipeline; active += count, shadow += count) { + // Wait previous chunk + mpi.exchange_wait(); + Texchange.stop(); + if (++stage < config.pipeline) { + // Start next chunk if there is a next one + Texchange.start(); + mpi.exchange_start(active + count, shadow + count, count, partner, ++tag); + } + // process the arrived data + timeCall(Tminmax, keepMinOrMax, active, shadow, count, keepSmall); } - // process the arrived data + } + else { + // No pipeline - use blocking MPI + timeCall(Texchange, mpi.exchange, active, shadow, count, partner, tag); timeCall(Tminmax, keepMinOrMax, active, shadow, count, keepSmall); } + if (config.exchangeOpt) + updateMinMax(localStat, data); } /*! @@ -339,8 +401,10 @@ void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { isActive(part, Processes) ) { // Exchange with partner, keep nim-or-max and sort - O(N) int tag = static_cast(tagGenerator(0, step)); - exchangePipeline(data, part, ks, tag); - timeCall(TelbowSort, elbowSort, data, ascending(rank, Processes)); + if (!config.exchangeOpt || needsExchange(localStat, remoteStat, part, tag++, ks)) { + exchange(data, part, ks, tag); + timeCall(TelbowSort, elbowSort, data, ascending(rank, Processes)); + } } } @@ -378,7 +442,9 @@ void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { auto ks = keepSmall(rank, part, depth); // Exchange with partner, keep nim-or-max int tag = static_cast(tagGenerator(depth, step)); - exchangePipeline(data, part, ks, tag); + if (!config.exchangeOpt || needsExchange(localStat, remoteStat, part, tag++, ks)) { + exchange(data, part, ks, tag); + } } // sort - O(N) timeCall(TelbowSort, elbowSort, data, ascending(rank, depth)); diff --git a/homework_2/include/utils.hpp b/homework_2/include/utils.hpp index f1c355e..6936d86 100644 --- a/homework_2/include/utils.hpp +++ b/homework_2/include/utils.hpp @@ -17,6 +17,22 @@ #include "config.h" +/*! + * Min-Max statistics data for exchange optimization + * @tparam Value_t The underlying data type of the sequence data + */ +template +struct Stat_t { + using value_type = Value_t; //!< meta-export the type + + Value_t min{}; //!< The minimum value of the sequence + Value_t max{}; //!< The maximum value of the sequence +}; + +//! Application data selection alias +using distStat_t = Stat_t; +extern distStat_t localStat, remoteStat; // Make stats public + /* * MPI_ dispatcher mechanism */ @@ -78,6 +94,64 @@ struct MPI_t { name_ = std::string (processor_name, name_len); } + /*! + * Exchange one data object of type @c T with partner as part of the sorting network of both + * bubbletonic or bitonic sorting algorithms. + * + * This function matches a transmit and a receive in order for fully exchanged the data object + * between current node and partner. + * + * @tparam T The object type + * + * @param local [const T&] Reference to the local object to send + * @param remote [T&] Reference to the object to receive data from partner + * @param partner [mpi_id_t] The partner for the exchange + * @param tag [int] The tag to use for the MPI communication + */ + template + void exchange_it(const T& local, T& remote, ID_t partner, int tag) { + if (tag < 0) + throw std::runtime_error("(MPI) exchange_it() [tag] - Out of bound"); + MPI_Status status; + int err; + if ((err = MPI_Sendrecv( + &local, sizeof(T), MPI_BYTE, partner, tag, + &remote, sizeof(T), MPI_BYTE, partner, tag, + MPI_COMM_WORLD, &status + )) != MPI_SUCCESS) + mpi_throw(err, "(MPI) MPI_Sendrecv() [item] - "); + } + + /*! + * Exchange data with partner as part of the sorting network of both bubbletonic or bitonic + * sorting algorithms. + * + * This function matches a transmit and a receive in order for fully exchanged data between + * current node and partner. + * + * @tparam T The inner valur type used in buffer + * + * @param ldata [const ValueT*] Pointer to local data to send + * @param rdata [ValueT*] Pointer to buffer to receive data from partner + * @param count [size_t] The number of data to exchange + * @param partner [mpi_id_t] The partner for the exchange + * @param tag [int] The tag to use for the MPI communication + */ + template + void exchange(const ValueT* ldata, ValueT* rdata, size_t count, ID_t partner, int tag) { + if (tag < 0) + throw std::runtime_error("(MPI) exchange_data() [tag] - Out of bound"); + + MPI_Datatype datatype = MPI_TypeMapper::getType(); + MPI_Status status; + int err; + if ((err = MPI_Sendrecv( + ldata, count, datatype, partner, tag, + rdata, count, datatype, partner, tag, + MPI_COMM_WORLD, &status + )) != MPI_SUCCESS) + mpi_throw(err, "(MPI) MPI_Sendrecv() [data] - "); + } /*! * Initiate a data exchange data with partner using non-blocking Isend-Irecv, as part of the @@ -353,33 +427,51 @@ struct Timing { using milliseconds = std::chrono::milliseconds; using seconds = std::chrono::seconds; + //! Setup measurement rounds + void init(size_t rounds) { + duration_.resize(rounds); + for (auto& d : duration_) + d = Tduration::zero(); + } + //! tool to mark the starting point Tpoint start() noexcept { return mark_ = std::chrono::steady_clock::now(); } //! tool to mark the ending point Tpoint stop() noexcept { Tpoint now = std::chrono::steady_clock::now(); - duration_ += dt(now, mark_); + duration_[current_] += dt(now, mark_); return now; } + //! Switch timing slot + void next() noexcept { + ++current_; + current_ %= duration_.size(); + } + + Tduration& median() noexcept { + std::sort(duration_.begin(), duration_.end()); + return duration_[duration_.size()/2]; + } + //! A duration calculation utility static Tduration dt(Tpoint t2, Tpoint t1) noexcept { return std::chrono::duration_cast(t2 - t1); } //! Tool to print the time interval - void print_duration(const char *what, mpi_id_t rank) noexcept { - if (std::chrono::duration_cast(duration_).count() < 10000) + static void print_duration(const Tduration& duration, const char *what, mpi_id_t rank) noexcept { + if (std::chrono::duration_cast(duration).count() < 10000) std::cout << "[Timing] (Rank " << rank << ") " << what << ": " - << std::to_string(std::chrono::duration_cast(duration_).count()) << " [usec]\n"; - else if (std::chrono::duration_cast(duration_).count() < 10000) + << std::to_string(std::chrono::duration_cast(duration).count()) << " [usec]\n"; + else if (std::chrono::duration_cast(duration).count() < 10000) std::cout << "[Timing] (Rank " << rank << ") " << what << ": " - << std::to_string(std::chrono::duration_cast(duration_).count()) << " [msec]\n"; + << std::to_string(std::chrono::duration_cast(duration).count()) << " [msec]\n"; else { char stime[26]; // fit ulong - auto sec = std::chrono::duration_cast(duration_).count(); - auto msec = (std::chrono::duration_cast(duration_).count() % 1000) / 10; // keep 2 digit + auto sec = std::chrono::duration_cast(duration).count(); + auto msec = (std::chrono::duration_cast(duration).count() % 1000) / 10; // keep 2 digit std::sprintf(stime, "%ld.%1ld", sec, msec); std::cout << "[Timing] (Rank " << rank << ") " << what << ": " << stime << " [sec]\n"; } @@ -387,8 +479,9 @@ struct Timing { } private: + size_t current_{0}; Tpoint mark_{}; - Tduration duration_{}; + std::vector duration_{1}; }; /*! diff --git a/homework_2/report/homework_2_report.pdf b/homework_2/report/homework_2_report.pdf index fa35cf2b13d0c06c249c564dd4b35e81c44c34a7..670a638f1f8db5e9c097021662f2cadc58ae9324 100644 GIT binary patch delta 9927 zcmai)LwF^C5@tKLZKsoTY}+@RXA$Swt-=H=ElBff0lKW|wlQYod%RB&LEfD3A8Z~NHB z47~?{kki1@41ClgzS&#uD}Los2g+~(JYA65ru1<}uxqZQ{k!~i>>0`K-XN@187J}# zTN{He;LYvLTwIf&L&SmXf`afauFmGhcJQ9-5{WkQy5_heGiOJSz4?gk7#S{QC&5wl z4fFljs?e;$Ei~j9;c2)nSmnubw5h4J!7_Mh*m;ie@8+xh^eh$7Dn_m5IL7wk<_ReB zB8dvpo8;wee_nI1hXxm>2ARGO{l3P&Uj=3!5<}!A<0=4%j_f)l~*G?TXtFTP_jQZ}G6lmh$+oPJv<{@r2MJRMy1etFqt!)+u8& zv(n8goPW#7$#XK+S~8U_TqGT|iPvb(n5V}q&y$N$rJ4TChz0~J_^La;rWUFeO^pqe zWZE`YCWktWF~N_ut}(tWE6_1X``48ftZwS zi85n5K~l7XCjKwR-EkiW?U-AA3W&F}?$)qUMd{9ubooHP09GNX3>tU|5J-gJ*0n6l z0YD(^aY&=t6-f1@4L=sY97SPK?~5!|)4d4cD#f=`@g+pC6uGAsZeg?*N{$fEk0U{p z?(n01?aE#=80L4^^v*%|uuMQqz`{=7%)u4+ahJCx6w21-^|g`DZrY6l=@J&@nV4#; z*e|F!KT04jpQyhRf!kIp{Lfi(h+ho9N_8C^PG|5*aaea47L!TrWrBkp%k{Z3`~}VD zoZtYvBYS6VZ%Lsjse3>1({?3Fabw?d2x@s+VYqhrTGu!ZBsTm{p9jhu7tUfX#==}Z z@sRt_v^e!~@+<_Q3$9U7handBEj3z-PfZ+Q9?*bIYpdCplr%8yqI(#PwXyTM!I9`n zU9k*x`NJ<9>FL_R_2Fd@NANji9(qKPI-*%wKxh+2hr$Do8vm34b-Bslo_Oqf7dA)W z&qCK<+7U;1a1pO*fh71BS%hw}nXALI*4si{LTvs-ryVD1kJj8T0BaFs#$NM!#yc*V(57o=L2bpI#NThJxLib7XteMB#PGPsNq-5Ms$ao175UV!Rm>bu zSn`qi9Fft>+cS$cAcz@QhX^sZ#RiK(7mnqFwwn1>c&K|A+{=P^DcGbiMOOMrri7J` z3sA9W%5A_I@@Kj+eg|!J$g+P+TC=Kot1|Gq?4@yQ=NaVYPA?`jTORJbRBAGBP&&{t z{ulpw7Rm9lAL^wctgA3U$G_R8^mDWK{E@4P^@VJmCy*WE@ZYYW6*EE&bFjhft75;^ z(=xQ%8zncP0j@g2#R~^uw_dGm`z@u?=Mu3zYOK%G6-`+^qpb|Me6;#?Y4g?5V>U}5-~## zpL0XAX5^E~Z8N^i*hC-w&Z=xa^_uOH2ANvA64%1uRzq1KA7=>xU z!fO?s{sF=kI<%pE@BcAT%t~|gooTuk%-j9`-j5y^m5(GJr5aK6MQgPlo@3@9D{#{L zQF9TLkN&-VUu25=iozDs?avJ$EHf85)g}6i5CZV(W#rZ)UUg|W?@bwj5qkLGhg*db zdZ@iF^T{LTyu+QsZ2>Z~aUTegVF`XLM?LHBeSn!O`K%OvSa4B~w z5@?`RzR8b0M&iV8db1CGoTo`ne5Y?fCA1}pg;u<#YIDOu`zcHpk8d1p>k-rU&hvMk zj+>xt_PE63g?V5QdxVPg%&!a|o0RX$d2D4&0k6{PWOhGMlIir>G&7YqfQoFUB*#$2664Yu5ph|`ITHquJ zKZ*=!Ez1OoEH{BKQDHAB-Y$xrBRHL7*IGxC$iN)!4I6vU+i&?UxWiWoY=Jw!n8qm4 zOk22?)<9z2nq9z)au0zN`9TTW2aTgnhqF!=@Vo!Z@neL~6F!M$TV+e=%bUdS zI&kqd%Vd+GM)P0=+a%H~34WiHabEboo)rPe)DP&y*l=QdFdItpSjr5_yCuX{1ntv% zO@iXSx^E%!^KR1OZLwXE3zy>0beB)C7$B0#c)~?4zm9Tg%p)IbSVGkq71R(ClyTdY zowce{t-z(CdL~^qHyt3wsT1%*D5BVRFk|Yqj9l5Vae`c|Q?J*-uKe>P8c{cmur7gf z19P52lyk&4f>Lg6BdrIm7fR4GA@v;*RiiOn6}nZP7XLbi)%GeNA5}g>jj^W*`_Euz zNcerMJt1eGwm&$r*VlD}{gVS{e1@E!U_Xyq;0Z~9aty-N3441v$Cb~=`CPE}hcP1i z(YrPPa7NY(8PZ)lhrsr>p|Yu>NmAfrNurpK&`mcY8RmnCuprGT3j1QM8_pQa+O9Mc z0!)abXWj=k&`aVBD`7aUj=Edz!&`!=+~KGI$;;ah?seh$`dSvQ;CfCv&l9L99v!~- znKQN(7BW59JG4Ur{zm>5kplGzVC}H)h(oysXL@@{_j-&w;I;vMXOYcn-T;4_Gpr$& zH&Zp|g?66?mbc#Q!zu>Loen=66D1*Jtt{Q!8Q$Qo!t3_wuOC)4vSS94a14y{F>X2S6Zf`ec9ph=r5tD}5w5j_=vq_d4B{!0%&hG(`FmaN_$PPi z{}fA$cKL1e>SIqoo1Txpx&S>GsRSWhUvE=Iv1Ro$O8*@r5iscf{7{h>#O;mBizV)^ zh!;$DJ)agX4!sMS)7I7b)Hus^Uc)&v=8>3W#!u=xJu}p!b^y%h->p3>Yw%``AS;D@ba)azW0yH z2(NfQ=xSSTNne5iySmKuBM>ziWZOMEVKxS322t}Si+`n+sH?TBp9NW;3%{){c8AM` z-b{U=#7|(2Ci%qvKjAB4AIyx}`HtlyudabhK93+OE1lq*ZwK!cP>&O-Zaa|<(No7l z6GnWQqdyZXg@JWBytqsH*6@4rG@l^3iTi8~J_P$+B!L@@2e!py51WFEl8(+^@Fg5| zYwdG9Gp(?4m7}ySt~7dvw0?to$^;dd*k#e;ok18Y?}pzH{X=}mz}eY7a6sZ5JMQ9g zLAELQeD9X#oVmin)Za<`C?Js^#rX~(YZ;NW2hN}+kQhBr*w8QuNB4Teu*JEq_~9tm zurS0%tFY~on4<*tzVFn&w|LOPZDh#5&D~wS-ZB0*fj`QI!lHZO_h(TQylM?uWRFd| zXsR7+nnYyXeKTnwP6mu42`L^IIJn*(BZ)q87QqPM28T&4rx&B(pron*8)nwk6AL|1 zZ1ET7S^ZbC+V^{1a0^u%O*P*)8oAjCSr=g~4NsEV0JqGEg^guc->W1->t+rlx?#4m z{gKQ69d!bl5hKf##FRMOn6y(kjuoSh8roDJ!6_UTakdib`UE-Fzp<(UL2B0K3FFp_ z&Y6DeqTf{vmEz%a+ets)qzrfSn6O{0i8JG#$ukRaZ)PQOh4cm1t~W_NJ_Bb596bJ#vCZw(VS;u(K%P6|<5J2{&0hTJ+<6H~WeTDW8ZdYM#8iI5;hU zF&i##SC!V&HfiFu`lmQR{ybO#y~YhHr<;vza8R}1RHKzMpAQUW-+<1hK+uTBsSl(Z z1=v%;m#>C^Hws6Q`(mJMBxkkE$_wo)2Y>vtSA@Ji0L*}}^6CkH;gtJx}jl0{} z{&Fh4Lp?7CPvI_{UZQ3>Un?fxU&W$K_32r>@Vo$AHqW($4LvoW^9G3-N|IU#HNRKI zbyW-VOl>bHVyh%fZjI1)S|0lN&l>Y!A&z8h3ki`W`StL#g8%w zwJ#)wrhOLY6W6#vWBLUkqW;N(}E4_E~3COyutZ zZpNbPtfi0iboiJ;!p;m$FTG7bE2RQo$ z+CM~h!)qj~F#m$`hE`GjSj?z^67)qv8(H^lM3P|GB*Bwirr zf8BiXeTpH>sDEz!B6(_A{scw&kWv1kPZ-po=!m$3z6ww%wv*+#eV$Lf>=WB?C}sPW z|0*w^_zjOR7p77zg3uMexmyevK;zV*WA2dn0+?3GsAubbF2};oojrcOId?Km z&ls}!wHj2uz;Qnp01q21L(HNs8g!_mOc{KjbR>Ti&S*F_00OrXs*rE=uD2Z7MUnq@ zV8_g`H!k;Z7d8pY%cE9y)6N2a(zoIg8 zi5|nW=E`EdpO&*Bz*s-k{h>H5GtJW@L`5h?Qv&Gtj0t99x3{xi`}U4u;Huyo|1;hi zC=U!&k&R}xww+`v_BDlQ0Vbd((vIKdNjZ0Fq?>ABza9;R6scdr1x+;Gt2#+~0>I(nyImmRALC^BrcrWAF@Dr#J4(0O#Q*VwE) zUj+*1ub-XOKEK8ZwU}q2pDyZ+qy5u;0+o*>1M;-gka+|u?|a+dfFf4+UhRsOm4;S# zL~5S~qBL4^?(^OaKdY?BH1BC>$~Xe^qcC(yFoW?>ll;a_1~Q(5aKbB*O9TQ~eP>{N<2D z`&NcL6MhUWT;wH~;^%LrmH`9H&zGFJuYy%!$U#BayyUgQS%fl1X;WrgrR=u$M+s-* zLEXOxI&01jLh8&(i%2kGIltI~&t%eZ>RRGFU<@yW)IA!J5vV5gH(x<^>=Qf_aHfz^ zr!S(K2rIp>YSAT4Mfn8&wl-!rK;;KV=B$;>miD%>0RIAXxy!erYFkx zxtuQE%AjnZh9KY@U2^(tz)(&OoF0YY=i{6Mb$4bi&J-vzq1xn=PJbf$&|>~2>d6+L zQK5~DW(Syw(%yF^GoZuM7-hA5N1NV$7#i;jWFom|4H^-EI%wd@W<`$M&%zkPoLNT? z>PCJIJ3*($xW`erC~?#K^jZ*upl&o?yA`^Gu#iboPyfGzBzY^gP#87z#w)TrdkjGS z38P>lzb=W+1lWo`fFNyR9D|3v>u)hEADCv1r+HmJR?k_`{?(!ca$9O#s;i}q?Y2m( zWaI-n5bN))tr=GF?gpi3%qfZQ3^6zar})fl>LESzLIJ%q`-@LU%?arvxb(cqn zu}#jtO;;qq2e+Aiqee2G*zL&DM-V5IZ`z6$N%g?q`d>Fry{z7qqkT{Tm-nSFt^Wy; z9oN7!Gp4s#6BJ%N1eG{H1u!vIIwT*HGD00zl@vKBGAMF2uvr|P4xTPC*qzkU+HxG0 zQiMf1Fu$UpBELcn-pIM~>-8%SN~YqhnMzC5q$~vNr2A;#KtebH9~8mzoxl#m6aS+mKV4c` zPqH97LRd`?z9}bzeS9jmmxeH~AZ^~V*o2-2oJW~IYRz)7kuZ6(p9O!3q0&a3iw||4 zeeJ{Z<=zt!kHnrk{^y{-oZSasoY<)iG*(+{>qP)Tx!*Y7Q z_N0bP9w0aCe{G$_E$iU_*(`pMUGlUd9-k3Gu7W$zE6FM(d(_ z$f7Ll^i*gPP2>(>ph^%+{>oYR&;31^PH7Uyo4+|I%0d=JNjgX2RGaZh;P{rY04<39 zx6?|X7P6L>aia+*CJ1SS8l*2|zV``6n>wA9S_J=0Dt7$*C`x3hHmkPQH!tL4IquD-H8*j>%b25S!Xvw$Ge zS;%mr?#vPsfGqH@3_E%ILGSyqX-4vz-K>5Gsotx*QPYJTGev?>=7UvNeu_HOYn*8 z$?x<9!8Lf1*6m6D*d_q!B^SrujLZp#Fz4 zO*%nb72awr!h}4OT_zsB5SA8OTJFAchWRpE_7l+ghzU$E7C+lxYk3iX;^M$ltu~kJ zc7CQRJFN6dFvU#~@7AjIA(IVfC07WkQ+8H8KXmBW zZ`U6SO_NbPd;F$8)WA^L)6KPJwhWmo{?-P@-UjcUfLOba4>#ErxevL^{Y!Zf(!PyO zVGKi8P#`5^wzZO^a(RI)|2-deHvfUak)3ziduLP3JXJf9kY(~wOFlpch?A#7SV6QY z+W-A5=OPM9-5+$@^T6n@az$4IeGKHLu*TJ{YI2>;>cHq_bktZ!prB5{1@yO^;$OTD zb=QZLLL8Hzgw-9q>(&e95o7yjz&DQTa;oI%wLu|U@5nf0}S~hv2VvLf?ROVitine@wlc=QZ zPA~efEJ0zy|N8f44gpLY7q}L$f}xeC0lv*;a_9_GM^Ob{<7yuA5>Rck8UJhBu)tv} z2f6t=kq@s_PqhCK94fKNO~7KZ7A%kPqQ*|z+5gka!0YCcGu{~A#7w4cd!h(Lj#q$d z(a&5!h!HEplz}PlGF5CmOiNAVqQ^p&4wErY&+^BLx>b~7?)OZSk*MWVved;awY+q! z*Wc7})_8a;qJgUT2%rU}+*pWJ=(2NZuQ3Q^d;#LRhYgJ)O!lm>Z~`ealucV>;%8WpTa2aqChVTVa>ZeN1t7XuGjkA*6>qS8(>siur_x+_r~N zICn!UGH5q3BN_yGJNnIwKDx2rk)SGX?M2#=uLpX6B9ajp+CZ`fPn3!20~Cvq$4C1I z=S58+6lks`eW32vrz&D3O^=O3;YE8-2SFZIFsB=Nkhij!l!1{}PAvL8@*a6vd3 zYDFIm>~|AlTGTz4LEb!U6w3H?#ziB23CwAc!FxBv>4`H{)&-sBURN2Z%{;m>6G_{w z2e9p@$Zkk1-T+DCB*4!WC8P}xI_7BJ$*}wdfmP-q?2ODY0N$>q)~wWNsnL`LE2@>! zoFg>U?cooC1!|Ssmzsrjo5SWac4l7s%;ti598S_!%eB}@9Ic@xEUk#WbJjiU{MkZj zgF+%o{Vbf0s_wBdE~STd+!pl(>1!WbvZcP5NtbOFqHtQ57?=Zyrq}r4WytU_aK(## zU@rW%Xu2-OvX2&06j}$|Vr$(%GMv?A*BERa<#Mwb9L&Z!7XV3XCggbJFG3p!7}*g?ird08Fxo$=keLIY=NLK2V^NYz=YD_|qS1(1quDf{ z=2T+z$u+x}UASsGvb4PlMnWu1t`Pa`1Cz&!dM2DU%_+)0DUy`!Le*bPs#7skJrYT; z#EVd?LaVKkN2?{U3O(+i@`X;TE5Ifq!=I9kLoTfqY-jR>Wi3K_U6ALpfloy<*l8+PBcE9kVlKcc<>GJ^>ySL!r~1bhCXii zmhID~4qQ22K3IEj=akeE$JO6^nuqH5sWgAo=m(2upE-A-={;s~sEGBHr41nA`If1b zh0uE2D<7jYT1Kcij@we$aZ__b>3UdjwLdw?;TyLAT+3A5tYt2r5WQa#7$Gf!aX|BY z3fi|5xx!F*o7T%fW_)-chzWlwd}C&WxrLENA^=qoHgh3u4e+Lkbn~>1*=&}IcEjCh z4!(P-LAqNw7+eE+zt48JGIm9N&^_cc{&aL|FJ?$Qkq;c=|1iC8r>S%Jt+%7zxq?VT zZdX{*9tI)`oMy=`C%lXS)=5Df+jpamrOIM3j~p^yF5krgy?G5uWP6o_P?>*u(S@y?S-`Nuvn!wF+oGzj9=XoQlGss zvVuxHWoVb4&x4*vwzv9~WPD z5~$5RU?Ycj-0Nir(gf>Zv^B}1Oo0(}TYIo`3t_pM=BTbR-Ii)@@KUzwp zpelYPW4WS%d?6KCM>TLRpI#exc>66#k(zT943;Cb(ku{1sFJXQ7& zcfqIUCYXfNJ)Gf@2X`7+ift?A3(JN+J_F|srz1asj|!!PhM?Fm*oubF^|0G9Gxbf? zkiMdW0VUVLNTIotS=y1P^QF7J4qx;cY&(>Ft5t8u^e_fStw!OaG1YX8x$RFkY1UP$ z`s}w5(R;ORmI4g13)g1sw?=&6w*S!71ypSMK_#t$;~UO(f!C>^LY{>m8h%UO=t!?_Up0CszM?9OC$cA{PwsAv1c6gH z+8L-z<4H}dw;Vq1oTXEwO1QxmMKi#{&^Q?AE3?35;TRN5wr+`7BQHjNd&?!oz|S`M zuAU(FMp<1Tgr;36$KILUt%JcN4u4F=jK7$4TA2%6N$PysKR7Gny1`k! z)+*YAuu#aI)5!^J^#1yoww~g&;4o<$h7I)_fK%IrC;I9op9EOsXqMR#UwrqOauxEr zX)`SoF3;2*mdoMO;-k#te~>LwK;ejq21$hk1ue=!4fS)yFV+k#m#vfE$XYA2&?5l# z39$QvsWt}@4teOwP>_MK_m5mSdII1q^ZC;>F=DmN`Vot2QcDrAw8|7UHpRLFRH|J6 zcymfzf?aE^8iI#BbQk3pzwR(Cs-#uG<8#XH4NS@B|{M2FZ zRX*#e)sNErk}1AClINt$rPXd#a_xZJ$%#1lUEFIxCFd>hn@OFi_QrLjk$#8E-LZEN ziPCmFZ03tznw^S+Jo~o50k!OdpU+8r0k#heW7a(a8pAz8IJVEv6k+cR!Out=N{>Rq=6BS`$HRohw zH|1n!XX9pP;o;)oU}k0E;5OlA<1*neW@QuP|Nm5^1q6Vx0GaIDjZ-U|L4POHKy*FO2e}`qCmfkjI9Dl;=9yCHA_ z5@tE#Q7jC#IAl8YyCyx%2hqx#LMN>T=B&ort%e4##t3i+bghPcaz}`92bq>`wC@-X ztB-|vbnrc}+En_7s#bZAWfJ>~c~yI6unBy=c!8xkLTM>5l^K8kM(IuozLDxS9N}0P zF+9!ly3uVlj*Kc80c}6D+F?uV^%UCh-1q=WY-viWjCg22AyhDbZvSs9>wl?O|M9aN eOsxMSW*IxXdODi};CZ;&SUBLRs3epm;r|D^k4;zr delta 9873 zcmajkRaBkbvL@i*?(PuWS-88qhT!h*&KKO>3H$_y;E>?%?(Po3Wf5Ruy?1w?G4{SX z_w%Z1)LZq8c^C$N76xB`6bi+eb{t9xNJbA<=UVa2S=b@aq%<>Lga9sLt{4HfCsW|5 z?@6*UM$7KmR1of1K7J`ac{5i{n*l8S3j~FI*umbDrF%Y#96F5#8%TZv;@qRb?ZchF z!374WONgxM2@BRc#LbXfvJv97#k9LPwyxW`@Dx|H2Rn2wdR{kr`spv8KlZl>TXlX-g)K=(y0Cg;texhWN zIL-&^nnm?mqw#dz6wzvZuiwpVSu3$aq zT8aH5gkSS)69r<82%*QwR?e1b(BZNGkEkfJho`%hnG>?_x=gB*vVj%u$o1vKWrPHg zCm-jD^ECp2o@HqSYYD2QdXoxPj`$@0K{Is5on~xl>A9&Ue02@I;o+OrYCj985?;f! z)r!4;GkW|pMRq97_vBU?bsNaV`CjcJt8fcXH#K=~`MO}uPGl_EWeg`HaB zR4!ug6{}dIx|~x*mf5IToT0?9J!SW6eNmcCuV&d@k1njV`m4#%(OHdbn1TzM2vZxh zhg(kH*)H~c!WRcGea+M@XI5-?5!EE(=hSH0eMGoE%v)F`>1f2%WK8-P zW=rBVdE_GDx$sk$K30N#kQlhNSMJ?SEBh9Un?kf#0@il z@h`;lZ66qFsZaPJ;0eo|Xs!W+FU4dD-aRE^$sYnS*LlNiZ4dh}e9Xvt72YTea@8M7 z&qc`RMw$Pe;(^}UcmK`Mhelp^@EBg_hHMJ6JLz*X;me#DPzNQvTV}9sc-9O?Q~@(de6+K5sVOPH&vA-}{e-UHHUl;~~sg^#=3L zmgS6gW;dJmKrh@J%q|kYjlCi$-QXlB-9l?Y%e^Lgov;XhGq@q!yvqAq#46_byrVt@A5mGs8yW}%?>h4N%j|KeFU)#*Xt@zZgd5mIZ zL{Z3xU|D(vk7l5ukZ@PUA=-^xtTBVYWJ&BZlg&xm6wS-~jyh*^&~%Wk&)q_r|0NI z%hP8PH8xgf`WaTAdEyQnSY{YR=qusN%&i?j7QpLSTLv%Op@(n6DE0m+Yzx;N1#A9B zAu_y8<}MPr4Eb7tPYyELqu`RZAc#2H{o~ruJ~ zPq^x8<1TixGvy|vmq3a>&yF_(Q1ll&Flk!Rf&l-227{6MgmX9tYUc@a;O5VtzQ011 z!WXTx@GHSk9TcBp3R4uLCHZe2Rr>8QL}g|iufTGARMOW|8xHqDGiU9J`%x2Z%AA<|7wPw90M@*NG1-Pw z1HX=ac8Df4CXcFHgLT+IQ+7b-{cknuS$UrRz5aD|JKY+8Oq8j=2 z>*W1CjVZzFWL0~?RZ#mH(V(uy=6m1dZv02Mw(q^c%(_p+J%WC6z)V_kj1>SQNsYJ} zpy%q1B^L#1RnV>;ZE|#EK*nY;GTNwd(7jsrW^ccmt3B~{klS!7RcCiZwBWEr^%PDJ zj)w-JV&A056!|2-1!Z)zDwi#Xzb@;$dccB z%ygss{H?F+=3^drR_gKGJn+gLqai=ToD<-X{_%7ESPNIgFS0s4U7!%7T+}LWX+hKaSReruH8C+`(jIM4v~=t-;Y3$HWLt3vhTnErh%Z$uD3_SxtY!fJ`vIA7#2Ddq z#T5n6l_dHoe)g&Yy2|`gdFz0}A>%F&=Y~mOM+x^eCjlTZ=h}|BY|Zg&d>!kspNcKG zal}`B+;{Ywd`x=pzJd8saGjM*itmYDw3Ix_dVU|52|b=nI!NjL;jWh6C=FT8mSoB* ztBjCmKs91$WBxM19a21GaxCC{#RvU4ZPc?rB_h`)c6gPULy$NQrEiT#$@}t~ch5|b z-Y4gJ)0#irZl1AUlH@9Nf|&f)O)#TuQ?rgfu(${THHNX2_U z2Z{~)9ysg-;~xfRW_yR11#l=_~@r${&pA;_NU6cNL*8w%pA|3U_sfzuE=(_>_jLKiZb4 zb&3gnmL0r#pFX^jlC&}3-M_|w3c`9D91jiV%F$`r8bR<0AqWbb6o!nr<#oM$Okvtq z`2aP)G9BTyH?TKmBu&8L+M7OZu;qQUo)$0liDO|?4i3-Fbv~$$5jDGPPIrrX#Ie0e zx#pPW=9Gw7AU`fIYeaRXZ3gygBwrG~v70NK8+w<6;uhRgxW_7rn$}D?<7DbMOFQRk z-)|?oZP|Tms0u0J{x8iT9V~g5Ba12bC#qM5ih=N_cYCZ6Ea~ly$_|qxy!y?mPsQ1R z9N!2;$V39QsDc>&Ear#Eqa45$H#9dws^&$G8rkO+=;o3MJZeu}8p?MELkg)1))bH7 zQxgeH_4df+hra)CM)8!OSYwXUNPS2trbqSfNeOP`_) z=E4=42=C*le-bf_HBu6|l(pjX=VTT;Eqx2u$c18_i2s58u|4lpeEzKFfFyLBs9@(l zhA2b^<2|(JIZ!!J=hj=x-|1#}yWSD|F@$h88pggXSU~kV`5Pu5GoEND&-zo1>-#4I z2k+aot|SG3_dkmglx*~`yOeNbM+(C=^0yaB0!`uq4$p)uEINyZUZizZs`xw{(SW$2 z6v9ZpvlB*on2|v&S#{~<0$K8dLXWC8)`y1E@( zS4>sKb&Br!e&t1}9vhY^4pv`zLimN%{rd zQpuAY4%-WbNzk^<*_0!RnKSsI^+lpVU>9~dm^o#^tF(Z6?-z}81~!+NsXeU z^hS7`RFh*7dYo3lQ44<^fDB%qt6iB)LEk^j%Ry1cA5KSkh_&-pv+QGcJ|@9MtPiuB zPiQ6RS2ad9g-M~`aCP0opeUV>1sFK}AzntJ|6DE|H}weB3iN_lS$%6CX2K~jY9qa& zP0eJ#(c?KH1d?8>RS%u$nr3L^xwWVyuYV7&e~Xxb3FaH_&*h_~zhpEsBy~veNg5FJ zKnm{X{tio<>dqy29ltId5;XSNJcnUAeYc+=U85$GwNND0TUWViU`Fu+hCL%9=-(#% z{}i-J3r=JEZyOpV_t}SYU`Ag1#R@nx4%x0;$%KEw3SZA`&n#`pj^|ZR!`upUF-aRG z_m{t$vOF%>ONwo;&M>5e)aSOe_EKil^Pg88A&Mmazhr8Zdq2kEqG2Mo-d+vI3rjoCm!r_(yGu+UNJ?tn3xWaEg}TJ3$PRt-GbzXd&`;%0n-9A#4&(CN|U!#u0V z(_hG}Utm`^{JI}*5A}ZmZAM=UHy!ErOF7|x09`TI)5GlvhWRkdv`iZNqVVUhV<2jv zXCAyNSpjqbX;#R;_*~V>IGyyJ?A89VxtwoGXg4zh8P%nm*vT+U3_w8e?n-M(cL2&Q zIe7KVa4em#BKnGICya3kv{2DH^ya!v+lu=#ep9J#kpED(i<;!e$6whC=RgcmnzW{g zj6Wsdv5pzea%?B$xt*B4B+pv4HVLLbp*hLhDZoyoPS?CYXGORiG1=O*-SO!h&iuYe zW)Ws(2$a?JGSJEr@hqPZX?8ax8USjDW4K38GZq5dY?BR4aj$pg{TfW(2z}NYU!0nj zcp-uTuVnN_xteR;3YB)Y6+1yBvS#z|+_wm$xN9aiX3 zxyx-9=gZ=h>X%?ootIa7lht1`NS?NAM-}U(kd7rB#ip4m#;81U9Y0&6H=v#yBAj2& zy;QdvM6cbGg*m>Idim?0i*bAX=nP?1#c1g#VM!#pZ7M!H`mPLJd+EvHmTxkx@g&xs z?eRye@oT*lMLTD^)FItj0Xt{~TQKDkmUx}Pmyp+fokPHQX-^F+qP)k3WOd!O9 ziXL~DKyBOfUaCuk_DVlTRj`QG@Reco{q1@P8(|tW&L73W^&~Vo^A4|7=H8`f8t5Gc zmGq4drSY-OGZu}Q8gR7C0#x%0;hdhSu) zQ7b53##85mk-)#N6>v}~GyPO1y*%vD%M|HtDb=QL$VOABCpPm> zaSw*XwaV>O3}U{S80~$R3IhTHO<{J+4GP(v`w_{nrhF84oS`EkNEVF(d7S7;yCer= zxHI}Vq22J$ayB@0ICohp4y8PXZ*U9LZ*)y&dXHj_CpM93s@Z}7-+%6MqjVFOB5~ZI zooC$p)v2pM-)?eT8ht%CjiRr?Qz0{R|cJxl5e z0s+9U>&CIi&o`O3-*m3RYg`_uL7;oKsq2+KBk!q7tMlI@t7l%?-RN)mB11X>o?>Oc zgG>@ar*cHd995a0YtBVChj?J_yQU%!`TdzU#*I3PJcH))@>~9nI1DVW`8o-&Xk+vw zb~^4N?NCE}#8cyM?PaJ16cOvp^B zVa=4b_O|1nXe6lQLw-~iR{p5eLN@jO`Tp`gj^xd6kKDRnF{YLM{qo1J=s7Y$^_vPa zwp|a<==Z2}8QC{QskYd7NCp;y6(uMIOq~oj^aIyYf+#vS9mK;p*A_oedcHgNy98*l z=tN>rU|?h^8eP-ssBj=6gZhFf=twN( zm=Xp12QDr)QtSjnIta=(Q7I%fLZSF#A2Gfki_o7jL6(UQ9*z_W-<3zU zmlL0eOaZVPAvBy;l`8=7{r5SXs^SnP{;#d`9xGCo{-Yz9CKRz<_9nS`akXO(gtyOB zC2+W-YvX9!RNsGpH7Uf!UpASl{2PZ$%+utEqoGXacVt7)ALLacwi?b>Q!fJ*=Dn7) z@ji~D2GZ?u^lU#SRlC~7#8^=C6!YC@=3WmkM=nz@5hApLK7ti zA(xoVf!n-KbIjR0Dtp}D+VeFvqt&ux0j6gs!rIx8+-$3Gs&zN}nllH&s77d`pD2VO zJO#;ce*z_8wBNv28p~~!-AV~_F9Pe0;UsK>0&^h@5{=X}4N~r;H1x7}f2C7LM{=t= zfO~`UIpP*jyxgIKMy*u0sGcjf7L9H)gNCOOHlsWJvbkjskgI&r5U?>a7M{d&IVv0L zYA8A2 z_sE8gTb%E&i}>nEQ`MVg&xpRpgbl}Dpe zD-z6f5A86vR*of`vvS^LG_ecN>Q2Rx4ph+G{oY3k`>Bjo6FsYo>l>?|(ij~>9!(yD zT@=nhJum%)OiN#?%C>>Ga;eDHsqC&;{=r98mm7_|bEh39K^;%`Hqy@Dj}b~c2Zm5JW|E(om1jX0;D3A5FgPoe8n@d7+*twtLOfk#ybJ&bA(W6<(?C8eVkJHVs6cdUz?OxM9rd)*{@!&PXjcCBJ<3zB~*+W&XOu+L`2M~%QGB5^0?G-3&k`hP7}Uac?reLJyQ=j|YxeFyV|PGu~C!BFo1RQzpPEI?lp@z`j_ ze%m$)amgsNz;^kTA73847(*hY`ewUOpC|N7dZIH!CZj0-@EsC>zqp}kVI!O%#SX>d zO@Q$7$u^7J3yKu@KFVu+b-FK)F<8*5h2 zc$)rH_d_bgNNFto(?evT9qDS{Zv9tRKjk6mKV4Q|IEP9BYkV9y;@NVVllWzcVWmVb z3Ebs6mOl8i21FF0w%zz;dOv7zy(-}~%W@2!&?!M`iY#1qgR(`vY4-W21Q?aTeqmgt zek9~yX_L&>tjZzJynXCRPFTr_MB$WRflDN3L{(E#Fu&trmG13PmK)Y}f~*dX$)*Y4 zq9CIjF!TcGY_XrETYUK8_rxR}gr)al>UBoh5!^04gGBN>I!Sq#lDf8?ymqu7#qhd5 zk6(Y^uvPc*@j19JS}9SL=D@H%9YwkLxu7ArO=WoK+1&B6g+k29^zoB!uGNvwN|cFS z46nG4IzYRh8lxqj$Wj`ccDjbxQyr}eVxISi%+C$joZLn=oqw3~MkZd@&n+T^$Sp;; zCt^Z{oJV(d2Wi3jPCRti`V+{c4PRp9DQVWhhkXVy$ z(V;EYm>UE`KM>rOTn(*wiJol@`#hI0+M{ma75s$MeT)i7Z_APvI>I(1H!#5^p|gM%q7jSk? zT*oS`?-^M-j$TLCB5Ta17#)80*D%7<_VEOCBY3=&b)!mQ>{LixRU0+!HliU1)OTNL zl2whu!9490=uOv}dnd*EvhoMe1C$GUrDK=%o}!YqdFae-ycaA<-)?yKOo(`6LPuQ4 zmW`=fRH8!W%s_T@+K_&DSOV7XX18Z$t!;ZuAVVX1!v|c3*h3y$b~`)ci&_XB{SFXV zurgFpm@DhQV}??R?u`4-{TEv`

Ai)9aAqpfdw4N^)jj69NovSAy})+{S6UCHBaR z!U}pTp0ip-?FsCy)bzs`IukVtzEVq37rWqvzX%tkk{K;ZC_$XR+J5H=!K^z_oOvK$ z^Zl^p?}85rc63DvF(Gt$%A879ch(1_!l)J!F8|OIs|uI#Xg&(?NvH9^O!30BP}z4! zZsvw2-zAOv&rjk9g%)ubal227;9gfGI5!?fX+c?2nc6!ti)+lrC7=J{SFrnLollT< z)_sMCBWQ^2BY$$8l4cEw&sHSju~pT?^0amuFPO!u7k;(DP(uq2b ziuvbsiaMEJ3{GAtd^%eKW9)Gl7g}qZjRvQc=lf+!5hcJ5^V#&tyOhAy&SsD`;7vn- z^r-oGhXzo!szVUidQM(noU2Y94deg%59X;>0c)0K0_pnu zRSIt2JE#*_U*z)XpS?hN>d$RG-%Es_uJ*2Kv%7jptqLI$U-XYw~pYfQd&_#N5>K!N1s1em5Qb%i|vKF--ofkB@D% z_=SRaVO=)Yo&voQk~81)ZV`^GnV^xfH5}$Hk4QrM8BQs_c(r&Zt_7||jeAYV!%DDq za>iwmdVk`~UizB-39#K7E6cE3Id$il!_nVu#wQOHBYxGpUq5ReIM>}( zS>ELtgEppGgx*QJ=c}6mB5D^7&_2Cn!TTZ*VAYm02noon-O^^K{3+uoPOj;5d}f;L zI=f;^n%S2Sj+b-8`{)YaA0Ph*xfpMPL%$8Z*d%g}rR~p5KsgYII{ue$H(QuY?j?}a zPRf9?G(+>0mP13Q)PRFl@k%h;e#$1Uy&B@i&bHrkFxDtm%W^fQeul>|C75mi{eIv= zUrtf`!|`Llf|3ldqkJZn>grX`wu+LjwFlvTofnMs#vAan+)2k6%Dgew%Yq7oA_RE2 zXf;nTu}?MN>(~g31J7uT0MmFmiNdPnm7L|u#?K3xgj|;kmS>-2OLiyXRnas#%dEeY zl?kyVXXCMlL^Jkm%DD>oUF=%KjSAO6( zLwy+$fu4fZu*~sjyz~K2sf0h6qXGAD{4*zUKC>iW)G%)SvLfH5OZBnfQVT_lGNc-G zv}3J~1t`TW=a-59%>AvzQkXaxHCEeES?4X3*m*^Lf_>_=U%~HQZ9^?D=N?VwEp4vv zFZ~td%gIQiR)Y!t{D-zeZeZG9glNn*JX}(oLwzxUFw3Mhuf1iRxLvVt&sTjZ&pG{!*8Op?2>k{QZ|mbh+6% zl^J<+tKk%M2ECG*s&W8ESTbN)uVgOpfE009-f1C8A9?)8kriyi^Ir5E`hA6&6ePD) z;yd)mq&3QYDC_-FwFf7XVLP6|OqYAI?Kz8d90xbGsRQvdma6m>6?UxSXJo8B8SA~T z6`NmD4q`Gj#fc>LbXr$PJgHC)~g56>GAKV_5i1{BSDpzR zVrY}tk%`gx&rwa5ZkC*G4{QBaUF4hGbZ+O5CP5gRhu5S0rCg_J`$6{vEaaC+oU&d# z!PgL(?5SzN8r2QqMC|CnY-kg98T zidoc>TZ%zsRHv?SC@~nI)#TI2pPp}%5L{WL$DMOMk%CxlvVK25nmg-;*>r&cX^De$909Jt2{?! z<3GbG$91JCQd)J{=9Rf>;cW&wcq=qJuXzjGbR4hvus)X?6_daJhMAn9HstvPAsx-< z)5?YjENX+tYTo{~c!J#aiTwHKrphZ+$SX5u)H`TD{ZsTcS}u2qi`!*qq?{q^F6D0R zasC4apY?n)>luKOl;9OG;{XX*gDfpAIQT$jR($6C93V3bGfQre6*sR1m#Fan@**qn z8x#oO{7*ab%n7hdYY-0Ko{+up4y--~5utK$_+qta7Aw_D~@%scU$$$yw{9UP&Q-cHREbFrRFsy`D;qN z(Lf?pkJI4_8_+<3pi-*VGtgR@hyHQ-`e8t)Oh8d+XyTW{y%iYc4*ani2j3i>5akW| z8FGk_g}-^wK53$N%XZHk6#coR2S0{B;u&%dbrg=YY_GU2v3$q4&cI>y+u@GmER?42 cTj*K&fD7 0) && - (nodes <= std::numeric_limits::max()) )) + (nodes <= static_cast(std::numeric_limits::max())) )) throw std::runtime_error("(isActive) Non-acceptable value of MPI Nodes\n"); // ^ Assert that mpi_id_t can hold nodes, and thus we can cast without data loss! @@ -24,12 +20,14 @@ bool isActive(mpi_id_t node, size_t nodes) { } size_t tagGenerator(size_t depth, size_t step, size_t stage) { - auto stage_bits = static_cast(std::log2(MAX_PIPELINE_SIZE)); - auto step_bits = static_cast(std::log2(MAX_MPI_SIZE)); + auto stage_bits = static_cast(std::log2(MAX_PIPELINE_SIZE)); + auto step_bits = static_cast(std::log2(MAX_MPI_SIZE)); + uint32_t stat_bit = 1UL; // ^ We use MPI_SIZE room for steps to fit the bubbletonic version + // [ depth | step | stage+stats ] size_t tag = stage - | (step << stage_bits) - | (depth << (stage_bits + step_bits)); + | (step << (stage_bits + stat_bit)) + | (depth << (stage_bits + step_bits + stat_bit)); return tag; } diff --git a/homework_2/src/main.cpp b/homework_2/src/main.cpp index dafa1e0..1eded9f 100644 --- a/homework_2/src/main.cpp +++ b/homework_2/src/main.cpp @@ -22,7 +22,36 @@ config_t config; MPI_t<> mpi; distBuffer_t Data; Log logger; -Timing Ttotal; +distStat_t localStat, remoteStat; + +//! Performance timers for each one of the "costly" functions +Timing Ttotal; +Timing TfullSort; +Timing Texchange; +Timing Tminmax; +Timing TelbowSort; + +//! Init timing objects for extra rounds +void measurements_init() { + if (config.perf > 1) { + Ttotal.init(config.perf); + TfullSort.init(config.perf); + Texchange.init(config.perf); + Tminmax.init(config.perf); + TelbowSort.init(config.perf); + } +} + +//! iterate ot the next round of measurements for all measurement objects +void measurements_next() { + if (config.perf > 1) { + Ttotal.next(); + TfullSort.next(); + Texchange.next(); + Tminmax.next(); + TelbowSort.next(); + } +} /*! * A small command line argument parser @@ -43,6 +72,9 @@ bool get_options(int argc, char* argv[]){ status = false; } } + else if (arg == "-e" || arg == "--exchange-opt") { + config.exchangeOpt = true; + } else if (arg == "--pipeline") { if (i+1 < argc) { auto stages = atoi(argv[++i]); @@ -59,7 +91,12 @@ bool get_options(int argc, char* argv[]){ config.validation = true; } else if (arg == "--perf") { - config.perf = true; + if (i+1 < argc) { + config.perf = atoi(argv[++i]); + } + else { + status = false; + } } else if (arg == "--ndebug") { config.ndebug = true; @@ -68,22 +105,25 @@ bool get_options(int argc, char* argv[]){ config.verbose = true; } else if (arg == "-h" || arg == "--help") { - std::cout << "distbitonic/distbubbletonic - A distributed bitonic sort\n\n"; - std::cout << "distbitonic -q [--pipeline N] [--validation] [--ndebug] [-v]\n"; + std::cout << "distbitonic/distbubbletonic - A distributed sort utility\n\n"; + std::cout << "distbitonic -q [-e] [-p | --pipeline N] [--validation] [--perf] [--ndebug] [-v]\n"; std::cout << "distbitonic -h\n"; - std::cout << "distbubbletonic -q [--pipeline N] [--validation] [--ndebug] [-v]\n"; + std::cout << "distbubbletonic -q [-e] [-p | --pipeline N] [--validation] [--perf] [--ndebug] [-v]\n"; std::cout << "distbubbletonic -h\n"; std::cout << '\n'; std::cout << "Options:\n\n"; std::cout << " -q | --array-size \n"; std::cout << " Selects the array size according to size = 2^N\n\n"; - std::cout << " --pipeline \n"; + std::cout << " -e | --exchange-opt\n"; + std::cout << " Request an MPI data exchange optimization \n\n"; + std::cout << " -p | --pipeline \n"; std::cout << " Request a pipeline of stages for exchange-minmax\n"; std::cout << " N must be power of 2 up to " << MAX_PIPELINE_SIZE << "\n\n"; std::cout << " --validation\n"; std::cout << " Request a full validation at the end, performed by process rank 0\n\n"; - std::cout << " --perf\n"; - std::cout << " Request performance timing measurements to stdout.\n\n"; + std::cout << " --perf \n"; + std::cout << " Enable performance timing measurements and prints, and repeat\n"; + std::cout << " the sorting times to average the measurements\n\n"; std::cout << " --ndebug\n"; std::cout << " Skip debug breakpoint when on debug build.\n\n"; std::cout << " -v | --verbose\n"; @@ -167,8 +207,8 @@ int main(int argc, char* argv[]) try { " Size: " << mpi.size() << logger.endl; -#if defined DEBUG -#if defined TESTING + #if defined DEBUG + #if defined TESTING /* * In case of a debug build we will wait here until sleep_wait * will reset via debugger. In order to do that the user must attach @@ -179,12 +219,12 @@ int main(int argc, char* argv[]) try { * $> gdb */ volatile bool sleep_wait = false; -#else + #else volatile bool sleep_wait = true; -#endif + #endif while (sleep_wait && !config.ndebug) sleep(1); -#endif + #endif // Initialize local data logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl; @@ -201,24 +241,27 @@ int main(int argc, char* argv[]) try { // Run distributed sort if (mpi.rank() == 0) logger << "Starting distributed sorting ... "; - Ttotal.start(); + measurements_init(); + for (size_t it = 0 ; it < config.perf ; ++it) { + Ttotal.start(); #if CODE_VERSION == BUBBLETONIC - distBubbletonic(Data, mpi.size(), mpi.rank()); + distBubbletonic(Data, mpi.size(), mpi.rank()); #else - distBitonic (Data, mpi.size(), mpi.rank()); + distBitonic(Data, mpi.size(), mpi.rank()); #endif - Ttotal.stop(); + Ttotal.stop(); + measurements_next(); + } if (mpi.rank() == 0) logger << " Done." << logger.endl; - // Print-outs and validation - if (config.perf) { - Ttotal.print_duration("Total ", mpi.rank()); - TfullSort.print_duration("Full-Sort ", mpi.rank()); - Texchange.print_duration("Exchange ", mpi.rank()); - Tminmax.print_duration("Min-Max ", mpi.rank()); - TelbowSort.print_duration("Elbow-Sort", mpi.rank()); + if (config.perf > 1) { + Timing::print_duration(Ttotal.median(), "Total ", mpi.rank()); + Timing::print_duration(TfullSort.median(), "Full-Sort ", mpi.rank()); + Timing::print_duration(Texchange.median(), "Exchange ", mpi.rank()); + Timing::print_duration(Texchange.median(), "Min-Max ", mpi.rank()); + Timing::print_duration(TelbowSort.median(),"Elbow-Sort", mpi.rank()); } if (config.validation) { // If requested, we have the chance to fail!