HW2: Add julia implementations for v0.5 and v1

2024-12-24 17:58:10 +02:00 · 2024-12-24 17:58:10 +02:00 · 61b63016cc
commit 61b63016cc
parent 955981d177
8 changed files with 343 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+# Assessments
+assessments/
--- a/homework_1/.gitignore
+++ b/homework_1/.gitignore
@ -9,8 +9,16 @@ mtx/
 exclude
 hpc_auth_sync.sh

+# IDEs
+.idea/
+.clangd
+
 # eclipse
 .project
 .cproject
 .settings/

+.vs/
+.vscode/
+
+
--- a/homework_2/.gitignore
+++ b/homework_2/.gitignore
@ -0,0 +1,23 @@
+# project
+bin/
+out/
+mat/
+mtx/
+.unused/
+various/
+
+# hpc
+
+# IDEs
+.idea/
+.clangd
+
+# eclipse
+.project
+.cproject
+.settings/
+
+.vs/
+.vscode/
+
+
--- a/homework_2/exersize.md
+++ b/homework_2/exersize.md
@ -0,0 +1,33 @@
+Parallel & Distributed Computer Systems
+
+December 6, 2024
+
+Write a distributed program that sorts $N$ integers in ascending order, using MPI. The inter-process communications should be defined by the Bitonic sort algorithm as presented in our class notes.
+
+The program must perform the following tasks:
+
+- The user specifies two positive integers $q$ and $p$.
+
+- Start $2^p$ processes with an array of $2^q$ random integers is each processes.
+
+- Sort all $N = \left ( 2^{(q + p)} \right)$ elements int ascending order.
+
+- Check the correctness of the final result.
+
+Your implementation should be based on the following steps:
+- Start p processes, each process gets n/p data and sorts (ascending or descending depending on the process id) them using sort from any library. Use two buffers, to sort from one to the other, to send from one and receive in the other.
+- Repeat $O((log_2(p))^2)$:
+    * Exchange data with the corresponding partner and keep the min or max elements, depending on the process id and phase of the computation
+    * Sort locally the bitonic sequence using a modification of merge sort, starting from the "elbow" of the bitonic.
+
+You may use the C standard library function stdlib qsort() to check the correctness of your results and as the initial local sorting routine for each process.
+
+You must deliver:
+- A report (about $3-4$ pages) that describes your parallel algorithm and implementation.
+
+- Your comments on the speed of your parallel program compared to the serial sort, after trying you program on aristotelis for $p = [1:7]$ and $q = [20:27]$.
+
+- The source code of your program uploaded online.
+
+Ethics: If you use code found on the web or by an LLM, you should mention your source and the changes you made. You may work in pairs; both partners must submit a single report with both names.
+Deadline: 7 January, $2025$.
--- a/homework_2/julia/bitonic_v05.jl
+++ b/homework_2/julia/bitonic_v05.jl
@ -0,0 +1,114 @@
+#
+# ---------------------------------------------
+# Bitonic v0.5 functionality
+#
+
+function partner(node, step)
+    partner = node + ((((node+step) % 2) == 0) ? 1 : -1)
+end
+
+function active(id, p)
+    ret =  (id >= 0) && (id < p)
+end
+
+function exchange(localid,  remoteid)
+    if verbose
+        println("Exchange local data from $localid with partner $remoteid")
+    end
+    nothing # We have all data here ;)
+end
+
+function minmax(data, localid, remoteid, keepsmall)
+    # Keep min-max on local data
+    temp = copy(data[localid+1, :])
+    if (keepsmall)
+        view(data, localid+1, :)  .= min.(temp, data[remoteid+1, :])
+        view(data, remoteid+1, :) .= max.(temp, data[remoteid+1, :])
+    else
+        view(data, localid+1, :)  .= max.(temp, data[remoteid+1, :])
+        view(data, remoteid+1, :) .= min.(temp, data[remoteid+1, :])
+    end
+end
+
+"""
+    distbubletonic!(p, data)
+
+distributed bitonic v0.5 sort using a "bubble sort"-like functionality to propagate large and small
+items between nodes.
+
+p:    The number of processes
+data: (p, N/p) array
+"""
+function distbubletonic!(p, data) 
+  
+    pid = 0:p-1
+    ascending = mod.(pid,2) .== 0
+    if verbose
+        println("ascending: $ascending")
+    end
+    # local full sort here (run all MPI nodes)
+    for i in 1:p
+        sort!(view(data, i, :), rev = !ascending[i])
+    end
+    for step in 0:p-2
+        direction = [true for x = 1:p]
+        partnerid = partner.(pid, step)
+        activeids = active.(partnerid, p)
+        keepsmall = pid .< partnerid
+        if verbose
+            println("step: $step | active ids: $activeids | partner: $partnerid | keepsmall: $keepsmall")
+        end
+        # exchange with partner and keep small or large (run all MPI nodes)
+        for i in 0:p-1
+            l_idx = i+1
+            r_idx = partnerid[i+1]+1
+            if activeids[l_idx] && i < partnerid[l_idx]
+                exchange(i, partnerid[l_idx])
+                minmax(data, i, partnerid[l_idx], keepsmall[l_idx])
+                sort!(view(data, l_idx, :), rev = !ascending[l_idx])    # elbow sort here
+                sort!(view(data, r_idx, :), rev = !ascending[r_idx])    # elbow sort here
+            end
+        end
+    end
+    # [optional] reverse the odd positions (run all MPI nodes)
+    for i in 1:p
+        if !ascending[i]
+            sort!(view(data, i, :))
+        end
+    end
+    nothing
+end
+
+
+
+#
+# Homework setup
+# ---------------------------------------------
+#
+p::Int8 = 3  # The order of number of "processors"
+q::Int8 = 8  # The data size order (power of 2) of each "processor"
+verbose = false;
+
+
+# Run Script
+# ---------------------------------------------
+P::Int = 2^p
+Q::Int = 2^q
+N::Int = 2^(q+p)
+
+println("Distributed Bubbletonic (v0.5) test")
+println("p: $p -> Number of processors: $P")
+println("q: $q -> Data length for each node: $Q, Total: $(P*Q)")
+
+println("Create an $P x $Q array")
+Data = rand(Int8, P, Q)
+
+println("Sort array with $P (MPI) nodes")
+@time distbubletonic!(P, Data)
+
+# Test
+if issorted(vec(permutedims(Data)))
+    println("Test: Passed")
+else
+    println("Test: Failed")
+end
--- a/homework_2/julia/bitonic_v1.jl
+++ b/homework_2/julia/bitonic_v1.jl
@ -0,0 +1,113 @@
+#
+# ---------------------------------------------
+# Bitonic v0.5 functionality
+#
+
+function exchange(localid,  remoteid)
+    if verbose
+        println("Exchange local data from $localid with partner $remoteid")
+    end
+    nothing # We have all data here ;)
+end
+
+function minmax(data, localid, remoteid, keepsmall)
+    # Keep min-max on local data
+    temp = copy(data[localid+1, :])
+    if (keepsmall)
+        view(data, localid+1, :)  .= min.(temp, data[remoteid+1, :])
+        view(data, remoteid+1, :) .= max.(temp, data[remoteid+1, :])
+    else
+        view(data, localid+1, :)  .= max.(temp, data[remoteid+1, :])
+        view(data, remoteid+1, :) .= min.(temp, data[remoteid+1, :])
+    end
+end
+
+
+
+function sort_network!(data, n, depth)
+    nodes = 0:n-1
+    for step = depth-1:-1:0
+        partnerid = nodes .⊻ (1 << step)
+        direction = (nodes .& (1 << depth)) .== 0 .& (nodes .< partnerid)
+        keepsmall = ((nodes .< partnerid) .& direction) .| ((nodes .> partnerid) .& .!direction)
+        if verbose
+            println("depth: $depth | step: $step | partner: $partnerid | keepsmall: $keepsmall")
+        end
+        # exchange with partner and keep small or large (run all MPI nodes)
+        for i in 0:n-1
+            if (i < partnerid[i+1])
+                exchange(i, partnerid[i+1])
+                minmax(data, i, partnerid[i+1], keepsmall[i+1])
+            end
+        end
+    end
+end
+
+
+
+"""
+    distbitonic!(p, data)
+
+distributed bitonic sort v1 using elbow merge locally except for the first step
+p:    The number of processes
+data: (p, N/p) array
+"""
+function distbitonic!(p, data) 
+
+    q = Int(log2(p))    # CPU order
+  
+    pid = 0:p-1
+    ascending = mod.(pid,2) .== 0
+    if verbose
+        println("ascending: $ascending")
+    end
+    # local full sort here (run all MPI nodes)
+    for i in 1:p
+        sort!(view(data, i, :), rev = !ascending[i])
+    end
+    for depth = 1:q
+        sort_network!(data, p, depth)
+        ascending = (pid .& (1 << depth)) .== 0
+        if verbose
+            println("ascending: $ascending")
+        end
+        # local elbowmerge here (run all MPI nodes)
+        for i in 1:p
+            sort!(view(data, i, :), rev = !ascending[i])
+        end
+    end
+  
+    nothing
+end
+
+#
+# Homework setup
+# ---------------------------------------------
+#
+p::Int8 = 3  # The order of number of "processors"
+q::Int8 = 8  # The data size order (power of 2) of each "processor"
+verbose = false;
+
+
+# Run Script
+# ---------------------------------------------
+P::Int = 2^p
+Q::Int = 2^q
+N::Int = 2^(q+p)
+
+println("Distributed bitonic (v1) test")
+println("p: $p -> Number of processors: $P")
+println("q: $q -> Data length for each node: $Q, Total: $(P*Q)")
+
+println("Create an $P x $Q array")
+Data = rand(Int8, P, Q)
+
+println("Sort array with $P (MPI) nodes")
+@time distbitonic!(P, Data)
+
+# Test
+if issorted(vec(permutedims(Data)))
+    println("Test: Passed")
+else
+    println("Test: Failed")
+end
--- a/homework_2/julia/distbitonic.jl
+++ b/homework_2/julia/distbitonic.jl
@ -0,0 +1,27 @@
+# distributed bitonic sort using elbow merge locally except for the first step
+function distbitonic!(p)
+
+    q = Int(log2(p))
+  
+    pid = 0:p-1
+    ascending = mod.(pid,2) .== 0
+    println("ascending: $ascending")
+    # local full sort here
+    for k = 1:q
+      kk = 1 << k
+      for j = k-1:-1:0
+        jj = 1 << j
+        partnerid = pid .⊻ jj
+        direction = (pid .& kk) .== 0 .& (pid .< partnerid)
+        keepsmall = ((pid .< partnerid) .& direction) .| ((pid .> partnerid) .& .!direction)
+        println("k: $k | j: $j | partner: $partnerid | keepsmall: $keepsmall")
+        # exchange with partner and keep small or large
+      end
+      ascending = (pid .& kk) .== 0
+      println("ascending: $ascending")
+      # local elbowmerge here
+    end
+  
+    nothing
+  end
+ 
--- a/homework_2/julia/elbowmerge.jl
+++ b/homework_2/julia/elbowmerge.jl
@ -0,0 +1,23 @@
+
+# given a bitonic sequence b, merge it into a sorted sequence s
+@inbounds function elbowmerge!(s, b)
+    n = length(b)
+    l = argmin(b)
+    r = l == n ? 1 : l + 1
+  
+    i = 1
+    while i <= n
+      if b[l] < b[r]
+        s[i] = b[l]
+        l = l == 1 ? n : l - 1
+      else
+        s[i] = b[r]
+        r = r == n ? 1 : r + 1
+      end
+      i += 1
+    end
+    nothing
+  end
+  
+
+