HW2: RC4 - Measurements version
This commit is contained in:
parent
f849e8a309
commit
6e301bce99
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=1
|
#SBATCH --nodes=1
|
||||||
#SBATCH --ntasks-per-node=2
|
#SBATCH --ntasks-per-node=2
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=1:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 20 --perf --validation
|
srun ./out/distbitonic -q 20 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=1
|
#SBATCH --nodes=1
|
||||||
#SBATCH --ntasks-per-node=2
|
#SBATCH --ntasks-per-node=2
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=1:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 23 --perf --validation
|
srun ./out/distbitonic -q 23 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=1
|
#SBATCH --nodes=1
|
||||||
#SBATCH --ntasks-per-node=2
|
#SBATCH --ntasks-per-node=2
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=1:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 25 --perf --validation
|
srun ./out/distbitonic -q 25 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=1
|
#SBATCH --nodes=1
|
||||||
#SBATCH --ntasks-per-node=2
|
#SBATCH --ntasks-per-node=2
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=1:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 27 --perf --validation
|
srun ./out/distbitonic -q 27 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=1
|
#SBATCH --nodes=1
|
||||||
#SBATCH --ntasks-per-node=4
|
#SBATCH --ntasks-per-node=4
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=1:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 20 --perf --validation
|
srun ./out/distbitonic -q 20 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=1
|
#SBATCH --nodes=1
|
||||||
#SBATCH --ntasks-per-node=4
|
#SBATCH --ntasks-per-node=4
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=1:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 23 --perf --validation
|
srun ./out/distbitonic -q 23 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=1
|
#SBATCH --nodes=1
|
||||||
#SBATCH --ntasks-per-node=4
|
#SBATCH --ntasks-per-node=4
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=1:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 25 --perf --validation
|
srun ./out/distbitonic -q 25 --validation --perf 8
|
28
homework_2/hpc/N1P4T4Q26.sh
Normal file
28
homework_2/hpc/N1P4T4Q26.sh
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
|
#SBATCH --nodes=1
|
||||||
|
#SBATCH --ntasks-per-node=4
|
||||||
|
#SBATCH --cpus-per-task=4
|
||||||
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
|
# Use this as following
|
||||||
|
# $> sbatch -p batch|rome <this file>
|
||||||
|
#
|
||||||
|
# NOTE:
|
||||||
|
# First compile in aristotle with
|
||||||
|
# $> module load gcc/9.2.0 openmpi/4.0.3
|
||||||
|
# $> make -j hpc-build
|
||||||
|
#
|
||||||
|
|
||||||
|
module load gcc/9.2.0 openmpi/4.0.3
|
||||||
|
# Note:
|
||||||
|
# The above versions are matching w/ my system's
|
||||||
|
# versions, thus making compiling/debugging easier.
|
||||||
|
|
||||||
|
# Suppress unused UCX_ROOT warning
|
||||||
|
export UCX_WARN_UNUSED_ENV_VARS=n
|
||||||
|
|
||||||
|
# Suppress CUDA-aware support is disabled warning
|
||||||
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
|
srun ./out/distbitonic -q 26 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=1
|
#SBATCH --nodes=1
|
||||||
#SBATCH --ntasks-per-node=4
|
#SBATCH --ntasks-per-node=4
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=1:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 27 --perf --validation
|
srun ./out/distbitonic -q 27 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=2
|
#SBATCH --nodes=2
|
||||||
#SBATCH --ntasks-per-node=4
|
#SBATCH --ntasks-per-node=4
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=2:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 20 --perf --validation
|
srun ./out/distbitonic -q 20 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=2
|
#SBATCH --nodes=2
|
||||||
#SBATCH --ntasks-per-node=4
|
#SBATCH --ntasks-per-node=4
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=2:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 23 --perf --validation
|
srun ./out/distbitonic -q 23 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=2
|
#SBATCH --nodes=2
|
||||||
#SBATCH --ntasks-per-node=4
|
#SBATCH --ntasks-per-node=4
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=2:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 25 --perf --validation
|
srun ./out/distbitonic -q 25 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=2
|
#SBATCH --nodes=2
|
||||||
#SBATCH --ntasks-per-node=4
|
#SBATCH --ntasks-per-node=4
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=2:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 27 --perf --validation
|
srun ./out/distbitonic -q 27 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=16
|
#SBATCH --ntasks-per-node=16
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=5:00
|
#SBATCH --time=10:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 20 --perf --validation
|
srun ./out/distbitonic -q 20 --validation --perf 8 --exchange-opt
|
28
homework_2/hpc/N4P16T4Q22.sh
Normal file
28
homework_2/hpc/N4P16T4Q22.sh
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
|
#SBATCH --nodes=4
|
||||||
|
#SBATCH --ntasks-per-node=16
|
||||||
|
#SBATCH --cpus-per-task=4
|
||||||
|
#SBATCH --time=10:00
|
||||||
|
|
||||||
|
# Use this as following
|
||||||
|
# $> sbatch -p batch|rome <this file>
|
||||||
|
#
|
||||||
|
# NOTE:
|
||||||
|
# First compile in aristotle with
|
||||||
|
# $> module load gcc/9.2.0 openmpi/4.0.3
|
||||||
|
# $> make -j hpc-build
|
||||||
|
#
|
||||||
|
|
||||||
|
module load gcc/9.2.0 openmpi/4.0.3
|
||||||
|
# Note:
|
||||||
|
# The above versions are matching w/ my system's
|
||||||
|
# versions, thus making compiling/debugging easier.
|
||||||
|
|
||||||
|
# Suppress unused UCX_ROOT warning
|
||||||
|
export UCX_WARN_UNUSED_ENV_VARS=n
|
||||||
|
|
||||||
|
# Suppress CUDA-aware support is disabled warning
|
||||||
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
|
srun ./out/distbitonic -q 22 --validation --perf 8 --exchange-opt
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=16
|
#SBATCH --ntasks-per-node=16
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=5:00
|
#SBATCH --time=10:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 23 --perf --validation
|
srun ./out/distbitonic -q 23 --validation --perf 8 --exchange-opt
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=16
|
#SBATCH --ntasks-per-node=16
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=5:00
|
#SBATCH --time=10:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 25 --perf --validation --pipeline 8
|
srun ./out/distbitonic -q 25 --validation --perf 8 --exchange-opt
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=16
|
#SBATCH --ntasks-per-node=16
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=5:00
|
#SBATCH --time=10:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 27 --perf --validation --pipeline 8
|
srun ./out/distbitonic -q 27 --validation --perf 8 --exchange-opt
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=32
|
#SBATCH --ntasks-per-node=32
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=5:00
|
#SBATCH --time=10:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 20 --perf --validation
|
srun ./out/distbitonic -q 20 --validation --perf 8 --exchange-opt
|
28
homework_2/hpc/N4P32T4Q21.sh
Normal file
28
homework_2/hpc/N4P32T4Q21.sh
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
|
#SBATCH --nodes=4
|
||||||
|
#SBATCH --ntasks-per-node=32
|
||||||
|
#SBATCH --cpus-per-task=4
|
||||||
|
#SBATCH --time=10:00
|
||||||
|
|
||||||
|
# Use this as following
|
||||||
|
# $> sbatch -p batch|rome <this file>
|
||||||
|
#
|
||||||
|
# NOTE:
|
||||||
|
# First compile in aristotle with
|
||||||
|
# $> module load gcc/9.2.0 openmpi/4.0.3
|
||||||
|
# $> make -j hpc-build
|
||||||
|
#
|
||||||
|
|
||||||
|
module load gcc/9.2.0 openmpi/4.0.3
|
||||||
|
# Note:
|
||||||
|
# The above versions are matching w/ my system's
|
||||||
|
# versions, thus making compiling/debugging easier.
|
||||||
|
|
||||||
|
# Suppress unused UCX_ROOT warning
|
||||||
|
export UCX_WARN_UNUSED_ENV_VARS=n
|
||||||
|
|
||||||
|
# Suppress CUDA-aware support is disabled warning
|
||||||
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
|
srun ./out/distbitonic -q 21 --validation --perf 8 --exchange-opt
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=32
|
#SBATCH --ntasks-per-node=32
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=5:00
|
#SBATCH --time=10:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 23 --perf --validation
|
srun ./out/distbitonic -q 23 --validation --perf 8 --exchange-opt
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=32
|
#SBATCH --ntasks-per-node=32
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=5:00
|
#SBATCH --time=10:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 25 --perf --validation --pipeline 8
|
srun ./out/distbitonic -q 25 --validation --perf 8 --exchange-opt
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=32
|
#SBATCH --ntasks-per-node=32
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=5:00
|
#SBATCH --time=10:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 27 --perf --validation --pipeline 8
|
srun ./out/distbitonic -q 27 --validation --perf 8 --exchange-opt
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=4
|
#SBATCH --ntasks-per-node=4
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=2:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 20 --perf --validation
|
srun ./out/distbitonic -q 20 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=4
|
#SBATCH --ntasks-per-node=4
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=2:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 23 --perf --validation
|
srun ./out/distbitonic -q 23 --validation --perf 8
|
28
homework_2/hpc/N4P4T4Q24.sh
Normal file
28
homework_2/hpc/N4P4T4Q24.sh
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
|
#SBATCH --nodes=4
|
||||||
|
#SBATCH --ntasks-per-node=4
|
||||||
|
#SBATCH --cpus-per-task=4
|
||||||
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
|
# Use this as following
|
||||||
|
# $> sbatch -p batch|rome <this file>
|
||||||
|
#
|
||||||
|
# NOTE:
|
||||||
|
# First compile in aristotle with
|
||||||
|
# $> module load gcc/9.2.0 openmpi/4.0.3
|
||||||
|
# $> make -j hpc-build
|
||||||
|
#
|
||||||
|
|
||||||
|
module load gcc/9.2.0 openmpi/4.0.3
|
||||||
|
# Note:
|
||||||
|
# The above versions are matching w/ my system's
|
||||||
|
# versions, thus making compiling/debugging easier.
|
||||||
|
|
||||||
|
# Suppress unused UCX_ROOT warning
|
||||||
|
export UCX_WARN_UNUSED_ENV_VARS=n
|
||||||
|
|
||||||
|
# Suppress CUDA-aware support is disabled warning
|
||||||
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
|
srun ./out/distbitonic -q 24 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=4
|
#SBATCH --ntasks-per-node=4
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=2:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 25 --perf --validation
|
srun ./out/distbitonic -q 25 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=4
|
#SBATCH --ntasks-per-node=4
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=2:00
|
#SBATCH --time=5:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 27 --perf --validation
|
srun ./out/distbitonic -q 27 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=8
|
#SBATCH --ntasks-per-node=8
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=2:00
|
#SBATCH --time=10:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 20 --perf --validation
|
srun ./out/distbitonic -q 20 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=8
|
#SBATCH --ntasks-per-node=8
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=2:00
|
#SBATCH --time=10:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 23 --perf --validation
|
srun ./out/distbitonic -q 23 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=8
|
#SBATCH --ntasks-per-node=8
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=2:00
|
#SBATCH --time=10:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 25 --perf --validation --pipeline 8
|
srun ./out/distbitonic -q 25 --validation --perf 8
|
@ -3,7 +3,7 @@
|
|||||||
#SBATCH --nodes=4
|
#SBATCH --nodes=4
|
||||||
#SBATCH --ntasks-per-node=8
|
#SBATCH --ntasks-per-node=8
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --time=2:00
|
#SBATCH --time=10:00
|
||||||
|
|
||||||
# Use this as following
|
# Use this as following
|
||||||
# $> sbatch -p batch|rome <this file>
|
# $> sbatch -p batch|rome <this file>
|
||||||
@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n
|
|||||||
# Suppress CUDA-aware support is disabled warning
|
# Suppress CUDA-aware support is disabled warning
|
||||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
export OMPI_MCA_opal_warn_on_missing_libcuda=0
|
||||||
|
|
||||||
srun ./out-rc3b/distbitonic -q 27 --perf --validation --pipeline 8
|
srun ./out/distbitonic -q 27 --validation --perf 8
|
@ -27,7 +27,7 @@
|
|||||||
// Default Data size (in case -q <N> is not present)
|
// Default Data size (in case -q <N> is not present)
|
||||||
static constexpr size_t DEFAULT_DATA_SIZE = 1 << 16;
|
static constexpr size_t DEFAULT_DATA_SIZE = 1 << 16;
|
||||||
|
|
||||||
// The maximum MPI size we support
|
// The maximum MPI size we support (in Nodes x Processes)
|
||||||
static constexpr size_t MAX_MPI_SIZE = 1024UL;
|
static constexpr size_t MAX_MPI_SIZE = 1024UL;
|
||||||
|
|
||||||
// The maximum pipeline size we support
|
// The maximum pipeline size we support
|
||||||
@ -61,7 +61,7 @@ struct config_t {
|
|||||||
bool validation{false}; //!< Request a full validation at the end, performed by process rank 0.
|
bool validation{false}; //!< Request a full validation at the end, performed by process rank 0.
|
||||||
bool ndebug{false}; //!< Skips debug trap on DEBUG builds.
|
bool ndebug{false}; //!< Skips debug trap on DEBUG builds.
|
||||||
size_t perf{1}; //!< Enable performance timing measurements and prints and repeat
|
size_t perf{1}; //!< Enable performance timing measurements and prints and repeat
|
||||||
//!< the performs the sorting <perf> times to average the measurements
|
//!< the sorting <perf> times.
|
||||||
bool verbose{false}; //!< Flag to enable verbose output to stdout.
|
bool verbose{false}; //!< Flag to enable verbose output to stdout.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -129,7 +129,7 @@ struct MPI_t {
|
|||||||
* This function matches a transmit and a receive in order for fully exchanged data between
|
* This function matches a transmit and a receive in order for fully exchanged data between
|
||||||
* current node and partner.
|
* current node and partner.
|
||||||
*
|
*
|
||||||
* @tparam T The inner valur type used in buffer
|
* @tparam ValueT The value type used in buffer
|
||||||
*
|
*
|
||||||
* @param ldata [const ValueT*] Pointer to local data to send
|
* @param ldata [const ValueT*] Pointer to local data to send
|
||||||
* @param rdata [ValueT*] Pointer to buffer to receive data from partner
|
* @param rdata [ValueT*] Pointer to buffer to receive data from partner
|
||||||
@ -163,7 +163,7 @@ struct MPI_t {
|
|||||||
* This call MUST paired with exchange_wait() for each MPI_t object.
|
* This call MUST paired with exchange_wait() for each MPI_t object.
|
||||||
* Calling 2 consecutive exchange_start() for the same MPI_t object is undefined.
|
* Calling 2 consecutive exchange_start() for the same MPI_t object is undefined.
|
||||||
*
|
*
|
||||||
* @tparam ValueT The underlying value type used in buffers
|
* @tparam ValueT The value type used in buffers
|
||||||
*
|
*
|
||||||
* @param ldata [const ValueT*] Pointer to local data to send
|
* @param ldata [const ValueT*] Pointer to local data to send
|
||||||
* @param rdata [ValueT*] Pointer to buffer to receive data from partner
|
* @param rdata [ValueT*] Pointer to buffer to receive data from partner
|
||||||
@ -267,8 +267,8 @@ using mpi_id_t = MPI_t<>::ID_t;
|
|||||||
/*!
|
/*!
|
||||||
* @brief A std::vector wrapper with 2 vectors, an active and a shadow.
|
* @brief A std::vector wrapper with 2 vectors, an active and a shadow.
|
||||||
*
|
*
|
||||||
* This type exposes the standard vector
|
* This type exposes the standard vector functionality of the active vector.
|
||||||
* functionality of the active vector. The shadow can be used when we need to use the vector as mutable
|
* The shadow can be used when we need to use the vector as mutable
|
||||||
* data in algorithms that can not support "in-place" editing (like elbow-sort for example)
|
* data in algorithms that can not support "in-place" editing (like elbow-sort for example)
|
||||||
*
|
*
|
||||||
* @tparam Value_t the underlying data type of the vectors
|
* @tparam Value_t the underlying data type of the vectors
|
||||||
@ -418,7 +418,9 @@ private:
|
|||||||
extern Log logger;
|
extern Log logger;
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* A small timing utility based on chrono.
|
* A small timing utility based on chrono that supports timing rounds
|
||||||
|
* and returning the median of them. Time can accumulate to the measurement
|
||||||
|
* for each round.
|
||||||
*/
|
*/
|
||||||
struct Timing {
|
struct Timing {
|
||||||
using Tpoint = std::chrono::steady_clock::time_point;
|
using Tpoint = std::chrono::steady_clock::time_point;
|
||||||
@ -485,7 +487,7 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* Utility "high level function"-like macro to forward a function call
|
* A "high level function"-like utility macro to forward a function call
|
||||||
* and accumulate the execution time to the corresponding timing object.
|
* and accumulate the execution time to the corresponding timing object.
|
||||||
*
|
*
|
||||||
* @param Tim The Timing object [Needs to have methods start() and stop()]
|
* @param Tim The Timing object [Needs to have methods start() and stop()]
|
||||||
|
@ -24,6 +24,10 @@ distBuffer_t Data;
|
|||||||
Log logger;
|
Log logger;
|
||||||
distStat_t localStat, remoteStat;
|
distStat_t localStat, remoteStat;
|
||||||
|
|
||||||
|
// Mersenne seeded from hw if possible. range: [type_min, type_max]
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 gen(rd());
|
||||||
|
|
||||||
//! Performance timers for each one of the "costly" functions
|
//! Performance timers for each one of the "costly" functions
|
||||||
Timing Ttotal;
|
Timing Ttotal;
|
||||||
Timing TfullSort;
|
Timing TfullSort;
|
||||||
@ -106,9 +110,9 @@ bool get_options(int argc, char* argv[]){
|
|||||||
}
|
}
|
||||||
else if (arg == "-h" || arg == "--help") {
|
else if (arg == "-h" || arg == "--help") {
|
||||||
std::cout << "distbitonic/distbubbletonic - A distributed sort utility\n\n";
|
std::cout << "distbitonic/distbubbletonic - A distributed sort utility\n\n";
|
||||||
std::cout << "distbitonic -q <N> [-e] [-p | --pipeline N] [--validation] [--perf] [--ndebug] [-v]\n";
|
std::cout << "distbitonic -q <N> [-e] [-p | --pipeline N] [--validation] [--perf <N>] [--ndebug] [-v]\n";
|
||||||
std::cout << "distbitonic -h\n";
|
std::cout << "distbitonic -h\n";
|
||||||
std::cout << "distbubbletonic -q <N> [-e] [-p | --pipeline N] [--validation] [--perf] [--ndebug] [-v]\n";
|
std::cout << "distbubbletonic -q <N> [-e] [-p | --pipeline N] [--validation] [--perf <N> ] [--ndebug] [-v]\n";
|
||||||
std::cout << "distbubbletonic -h\n";
|
std::cout << "distbubbletonic -h\n";
|
||||||
std::cout << '\n';
|
std::cout << '\n';
|
||||||
std::cout << "Options:\n\n";
|
std::cout << "Options:\n\n";
|
||||||
@ -123,7 +127,7 @@ bool get_options(int argc, char* argv[]){
|
|||||||
std::cout << " Request a full validation at the end, performed by process rank 0\n\n";
|
std::cout << " Request a full validation at the end, performed by process rank 0\n\n";
|
||||||
std::cout << " --perf <N> \n";
|
std::cout << " --perf <N> \n";
|
||||||
std::cout << " Enable performance timing measurements and prints, and repeat\n";
|
std::cout << " Enable performance timing measurements and prints, and repeat\n";
|
||||||
std::cout << " the sorting <N> times to average the measurements\n\n";
|
std::cout << " the sorting <N> times.\n\n";
|
||||||
std::cout << " --ndebug\n";
|
std::cout << " --ndebug\n";
|
||||||
std::cout << " Skip debug breakpoint when on debug build.\n\n";
|
std::cout << " Skip debug breakpoint when on debug build.\n\n";
|
||||||
std::cout << " -v | --verbose\n";
|
std::cout << " -v | --verbose\n";
|
||||||
@ -190,22 +194,22 @@ bool validator(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined TESTING
|
|
||||||
/*!
|
/*!
|
||||||
* @return Returns 0, but.... we may throw or exit(1)
|
* Initializes the environment, must called from each process
|
||||||
|
*
|
||||||
|
* @param argc [int*] POINTER to main's argc argument
|
||||||
|
* @param argv [char***] POINTER to main's argv argument
|
||||||
*/
|
*/
|
||||||
int main(int argc, char* argv[]) try {
|
void init(int* argc, char*** argv) {
|
||||||
// Initialize MPI environment
|
// Initialize MPI environment
|
||||||
mpi.init(&argc, &argv);
|
mpi.init(argc, argv);
|
||||||
|
|
||||||
// try to read command line (after MPI parsing)
|
// try to read command line (after MPI parsing)
|
||||||
if (!get_options(argc, argv))
|
if (!get_options(*argc, *argv))
|
||||||
exit(1);
|
exit(1);
|
||||||
|
|
||||||
logger << "MPI environment initialized." <<
|
logger << "MPI environment initialized." << " Rank: " << mpi.rank() << " Size: " << mpi.size()
|
||||||
" Rank: " << mpi.rank() <<
|
<< logger.endl;
|
||||||
" Size: " << mpi.size() <<
|
|
||||||
logger.endl;
|
|
||||||
|
|
||||||
#if defined DEBUG
|
#if defined DEBUG
|
||||||
#if defined TESTING
|
#if defined TESTING
|
||||||
@ -226,23 +230,31 @@ int main(int argc, char* argv[]) try {
|
|||||||
sleep(1);
|
sleep(1);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Prepare vector and timing data
|
||||||
|
Data.resize(config.arraySize);
|
||||||
|
measurements_init();
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined TESTING
|
||||||
|
/*!
|
||||||
|
* @return Returns 0, but.... we may throw or exit(1)
|
||||||
|
*/
|
||||||
|
int main(int argc, char* argv[]) try {
|
||||||
|
|
||||||
|
// Init everything
|
||||||
|
init(&argc, &argv);
|
||||||
|
|
||||||
|
for (size_t it = 0 ; it < config.perf ; ++it) {
|
||||||
// Initialize local data
|
// Initialize local data
|
||||||
logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl;
|
logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl;
|
||||||
std::random_device rd; // Mersenne seeded from hw if possible. range: [type_min, type_max]
|
|
||||||
std::mt19937 gen(rd());
|
|
||||||
std::uniform_int_distribution<distValue_t > dis(
|
std::uniform_int_distribution<distValue_t > dis(
|
||||||
std::numeric_limits<distValue_t>::min(),
|
std::numeric_limits<distValue_t>::min(),
|
||||||
std::numeric_limits<distValue_t>::max()
|
std::numeric_limits<distValue_t>::max()
|
||||||
);
|
);
|
||||||
// Fill vector
|
|
||||||
Data.resize(config.arraySize);
|
|
||||||
std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); });
|
std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); });
|
||||||
|
|
||||||
// Run distributed sort
|
// Run distributed sort
|
||||||
if (mpi.rank() == 0)
|
if (mpi.rank() == 0)
|
||||||
logger << "Starting distributed sorting ... ";
|
logger << "Starting distributed sorting ... ";
|
||||||
measurements_init();
|
|
||||||
for (size_t it = 0 ; it < config.perf ; ++it) {
|
|
||||||
Ttotal.start();
|
Ttotal.start();
|
||||||
#if CODE_VERSION == BUBBLETONIC
|
#if CODE_VERSION == BUBBLETONIC
|
||||||
distBubbletonic(Data, mpi.size(), mpi.rank());
|
distBubbletonic(Data, mpi.size(), mpi.rank());
|
||||||
@ -251,9 +263,9 @@ int main(int argc, char* argv[]) try {
|
|||||||
#endif
|
#endif
|
||||||
Ttotal.stop();
|
Ttotal.stop();
|
||||||
measurements_next();
|
measurements_next();
|
||||||
}
|
|
||||||
if (mpi.rank() == 0)
|
if (mpi.rank() == 0)
|
||||||
logger << " Done." << logger.endl;
|
logger << " Done." << logger.endl;
|
||||||
|
}
|
||||||
|
|
||||||
// Print-outs and validation
|
// Print-outs and validation
|
||||||
if (config.perf > 1) {
|
if (config.perf > 1) {
|
||||||
@ -266,10 +278,10 @@ int main(int argc, char* argv[]) try {
|
|||||||
if (config.validation) {
|
if (config.validation) {
|
||||||
// If requested, we have the chance to fail!
|
// If requested, we have the chance to fail!
|
||||||
if (mpi.rank() == 0)
|
if (mpi.rank() == 0)
|
||||||
std::cout << "Results validation ...";
|
std::cout << "[Validation] Results validation ...";
|
||||||
bool val = validator(Data, mpi.size(), mpi.rank());
|
bool val = validator(Data, mpi.size(), mpi.rank());
|
||||||
if (mpi.rank() == 0)
|
if (mpi.rank() == 0)
|
||||||
std::cout << ((val) ? "\x1B[32m [PASS] \x1B[0m\n" : " \x1B[32m [FAIL] \x1B[0m\n");
|
std::cout << ((val) ? "\x1B[32m [PASSED] \x1B[0m\n" : " \x1B[32m [FAILED] \x1B[0m\n");
|
||||||
}
|
}
|
||||||
mpi.finalize();
|
mpi.finalize();
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -91,3 +91,29 @@ TEST(TdistCommonUT, elbowSort_test3) {
|
|||||||
EXPECT_EQ((ts_data == ts_expected_des), true);
|
EXPECT_EQ((ts_data == ts_expected_des), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tag generator test without stage calls
|
||||||
|
*/
|
||||||
|
TEST(TdistCommonUT, tagGenerator_test1) {
|
||||||
|
// The maximum MPI size we support
|
||||||
|
// static constexpr size_t MAX_MPI_SIZE = 1024UL;
|
||||||
|
// The maximum pipeline size we support
|
||||||
|
// static constexpr size_t MAX_PIPELINE_SIZE = 64UL;
|
||||||
|
|
||||||
|
std::vector<int> ts_tags;
|
||||||
|
auto ts_logSize = static_cast<uint32_t>(std::log2(MAX_MPI_SIZE));
|
||||||
|
|
||||||
|
for (size_t depth = 0; depth <= ts_logSize; ++depth) {
|
||||||
|
for (size_t step = 0 ; step < MAX_MPI_SIZE; ++step) {
|
||||||
|
int tag = static_cast<int>(tagGenerator(depth, step));
|
||||||
|
ts_tags.push_back(tag); // Exchange optimization
|
||||||
|
for (size_t stage = 0; stage < MAX_PIPELINE_SIZE; ++stage) {
|
||||||
|
ts_tags.push_back(++tag); // stages
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::sort(ts_tags.begin(), ts_tags.end());
|
||||||
|
for (size_t i = 0 ; i < ts_tags.size() - 1 ; ++i)
|
||||||
|
EXPECT_NE(ts_tags[i], ts_tags[i+1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -27,8 +27,8 @@
|
|||||||
MPI_t<> ts_mpi;
|
MPI_t<> ts_mpi;
|
||||||
|
|
||||||
// Mersenne seeded from hw if possible. range: [type_min, type_max]
|
// Mersenne seeded from hw if possible. range: [type_min, type_max]
|
||||||
std::random_device rd;
|
std::random_device ts_rd;
|
||||||
std::mt19937 gen(rd());
|
std::mt19937 ts_gen(ts_rd());
|
||||||
|
|
||||||
class TMPIdistSort : public ::testing::Test {
|
class TMPIdistSort : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
@ -59,7 +59,7 @@ TEST_F(TMPIdistSort, distBubbletonic_test1) {
|
|||||||
std::numeric_limits<tsValue_t>::max()
|
std::numeric_limits<tsValue_t>::max()
|
||||||
);
|
);
|
||||||
ts_Data.resize(ts_buffer_size);
|
ts_Data.resize(ts_buffer_size);
|
||||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(gen); });
|
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); });
|
||||||
|
|
||||||
// Execute function under test in all processes
|
// Execute function under test in all processes
|
||||||
distBubbletonic(ts_Data, ts_mpi.size(), ts_mpi.rank());
|
distBubbletonic(ts_Data, ts_mpi.size(), ts_mpi.rank());
|
||||||
@ -100,7 +100,7 @@ TEST_F(TMPIdistSort, distBubbletonic_test2) {
|
|||||||
std::numeric_limits<tsValue_t>::max()
|
std::numeric_limits<tsValue_t>::max()
|
||||||
);
|
);
|
||||||
ts_Data.resize(ts_buffer_size);
|
ts_Data.resize(ts_buffer_size);
|
||||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(gen); });
|
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); });
|
||||||
|
|
||||||
// Execute function under test in all processes
|
// Execute function under test in all processes
|
||||||
distBubbletonic(ts_Data, ts_mpi.size(), ts_mpi.rank());
|
distBubbletonic(ts_Data, ts_mpi.size(), ts_mpi.rank());
|
||||||
@ -141,7 +141,7 @@ TEST_F(TMPIdistSort, distBubbletonic_test3) {
|
|||||||
std::numeric_limits<tsValue_t>::max()
|
std::numeric_limits<tsValue_t>::max()
|
||||||
);
|
);
|
||||||
ts_Data.resize(ts_buffer_size);
|
ts_Data.resize(ts_buffer_size);
|
||||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(gen); });
|
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); });
|
||||||
|
|
||||||
// Set pipeline
|
// Set pipeline
|
||||||
config.pipeline = 8;
|
config.pipeline = 8;
|
||||||
@ -170,6 +170,96 @@ TEST_F(TMPIdistSort, distBubbletonic_test3) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MPI: SysTest (acceptance)
|
||||||
|
* Each process executes distBubbletonic for uin32_t [1 << 16] with exchange optimization
|
||||||
|
*/
|
||||||
|
TEST_F(TMPIdistSort, distBubbletonic_test4) {
|
||||||
|
// Create and fill vector
|
||||||
|
using tsValue_t = uint32_t; // Test parameters
|
||||||
|
size_t ts_buffer_size = 1 << 16;
|
||||||
|
|
||||||
|
ShadowedVec_t<tsValue_t> ts_Data;
|
||||||
|
std::uniform_int_distribution<tsValue_t > dis(
|
||||||
|
std::numeric_limits<tsValue_t>::min(),
|
||||||
|
std::numeric_limits<tsValue_t>::max()
|
||||||
|
);
|
||||||
|
ts_Data.resize(ts_buffer_size);
|
||||||
|
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); });
|
||||||
|
|
||||||
|
// Set exchange optimization
|
||||||
|
config.exchangeOpt = true;
|
||||||
|
|
||||||
|
// Execute function under test in all processes
|
||||||
|
distBubbletonic(ts_Data, ts_mpi.size(), ts_mpi.rank());
|
||||||
|
|
||||||
|
// Local min and max
|
||||||
|
auto local_min = *std::min_element(ts_Data.begin(), ts_Data.end());
|
||||||
|
auto local_max = *std::max_element(ts_Data.begin(), ts_Data.end());
|
||||||
|
|
||||||
|
// Gather min/max to rank 0
|
||||||
|
std::vector<tsValue_t> global_mins(ts_mpi.size());
|
||||||
|
std::vector<tsValue_t> global_maxes(ts_mpi.size());
|
||||||
|
MPI_Datatype datatype = MPI_TypeMapper<tsValue_t>::getType();
|
||||||
|
|
||||||
|
MPI_Gather(&local_min, 1, datatype, global_mins.data(), 1, datatype, 0, MPI_COMM_WORLD);
|
||||||
|
MPI_Gather(&local_max, 1, datatype, global_maxes.data(), 1, datatype, 0, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
// Check results
|
||||||
|
EXPECT_EQ(std::is_sorted(ts_Data.begin(), ts_Data.end()), true);
|
||||||
|
if (ts_mpi.rank() == 0) {
|
||||||
|
for (size_t i = 1; i < global_mins.size(); ++i) {
|
||||||
|
EXPECT_LE(global_maxes[i - 1], global_mins[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MPI: SysTest (acceptance)
|
||||||
|
* Each process executes distBubbletonic for uin32_t [1 << 16] with
|
||||||
|
* exchange optimization and pipeline
|
||||||
|
*/
|
||||||
|
TEST_F(TMPIdistSort, distBubbletonic_test5) {
|
||||||
|
// Create and fill vector
|
||||||
|
using tsValue_t = uint32_t; // Test parameters
|
||||||
|
size_t ts_buffer_size = 1 << 16;
|
||||||
|
|
||||||
|
ShadowedVec_t<tsValue_t> ts_Data;
|
||||||
|
std::uniform_int_distribution<tsValue_t > dis(
|
||||||
|
std::numeric_limits<tsValue_t>::min(),
|
||||||
|
std::numeric_limits<tsValue_t>::max()
|
||||||
|
);
|
||||||
|
ts_Data.resize(ts_buffer_size);
|
||||||
|
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); });
|
||||||
|
|
||||||
|
// Set exchange optimization + pipeline
|
||||||
|
config.exchangeOpt = true;
|
||||||
|
config.pipeline = 8;
|
||||||
|
|
||||||
|
// Execute function under test in all processes
|
||||||
|
distBubbletonic(ts_Data, ts_mpi.size(), ts_mpi.rank());
|
||||||
|
|
||||||
|
// Local min and max
|
||||||
|
auto local_min = *std::min_element(ts_Data.begin(), ts_Data.end());
|
||||||
|
auto local_max = *std::max_element(ts_Data.begin(), ts_Data.end());
|
||||||
|
|
||||||
|
// Gather min/max to rank 0
|
||||||
|
std::vector<tsValue_t> global_mins(ts_mpi.size());
|
||||||
|
std::vector<tsValue_t> global_maxes(ts_mpi.size());
|
||||||
|
MPI_Datatype datatype = MPI_TypeMapper<tsValue_t>::getType();
|
||||||
|
|
||||||
|
MPI_Gather(&local_min, 1, datatype, global_mins.data(), 1, datatype, 0, MPI_COMM_WORLD);
|
||||||
|
MPI_Gather(&local_max, 1, datatype, global_maxes.data(), 1, datatype, 0, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
// Check results
|
||||||
|
EXPECT_EQ(std::is_sorted(ts_Data.begin(), ts_Data.end()), true);
|
||||||
|
if (ts_mpi.rank() == 0) {
|
||||||
|
for (size_t i = 1; i < global_mins.size(); ++i) {
|
||||||
|
EXPECT_LE(global_maxes[i - 1], global_mins[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* MPI: SysTest (acceptance)
|
* MPI: SysTest (acceptance)
|
||||||
* Each process executes distBitonic for uin8_t [16]
|
* Each process executes distBitonic for uin8_t [16]
|
||||||
@ -185,7 +275,7 @@ TEST_F(TMPIdistSort, distBitonic_test1) {
|
|||||||
std::numeric_limits<tsValue_t>::max()
|
std::numeric_limits<tsValue_t>::max()
|
||||||
);
|
);
|
||||||
ts_Data.resize(ts_buffer_size);
|
ts_Data.resize(ts_buffer_size);
|
||||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(gen); });
|
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); });
|
||||||
|
|
||||||
// Execute function under test in all processes
|
// Execute function under test in all processes
|
||||||
distBitonic(ts_Data, ts_mpi.size(), ts_mpi.rank());
|
distBitonic(ts_Data, ts_mpi.size(), ts_mpi.rank());
|
||||||
@ -226,7 +316,7 @@ TEST_F(TMPIdistSort, distBitonic_test2) {
|
|||||||
std::numeric_limits<tsValue_t>::max()
|
std::numeric_limits<tsValue_t>::max()
|
||||||
);
|
);
|
||||||
ts_Data.resize(ts_buffer_size);
|
ts_Data.resize(ts_buffer_size);
|
||||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(gen); });
|
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); });
|
||||||
|
|
||||||
// Execute function under test in all processes
|
// Execute function under test in all processes
|
||||||
distBitonic(ts_Data, ts_mpi.size(), ts_mpi.rank());
|
distBitonic(ts_Data, ts_mpi.size(), ts_mpi.rank());
|
||||||
@ -267,7 +357,7 @@ TEST_F(TMPIdistSort, distBitonic_test3) {
|
|||||||
std::numeric_limits<tsValue_t>::max()
|
std::numeric_limits<tsValue_t>::max()
|
||||||
);
|
);
|
||||||
ts_Data.resize(ts_buffer_size);
|
ts_Data.resize(ts_buffer_size);
|
||||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(gen); });
|
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); });
|
||||||
|
|
||||||
// Set pipeline
|
// Set pipeline
|
||||||
config.pipeline = 8;
|
config.pipeline = 8;
|
||||||
@ -295,3 +385,93 @@ TEST_F(TMPIdistSort, distBitonic_test3) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MPI: SysTest (acceptance)
|
||||||
|
* Each process executes distBitonic for uin32_t [1 << 16] with exchange optimization
|
||||||
|
*/
|
||||||
|
TEST_F(TMPIdistSort, distBitonic_test4) {
|
||||||
|
// Create and fill vector
|
||||||
|
using tsValue_t = uint32_t; // Test parameters
|
||||||
|
size_t ts_buffer_size = 1 << 16;
|
||||||
|
|
||||||
|
ShadowedVec_t<tsValue_t> ts_Data;
|
||||||
|
std::uniform_int_distribution<tsValue_t > dis(
|
||||||
|
std::numeric_limits<tsValue_t>::min(),
|
||||||
|
std::numeric_limits<tsValue_t>::max()
|
||||||
|
);
|
||||||
|
ts_Data.resize(ts_buffer_size);
|
||||||
|
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); });
|
||||||
|
|
||||||
|
// Set exchange optimization
|
||||||
|
config.exchangeOpt = true;
|
||||||
|
|
||||||
|
// Execute function under test in all processes
|
||||||
|
distBitonic(ts_Data, ts_mpi.size(), ts_mpi.rank());
|
||||||
|
|
||||||
|
// Local min and max
|
||||||
|
auto local_min = *std::min_element(ts_Data.begin(), ts_Data.end());
|
||||||
|
auto local_max = *std::max_element(ts_Data.begin(), ts_Data.end());
|
||||||
|
|
||||||
|
// Gather min/max to rank 0
|
||||||
|
std::vector<tsValue_t> global_mins(ts_mpi.size());
|
||||||
|
std::vector<tsValue_t> global_maxes(ts_mpi.size());
|
||||||
|
MPI_Datatype datatype = MPI_TypeMapper<tsValue_t>::getType();
|
||||||
|
|
||||||
|
MPI_Gather(&local_min, 1, datatype, global_mins.data(), 1, datatype, 0, MPI_COMM_WORLD);
|
||||||
|
MPI_Gather(&local_max, 1, datatype, global_maxes.data(), 1, datatype, 0, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
// Check results
|
||||||
|
EXPECT_EQ(std::is_sorted(ts_Data.begin(), ts_Data.end()), true);
|
||||||
|
if (ts_mpi.rank() == 0) {
|
||||||
|
for (size_t i = 1; i < global_mins.size(); ++i) {
|
||||||
|
EXPECT_LE(global_maxes[i - 1], global_mins[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MPI: SysTest (acceptance)
|
||||||
|
* Each process executes distBitonic for uin32_t [1 << 16] with
|
||||||
|
* exchange optimization and pipeline
|
||||||
|
*/
|
||||||
|
TEST_F(TMPIdistSort, distBitonic_test5) {
|
||||||
|
// Create and fill vector
|
||||||
|
using tsValue_t = uint32_t; // Test parameters
|
||||||
|
size_t ts_buffer_size = 1 << 16;
|
||||||
|
|
||||||
|
ShadowedVec_t<tsValue_t> ts_Data;
|
||||||
|
std::uniform_int_distribution<tsValue_t > dis(
|
||||||
|
std::numeric_limits<tsValue_t>::min(),
|
||||||
|
std::numeric_limits<tsValue_t>::max()
|
||||||
|
);
|
||||||
|
ts_Data.resize(ts_buffer_size);
|
||||||
|
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); });
|
||||||
|
|
||||||
|
// Set exchange optimization + pipeline
|
||||||
|
config.exchangeOpt = true;
|
||||||
|
config.pipeline = 8;
|
||||||
|
|
||||||
|
// Execute function under test in all processes
|
||||||
|
distBitonic(ts_Data, ts_mpi.size(), ts_mpi.rank());
|
||||||
|
|
||||||
|
// Local min and max
|
||||||
|
auto local_min = *std::min_element(ts_Data.begin(), ts_Data.end());
|
||||||
|
auto local_max = *std::max_element(ts_Data.begin(), ts_Data.end());
|
||||||
|
|
||||||
|
// Gather min/max to rank 0
|
||||||
|
std::vector<tsValue_t> global_mins(ts_mpi.size());
|
||||||
|
std::vector<tsValue_t> global_maxes(ts_mpi.size());
|
||||||
|
MPI_Datatype datatype = MPI_TypeMapper<tsValue_t>::getType();
|
||||||
|
|
||||||
|
MPI_Gather(&local_min, 1, datatype, global_mins.data(), 1, datatype, 0, MPI_COMM_WORLD);
|
||||||
|
MPI_Gather(&local_max, 1, datatype, global_maxes.data(), 1, datatype, 0, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
// Check results
|
||||||
|
EXPECT_EQ(std::is_sorted(ts_Data.begin(), ts_Data.end()), true);
|
||||||
|
if (ts_mpi.rank() == 0) {
|
||||||
|
for (size_t i = 1; i < global_mins.size(); ++i) {
|
||||||
|
EXPECT_LE(global_maxes[i - 1], global_mins[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user