@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=1 | #SBATCH --nodes=1 | ||||
#SBATCH --ntasks-per-node=2 | #SBATCH --ntasks-per-node=2 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=1:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 20 --perf --validation | |||||
srun ./out/distbitonic -q 20 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=1 | #SBATCH --nodes=1 | ||||
#SBATCH --ntasks-per-node=2 | #SBATCH --ntasks-per-node=2 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=1:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 23 --perf --validation | |||||
srun ./out/distbitonic -q 23 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=1 | #SBATCH --nodes=1 | ||||
#SBATCH --ntasks-per-node=2 | #SBATCH --ntasks-per-node=2 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=1:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 25 --perf --validation | |||||
srun ./out/distbitonic -q 25 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=1 | #SBATCH --nodes=1 | ||||
#SBATCH --ntasks-per-node=2 | #SBATCH --ntasks-per-node=2 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=1:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 27 --perf --validation | |||||
srun ./out/distbitonic -q 27 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=1 | #SBATCH --nodes=1 | ||||
#SBATCH --ntasks-per-node=4 | #SBATCH --ntasks-per-node=4 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=1:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 20 --perf --validation | |||||
srun ./out/distbitonic -q 20 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=1 | #SBATCH --nodes=1 | ||||
#SBATCH --ntasks-per-node=4 | #SBATCH --ntasks-per-node=4 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=1:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 23 --perf --validation | |||||
srun ./out/distbitonic -q 23 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=1 | #SBATCH --nodes=1 | ||||
#SBATCH --ntasks-per-node=4 | #SBATCH --ntasks-per-node=4 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=1:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 25 --perf --validation | |||||
srun ./out/distbitonic -q 25 --validation --perf 8 |
@@ -0,0 +1,28 @@ | |||||
#! /usr/bin/env bash | |||||
#SBATCH --nodes=1 | |||||
#SBATCH --ntasks-per-node=4 | |||||
#SBATCH --cpus-per-task=4 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | |||||
# $> sbatch -p batch|rome <this file> | |||||
# | |||||
# NOTE: | |||||
# First compile in aristotle with | |||||
# $> module load gcc/9.2.0 openmpi/4.0.3 | |||||
# $> make -j hpc-build | |||||
# | |||||
module load gcc/9.2.0 openmpi/4.0.3 | |||||
# Note: | |||||
# The above versions are matching w/ my system's | |||||
# versions, thus making compiling/debugging easier. | |||||
# Suppress unused UCX_ROOT warning | |||||
export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | |||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | |||||
srun ./out/distbitonic -q 26 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=1 | #SBATCH --nodes=1 | ||||
#SBATCH --ntasks-per-node=4 | #SBATCH --ntasks-per-node=4 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=1:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 27 --perf --validation | |||||
srun ./out/distbitonic -q 27 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=2 | #SBATCH --nodes=2 | ||||
#SBATCH --ntasks-per-node=4 | #SBATCH --ntasks-per-node=4 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=2:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 20 --perf --validation | |||||
srun ./out/distbitonic -q 20 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=2 | #SBATCH --nodes=2 | ||||
#SBATCH --ntasks-per-node=4 | #SBATCH --ntasks-per-node=4 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=2:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 23 --perf --validation | |||||
srun ./out/distbitonic -q 23 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=2 | #SBATCH --nodes=2 | ||||
#SBATCH --ntasks-per-node=4 | #SBATCH --ntasks-per-node=4 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=2:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 25 --perf --validation | |||||
srun ./out/distbitonic -q 25 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=2 | #SBATCH --nodes=2 | ||||
#SBATCH --ntasks-per-node=4 | #SBATCH --ntasks-per-node=4 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=2:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 27 --perf --validation | |||||
srun ./out/distbitonic -q 27 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=16 | #SBATCH --ntasks-per-node=16 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=5:00 | |||||
#SBATCH --time=10:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 20 --perf --validation | |||||
srun ./out/distbitonic -q 20 --validation --perf 8 --exchange-opt |
@@ -0,0 +1,28 @@ | |||||
#! /usr/bin/env bash | |||||
#SBATCH --nodes=4 | |||||
#SBATCH --ntasks-per-node=16 | |||||
#SBATCH --cpus-per-task=4 | |||||
#SBATCH --time=10:00 | |||||
# Use this as following | |||||
# $> sbatch -p batch|rome <this file> | |||||
# | |||||
# NOTE: | |||||
# First compile in aristotle with | |||||
# $> module load gcc/9.2.0 openmpi/4.0.3 | |||||
# $> make -j hpc-build | |||||
# | |||||
module load gcc/9.2.0 openmpi/4.0.3 | |||||
# Note: | |||||
# The above versions are matching w/ my system's | |||||
# versions, thus making compiling/debugging easier. | |||||
# Suppress unused UCX_ROOT warning | |||||
export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | |||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | |||||
srun ./out/distbitonic -q 22 --validation --perf 8 --exchange-opt |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=16 | #SBATCH --ntasks-per-node=16 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=5:00 | |||||
#SBATCH --time=10:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 23 --perf --validation | |||||
srun ./out/distbitonic -q 23 --validation --perf 8 --exchange-opt |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=16 | #SBATCH --ntasks-per-node=16 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=5:00 | |||||
#SBATCH --time=10:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 25 --perf --validation --pipeline 8 | |||||
srun ./out/distbitonic -q 25 --validation --perf 8 --exchange-opt |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=16 | #SBATCH --ntasks-per-node=16 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=5:00 | |||||
#SBATCH --time=10:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 27 --perf --validation --pipeline 8 | |||||
srun ./out/distbitonic -q 27 --validation --perf 8 --exchange-opt |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=32 | #SBATCH --ntasks-per-node=32 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=5:00 | |||||
#SBATCH --time=10:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 20 --perf --validation | |||||
srun ./out/distbitonic -q 20 --validation --perf 8 --exchange-opt |
@@ -0,0 +1,28 @@ | |||||
#! /usr/bin/env bash | |||||
#SBATCH --nodes=4 | |||||
#SBATCH --ntasks-per-node=32 | |||||
#SBATCH --cpus-per-task=4 | |||||
#SBATCH --time=10:00 | |||||
# Use this as following | |||||
# $> sbatch -p batch|rome <this file> | |||||
# | |||||
# NOTE: | |||||
# First compile in aristotle with | |||||
# $> module load gcc/9.2.0 openmpi/4.0.3 | |||||
# $> make -j hpc-build | |||||
# | |||||
module load gcc/9.2.0 openmpi/4.0.3 | |||||
# Note: | |||||
# The above versions are matching w/ my system's | |||||
# versions, thus making compiling/debugging easier. | |||||
# Suppress unused UCX_ROOT warning | |||||
export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | |||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | |||||
srun ./out/distbitonic -q 21 --validation --perf 8 --exchange-opt |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=32 | #SBATCH --ntasks-per-node=32 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=5:00 | |||||
#SBATCH --time=10:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 23 --perf --validation | |||||
srun ./out/distbitonic -q 23 --validation --perf 8 --exchange-opt |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=32 | #SBATCH --ntasks-per-node=32 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=5:00 | |||||
#SBATCH --time=10:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 25 --perf --validation --pipeline 8 | |||||
srun ./out/distbitonic -q 25 --validation --perf 8 --exchange-opt |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=32 | #SBATCH --ntasks-per-node=32 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=5:00 | |||||
#SBATCH --time=10:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 27 --perf --validation --pipeline 8 | |||||
srun ./out/distbitonic -q 27 --validation --perf 8 --exchange-opt |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=4 | #SBATCH --ntasks-per-node=4 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=2:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 20 --perf --validation | |||||
srun ./out/distbitonic -q 20 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=4 | #SBATCH --ntasks-per-node=4 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=2:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 23 --perf --validation | |||||
srun ./out/distbitonic -q 23 --validation --perf 8 |
@@ -0,0 +1,28 @@ | |||||
#! /usr/bin/env bash | |||||
#SBATCH --nodes=4 | |||||
#SBATCH --ntasks-per-node=4 | |||||
#SBATCH --cpus-per-task=4 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | |||||
# $> sbatch -p batch|rome <this file> | |||||
# | |||||
# NOTE: | |||||
# First compile in aristotle with | |||||
# $> module load gcc/9.2.0 openmpi/4.0.3 | |||||
# $> make -j hpc-build | |||||
# | |||||
module load gcc/9.2.0 openmpi/4.0.3 | |||||
# Note: | |||||
# The above versions are matching w/ my system's | |||||
# versions, thus making compiling/debugging easier. | |||||
# Suppress unused UCX_ROOT warning | |||||
export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | |||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | |||||
srun ./out/distbitonic -q 24 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=4 | #SBATCH --ntasks-per-node=4 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=2:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 25 --perf --validation | |||||
srun ./out/distbitonic -q 25 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=4 | #SBATCH --ntasks-per-node=4 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=2:00 | |||||
#SBATCH --time=5:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 27 --perf --validation | |||||
srun ./out/distbitonic -q 27 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=8 | #SBATCH --ntasks-per-node=8 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=2:00 | |||||
#SBATCH --time=10:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 20 --perf --validation | |||||
srun ./out/distbitonic -q 20 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=8 | #SBATCH --ntasks-per-node=8 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=2:00 | |||||
#SBATCH --time=10:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 23 --perf --validation | |||||
srun ./out/distbitonic -q 23 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=8 | #SBATCH --ntasks-per-node=8 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=2:00 | |||||
#SBATCH --time=10:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 25 --perf --validation --pipeline 8 | |||||
srun ./out/distbitonic -q 25 --validation --perf 8 |
@@ -3,7 +3,7 @@ | |||||
#SBATCH --nodes=4 | #SBATCH --nodes=4 | ||||
#SBATCH --ntasks-per-node=8 | #SBATCH --ntasks-per-node=8 | ||||
#SBATCH --cpus-per-task=4 | #SBATCH --cpus-per-task=4 | ||||
#SBATCH --time=2:00 | |||||
#SBATCH --time=10:00 | |||||
# Use this as following | # Use this as following | ||||
# $> sbatch -p batch|rome <this file> | # $> sbatch -p batch|rome <this file> | ||||
@@ -25,4 +25,4 @@ export UCX_WARN_UNUSED_ENV_VARS=n | |||||
# Suppress CUDA-aware support is disabled warning | # Suppress CUDA-aware support is disabled warning | ||||
export OMPI_MCA_opal_warn_on_missing_libcuda=0 | export OMPI_MCA_opal_warn_on_missing_libcuda=0 | ||||
srun ./out-rc3b/distbitonic -q 27 --perf --validation --pipeline 8 | |||||
srun ./out/distbitonic -q 27 --validation --perf 8 |
@@ -27,7 +27,7 @@ | |||||
// Default Data size (in case -q <N> is not present) | // Default Data size (in case -q <N> is not present) | ||||
static constexpr size_t DEFAULT_DATA_SIZE = 1 << 16; | static constexpr size_t DEFAULT_DATA_SIZE = 1 << 16; | ||||
// The maximum MPI size we support | |||||
// The maximum MPI size we support (in Nodes x Processes) | |||||
static constexpr size_t MAX_MPI_SIZE = 1024UL; | static constexpr size_t MAX_MPI_SIZE = 1024UL; | ||||
// The maximum pipeline size we support | // The maximum pipeline size we support | ||||
@@ -61,7 +61,7 @@ struct config_t { | |||||
bool validation{false}; //!< Request a full validation at the end, performed by process rank 0. | bool validation{false}; //!< Request a full validation at the end, performed by process rank 0. | ||||
bool ndebug{false}; //!< Skips debug trap on DEBUG builds. | bool ndebug{false}; //!< Skips debug trap on DEBUG builds. | ||||
size_t perf{1}; //!< Enable performance timing measurements and prints and repeat | size_t perf{1}; //!< Enable performance timing measurements and prints and repeat | ||||
//!< the performs the sorting <perf> times to average the measurements | |||||
//!< the sorting <perf> times. | |||||
bool verbose{false}; //!< Flag to enable verbose output to stdout. | bool verbose{false}; //!< Flag to enable verbose output to stdout. | ||||
}; | }; | ||||
@@ -129,7 +129,7 @@ struct MPI_t { | |||||
* This function matches a transmit and a receive in order for fully exchanged data between | * This function matches a transmit and a receive in order for fully exchanged data between | ||||
* current node and partner. | * current node and partner. | ||||
* | * | ||||
* @tparam T The inner valur type used in buffer | |||||
* @tparam ValueT The value type used in buffer | |||||
* | * | ||||
* @param ldata [const ValueT*] Pointer to local data to send | * @param ldata [const ValueT*] Pointer to local data to send | ||||
* @param rdata [ValueT*] Pointer to buffer to receive data from partner | * @param rdata [ValueT*] Pointer to buffer to receive data from partner | ||||
@@ -163,7 +163,7 @@ struct MPI_t { | |||||
* This call MUST paired with exchange_wait() for each MPI_t object. | * This call MUST paired with exchange_wait() for each MPI_t object. | ||||
* Calling 2 consecutive exchange_start() for the same MPI_t object is undefined. | * Calling 2 consecutive exchange_start() for the same MPI_t object is undefined. | ||||
* | * | ||||
* @tparam ValueT The underlying value type used in buffers | |||||
* @tparam ValueT The value type used in buffers | |||||
* | * | ||||
* @param ldata [const ValueT*] Pointer to local data to send | * @param ldata [const ValueT*] Pointer to local data to send | ||||
* @param rdata [ValueT*] Pointer to buffer to receive data from partner | * @param rdata [ValueT*] Pointer to buffer to receive data from partner | ||||
@@ -267,8 +267,8 @@ using mpi_id_t = MPI_t<>::ID_t; | |||||
/*! | /*! | ||||
* @brief A std::vector wrapper with 2 vectors, an active and a shadow. | * @brief A std::vector wrapper with 2 vectors, an active and a shadow. | ||||
* | * | ||||
* This type exposes the standard vector | |||||
* functionality of the active vector. The shadow can be used when we need to use the vector as mutable | |||||
* This type exposes the standard vector functionality of the active vector. | |||||
* The shadow can be used when we need to use the vector as mutable | |||||
* data in algorithms that can not support "in-place" editing (like elbow-sort for example) | * data in algorithms that can not support "in-place" editing (like elbow-sort for example) | ||||
* | * | ||||
* @tparam Value_t the underlying data type of the vectors | * @tparam Value_t the underlying data type of the vectors | ||||
@@ -418,7 +418,9 @@ private: | |||||
extern Log logger; | extern Log logger; | ||||
/*! | /*! | ||||
* A small timing utility based on chrono. | |||||
* A small timing utility based on chrono that supports timing rounds | |||||
* and returning the median of them. Time can accumulate to the measurement | |||||
* for each round. | |||||
*/ | */ | ||||
struct Timing { | struct Timing { | ||||
using Tpoint = std::chrono::steady_clock::time_point; | using Tpoint = std::chrono::steady_clock::time_point; | ||||
@@ -485,7 +487,7 @@ private: | |||||
}; | }; | ||||
/*! | /*! | ||||
* Utility "high level function"-like macro to forward a function call | |||||
* A "high level function"-like utility macro to forward a function call | |||||
* and accumulate the execution time to the corresponding timing object. | * and accumulate the execution time to the corresponding timing object. | ||||
* | * | ||||
* @param Tim The Timing object [Needs to have methods start() and stop()] | * @param Tim The Timing object [Needs to have methods start() and stop()] | ||||
@@ -24,6 +24,10 @@ distBuffer_t Data; | |||||
Log logger; | Log logger; | ||||
distStat_t localStat, remoteStat; | distStat_t localStat, remoteStat; | ||||
// Mersenne seeded from hw if possible. range: [type_min, type_max] | |||||
std::random_device rd; | |||||
std::mt19937 gen(rd()); | |||||
//! Performance timers for each one of the "costly" functions | //! Performance timers for each one of the "costly" functions | ||||
Timing Ttotal; | Timing Ttotal; | ||||
Timing TfullSort; | Timing TfullSort; | ||||
@@ -106,9 +110,9 @@ bool get_options(int argc, char* argv[]){ | |||||
} | } | ||||
else if (arg == "-h" || arg == "--help") { | else if (arg == "-h" || arg == "--help") { | ||||
std::cout << "distbitonic/distbubbletonic - A distributed sort utility\n\n"; | std::cout << "distbitonic/distbubbletonic - A distributed sort utility\n\n"; | ||||
std::cout << "distbitonic -q <N> [-e] [-p | --pipeline N] [--validation] [--perf] [--ndebug] [-v]\n"; | |||||
std::cout << "distbitonic -q <N> [-e] [-p | --pipeline N] [--validation] [--perf <N>] [--ndebug] [-v]\n"; | |||||
std::cout << "distbitonic -h\n"; | std::cout << "distbitonic -h\n"; | ||||
std::cout << "distbubbletonic -q <N> [-e] [-p | --pipeline N] [--validation] [--perf] [--ndebug] [-v]\n"; | |||||
std::cout << "distbubbletonic -q <N> [-e] [-p | --pipeline N] [--validation] [--perf <N> ] [--ndebug] [-v]\n"; | |||||
std::cout << "distbubbletonic -h\n"; | std::cout << "distbubbletonic -h\n"; | ||||
std::cout << '\n'; | std::cout << '\n'; | ||||
std::cout << "Options:\n\n"; | std::cout << "Options:\n\n"; | ||||
@@ -123,7 +127,7 @@ bool get_options(int argc, char* argv[]){ | |||||
std::cout << " Request a full validation at the end, performed by process rank 0\n\n"; | std::cout << " Request a full validation at the end, performed by process rank 0\n\n"; | ||||
std::cout << " --perf <N> \n"; | std::cout << " --perf <N> \n"; | ||||
std::cout << " Enable performance timing measurements and prints, and repeat\n"; | std::cout << " Enable performance timing measurements and prints, and repeat\n"; | ||||
std::cout << " the sorting <N> times to average the measurements\n\n"; | |||||
std::cout << " the sorting <N> times.\n\n"; | |||||
std::cout << " --ndebug\n"; | std::cout << " --ndebug\n"; | ||||
std::cout << " Skip debug breakpoint when on debug build.\n\n"; | std::cout << " Skip debug breakpoint when on debug build.\n\n"; | ||||
std::cout << " -v | --verbose\n"; | std::cout << " -v | --verbose\n"; | ||||
@@ -190,59 +194,67 @@ bool validator(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) { | |||||
return ret; | return ret; | ||||
} | } | ||||
#if !defined TESTING | |||||
/*! | /*! | ||||
* @return Returns 0, but.... we may throw or exit(1) | |||||
* Initializes the environment, must called from each process | |||||
* | |||||
* @param argc [int*] POINTER to main's argc argument | |||||
* @param argv [char***] POINTER to main's argv argument | |||||
*/ | */ | ||||
int main(int argc, char* argv[]) try { | |||||
void init(int* argc, char*** argv) { | |||||
// Initialize MPI environment | // Initialize MPI environment | ||||
mpi.init(&argc, &argv); | |||||
mpi.init(argc, argv); | |||||
// try to read command line (after MPI parsing) | // try to read command line (after MPI parsing) | ||||
if (!get_options(argc, argv)) | |||||
if (!get_options(*argc, *argv)) | |||||
exit(1); | exit(1); | ||||
logger << "MPI environment initialized." << | |||||
" Rank: " << mpi.rank() << | |||||
" Size: " << mpi.size() << | |||||
logger.endl; | |||||
logger << "MPI environment initialized." << " Rank: " << mpi.rank() << " Size: " << mpi.size() | |||||
<< logger.endl; | |||||
#if defined DEBUG | #if defined DEBUG | ||||
#if defined TESTING | #if defined TESTING | ||||
/* | |||||
* In case of a debug build we will wait here until sleep_wait | |||||
* will reset via debugger. In order to do that the user must attach | |||||
* debugger to all processes. For example: | |||||
* $> mpirun -np 2 ./<program path> | |||||
* $> ps aux | grep <program> | |||||
* $> gdb <program> <PID1> | |||||
* $> gdb <program> <PID2> | |||||
*/ | |||||
volatile bool sleep_wait = false; | |||||
/* | |||||
* In case of a debug build we will wait here until sleep_wait | |||||
* will reset via debugger. In order to do that the user must attach | |||||
* debugger to all processes. For example: | |||||
* $> mpirun -np 2 ./<program path> | |||||
* $> ps aux | grep <program> | |||||
* $> gdb <program> <PID1> | |||||
* $> gdb <program> <PID2> | |||||
*/ | |||||
volatile bool sleep_wait = false; | |||||
#else | #else | ||||
volatile bool sleep_wait = true; | |||||
volatile bool sleep_wait = true; | |||||
#endif | #endif | ||||
while (sleep_wait && !config.ndebug) | |||||
sleep(1); | |||||
while (sleep_wait && !config.ndebug) | |||||
sleep(1); | |||||
#endif | #endif | ||||
// Initialize local data | |||||
logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl; | |||||
std::random_device rd; // Mersenne seeded from hw if possible. range: [type_min, type_max] | |||||
std::mt19937 gen(rd()); | |||||
std::uniform_int_distribution<distValue_t > dis( | |||||
std::numeric_limits<distValue_t>::min(), | |||||
std::numeric_limits<distValue_t>::max() | |||||
); | |||||
// Fill vector | |||||
// Prepare vector and timing data | |||||
Data.resize(config.arraySize); | Data.resize(config.arraySize); | ||||
std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); }); | |||||
// Run distributed sort | |||||
if (mpi.rank() == 0) | |||||
logger << "Starting distributed sorting ... "; | |||||
measurements_init(); | measurements_init(); | ||||
} | |||||
#if !defined TESTING | |||||
/*! | |||||
* @return Returns 0, but.... we may throw or exit(1) | |||||
*/ | |||||
int main(int argc, char* argv[]) try { | |||||
// Init everything | |||||
init(&argc, &argv); | |||||
for (size_t it = 0 ; it < config.perf ; ++it) { | for (size_t it = 0 ; it < config.perf ; ++it) { | ||||
// Initialize local data | |||||
logger << "Initialize local array of " << config.arraySize << " elements" << logger.endl; | |||||
std::uniform_int_distribution<distValue_t > dis( | |||||
std::numeric_limits<distValue_t>::min(), | |||||
std::numeric_limits<distValue_t>::max() | |||||
); | |||||
std::generate(Data.begin(), Data.end(), [&]() { return dis(gen); }); | |||||
// Run distributed sort | |||||
if (mpi.rank() == 0) | |||||
logger << "Starting distributed sorting ... "; | |||||
Ttotal.start(); | Ttotal.start(); | ||||
#if CODE_VERSION == BUBBLETONIC | #if CODE_VERSION == BUBBLETONIC | ||||
distBubbletonic(Data, mpi.size(), mpi.rank()); | distBubbletonic(Data, mpi.size(), mpi.rank()); | ||||
@@ -251,9 +263,9 @@ int main(int argc, char* argv[]) try { | |||||
#endif | #endif | ||||
Ttotal.stop(); | Ttotal.stop(); | ||||
measurements_next(); | measurements_next(); | ||||
if (mpi.rank() == 0) | |||||
logger << " Done." << logger.endl; | |||||
} | } | ||||
if (mpi.rank() == 0) | |||||
logger << " Done." << logger.endl; | |||||
// Print-outs and validation | // Print-outs and validation | ||||
if (config.perf > 1) { | if (config.perf > 1) { | ||||
@@ -266,10 +278,10 @@ int main(int argc, char* argv[]) try { | |||||
if (config.validation) { | if (config.validation) { | ||||
// If requested, we have the chance to fail! | // If requested, we have the chance to fail! | ||||
if (mpi.rank() == 0) | if (mpi.rank() == 0) | ||||
std::cout << "Results validation ..."; | |||||
std::cout << "[Validation] Results validation ..."; | |||||
bool val = validator(Data, mpi.size(), mpi.rank()); | bool val = validator(Data, mpi.size(), mpi.rank()); | ||||
if (mpi.rank() == 0) | if (mpi.rank() == 0) | ||||
std::cout << ((val) ? "\x1B[32m [PASS] \x1B[0m\n" : " \x1B[32m [FAIL] \x1B[0m\n"); | |||||
std::cout << ((val) ? "\x1B[32m [PASSED] \x1B[0m\n" : " \x1B[32m [FAILED] \x1B[0m\n"); | |||||
} | } | ||||
mpi.finalize(); | mpi.finalize(); | ||||
return 0; | return 0; | ||||
@@ -91,3 +91,29 @@ TEST(TdistCommonUT, elbowSort_test3) { | |||||
EXPECT_EQ((ts_data == ts_expected_des), true); | EXPECT_EQ((ts_data == ts_expected_des), true); | ||||
} | } | ||||
/* | |||||
* Tag generator test without stage calls | |||||
*/ | |||||
TEST(TdistCommonUT, tagGenerator_test1) { | |||||
// The maximum MPI size we support | |||||
// static constexpr size_t MAX_MPI_SIZE = 1024UL; | |||||
// The maximum pipeline size we support | |||||
// static constexpr size_t MAX_PIPELINE_SIZE = 64UL; | |||||
std::vector<int> ts_tags; | |||||
auto ts_logSize = static_cast<uint32_t>(std::log2(MAX_MPI_SIZE)); | |||||
for (size_t depth = 0; depth <= ts_logSize; ++depth) { | |||||
for (size_t step = 0 ; step < MAX_MPI_SIZE; ++step) { | |||||
int tag = static_cast<int>(tagGenerator(depth, step)); | |||||
ts_tags.push_back(tag); // Exchange optimization | |||||
for (size_t stage = 0; stage < MAX_PIPELINE_SIZE; ++stage) { | |||||
ts_tags.push_back(++tag); // stages | |||||
} | |||||
} | |||||
} | |||||
std::sort(ts_tags.begin(), ts_tags.end()); | |||||
for (size_t i = 0 ; i < ts_tags.size() - 1 ; ++i) | |||||
EXPECT_NE(ts_tags[i], ts_tags[i+1]); | |||||
} | |||||
@@ -27,8 +27,8 @@ | |||||
MPI_t<> ts_mpi; | MPI_t<> ts_mpi; | ||||
// Mersenne seeded from hw if possible. range: [type_min, type_max] | // Mersenne seeded from hw if possible. range: [type_min, type_max] | ||||
std::random_device rd; | |||||
std::mt19937 gen(rd()); | |||||
std::random_device ts_rd; | |||||
std::mt19937 ts_gen(ts_rd()); | |||||
class TMPIdistSort : public ::testing::Test { | class TMPIdistSort : public ::testing::Test { | ||||
protected: | protected: | ||||
@@ -59,7 +59,7 @@ TEST_F(TMPIdistSort, distBubbletonic_test1) { | |||||
std::numeric_limits<tsValue_t>::max() | std::numeric_limits<tsValue_t>::max() | ||||
); | ); | ||||
ts_Data.resize(ts_buffer_size); | ts_Data.resize(ts_buffer_size); | ||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(gen); }); | |||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); }); | |||||
// Execute function under test in all processes | // Execute function under test in all processes | ||||
distBubbletonic(ts_Data, ts_mpi.size(), ts_mpi.rank()); | distBubbletonic(ts_Data, ts_mpi.size(), ts_mpi.rank()); | ||||
@@ -100,7 +100,7 @@ TEST_F(TMPIdistSort, distBubbletonic_test2) { | |||||
std::numeric_limits<tsValue_t>::max() | std::numeric_limits<tsValue_t>::max() | ||||
); | ); | ||||
ts_Data.resize(ts_buffer_size); | ts_Data.resize(ts_buffer_size); | ||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(gen); }); | |||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); }); | |||||
// Execute function under test in all processes | // Execute function under test in all processes | ||||
distBubbletonic(ts_Data, ts_mpi.size(), ts_mpi.rank()); | distBubbletonic(ts_Data, ts_mpi.size(), ts_mpi.rank()); | ||||
@@ -141,7 +141,7 @@ TEST_F(TMPIdistSort, distBubbletonic_test3) { | |||||
std::numeric_limits<tsValue_t>::max() | std::numeric_limits<tsValue_t>::max() | ||||
); | ); | ||||
ts_Data.resize(ts_buffer_size); | ts_Data.resize(ts_buffer_size); | ||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(gen); }); | |||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); }); | |||||
// Set pipeline | // Set pipeline | ||||
config.pipeline = 8; | config.pipeline = 8; | ||||
@@ -170,6 +170,96 @@ TEST_F(TMPIdistSort, distBubbletonic_test3) { | |||||
} | } | ||||
} | } | ||||
/* | |||||
* MPI: SysTest (acceptance) | |||||
* Each process executes distBubbletonic for uin32_t [1 << 16] with exchange optimization | |||||
*/ | |||||
TEST_F(TMPIdistSort, distBubbletonic_test4) { | |||||
// Create and fill vector | |||||
using tsValue_t = uint32_t; // Test parameters | |||||
size_t ts_buffer_size = 1 << 16; | |||||
ShadowedVec_t<tsValue_t> ts_Data; | |||||
std::uniform_int_distribution<tsValue_t > dis( | |||||
std::numeric_limits<tsValue_t>::min(), | |||||
std::numeric_limits<tsValue_t>::max() | |||||
); | |||||
ts_Data.resize(ts_buffer_size); | |||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); }); | |||||
// Set exchange optimization | |||||
config.exchangeOpt = true; | |||||
// Execute function under test in all processes | |||||
distBubbletonic(ts_Data, ts_mpi.size(), ts_mpi.rank()); | |||||
// Local min and max | |||||
auto local_min = *std::min_element(ts_Data.begin(), ts_Data.end()); | |||||
auto local_max = *std::max_element(ts_Data.begin(), ts_Data.end()); | |||||
// Gather min/max to rank 0 | |||||
std::vector<tsValue_t> global_mins(ts_mpi.size()); | |||||
std::vector<tsValue_t> global_maxes(ts_mpi.size()); | |||||
MPI_Datatype datatype = MPI_TypeMapper<tsValue_t>::getType(); | |||||
MPI_Gather(&local_min, 1, datatype, global_mins.data(), 1, datatype, 0, MPI_COMM_WORLD); | |||||
MPI_Gather(&local_max, 1, datatype, global_maxes.data(), 1, datatype, 0, MPI_COMM_WORLD); | |||||
// Check results | |||||
EXPECT_EQ(std::is_sorted(ts_Data.begin(), ts_Data.end()), true); | |||||
if (ts_mpi.rank() == 0) { | |||||
for (size_t i = 1; i < global_mins.size(); ++i) { | |||||
EXPECT_LE(global_maxes[i - 1], global_mins[i]); | |||||
} | |||||
} | |||||
} | |||||
/* | |||||
* MPI: SysTest (acceptance) | |||||
* Each process executes distBubbletonic for uin32_t [1 << 16] with | |||||
* exchange optimization and pipeline | |||||
*/ | |||||
TEST_F(TMPIdistSort, distBubbletonic_test5) { | |||||
// Create and fill vector | |||||
using tsValue_t = uint32_t; // Test parameters | |||||
size_t ts_buffer_size = 1 << 16; | |||||
ShadowedVec_t<tsValue_t> ts_Data; | |||||
std::uniform_int_distribution<tsValue_t > dis( | |||||
std::numeric_limits<tsValue_t>::min(), | |||||
std::numeric_limits<tsValue_t>::max() | |||||
); | |||||
ts_Data.resize(ts_buffer_size); | |||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); }); | |||||
// Set exchange optimization + pipeline | |||||
config.exchangeOpt = true; | |||||
config.pipeline = 8; | |||||
// Execute function under test in all processes | |||||
distBubbletonic(ts_Data, ts_mpi.size(), ts_mpi.rank()); | |||||
// Local min and max | |||||
auto local_min = *std::min_element(ts_Data.begin(), ts_Data.end()); | |||||
auto local_max = *std::max_element(ts_Data.begin(), ts_Data.end()); | |||||
// Gather min/max to rank 0 | |||||
std::vector<tsValue_t> global_mins(ts_mpi.size()); | |||||
std::vector<tsValue_t> global_maxes(ts_mpi.size()); | |||||
MPI_Datatype datatype = MPI_TypeMapper<tsValue_t>::getType(); | |||||
MPI_Gather(&local_min, 1, datatype, global_mins.data(), 1, datatype, 0, MPI_COMM_WORLD); | |||||
MPI_Gather(&local_max, 1, datatype, global_maxes.data(), 1, datatype, 0, MPI_COMM_WORLD); | |||||
// Check results | |||||
EXPECT_EQ(std::is_sorted(ts_Data.begin(), ts_Data.end()), true); | |||||
if (ts_mpi.rank() == 0) { | |||||
for (size_t i = 1; i < global_mins.size(); ++i) { | |||||
EXPECT_LE(global_maxes[i - 1], global_mins[i]); | |||||
} | |||||
} | |||||
} | |||||
/* | /* | ||||
* MPI: SysTest (acceptance) | * MPI: SysTest (acceptance) | ||||
* Each process executes distBitonic for uin8_t [16] | * Each process executes distBitonic for uin8_t [16] | ||||
@@ -185,7 +275,7 @@ TEST_F(TMPIdistSort, distBitonic_test1) { | |||||
std::numeric_limits<tsValue_t>::max() | std::numeric_limits<tsValue_t>::max() | ||||
); | ); | ||||
ts_Data.resize(ts_buffer_size); | ts_Data.resize(ts_buffer_size); | ||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(gen); }); | |||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); }); | |||||
// Execute function under test in all processes | // Execute function under test in all processes | ||||
distBitonic(ts_Data, ts_mpi.size(), ts_mpi.rank()); | distBitonic(ts_Data, ts_mpi.size(), ts_mpi.rank()); | ||||
@@ -226,7 +316,7 @@ TEST_F(TMPIdistSort, distBitonic_test2) { | |||||
std::numeric_limits<tsValue_t>::max() | std::numeric_limits<tsValue_t>::max() | ||||
); | ); | ||||
ts_Data.resize(ts_buffer_size); | ts_Data.resize(ts_buffer_size); | ||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(gen); }); | |||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); }); | |||||
// Execute function under test in all processes | // Execute function under test in all processes | ||||
distBitonic(ts_Data, ts_mpi.size(), ts_mpi.rank()); | distBitonic(ts_Data, ts_mpi.size(), ts_mpi.rank()); | ||||
@@ -267,7 +357,7 @@ TEST_F(TMPIdistSort, distBitonic_test3) { | |||||
std::numeric_limits<tsValue_t>::max() | std::numeric_limits<tsValue_t>::max() | ||||
); | ); | ||||
ts_Data.resize(ts_buffer_size); | ts_Data.resize(ts_buffer_size); | ||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(gen); }); | |||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); }); | |||||
// Set pipeline | // Set pipeline | ||||
config.pipeline = 8; | config.pipeline = 8; | ||||
@@ -295,3 +385,93 @@ TEST_F(TMPIdistSort, distBitonic_test3) { | |||||
} | } | ||||
} | } | ||||
} | } | ||||
/* | |||||
* MPI: SysTest (acceptance) | |||||
* Each process executes distBitonic for uin32_t [1 << 16] with exchange optimization | |||||
*/ | |||||
TEST_F(TMPIdistSort, distBitonic_test4) { | |||||
// Create and fill vector | |||||
using tsValue_t = uint32_t; // Test parameters | |||||
size_t ts_buffer_size = 1 << 16; | |||||
ShadowedVec_t<tsValue_t> ts_Data; | |||||
std::uniform_int_distribution<tsValue_t > dis( | |||||
std::numeric_limits<tsValue_t>::min(), | |||||
std::numeric_limits<tsValue_t>::max() | |||||
); | |||||
ts_Data.resize(ts_buffer_size); | |||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); }); | |||||
// Set exchange optimization | |||||
config.exchangeOpt = true; | |||||
// Execute function under test in all processes | |||||
distBitonic(ts_Data, ts_mpi.size(), ts_mpi.rank()); | |||||
// Local min and max | |||||
auto local_min = *std::min_element(ts_Data.begin(), ts_Data.end()); | |||||
auto local_max = *std::max_element(ts_Data.begin(), ts_Data.end()); | |||||
// Gather min/max to rank 0 | |||||
std::vector<tsValue_t> global_mins(ts_mpi.size()); | |||||
std::vector<tsValue_t> global_maxes(ts_mpi.size()); | |||||
MPI_Datatype datatype = MPI_TypeMapper<tsValue_t>::getType(); | |||||
MPI_Gather(&local_min, 1, datatype, global_mins.data(), 1, datatype, 0, MPI_COMM_WORLD); | |||||
MPI_Gather(&local_max, 1, datatype, global_maxes.data(), 1, datatype, 0, MPI_COMM_WORLD); | |||||
// Check results | |||||
EXPECT_EQ(std::is_sorted(ts_Data.begin(), ts_Data.end()), true); | |||||
if (ts_mpi.rank() == 0) { | |||||
for (size_t i = 1; i < global_mins.size(); ++i) { | |||||
EXPECT_LE(global_maxes[i - 1], global_mins[i]); | |||||
} | |||||
} | |||||
} | |||||
/* | |||||
* MPI: SysTest (acceptance) | |||||
* Each process executes distBitonic for uin32_t [1 << 16] with | |||||
* exchange optimization and pipeline | |||||
*/ | |||||
TEST_F(TMPIdistSort, distBitonic_test5) { | |||||
// Create and fill vector | |||||
using tsValue_t = uint32_t; // Test parameters | |||||
size_t ts_buffer_size = 1 << 16; | |||||
ShadowedVec_t<tsValue_t> ts_Data; | |||||
std::uniform_int_distribution<tsValue_t > dis( | |||||
std::numeric_limits<tsValue_t>::min(), | |||||
std::numeric_limits<tsValue_t>::max() | |||||
); | |||||
ts_Data.resize(ts_buffer_size); | |||||
std::generate(ts_Data.begin(), ts_Data.end(), [&]() { return dis(ts_gen); }); | |||||
// Set exchange optimization + pipeline | |||||
config.exchangeOpt = true; | |||||
config.pipeline = 8; | |||||
// Execute function under test in all processes | |||||
distBitonic(ts_Data, ts_mpi.size(), ts_mpi.rank()); | |||||
// Local min and max | |||||
auto local_min = *std::min_element(ts_Data.begin(), ts_Data.end()); | |||||
auto local_max = *std::max_element(ts_Data.begin(), ts_Data.end()); | |||||
// Gather min/max to rank 0 | |||||
std::vector<tsValue_t> global_mins(ts_mpi.size()); | |||||
std::vector<tsValue_t> global_maxes(ts_mpi.size()); | |||||
MPI_Datatype datatype = MPI_TypeMapper<tsValue_t>::getType(); | |||||
MPI_Gather(&local_min, 1, datatype, global_mins.data(), 1, datatype, 0, MPI_COMM_WORLD); | |||||
MPI_Gather(&local_max, 1, datatype, global_maxes.data(), 1, datatype, 0, MPI_COMM_WORLD); | |||||
// Check results | |||||
EXPECT_EQ(std::is_sorted(ts_Data.begin(), ts_Data.end()), true); | |||||
if (ts_mpi.rank() == 0) { | |||||
for (size_t i = 1; i < global_mins.size(); ++i) { | |||||
EXPECT_LE(global_maxes[i - 1], global_mins[i]); | |||||
} | |||||
} | |||||
} |