From f687f30d2fa1010d1c3aa0752b64cddca5f561b8 Mon Sep 17 00:00:00 2001 From: Christos Choutouridis Date: Sun, 16 Feb 2025 17:56:05 +0200 Subject: [PATCH] HW3: RC2 - Ampere results and some data analysis scripts --- .../RC1-7a6f7f5/ampere/slurm-BitncV0Q20.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV0Q21.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV0Q22.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV0Q23.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV0Q24.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV0Q25.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV0Q26.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV0Q27.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV0Q28.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV0Q29.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV0Q30.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV1Q20.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV1Q21.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV1Q22.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV1Q23.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV1Q24.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV1Q25.out | 2 + .../RC1-7a6f7f5/ampere/slurm-BitncV1Q26.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV1Q27.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV1Q28.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV1Q29.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV1Q30.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV2Q20.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV2Q21.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV2Q22.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV2Q23.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV2Q24.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV2Q25.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV2Q26.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV2Q27.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV2Q28.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV2Q29.out | 1 + .../RC1-7a6f7f5/ampere/slurm-BitncV2Q30.out | 1 + .../{profreportv2.txt => profReportv2.txt} | 0 .../RC1-7a6f7f5/tesla/slurm-BitncV0Q20.out | 2 + .../RC1-7a6f7f5/tesla/slurm-BitncV0Q21.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV0Q22.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV0Q23.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV0Q24.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV0Q25.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV0Q26.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV0Q27.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV0Q28.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV0Q29.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV0Q30.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV1Q20.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV1Q21.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV1Q22.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV1Q23.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV1Q24.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV1Q25.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV1Q26.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV1Q27.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV1Q28.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV1Q29.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV1Q30.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV2Q20.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV2Q21.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV2Q22.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV2Q23.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV2Q24.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV2Q25.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV2Q26.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV2Q27.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV2Q28.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV2Q29.out | 23 + .../RC1-7a6f7f5/tesla/slurm-BitncV2Q30.out | 23 + .../analyse/RC2-f749862/Pending-PIDs-ampere | 67 + .../RC2-f749862/ampere/slurm-1914812.out | 23 + .../RC2-f749862/ampere/slurm-1914813.out | 23 + .../RC2-f749862/ampere/slurm-1914814.out | 23 + .../RC2-f749862/ampere/slurm-1914815.out | 23 + .../RC2-f749862/ampere/slurm-1914816.out | 23 + .../RC2-f749862/ampere/slurm-1914817.out | 23 + .../RC2-f749862/ampere/slurm-1914818.out | 23 + .../RC2-f749862/ampere/slurm-1914819.out | 23 + .../RC2-f749862/ampere/slurm-1914820.out | 23 + .../RC2-f749862/ampere/slurm-1914821.out | 23 + .../RC2-f749862/ampere/slurm-1914822.out | 23 + .../RC2-f749862/ampere/slurm-1914823.out | 23 + .../RC2-f749862/ampere/slurm-1914824.out | 23 + .../RC2-f749862/ampere/slurm-1914825.out | 23 + .../RC2-f749862/ampere/slurm-1914826.out | 23 + .../RC2-f749862/ampere/slurm-1914827.out | 23 + .../RC2-f749862/ampere/slurm-1914828.out | 23 + .../RC2-f749862/ampere/slurm-1914829.out | 23 + .../RC2-f749862/ampere/slurm-1914830.out | 23 + .../RC2-f749862/ampere/slurm-1914831.out | 23 + .../RC2-f749862/ampere/slurm-1914832.out | 23 + .../RC2-f749862/ampere/slurm-1914833.out | 23 + .../RC2-f749862/ampere/slurm-1914834.out | 23 + .../RC2-f749862/ampere/slurm-1914835.out | 23 + .../RC2-f749862/ampere/slurm-1914836.out | 23 + .../RC2-f749862/ampere/slurm-1914837.out | 23 + .../RC2-f749862/ampere/slurm-1914838.out | 23 + .../RC2-f749862/ampere/slurm-1914839.out | 23 + .../RC2-f749862/ampere/slurm-1914840.out | 23 + .../RC2-f749862/ampere/slurm-1914841.out | 23 + .../RC2-f749862/ampere/slurm-1914842.out | 23 + .../RC2-f749862/ampere/slurm-1914843.out | 23 + .../RC2-f749862/ampere/slurm-1914844.out | 23 + .../RC2-f749862/profReportv1.txt} | 1374 ++++++++------- .../RC2-f749862/profReportv2.txt} | 1492 +++++++++-------- homework_3/analyse/extract_results.sh | 41 + 104 files changed, 3204 insertions(+), 1301 deletions(-) rename homework_3/analyse/RC1-7a6f7f5/{profreportv2.txt => profReportv2.txt} (100%) create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q20.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q21.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q22.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q23.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q24.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q25.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q26.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q27.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q28.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q29.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q30.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q20.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q21.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q22.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q23.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q24.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q25.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q26.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q27.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q28.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q29.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q30.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q20.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q21.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q22.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q23.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q24.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q25.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q26.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q27.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q28.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q29.out create mode 100644 homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q30.out create mode 100644 homework_3/analyse/RC2-f749862/Pending-PIDs-ampere create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914812.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914813.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914814.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914815.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914816.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914817.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914818.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914819.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914820.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914821.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914822.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914823.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914824.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914825.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914826.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914827.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914828.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914829.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914830.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914831.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914832.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914833.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914834.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914835.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914836.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914837.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914838.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914839.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914840.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914841.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914842.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914843.out create mode 100644 homework_3/analyse/RC2-f749862/ampere/slurm-1914844.out rename homework_3/{reportv1.3 => analyse/RC2-f749862/profReportv1.txt} (78%) rename homework_3/{reportv2.3 => analyse/RC2-f749862/profReportv2.txt} (77%) create mode 100755 homework_3/analyse/extract_results.sh diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q20.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q20.out index 5a2598a..384535a 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q20.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q20.out @@ -1,3 +1,4 @@ +[Log]: Code version: V0 [Log]: Array size: 1048576 (Q=20) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q21.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q21.out index 79c5b52..f781175 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q21.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q21.out @@ -1,3 +1,4 @@ +[Log]: Code version: V0 [Log]: Array size: 2097152 (Q=21) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q22.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q22.out index f0e984f..f9d2814 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q22.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q22.out @@ -1,3 +1,4 @@ +[Log]: Code version: V0 [Log]: Array size: 4194304 (Q=22) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q23.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q23.out index f59fd49..a0c61e5 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q23.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q23.out @@ -1,3 +1,4 @@ +[Log]: Code version: V0 [Log]: Array size: 8388608 (Q=23) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q24.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q24.out index 9177d3f..4fe6acc 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q24.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q24.out @@ -1,3 +1,4 @@ +[Log]: Code version: V0 [Log]: Array size: 16777216 (Q=24) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q25.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q25.out index 62267c0..3b4b214 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q25.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q25.out @@ -1,3 +1,4 @@ +[Log]: Code version: V0 [Log]: Array size: 33554432 (Q=25) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q26.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q26.out index 3c11832..c93bd0a 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q26.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q26.out @@ -1,3 +1,4 @@ +[Log]: Code version: V0 [Log]: Array size: 67108864 (Q=26) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q27.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q27.out index f33a2c6..d732a2a 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q27.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q27.out @@ -1,3 +1,4 @@ +[Log]: Code version: V0 [Log]: Array size: 134217728 (Q=27) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q28.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q28.out index 4ed678b..ee1579a 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q28.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q28.out @@ -1,3 +1,4 @@ +[Log]: Code version: V0 [Log]: Array size: 268435456 (Q=28) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q29.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q29.out index e1c59bd..aae59bb 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q29.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q29.out @@ -1,3 +1,4 @@ +[Log]: Code version: V0 [Log]: Array size: 536870912 (Q=29) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q30.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q30.out index 17c3212..a2d54b7 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q30.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV0Q30.out @@ -1,3 +1,4 @@ +[Log]: Code version: V0 [Log]: Array size: 1073741824 (Q=30) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q20.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q20.out index d16038e..fc7f737 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q20.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q20.out @@ -1,3 +1,4 @@ +[Log]: Code version: V1 [Log]: Array size: 1048576 (Q=20) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q21.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q21.out index 73c1ed2..1ca055f 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q21.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q21.out @@ -1,3 +1,4 @@ +[Log]: Code version: V1 [Log]: Array size: 2097152 (Q=21) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q22.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q22.out index 52c47c9..9bb6586 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q22.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q22.out @@ -1,3 +1,4 @@ +[Log]: Code version: V1 [Log]: Array size: 4194304 (Q=22) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q23.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q23.out index 37fd860..53a7bad 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q23.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q23.out @@ -1,3 +1,4 @@ +[Log]: Code version: V1 [Log]: Array size: 8388608 (Q=23) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q24.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q24.out index 6454e1e..13a950d 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q24.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q24.out @@ -1,3 +1,4 @@ +[Log]: Code version: V1 [Log]: Array size: 16777216 (Q=24) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q25.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q25.out index 4e37052..026e638 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q25.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q25.out @@ -1,3 +1,4 @@ +[Log]: Code version: V1 [Log]: Array size: 33554432 (Q=25) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB @@ -20,3 +21,4 @@ [Timing] Mem-xch : 27 [msec] [Timing] Sorting : 38 [msec] [Validation] Results validation ... [PASSED]  + diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q26.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q26.out index 4e705e8..77eaec4 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q26.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q26.out @@ -1,3 +1,4 @@ +[Log]: Code version: V1 [Log]: Array size: 67108864 (Q=26) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q27.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q27.out index 78577bc..ca09c6f 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q27.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q27.out @@ -1,3 +1,4 @@ +[Log]: Code version: V1 [Log]: Array size: 134217728 (Q=27) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q28.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q28.out index cba64c8..06cac21 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q28.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q28.out @@ -1,3 +1,4 @@ +[Log]: Code version: V1 [Log]: Array size: 268435456 (Q=28) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q29.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q29.out index 3acbf6f..b2b4fc7 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q29.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q29.out @@ -1,3 +1,4 @@ +[Log]: Code version: V1 [Log]: Array size: 536870912 (Q=29) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q30.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q30.out index e768588..0d87016 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q30.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV1Q30.out @@ -1,3 +1,4 @@ +[Log]: Code version: V1 [Log]: Array size: 1073741824 (Q=30) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q20.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q20.out index e2403d4..6378ce4 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q20.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q20.out @@ -1,3 +1,4 @@ +[Log]: Code version: V2 [Log]: Array size: 1048576 (Q=20) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q21.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q21.out index 7e5f117..cbd0085 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q21.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q21.out @@ -1,3 +1,4 @@ +[Log]: Code version: V2 [Log]: Array size: 2097152 (Q=21) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q22.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q22.out index 792ad6c..9b64760 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q22.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q22.out @@ -1,3 +1,4 @@ +[Log]: Code version: V2 [Log]: Array size: 4194304 (Q=22) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q23.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q23.out index 14b51c2..a872b00 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q23.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q23.out @@ -1,3 +1,4 @@ +[Log]: Code version: V2 [Log]: Array size: 8388608 (Q=23) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q24.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q24.out index ae5e36e..ff2c5e1 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q24.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q24.out @@ -1,3 +1,4 @@ +[Log]: Code version: V2 [Log]: Array size: 16777216 (Q=24) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q25.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q25.out index 7b02c36..8358cdc 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q25.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q25.out @@ -1,3 +1,4 @@ +[Log]: Code version: V2 [Log]: Array size: 33554432 (Q=25) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q26.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q26.out index 5851b86..2f4d0f5 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q26.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q26.out @@ -1,3 +1,4 @@ +[Log]: Code version: V2 [Log]: Array size: 67108864 (Q=26) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q27.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q27.out index eb944d2..bf3dde1 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q27.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q27.out @@ -1,3 +1,4 @@ +[Log]: Code version: V2 [Log]: Array size: 134217728 (Q=27) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q28.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q28.out index 18cd3d5..c9a9149 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q28.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q28.out @@ -1,3 +1,4 @@ +[Log]: Code version: V2 [Log]: Array size: 268435456 (Q=28) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q29.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q29.out index 3e51390..cb24ea2 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q29.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q29.out @@ -1,3 +1,4 @@ +[Log]: Code version: V2 [Log]: Array size: 536870912 (Q=29) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q30.out b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q30.out index ab3960d..085d456 100644 --- a/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q30.out +++ b/homework_3/analyse/RC1-7a6f7f5/ampere/slurm-BitncV2Q30.out @@ -1,3 +1,4 @@ +[Log]: Code version: V2 [Log]: Array size: 1073741824 (Q=30) [Log]: Repeated sorts: 7 [Log]: GPU: NVIDIA A100-SXM4-40GB diff --git a/homework_3/analyse/RC1-7a6f7f5/profreportv2.txt b/homework_3/analyse/RC1-7a6f7f5/profReportv2.txt similarity index 100% rename from homework_3/analyse/RC1-7a6f7f5/profreportv2.txt rename to homework_3/analyse/RC1-7a6f7f5/profReportv2.txt diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q20.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q20.out new file mode 100644 index 0000000..9677edd --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q20.out @@ -0,0 +1,2 @@ +[Log]: Code version: V0 +[Log]: Code version: V0 diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q21.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q21.out new file mode 100644 index 0000000..fa7f7cb --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q21.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 2097152 (Q=21) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 12 [msec] +[Timing] Mem-xch : 2034 [usec] +[Timing] Sorting : 10 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q22.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q22.out new file mode 100644 index 0000000..5ed43ad --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q22.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 4194304 (Q=22) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 23 [msec] +[Timing] Mem-xch : 3803 [usec] +[Timing] Sorting : 19 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q23.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q23.out new file mode 100644 index 0000000..06f12f3 --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q23.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 8388608 (Q=23) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 49 [msec] +[Timing] Mem-xch : 8494 [usec] +[Timing] Sorting : 40 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q24.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q24.out new file mode 100644 index 0000000..38f6c1e --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q24.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 16777216 (Q=24) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 101 [msec] +[Timing] Mem-xch : 15 [msec] +[Timing] Sorting : 85 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q25.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q25.out new file mode 100644 index 0000000..6f699e8 --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q25.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 33554432 (Q=25) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 210 [msec] +[Timing] Mem-xch : 29 [msec] +[Timing] Sorting : 181 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q26.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q26.out new file mode 100644 index 0000000..916da9f --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q26.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 67108864 (Q=26) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 442 [msec] +[Timing] Mem-xch : 57 [msec] +[Timing] Sorting : 385 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q27.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q27.out new file mode 100644 index 0000000..56432d2 --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q27.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 134217728 (Q=27) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 939 [msec] +[Timing] Mem-xch : 113 [msec] +[Timing] Sorting : 826 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q28.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q28.out new file mode 100644 index 0000000..7c9c47a --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q28.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 268435456 (Q=28) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 1985 [msec] +[Timing] Mem-xch : 223 [msec] +[Timing] Sorting : 1762 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q29.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q29.out new file mode 100644 index 0000000..53d91ce --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q29.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 536870912 (Q=29) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 4209 [msec] +[Timing] Mem-xch : 445 [msec] +[Timing] Sorting : 3764 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q30.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q30.out new file mode 100644 index 0000000..786499f --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV0Q30.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 1073741824 (Q=30) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 9110 [msec] +[Timing] Mem-xch : 1073 [msec] +[Timing] Sorting : 8037 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q20.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q20.out new file mode 100644 index 0000000..3f8aa6f --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q20.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 1048576 (Q=20) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 4630 [usec] +[Timing] Mem-xch : 1147 [usec] +[Timing] Sorting : 3477 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q21.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q21.out new file mode 100644 index 0000000..ebe3735 --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q21.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 2097152 (Q=21) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 9057 [usec] +[Timing] Mem-xch : 2023 [usec] +[Timing] Sorting : 7032 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q22.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q22.out new file mode 100644 index 0000000..1071364 --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q22.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 4194304 (Q=22) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 18 [msec] +[Timing] Mem-xch : 3817 [usec] +[Timing] Sorting : 14 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q23.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q23.out new file mode 100644 index 0000000..33a8e69 --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q23.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 8388608 (Q=23) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 39 [msec] +[Timing] Mem-xch : 8491 [usec] +[Timing] Sorting : 30 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q24.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q24.out new file mode 100644 index 0000000..d2d6cec --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q24.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 33554432 (Q=24) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 169 [msec] +[Timing] Mem-xch : 29 [msec] +[Timing] Sorting : 139 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q25.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q25.out new file mode 100644 index 0000000..94616dd --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q25.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 33554432 (Q=25) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 169 [msec] +[Timing] Mem-xch : 29 [msec] +[Timing] Sorting : 139 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q26.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q26.out new file mode 100644 index 0000000..da94713 --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q26.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 67108864 (Q=26) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 358 [msec] +[Timing] Mem-xch : 57 [msec] +[Timing] Sorting : 301 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q27.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q27.out new file mode 100644 index 0000000..5d32b5e --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q27.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 134217728 (Q=27) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 761 [msec] +[Timing] Mem-xch : 113 [msec] +[Timing] Sorting : 648 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q28.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q28.out new file mode 100644 index 0000000..b9e69ff --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q28.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 268435456 (Q=28) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 1618 [msec] +[Timing] Mem-xch : 223 [msec] +[Timing] Sorting : 1394 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q29.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q29.out new file mode 100644 index 0000000..51af9e7 --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q29.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 536870912 (Q=29) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 3440 [msec] +[Timing] Mem-xch : 446 [msec] +[Timing] Sorting : 2994 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q30.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q30.out new file mode 100644 index 0000000..e2983c8 --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV1Q30.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 1073741824 (Q=30) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 7327 [msec] +[Timing] Mem-xch : 913 [msec] +[Timing] Sorting : 6414 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q20.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q20.out new file mode 100644 index 0000000..272b144 --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q20.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 1048576 (Q=20) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 4391 [usec] +[Timing] Mem-xch : 1145 [usec] +[Timing] Sorting : 3242 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q21.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q21.out new file mode 100644 index 0000000..aaa8a26 --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q21.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 2097152 (Q=21) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 8644 [usec] +[Timing] Mem-xch : 2033 [usec] +[Timing] Sorting : 6606 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q22.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q22.out new file mode 100644 index 0000000..df7608d --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q22.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 4194304 (Q=22) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 17 [msec] +[Timing] Mem-xch : 3815 [usec] +[Timing] Sorting : 13 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q23.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q23.out new file mode 100644 index 0000000..88fb5d1 --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q23.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 8388608 (Q=23) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 37 [msec] +[Timing] Mem-xch : 8515 [usec] +[Timing] Sorting : 28 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q24.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q24.out new file mode 100644 index 0000000..216070c --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q24.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 16777216 (Q=24) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 77 [msec] +[Timing] Mem-xch : 15 [msec] +[Timing] Sorting : 61 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q25.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q25.out new file mode 100644 index 0000000..911356e --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q25.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 33554432 (Q=25) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 169 [msec] +[Timing] Mem-xch : 29 [msec] +[Timing] Sorting : 139 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q26.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q26.out new file mode 100644 index 0000000..a20dc6b --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q26.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 67108864 (Q=26) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 344 [msec] +[Timing] Mem-xch : 57 [msec] +[Timing] Sorting : 286 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q27.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q27.out new file mode 100644 index 0000000..fdf92eb --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q27.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 134217728 (Q=27) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 732 [msec] +[Timing] Mem-xch : 113 [msec] +[Timing] Sorting : 619 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q28.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q28.out new file mode 100644 index 0000000..da208a0 --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q28.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 268435456 (Q=28) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 1560 [msec] +[Timing] Mem-xch : 225 [msec] +[Timing] Sorting : 1334 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q29.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q29.out new file mode 100644 index 0000000..ec6a7ad --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q29.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 536870912 (Q=29) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 3317 [msec] +[Timing] Mem-xch : 447 [msec] +[Timing] Sorting : 2869 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q30.out b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q30.out new file mode 100644 index 0000000..53d414f --- /dev/null +++ b/homework_3/analyse/RC1-7a6f7f5/tesla/slurm-BitncV2Q30.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 1073741824 (Q=30) +[Log]: Repeated sorts: 7 +[Log]: GPU: Tesla P100-PCIE-12GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 7078 [msec] +[Timing] Mem-xch : 920 [msec] +[Timing] Sorting : 6158 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/Pending-PIDs-ampere b/homework_3/analyse/RC2-f749862/Pending-PIDs-ampere new file mode 100644 index 0000000..111de2a --- /dev/null +++ b/homework_3/analyse/RC2-f749862/Pending-PIDs-ampere @@ -0,0 +1,67 @@ +Submitting: hpc/BitncV0Q20.sh +Submitted batch job 1914812 +Submitting: hpc/BitncV0Q21.sh +Submitted batch job 1914813 +Submitting: hpc/BitncV0Q22.sh +Submitted batch job 1914814 +Submitting: hpc/BitncV0Q23.sh +Submitted batch job 1914815 +Submitting: hpc/BitncV0Q24.sh +Submitted batch job 1914816 +Submitting: hpc/BitncV0Q25.sh +Submitted batch job 1914817 +Submitting: hpc/BitncV0Q26.sh +Submitted batch job 1914818 +Submitting: hpc/BitncV0Q27.sh +Submitted batch job 1914819 +Submitting: hpc/BitncV0Q28.sh +Submitted batch job 1914820 +Submitting: hpc/BitncV0Q29.sh +Submitted batch job 1914821 +Submitting: hpc/BitncV0Q30.sh +Submitted batch job 1914822 +Submitting: hpc/BitncV1Q20.sh +Submitted batch job 1914823 +Submitting: hpc/BitncV1Q21.sh +Submitted batch job 1914824 +Submitting: hpc/BitncV1Q22.sh +Submitted batch job 1914825 +Submitting: hpc/BitncV1Q23.sh +Submitted batch job 1914826 +Submitting: hpc/BitncV1Q24.sh +Submitted batch job 1914827 +Submitting: hpc/BitncV1Q25.sh +Submitted batch job 1914828 +Submitting: hpc/BitncV1Q26.sh +Submitted batch job 1914829 +Submitting: hpc/BitncV1Q27.sh +Submitted batch job 1914830 +Submitting: hpc/BitncV1Q28.sh +Submitted batch job 1914831 +Submitting: hpc/BitncV1Q29.sh +Submitted batch job 1914832 +Submitting: hpc/BitncV1Q30.sh +Submitted batch job 1914833 +Submitting: hpc/BitncV2Q20.sh +Submitted batch job 1914834 +Submitting: hpc/BitncV2Q21.sh +Submitted batch job 1914835 +Submitting: hpc/BitncV2Q22.sh +Submitted batch job 1914836 +Submitting: hpc/BitncV2Q23.sh +Submitted batch job 1914837 +Submitting: hpc/BitncV2Q24.sh +Submitted batch job 1914838 +Submitting: hpc/BitncV2Q25.sh +Submitted batch job 1914839 +Submitting: hpc/BitncV2Q26.sh +Submitted batch job 1914840 +Submitting: hpc/BitncV2Q27.sh +Submitted batch job 1914841 +Submitting: hpc/BitncV2Q28.sh +Submitted batch job 1914842 +Submitting: hpc/BitncV2Q29.sh +Submitted batch job 1914843 +Submitting: hpc/BitncV2Q30.sh +Submitted batch job 1914844 + diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914812.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914812.out new file mode 100644 index 0000000..5863749 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914812.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 1048576 (Q=20) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 7949 [usec] +[Timing] Mem-xch : 4763 [usec] +[Timing] Sorting : 3142 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914813.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914813.out new file mode 100644 index 0000000..4a84b52 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914813.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 2097152 (Q=21) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 11 [msec] +[Timing] Mem-xch : 6943 [usec] +[Timing] Sorting : 4136 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914814.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914814.out new file mode 100644 index 0000000..a80ff46 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914814.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 4194304 (Q=22) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 67 [msec] +[Timing] Mem-xch : 61 [msec] +[Timing] Sorting : 6081 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914815.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914815.out new file mode 100644 index 0000000..d157aaf --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914815.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 8388608 (Q=23) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 33 [msec] +[Timing] Mem-xch : 19 [msec] +[Timing] Sorting : 14 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914816.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914816.out new file mode 100644 index 0000000..8d0627c --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914816.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 16777216 (Q=24) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 54 [msec] +[Timing] Mem-xch : 24 [msec] +[Timing] Sorting : 29 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914817.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914817.out new file mode 100644 index 0000000..079757b --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914817.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 33554432 (Q=25) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 93 [msec] +[Timing] Mem-xch : 33 [msec] +[Timing] Sorting : 59 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914818.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914818.out new file mode 100644 index 0000000..5dd3e55 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914818.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 67108864 (Q=26) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 205 [msec] +[Timing] Mem-xch : 84 [msec] +[Timing] Sorting : 120 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914819.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914819.out new file mode 100644 index 0000000..ff8f997 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914819.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 134217728 (Q=27) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 421 [msec] +[Timing] Mem-xch : 166 [msec] +[Timing] Sorting : 255 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914820.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914820.out new file mode 100644 index 0000000..a1ba0b3 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914820.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 268435456 (Q=28) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 845 [msec] +[Timing] Mem-xch : 297 [msec] +[Timing] Sorting : 547 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914821.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914821.out new file mode 100644 index 0000000..d95d4b1 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914821.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 536870912 (Q=29) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 1922 [msec] +[Timing] Mem-xch : 758 [msec] +[Timing] Sorting : 1163 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914822.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914822.out new file mode 100644 index 0000000..7e1af6c --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914822.out @@ -0,0 +1,23 @@ +[Log]: Code version: V0 +[Log]: Array size: 1073741824 (Q=30) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 3898 [msec] +[Timing] Mem-xch : 1446 [msec] +[Timing] Sorting : 2455 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914823.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914823.out new file mode 100644 index 0000000..8c687fd --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914823.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 1048576 (Q=20) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 6024 [usec] +[Timing] Mem-xch : 4616 [usec] +[Timing] Sorting : 1416 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914824.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914824.out new file mode 100644 index 0000000..fb54897 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914824.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 2097152 (Q=21) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 5028 [usec] +[Timing] Mem-xch : 2818 [usec] +[Timing] Sorting : 2200 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914825.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914825.out new file mode 100644 index 0000000..5f06513 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914825.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 4194304 (Q=22) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 8845 [usec] +[Timing] Mem-xch : 5124 [usec] +[Timing] Sorting : 3719 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914826.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914826.out new file mode 100644 index 0000000..596679f --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914826.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 8388608 (Q=23) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 18 [msec] +[Timing] Mem-xch : 9796 [usec] +[Timing] Sorting : 8340 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914827.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914827.out new file mode 100644 index 0000000..d47f3b2 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914827.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 16777216 (Q=24) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 36 [msec] +[Timing] Mem-xch : 18 [msec] +[Timing] Sorting : 17 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914828.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914828.out new file mode 100644 index 0000000..e6ec7ff --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914828.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 33554432 (Q=25) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 84 [msec] +[Timing] Mem-xch : 46 [msec] +[Timing] Sorting : 37 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914829.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914829.out new file mode 100644 index 0000000..59e834c --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914829.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 67108864 (Q=26) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 168 [msec] +[Timing] Mem-xch : 88 [msec] +[Timing] Sorting : 71 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914830.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914830.out new file mode 100644 index 0000000..b26b185 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914830.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 134217728 (Q=27) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 347 [msec] +[Timing] Mem-xch : 193 [msec] +[Timing] Sorting : 153 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914831.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914831.out new file mode 100644 index 0000000..15a34d7 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914831.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 268435456 (Q=28) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 714 [msec] +[Timing] Mem-xch : 355 [msec] +[Timing] Sorting : 357 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914832.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914832.out new file mode 100644 index 0000000..59bee17 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914832.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 536870912 (Q=29) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 1415 [msec] +[Timing] Mem-xch : 666 [msec] +[Timing] Sorting : 749 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914833.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914833.out new file mode 100644 index 0000000..addb861 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914833.out @@ -0,0 +1,23 @@ +[Log]: Code version: V1 +[Log]: Array size: 1073741824 (Q=30) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 2894 [msec] +[Timing] Mem-xch : 1313 [msec] +[Timing] Sorting : 1566 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914834.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914834.out new file mode 100644 index 0000000..173c63c --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914834.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 1048576 (Q=20) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 2854 [usec] +[Timing] Mem-xch : 1267 [usec] +[Timing] Sorting : 1585 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914835.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914835.out new file mode 100644 index 0000000..f4dc494 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914835.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 2097152 (Q=21) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 6877 [usec] +[Timing] Mem-xch : 4443 [usec] +[Timing] Sorting : 2394 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914836.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914836.out new file mode 100644 index 0000000..f7a9f8e --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914836.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 4194304 (Q=22) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 10 [msec] +[Timing] Mem-xch : 6171 [usec] +[Timing] Sorting : 4100 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914837.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914837.out new file mode 100644 index 0000000..adb3c37 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914837.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 8388608 (Q=23) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 19 [msec] +[Timing] Mem-xch : 9986 [usec] +[Timing] Sorting : 8924 [usec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914838.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914838.out new file mode 100644 index 0000000..86748b7 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914838.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 16777216 (Q=24) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 35 [msec] +[Timing] Mem-xch : 16 [msec] +[Timing] Sorting : 19 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914839.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914839.out new file mode 100644 index 0000000..02aff63 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914839.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 33554432 (Q=25) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 69 [msec] +[Timing] Mem-xch : 28 [msec] +[Timing] Sorting : 40 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914840.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914840.out new file mode 100644 index 0000000..a7675a9 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914840.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 67108864 (Q=26) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 144 [msec] +[Timing] Mem-xch : 68 [msec] +[Timing] Sorting : 76 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914841.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914841.out new file mode 100644 index 0000000..debe01c --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914841.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 134217728 (Q=27) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 319 [msec] +[Timing] Mem-xch : 155 [msec] +[Timing] Sorting : 163 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914842.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914842.out new file mode 100644 index 0000000..b94c27c --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914842.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 268435456 (Q=28) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 689 [msec] +[Timing] Mem-xch : 315 [msec] +[Timing] Sorting : 379 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914843.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914843.out new file mode 100644 index 0000000..08cf06e --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914843.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 536870912 (Q=29) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 1449 [msec] +[Timing] Mem-xch : 675 [msec] +[Timing] Sorting : 790 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/analyse/RC2-f749862/ampere/slurm-1914844.out b/homework_3/analyse/RC2-f749862/ampere/slurm-1914844.out new file mode 100644 index 0000000..18bc6f3 --- /dev/null +++ b/homework_3/analyse/RC2-f749862/ampere/slurm-1914844.out @@ -0,0 +1,23 @@ +[Log]: Code version: V2 +[Log]: Array size: 1073741824 (Q=30) +[Log]: Repeated sorts: 7 +[Log]: GPU: NVIDIA A100-SXM4-40GB +[Log]: Block size: 512 +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Log]: Initialize array ... Done. +[Log]: Start sorting ... Done. +[Timing] Total : 3022 [msec] +[Timing] Mem-xch : 1367 [msec] +[Timing] Sorting : 1654 [msec] +[Validation] Results validation ... [PASSED]  diff --git a/homework_3/reportv1.3 b/homework_3/analyse/RC2-f749862/profReportv1.txt similarity index 78% rename from homework_3/reportv1.3 rename to homework_3/analyse/RC2-f749862/profReportv1.txt index 1e77f44..0657f1b 100644 --- a/homework_3/reportv1.3 +++ b/homework_3/analyse/RC2-f749862/profReportv1.txt @@ -1,73 +1,73 @@ -==PROF== Connected to process 19677 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v1/bitonicCUDA) -==PROF== Profiling "prephase" - 1: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 5 passes -==PROF== Disconnected from process 19677 -[19677] bitonicCUDA@127.0.0.1 - void prephase(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:57, Context 1, Stream 7 +==PROF== Connected to process 38811 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v1/bitonicCUDA) +==PROF== Profiling "prephase" - 1: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 6 passes +==PROF== Disconnected from process 38811 +[38811] bitonicCUDA@127.0.0.1 + void prephase(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum msecond 1.06 @@ -75,6 +75,8 @@ l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.22 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.91 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 17.11 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 5.48 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -83,26 +85,28 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 - smsp__average_warp_latency_issue_stalled_barrier.pct % 1,054,215.71 - smsp__average_warp_latency_issue_stalled_barrier.ratio 10,542.16 - smsp__inst_executed.avg inst 770,278.16 - smsp__inst_executed.max inst 770,517 - smsp__inst_executed.min inst 770,078 - smsp__inst_executed.sum inst 49,297,802 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 23.29 + smsp__average_warp_latency_issue_stalled_barrier.pct % 1,052,474.71 + smsp__average_warp_latency_issue_stalled_barrier.ratio 10,524.75 + smsp__inst_executed.avg inst 770,268.77 + smsp__inst_executed.max inst 770,551 + smsp__inst_executed.min inst 770,034 + smsp__inst_executed.sum inst 49,297,201 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 23.25 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.23 - smsp__cycles_active.avg cycle 1,464,763.30 - smsp__cycles_active.sum cycle 93,744,851 + smsp__cycles_active.avg cycle 1,463,898.61 + smsp__cycles_active.sum cycle 93,689,511 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:57, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.59 + gpu__time_duration.sum usecond 58.78 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -113,24 +117,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,434.70 - smsp__inst_executed.max inst 12,627 - smsp__inst_executed.min inst 12,202 - smsp__inst_executed.sum inst 795,821 + smsp__inst_executed.avg inst 12,434.38 + smsp__inst_executed.max inst 13,034 + smsp__inst_executed.min inst 12,078 + smsp__inst_executed.sum inst 795,800 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,275.05 - smsp__cycles_active.sum cycle 4,625,603 + smsp__cycles_active.avg cycle 72,392.50 + smsp__cycles_active.sum cycle 4,633,120 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:57, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 185.54 + gpu__time_duration.sum usecond 182.78 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -139,26 +145,28 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 - smsp__average_warp_latency_issue_stalled_barrier.pct % 160,167.14 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,601.67 - smsp__inst_executed.avg inst 132,203.41 - smsp__inst_executed.max inst 134,386 - smsp__inst_executed.min inst 130,079 - smsp__inst_executed.sum inst 8,461,018 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.86 + smsp__average_warp_latency_issue_stalled_barrier.pct % 158,915.20 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,589.15 + smsp__inst_executed.avg inst 132,204.09 + smsp__inst_executed.max inst 134,291 + smsp__inst_executed.min inst 130,034 + smsp__inst_executed.sum inst 8,461,062 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.76 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 - smsp__cycles_active.avg cycle 253,150.12 - smsp__cycles_active.sum cycle 16,201,608 + smsp__cycles_active.avg cycle 252,696.25 + smsp__cycles_active.sum cycle 16,172,560 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.34 + gpu__time_duration.sum usecond 57.86 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -169,24 +177,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,300.38 - smsp__inst_executed.max inst 12,564 - smsp__inst_executed.min inst 12,036 - smsp__inst_executed.sum inst 787,224 + smsp__inst_executed.avg inst 12,300.02 + smsp__inst_executed.max inst 12,527 + smsp__inst_executed.min inst 11,906 + smsp__inst_executed.sum inst 787,201 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,561.48 - smsp__cycles_active.sum cycle 4,579,935 + smsp__cycles_active.avg cycle 71,419.20 + smsp__cycles_active.sum cycle 4,570,829 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.62 + gpu__time_duration.sum usecond 58.27 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -197,24 +207,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,308.33 - smsp__inst_executed.max inst 12,555 - smsp__inst_executed.min inst 12,038 - smsp__inst_executed.sum inst 787,733 + smsp__inst_executed.avg inst 12,309.58 + smsp__inst_executed.max inst 12,592 + smsp__inst_executed.min inst 12,022 + smsp__inst_executed.sum inst 787,813 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,921.98 - smsp__cycles_active.sum cycle 4,667,007 + smsp__cycles_active.avg cycle 71,582.45 + smsp__cycles_active.sum cycle 4,581,277 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 183.49 + gpu__time_duration.sum usecond 185.60 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.71 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -223,26 +235,28 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 - smsp__average_warp_latency_issue_stalled_barrier.pct % 160,010.27 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,600.10 - smsp__inst_executed.avg inst 132,209.20 - smsp__inst_executed.max inst 134,250 - smsp__inst_executed.min inst 130,144 - smsp__inst_executed.sum inst 8,461,389 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.92 + smsp__average_warp_latency_issue_stalled_barrier.pct % 160,972.19 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,609.72 + smsp__inst_executed.avg inst 132,207.95 + smsp__inst_executed.max inst 134,315 + smsp__inst_executed.min inst 130,093 + smsp__inst_executed.sum inst 8,461,309 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.08 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 - smsp__cycles_active.avg cycle 252,547.31 - smsp__cycles_active.sum cycle 16,163,028 + smsp__cycles_active.avg cycle 251,877 + smsp__cycles_active.sum cycle 16,120,128 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.59 + gpu__time_duration.sum usecond 58.72 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.97 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -253,24 +267,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,294.86 - smsp__inst_executed.max inst 12,694 - smsp__inst_executed.min inst 12,054 - smsp__inst_executed.sum inst 786,871 + smsp__inst_executed.avg inst 12,294.97 + smsp__inst_executed.max inst 12,632 + smsp__inst_executed.min inst 11,676 + smsp__inst_executed.sum inst 786,878 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,596.58 - smsp__cycles_active.sum cycle 4,582,181 + smsp__cycles_active.avg cycle 72,658.42 + smsp__cycles_active.sum cycle 4,650,139 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 57.98 + gpu__time_duration.sum usecond 58.27 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -281,24 +297,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,299 - smsp__inst_executed.max inst 12,638 - smsp__inst_executed.min inst 11,881 - smsp__inst_executed.sum inst 787,136 + smsp__inst_executed.avg inst 12,298.64 + smsp__inst_executed.max inst 12,609 + smsp__inst_executed.min inst 11,896 + smsp__inst_executed.sum inst 787,113 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,894.47 - smsp__cycles_active.sum cycle 4,601,246 + smsp__cycles_active.avg cycle 71,467.41 + smsp__cycles_active.sum cycle 4,573,914 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.53 + gpu__time_duration.sum usecond 58.46 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -309,24 +327,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,309.91 - smsp__inst_executed.max inst 12,636 - smsp__inst_executed.min inst 11,910 - smsp__inst_executed.sum inst 787,834 + smsp__inst_executed.avg inst 12,308.77 + smsp__inst_executed.max inst 12,684 + smsp__inst_executed.min inst 11,895 + smsp__inst_executed.sum inst 787,761 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,313.89 - smsp__cycles_active.sum cycle 4,564,089 + smsp__cycles_active.avg cycle 72,717.34 + smsp__cycles_active.sum cycle 4,653,910 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 184.90 + gpu__time_duration.sum usecond 186.30 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.71 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -335,26 +355,28 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 - smsp__average_warp_latency_issue_stalled_barrier.pct % 158,555.84 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,585.56 - smsp__inst_executed.avg inst 132,207.33 - smsp__inst_executed.max inst 134,301 - smsp__inst_executed.min inst 130,116 - smsp__inst_executed.sum inst 8,461,269 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.73 + smsp__average_warp_latency_issue_stalled_barrier.pct % 159,835.16 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,598.35 + smsp__inst_executed.avg inst 132,204.20 + smsp__inst_executed.max inst 134,182 + smsp__inst_executed.min inst 130,077 + smsp__inst_executed.sum inst 8,461,069 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.87 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 - smsp__cycles_active.avg cycle 252,473.81 - smsp__cycles_active.sum cycle 16,158,324 + smsp__cycles_active.avg cycle 252,623.66 + smsp__cycles_active.sum cycle 16,167,914 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 57.98 + gpu__time_duration.sum usecond 58.24 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -365,24 +387,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,291.09 - smsp__inst_executed.max inst 12,593 - smsp__inst_executed.min inst 11,856 - smsp__inst_executed.sum inst 786,630 + smsp__inst_executed.avg inst 12,291.53 + smsp__inst_executed.max inst 12,717 + smsp__inst_executed.min inst 11,826 + smsp__inst_executed.sum inst 786,658 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,576.33 - smsp__cycles_active.sum cycle 4,516,885 + smsp__cycles_active.avg cycle 71,072.03 + smsp__cycles_active.sum cycle 4,548,610 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.18 + gpu__time_duration.sum usecond 58.50 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -393,24 +417,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,293.56 - smsp__inst_executed.max inst 12,684 - smsp__inst_executed.min inst 11,908 - smsp__inst_executed.sum inst 786,788 + smsp__inst_executed.avg inst 12,293.39 + smsp__inst_executed.max inst 12,679 + smsp__inst_executed.min inst 11,917 + smsp__inst_executed.sum inst 786,777 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,507.47 - smsp__cycles_active.sum cycle 4,576,478 + smsp__cycles_active.avg cycle 72,284.91 + smsp__cycles_active.sum cycle 4,626,234 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 57.73 + gpu__time_duration.sum usecond 57.95 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -421,24 +447,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,298.97 - smsp__inst_executed.max inst 12,689 - smsp__inst_executed.min inst 11,912 - smsp__inst_executed.sum inst 787,134 + smsp__inst_executed.avg inst 12,298.83 + smsp__inst_executed.max inst 12,502 + smsp__inst_executed.min inst 12,048 + smsp__inst_executed.sum inst 787,125 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,018.25 - smsp__cycles_active.sum cycle 4,545,168 + smsp__cycles_active.avg cycle 71,602.17 + smsp__cycles_active.sum cycle 4,582,539 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.72 + gpu__time_duration.sum usecond 58.50 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -449,24 +477,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,308.84 - smsp__inst_executed.max inst 12,686 - smsp__inst_executed.min inst 12,079 - smsp__inst_executed.sum inst 787,766 + smsp__inst_executed.avg inst 12,309.36 + smsp__inst_executed.max inst 12,725 + smsp__inst_executed.min inst 11,813 + smsp__inst_executed.sum inst 787,799 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,024.20 - smsp__cycles_active.sum cycle 4,609,549 + smsp__cycles_active.avg cycle 71,676.52 + smsp__cycles_active.sum cycle 4,587,297 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 185.95 + gpu__time_duration.sum usecond 185.50 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -475,26 +505,28 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 - smsp__average_warp_latency_issue_stalled_barrier.pct % 157,276.34 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,572.76 - smsp__inst_executed.avg inst 132,205.28 - smsp__inst_executed.max inst 134,358 - smsp__inst_executed.min inst 130,024 - smsp__inst_executed.sum inst 8,461,138 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.55 + smsp__average_warp_latency_issue_stalled_barrier.pct % 157,267.79 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,572.68 + smsp__inst_executed.avg inst 132,200.03 + smsp__inst_executed.max inst 134,283 + smsp__inst_executed.min inst 130,070 + smsp__inst_executed.sum inst 8,460,802 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.56 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 - smsp__cycles_active.avg cycle 252,593.19 - smsp__cycles_active.sum cycle 16,165,964 + smsp__cycles_active.avg cycle 252,497.38 + smsp__cycles_active.sum cycle 16,159,832 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.30 + gpu__time_duration.sum usecond 57.98 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -505,24 +537,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,289.47 - smsp__inst_executed.max inst 12,560 - smsp__inst_executed.min inst 12,088 - smsp__inst_executed.sum inst 786,526 + smsp__inst_executed.avg inst 12,290.03 + smsp__inst_executed.max inst 12,632 + smsp__inst_executed.min inst 12,096 + smsp__inst_executed.sum inst 786,562 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,795.55 - smsp__cycles_active.sum cycle 4,530,915 + smsp__cycles_active.avg cycle 70,852.95 + smsp__cycles_active.sum cycle 4,534,589 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 57.76 + gpu__time_duration.sum usecond 58.02 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -533,24 +567,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,290.78 - smsp__inst_executed.max inst 12,745 - smsp__inst_executed.min inst 11,874 - smsp__inst_executed.sum inst 786,610 + smsp__inst_executed.avg inst 12,291.22 + smsp__inst_executed.max inst 12,768 + smsp__inst_executed.min inst 11,984 + smsp__inst_executed.sum inst 786,638 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,441.03 - smsp__cycles_active.sum cycle 4,508,226 + smsp__cycles_active.avg cycle 70,387.03 + smsp__cycles_active.sum cycle 4,504,770 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.56 + gpu__time_duration.sum usecond 58.43 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -561,17 +597,17 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,293.70 - smsp__inst_executed.max inst 12,566 - smsp__inst_executed.min inst 12,056 - smsp__inst_executed.sum inst 786,797 + smsp__inst_executed.avg inst 12,293.27 + smsp__inst_executed.max inst 12,615 + smsp__inst_executed.min inst 11,964 + smsp__inst_executed.sum inst 786,769 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,597.62 - smsp__cycles_active.sum cycle 4,582,248 + smsp__cycles_active.avg cycle 72,163 + smsp__cycles_active.sum cycle 4,618,432 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.95 @@ -579,6 +615,8 @@ l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -589,24 +627,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,299.03 - smsp__inst_executed.max inst 12,648 - smsp__inst_executed.min inst 11,910 - smsp__inst_executed.sum inst 787,138 + smsp__inst_executed.avg inst 12,298.78 + smsp__inst_executed.max inst 12,912 + smsp__inst_executed.min inst 11,741 + smsp__inst_executed.sum inst 787,122 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,625.34 - smsp__cycles_active.sum cycle 4,520,022 + smsp__cycles_active.avg cycle 71,542.72 + smsp__cycles_active.sum cycle 4,578,734 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.53 + gpu__time_duration.sum usecond 58.50 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -617,24 +657,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,308.55 - smsp__inst_executed.max inst 12,690 - smsp__inst_executed.min inst 12,090 - smsp__inst_executed.sum inst 787,747 + smsp__inst_executed.avg inst 12,309.25 + smsp__inst_executed.max inst 12,746 + smsp__inst_executed.min inst 11,941 + smsp__inst_executed.sum inst 787,792 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,911.34 - smsp__cycles_active.sum cycle 4,602,326 + smsp__cycles_active.avg cycle 71,521.23 + smsp__cycles_active.sum cycle 4,577,359 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 184.93 + gpu__time_duration.sum usecond 185.66 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -643,26 +685,28 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 - smsp__average_warp_latency_issue_stalled_barrier.pct % 159,654.44 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,596.54 - smsp__inst_executed.avg inst 132,204.97 - smsp__inst_executed.max inst 134,424 - smsp__inst_executed.min inst 129,985 - smsp__inst_executed.sum inst 8,461,118 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.86 + smsp__average_warp_latency_issue_stalled_barrier.pct % 160,534.14 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,605.34 + smsp__inst_executed.avg inst 132,205.30 + smsp__inst_executed.max inst 134,375 + smsp__inst_executed.min inst 130,047 + smsp__inst_executed.sum inst 8,461,139 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.03 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 - smsp__cycles_active.avg cycle 252,486.12 - smsp__cycles_active.sum cycle 16,159,112 + smsp__cycles_active.avg cycle 251,948.69 + smsp__cycles_active.sum cycle 16,124,716 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.24 + gpu__time_duration.sum usecond 58.14 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -673,24 +717,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,289.47 - smsp__inst_executed.max inst 12,834 - smsp__inst_executed.min inst 11,932 - smsp__inst_executed.sum inst 786,526 + smsp__inst_executed.avg inst 12,289.38 + smsp__inst_executed.max inst 12,640 + smsp__inst_executed.min inst 11,952 + smsp__inst_executed.sum inst 786,520 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,577.20 - smsp__cycles_active.sum cycle 4,516,941 + smsp__cycles_active.avg cycle 71,646.14 + smsp__cycles_active.sum cycle 4,585,353 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.24 + gpu__time_duration.sum usecond 58.05 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -701,24 +747,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,289.45 - smsp__inst_executed.max inst 12,702 - smsp__inst_executed.min inst 11,912 - smsp__inst_executed.sum inst 786,525 + smsp__inst_executed.avg inst 12,289.72 + smsp__inst_executed.max inst 12,532 + smsp__inst_executed.min inst 11,872 + smsp__inst_executed.sum inst 786,542 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,559.42 - smsp__cycles_active.sum cycle 4,579,803 + smsp__cycles_active.avg cycle 71,515.61 + smsp__cycles_active.sum cycle 4,576,999 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.02 + gpu__time_duration.sum usecond 57.82 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.47 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -729,24 +777,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,291.50 - smsp__inst_executed.max inst 12,638 - smsp__inst_executed.min inst 12,088 - smsp__inst_executed.sum inst 786,656 + smsp__inst_executed.avg inst 12,290.48 + smsp__inst_executed.max inst 12,775 + smsp__inst_executed.min inst 11,898 + smsp__inst_executed.sum inst 786,591 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,387.86 - smsp__cycles_active.sum cycle 4,568,823 + smsp__cycles_active.avg cycle 71,662.92 + smsp__cycles_active.sum cycle 4,586,427 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.56 + gpu__time_duration.sum usecond 58.53 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.97 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -757,24 +807,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,293.50 - smsp__inst_executed.max inst 12,785 - smsp__inst_executed.min inst 11,630 - smsp__inst_executed.sum inst 786,784 + smsp__inst_executed.avg inst 12,293.81 + smsp__inst_executed.max inst 12,753 + smsp__inst_executed.min inst 11,862 + smsp__inst_executed.sum inst 786,804 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,530.47 - smsp__cycles_active.sum cycle 4,577,950 + smsp__cycles_active.avg cycle 72,455.17 + smsp__cycles_active.sum cycle 4,637,131 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 57.98 + gpu__time_duration.sum usecond 58.02 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -785,24 +837,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,298.41 - smsp__inst_executed.max inst 12,716 - smsp__inst_executed.min inst 11,883 - smsp__inst_executed.sum inst 787,098 + smsp__inst_executed.avg inst 12,299.31 + smsp__inst_executed.max inst 12,673 + smsp__inst_executed.min inst 12,034 + smsp__inst_executed.sum inst 787,156 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,755.27 - smsp__cycles_active.sum cycle 4,528,337 + smsp__cycles_active.avg cycle 71,010.81 + smsp__cycles_active.sum cycle 4,544,692 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.53 + gpu__time_duration.sum usecond 58.72 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -813,24 +867,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,310.11 - smsp__inst_executed.max inst 12,496 - smsp__inst_executed.min inst 11,901 - smsp__inst_executed.sum inst 787,847 + smsp__inst_executed.avg inst 12,307.36 + smsp__inst_executed.max inst 12,544 + smsp__inst_executed.min inst 11,923 + smsp__inst_executed.sum inst 787,671 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,085.02 - smsp__cycles_active.sum cycle 4,613,441 + smsp__cycles_active.avg cycle 72,317.72 + smsp__cycles_active.sum cycle 4,628,334 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 185.02 + gpu__time_duration.sum usecond 183.36 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.70 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -839,26 +895,28 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 - smsp__average_warp_latency_issue_stalled_barrier.pct % 158,201.12 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,582.01 - smsp__inst_executed.avg inst 132,195.78 - smsp__inst_executed.max inst 134,319 - smsp__inst_executed.min inst 130,101 - smsp__inst_executed.sum inst 8,460,530 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.71 + smsp__average_warp_latency_issue_stalled_barrier.pct % 160,035.27 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,600.35 + smsp__inst_executed.avg inst 132,214.05 + smsp__inst_executed.max inst 134,326 + smsp__inst_executed.min inst 130,109 + smsp__inst_executed.sum inst 8,461,699 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.87 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 - smsp__cycles_active.avg cycle 251,923.50 - smsp__cycles_active.sum cycle 16,123,104 + smsp__cycles_active.avg cycle 252,974.50 + smsp__cycles_active.sum cycle 16,190,368 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.66 + gpu__time_duration.sum usecond 58.69 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -869,24 +927,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,289.02 - smsp__inst_executed.max inst 12,668 - smsp__inst_executed.min inst 11,912 - smsp__inst_executed.sum inst 786,497 + smsp__inst_executed.avg inst 12,288.52 + smsp__inst_executed.max inst 12,488 + smsp__inst_executed.min inst 11,936 + smsp__inst_executed.sum inst 786,465 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,635.66 - smsp__cycles_active.sum cycle 4,648,682 + smsp__cycles_active.avg cycle 72,501.95 + smsp__cycles_active.sum cycle 4,640,125 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.37 + gpu__time_duration.sum usecond 58.27 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -897,24 +957,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.31 - smsp__inst_executed.max inst 12,508 - smsp__inst_executed.min inst 11,924 - smsp__inst_executed.sum inst 786,452 + smsp__inst_executed.avg inst 12,287.92 + smsp__inst_executed.max inst 12,484 + smsp__inst_executed.min inst 12,100 + smsp__inst_executed.sum inst 786,427 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,858 - smsp__cycles_active.sum cycle 4,534,912 + smsp__cycles_active.avg cycle 71,503.14 + smsp__cycles_active.sum cycle 4,576,201 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.30 + gpu__time_duration.sum usecond 58.08 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -925,24 +987,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.89 - smsp__inst_executed.max inst 12,659 - smsp__inst_executed.min inst 11,942 - smsp__inst_executed.sum inst 786,489 + smsp__inst_executed.avg inst 12,289.14 + smsp__inst_executed.max inst 12,869 + smsp__inst_executed.min inst 11,892 + smsp__inst_executed.sum inst 786,505 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,747.28 - smsp__cycles_active.sum cycle 4,527,826 + smsp__cycles_active.avg cycle 71,524.88 + smsp__cycles_active.sum cycle 4,577,592 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 57.98 + gpu__time_duration.sum usecond 58.05 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.47 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -953,17 +1017,17 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,291.03 - smsp__inst_executed.max inst 12,683 - smsp__inst_executed.min inst 11,982 - smsp__inst_executed.sum inst 786,626 + smsp__inst_executed.avg inst 12,290.83 + smsp__inst_executed.max inst 12,572 + smsp__inst_executed.min inst 12,020 + smsp__inst_executed.sum inst 786,613 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,881.03 - smsp__cycles_active.sum cycle 4,536,386 + smsp__cycles_active.avg cycle 70,350.59 + smsp__cycles_active.sum cycle 4,502,438 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.43 @@ -971,6 +1035,8 @@ l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -981,24 +1047,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,293.31 - smsp__inst_executed.max inst 12,752 - smsp__inst_executed.min inst 11,612 - smsp__inst_executed.sum inst 786,772 + smsp__inst_executed.avg inst 12,292.59 + smsp__inst_executed.max inst 12,681 + smsp__inst_executed.min inst 11,988 + smsp__inst_executed.sum inst 786,726 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,166.55 - smsp__cycles_active.sum cycle 4,554,659 + smsp__cycles_active.avg cycle 71,302.25 + smsp__cycles_active.sum cycle 4,563,344 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.05 + gpu__time_duration.sum usecond 57.89 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1010,23 +1078,25 @@ smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,298.28 - smsp__inst_executed.max inst 12,667 - smsp__inst_executed.min inst 11,870 + smsp__inst_executed.max inst 12,708 + smsp__inst_executed.min inst 11,898 smsp__inst_executed.sum inst 787,090 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,893.25 - smsp__cycles_active.sum cycle 4,537,168 + smsp__cycles_active.avg cycle 71,771.03 + smsp__cycles_active.sum cycle 4,593,346 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.43 + gpu__time_duration.sum usecond 58.53 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1037,24 +1107,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,311.05 - smsp__inst_executed.max inst 12,751 - smsp__inst_executed.min inst 12,075 - smsp__inst_executed.sum inst 787,907 + smsp__inst_executed.avg inst 12,310.67 + smsp__inst_executed.max inst 12,575 + smsp__inst_executed.min inst 12,060 + smsp__inst_executed.sum inst 787,883 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,743.69 - smsp__cycles_active.sum cycle 4,591,596 + smsp__cycles_active.avg cycle 73,137 + smsp__cycles_active.sum cycle 4,680,768 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 185.66 + gpu__time_duration.sum usecond 184.67 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1063,26 +1135,28 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 - smsp__average_warp_latency_issue_stalled_barrier.pct % 161,553.58 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,615.54 - smsp__inst_executed.avg inst 132,193.28 - smsp__inst_executed.max inst 134,294 - smsp__inst_executed.min inst 130,087 - smsp__inst_executed.sum inst 8,460,370 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.09 + smsp__average_warp_latency_issue_stalled_barrier.pct % 161,865.73 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,618.66 + smsp__inst_executed.avg inst 132,202.36 + smsp__inst_executed.max inst 134,344 + smsp__inst_executed.min inst 130,057 + smsp__inst_executed.sum inst 8,460,951 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.11 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 - smsp__cycles_active.avg cycle 252,649.62 - smsp__cycles_active.sum cycle 16,169,576 + smsp__cycles_active.avg cycle 252,870.69 + smsp__cycles_active.sum cycle 16,183,724 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.17 + gpu__time_duration.sum usecond 59.23 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1093,24 +1167,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,289.12 - smsp__inst_executed.max inst 12,484 - smsp__inst_executed.min inst 12,084 - smsp__inst_executed.sum inst 786,504 + smsp__inst_executed.avg inst 12,287.84 + smsp__inst_executed.max inst 12,700 + smsp__inst_executed.min inst 11,884 + smsp__inst_executed.sum inst 786,422 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,022.83 - smsp__cycles_active.sum cycle 4,609,461 + smsp__cycles_active.avg cycle 71,958.33 + smsp__cycles_active.sum cycle 4,605,333 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.82 + gpu__time_duration.sum usecond 58.91 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1121,24 +1197,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.45 - smsp__inst_executed.max inst 12,672 - smsp__inst_executed.min inst 11,900 - smsp__inst_executed.sum inst 786,461 + smsp__inst_executed.avg inst 12,288.59 + smsp__inst_executed.max inst 12,836 + smsp__inst_executed.min inst 11,892 + smsp__inst_executed.sum inst 786,470 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,587.06 - smsp__cycles_active.sum cycle 4,581,572 + smsp__cycles_active.avg cycle 72,352.05 + smsp__cycles_active.sum cycle 4,630,531 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.53 + gpu__time_duration.sum usecond 58.43 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1149,24 +1227,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.42 - smsp__inst_executed.max inst 12,632 - smsp__inst_executed.min inst 12,096 - smsp__inst_executed.sum inst 786,459 + smsp__inst_executed.avg inst 12,288.62 + smsp__inst_executed.max inst 12,636 + smsp__inst_executed.min inst 11,942 + smsp__inst_executed.sum inst 786,472 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,582.89 - smsp__cycles_active.sum cycle 4,517,305 + smsp__cycles_active.avg cycle 71,507.44 + smsp__cycles_active.sum cycle 4,576,476 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.02 + gpu__time_duration.sum usecond 58.11 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1177,24 +1257,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.80 - smsp__inst_executed.max inst 12,500 - smsp__inst_executed.min inst 11,924 - smsp__inst_executed.sum inst 786,483 + smsp__inst_executed.avg inst 12,289.72 + smsp__inst_executed.max inst 12,672 + smsp__inst_executed.min inst 11,662 + smsp__inst_executed.sum inst 786,542 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,332.38 - smsp__cycles_active.sum cycle 4,565,272 + smsp__cycles_active.avg cycle 71,165.02 + smsp__cycles_active.sum cycle 4,554,561 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 57.92 + gpu__time_duration.sum usecond 58.02 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1205,24 +1287,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,290.08 - smsp__inst_executed.max inst 12,636 - smsp__inst_executed.min inst 11,868 - smsp__inst_executed.sum inst 786,565 + smsp__inst_executed.avg inst 12,291 + smsp__inst_executed.max inst 12,677 + smsp__inst_executed.min inst 11,882 + smsp__inst_executed.sum inst 786,624 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,497.30 - smsp__cycles_active.sum cycle 4,575,827 + smsp__cycles_active.avg cycle 70,224.42 + smsp__cycles_active.sum cycle 4,494,363 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.43 + gpu__time_duration.sum usecond 58.72 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1233,17 +1317,17 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,292.33 - smsp__inst_executed.max inst 12,709 - smsp__inst_executed.min inst 11,780 - smsp__inst_executed.sum inst 786,709 + smsp__inst_executed.avg inst 12,294.19 + smsp__inst_executed.max inst 12,761 + smsp__inst_executed.min inst 11,776 + smsp__inst_executed.sum inst 786,828 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,223.28 - smsp__cycles_active.sum cycle 4,622,290 + smsp__cycles_active.avg cycle 72,352.88 + smsp__cycles_active.sum cycle 4,630,584 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.08 @@ -1251,6 +1335,8 @@ l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1261,24 +1347,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,296.56 - smsp__inst_executed.max inst 12,676 - smsp__inst_executed.min inst 11,885 - smsp__inst_executed.sum inst 786,980 + smsp__inst_executed.avg inst 12,300.17 + smsp__inst_executed.max inst 12,699 + smsp__inst_executed.min inst 11,741 + smsp__inst_executed.sum inst 787,211 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,705.17 - smsp__cycles_active.sum cycle 4,525,131 + smsp__cycles_active.avg cycle 71,852.11 + smsp__cycles_active.sum cycle 4,598,535 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.72 + gpu__time_duration.sum usecond 58.69 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1289,24 +1377,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,311.58 - smsp__inst_executed.max inst 12,710 - smsp__inst_executed.min inst 11,851 - smsp__inst_executed.sum inst 787,941 + smsp__inst_executed.avg inst 12,305.34 + smsp__inst_executed.max inst 12,557 + smsp__inst_executed.min inst 12,098 + smsp__inst_executed.sum inst 787,542 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,827.20 - smsp__cycles_active.sum cycle 4,596,941 + smsp__cycles_active.avg cycle 72,448.42 + smsp__cycles_active.sum cycle 4,636,699 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 185.02 + gpu__time_duration.sum usecond 183.71 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1315,26 +1405,28 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 - smsp__average_warp_latency_issue_stalled_barrier.pct % 159,138.76 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,591.39 - smsp__inst_executed.avg inst 132,167.45 - smsp__inst_executed.max inst 134,248 - smsp__inst_executed.min inst 130,050 - smsp__inst_executed.sum inst 8,458,717 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.84 + smsp__average_warp_latency_issue_stalled_barrier.pct % 159,410.38 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,594.10 + smsp__inst_executed.avg inst 132,239.30 + smsp__inst_executed.max inst 134,389 + smsp__inst_executed.min inst 130,159 + smsp__inst_executed.sum inst 8,463,315 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.78 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 - smsp__cycles_active.avg cycle 251,922.81 - smsp__cycles_active.sum cycle 16,123,060 + smsp__cycles_active.avg cycle 253,259.25 + smsp__cycles_active.sum cycle 16,208,592 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.58 + gpu__time_duration.sum usecond 59.68 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1345,24 +1437,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,287.83 - smsp__inst_executed.max inst 12,872 - smsp__inst_executed.min inst 11,524 - smsp__inst_executed.sum inst 786,421 + smsp__inst_executed.avg inst 12,287.97 + smsp__inst_executed.max inst 12,696 + smsp__inst_executed.min inst 11,904 + smsp__inst_executed.sum inst 786,430 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 73,310.11 - smsp__cycles_active.sum cycle 4,691,847 + smsp__cycles_active.avg cycle 73,186.48 + smsp__cycles_active.sum cycle 4,683,935 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.10 + gpu__time_duration.sum usecond 59.14 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1373,24 +1467,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.67 - smsp__inst_executed.max inst 12,488 - smsp__inst_executed.min inst 12,092 - smsp__inst_executed.sum inst 786,475 + smsp__inst_executed.avg inst 12,288.12 + smsp__inst_executed.max inst 12,680 + smsp__inst_executed.min inst 12,068 + smsp__inst_executed.sum inst 786,440 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,120.91 - smsp__cycles_active.sum cycle 4,615,738 + smsp__cycles_active.avg cycle 71,841.56 + smsp__cycles_active.sum cycle 4,597,860 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.85 + gpu__time_duration.sum usecond 59.20 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1401,24 +1497,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.33 - smsp__inst_executed.max inst 12,500 - smsp__inst_executed.min inst 11,728 - smsp__inst_executed.sum inst 786,453 + smsp__inst_executed.avg inst 12,288.52 + smsp__inst_executed.max inst 12,664 + smsp__inst_executed.min inst 11,916 + smsp__inst_executed.sum inst 786,465 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,726.75 - smsp__cycles_active.sum cycle 4,590,512 + smsp__cycles_active.avg cycle 71,395.02 + smsp__cycles_active.sum cycle 4,569,281 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.62 + gpu__time_duration.sum usecond 58.37 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1429,24 +1527,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.86 - smsp__inst_executed.max inst 12,522 - smsp__inst_executed.min inst 11,924 - smsp__inst_executed.sum inst 786,487 + smsp__inst_executed.avg inst 12,288.83 + smsp__inst_executed.max inst 12,652 + smsp__inst_executed.min inst 12,092 + smsp__inst_executed.sum inst 786,485 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,432.41 - smsp__cycles_active.sum cycle 4,571,674 + smsp__cycles_active.avg cycle 70,939.75 + smsp__cycles_active.sum cycle 4,540,144 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.18 + gpu__time_duration.sum usecond 58.05 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1457,24 +1557,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,289.25 - smsp__inst_executed.max inst 12,672 - smsp__inst_executed.min inst 11,898 - smsp__inst_executed.sum inst 786,512 + smsp__inst_executed.avg inst 12,289.27 + smsp__inst_executed.max inst 12,671 + smsp__inst_executed.min inst 11,948 + smsp__inst_executed.sum inst 786,513 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,698.97 - smsp__cycles_active.sum cycle 4,588,734 + smsp__cycles_active.avg cycle 70,398.56 + smsp__cycles_active.sum cycle 4,505,508 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 57.95 + gpu__time_duration.sum usecond 57.89 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1485,24 +1587,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,290.50 - smsp__inst_executed.max inst 12,484 - smsp__inst_executed.min inst 12,008 - smsp__inst_executed.sum inst 786,592 + smsp__inst_executed.avg inst 12,290.94 + smsp__inst_executed.max inst 12,634 + smsp__inst_executed.min inst 11,856 + smsp__inst_executed.sum inst 786,620 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,943.89 - smsp__cycles_active.sum cycle 4,604,409 + smsp__cycles_active.avg cycle 71,430.73 + smsp__cycles_active.sum cycle 4,571,567 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.40 + gpu__time_duration.sum usecond 58.91 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.97 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1513,24 +1617,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,293.12 - smsp__inst_executed.max inst 12,713 - smsp__inst_executed.min inst 11,621 - smsp__inst_executed.sum inst 786,760 + smsp__inst_executed.avg inst 12,294.67 + smsp__inst_executed.max inst 12,714 + smsp__inst_executed.min inst 11,973 + smsp__inst_executed.sum inst 786,859 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,649.19 - smsp__cycles_active.sum cycle 4,585,548 + smsp__cycles_active.avg cycle 72,485.34 + smsp__cycles_active.sum cycle 4,639,062 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.05 + gpu__time_duration.sum usecond 58.11 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1541,24 +1647,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,298.70 - smsp__inst_executed.max inst 12,725 - smsp__inst_executed.min inst 11,966 - smsp__inst_executed.sum inst 787,117 + smsp__inst_executed.avg inst 12,300.77 + smsp__inst_executed.max inst 12,715 + smsp__inst_executed.min inst 11,880 + smsp__inst_executed.sum inst 787,249 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,102.67 - smsp__cycles_active.sum cycle 4,550,571 + smsp__cycles_active.avg cycle 71,482.33 + smsp__cycles_active.sum cycle 4,574,869 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.50 + gpu__time_duration.sum usecond 58.40 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1569,24 +1677,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,309.14 - smsp__inst_executed.max inst 12,737 - smsp__inst_executed.min inst 12,018 - smsp__inst_executed.sum inst 787,785 + smsp__inst_executed.avg inst 12,305.27 + smsp__inst_executed.max inst 12,723 + smsp__inst_executed.min inst 11,888 + smsp__inst_executed.sum inst 787,537 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,724.20 - smsp__cycles_active.sum cycle 4,590,349 + smsp__cycles_active.avg cycle 71,478.97 + smsp__cycles_active.sum cycle 4,574,654 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 185.76 + gpu__time_duration.sum usecond 182.66 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.71 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1595,26 +1705,28 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 - smsp__average_warp_latency_issue_stalled_barrier.pct % 159,061.95 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,590.62 - smsp__inst_executed.avg inst 132,213.64 - smsp__inst_executed.max inst 134,321 - smsp__inst_executed.min inst 130,119 - smsp__inst_executed.sum inst 8,461,673 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.80 + smsp__average_warp_latency_issue_stalled_barrier.pct % 159,442.50 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,594.42 + smsp__inst_executed.avg inst 132,250.77 + smsp__inst_executed.max inst 134,372 + smsp__inst_executed.min inst 130,113 + smsp__inst_executed.sum inst 8,464,049 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.82 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 - smsp__cycles_active.avg cycle 252,290.12 - smsp__cycles_active.sum cycle 16,146,568 + smsp__cycles_active.avg cycle 252,682.78 + smsp__cycles_active.sum cycle 16,171,698 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 56.96 + gpu__time_duration.sum usecond 56.86 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1625,24 +1737,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.03 + smsp__inst_executed.avg inst 12,288.31 smsp__inst_executed.max inst 12,684 smsp__inst_executed.min inst 12,072 - smsp__inst_executed.sum inst 786,434 + smsp__inst_executed.sum inst 786,452 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 68,948.02 - smsp__cycles_active.sum cycle 4,412,673 + smsp__cycles_active.avg cycle 69,556.61 + smsp__cycles_active.sum cycle 4,451,623 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.71 + gpu__time_duration.sum usecond 59.84 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1653,24 +1767,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.23 - smsp__inst_executed.max inst 12,712 - smsp__inst_executed.min inst 11,696 - smsp__inst_executed.sum inst 786,447 + smsp__inst_executed.avg inst 12,287.48 + smsp__inst_executed.max inst 12,868 + smsp__inst_executed.min inst 11,704 + smsp__inst_executed.sum inst 786,399 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,574.20 - smsp__cycles_active.sum cycle 4,644,749 + smsp__cycles_active.avg cycle 73,568.34 + smsp__cycles_active.sum cycle 4,708,374 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.01 + gpu__time_duration.sum usecond 58.88 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1681,24 +1797,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.03 - smsp__inst_executed.max inst 12,668 - smsp__inst_executed.min inst 12,068 - smsp__inst_executed.sum inst 786,434 + smsp__inst_executed.avg inst 12,288.06 + smsp__inst_executed.max inst 12,672 + smsp__inst_executed.min inst 11,712 + smsp__inst_executed.sum inst 786,436 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,415.67 - smsp__cycles_active.sum cycle 4,634,603 + smsp__cycles_active.avg cycle 73,217.27 + smsp__cycles_active.sum cycle 4,685,905 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.14 + gpu__time_duration.sum usecond 59.39 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1709,24 +1827,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.58 - smsp__inst_executed.max inst 12,676 - smsp__inst_executed.min inst 11,938 - smsp__inst_executed.sum inst 786,469 + smsp__inst_executed.avg inst 12,288.70 + smsp__inst_executed.max inst 12,684 + smsp__inst_executed.min inst 12,060 + smsp__inst_executed.sum inst 786,477 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,550.84 - smsp__cycles_active.sum cycle 4,579,254 + smsp__cycles_active.avg cycle 72,586.44 + smsp__cycles_active.sum cycle 4,645,532 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.34 + gpu__time_duration.sum usecond 58.24 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1737,17 +1857,17 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.86 - smsp__inst_executed.max inst 12,476 - smsp__inst_executed.min inst 12,078 - smsp__inst_executed.sum inst 786,487 + smsp__inst_executed.avg inst 12,287.72 + smsp__inst_executed.max inst 12,496 + smsp__inst_executed.min inst 12,076 + smsp__inst_executed.sum inst 786,414 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,923.06 - smsp__cycles_active.sum cycle 4,539,076 + smsp__cycles_active.avg cycle 71,872.52 + smsp__cycles_active.sum cycle 4,599,841 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.14 @@ -1755,6 +1875,8 @@ l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1765,24 +1887,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,289.56 - smsp__inst_executed.max inst 12,503 - smsp__inst_executed.min inst 11,928 - smsp__inst_executed.sum inst 786,532 + smsp__inst_executed.avg inst 12,289.17 + smsp__inst_executed.max inst 12,692 + smsp__inst_executed.min inst 11,950 + smsp__inst_executed.sum inst 786,507 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,782.67 - smsp__cycles_active.sum cycle 4,530,091 + smsp__cycles_active.avg cycle 71,569.31 + smsp__cycles_active.sum cycle 4,580,436 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 57.79 + gpu__time_duration.sum usecond 57.98 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1793,24 +1917,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,290.62 - smsp__inst_executed.max inst 12,556 - smsp__inst_executed.min inst 12,068 - smsp__inst_executed.sum inst 786,600 + smsp__inst_executed.avg inst 12,290.83 + smsp__inst_executed.max inst 12,676 + smsp__inst_executed.min inst 12,066 + smsp__inst_executed.sum inst 786,613 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,737.78 - smsp__cycles_active.sum cycle 4,527,218 + smsp__cycles_active.avg cycle 70,942.83 + smsp__cycles_active.sum cycle 4,540,341 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.75 + gpu__time_duration.sum usecond 58.78 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1821,24 +1947,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,294.31 - smsp__inst_executed.max inst 12,661 - smsp__inst_executed.min inst 11,903 - smsp__inst_executed.sum inst 786,836 + smsp__inst_executed.avg inst 12,292.48 + smsp__inst_executed.max inst 12,668 + smsp__inst_executed.min inst 11,864 + smsp__inst_executed.sum inst 786,719 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,611.56 - smsp__cycles_active.sum cycle 4,583,140 + smsp__cycles_active.avg cycle 71,652.72 + smsp__cycles_active.sum cycle 4,585,774 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:14, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 57.89 + gpu__time_duration.sum usecond 57.92 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1849,24 +1977,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,300.94 - smsp__inst_executed.max inst 12,703 - smsp__inst_executed.min inst 11,887 - smsp__inst_executed.sum inst 787,260 + smsp__inst_executed.avg inst 12,297.83 + smsp__inst_executed.max inst 12,507 + smsp__inst_executed.min inst 12,080 + smsp__inst_executed.sum inst 787,061 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,911.81 - smsp__cycles_active.sum cycle 4,538,356 + smsp__cycles_active.avg cycle 71,989.52 + smsp__cycles_active.sum cycle 4,607,329 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:14, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.11 + gpu__time_duration.sum usecond 58.56 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1877,24 +2007,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,305.08 - smsp__inst_executed.max inst 12,731 - smsp__inst_executed.min inst 11,780 - smsp__inst_executed.sum inst 787,525 + smsp__inst_executed.avg inst 12,309.70 + smsp__inst_executed.max inst 12,667 + smsp__inst_executed.min inst 12,088 + smsp__inst_executed.sum inst 787,821 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,733.38 - smsp__cycles_active.sum cycle 4,590,936 + smsp__cycles_active.avg cycle 72,280.86 + smsp__cycles_active.sum cycle 4,625,975 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:14, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 184.58 + gpu__time_duration.sum usecond 183.58 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.70 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.70 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1903,15 +2035,15 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 - smsp__average_warp_latency_issue_stalled_barrier.pct % 161,368.32 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,613.68 - smsp__inst_executed.avg inst 131,997.05 - smsp__inst_executed.max inst 134,093 - smsp__inst_executed.min inst 129,868 - smsp__inst_executed.sum inst 8,447,811 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.12 + smsp__average_warp_latency_issue_stalled_barrier.pct % 159,496.29 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,594.96 + smsp__inst_executed.avg inst 131,966.41 + smsp__inst_executed.max inst 134,088 + smsp__inst_executed.min inst 129,856 + smsp__inst_executed.sum inst 8,445,850 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.79 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 - smsp__cycles_active.avg cycle 251,939.98 - smsp__cycles_active.sum cycle 16,124,159 + smsp__cycles_active.avg cycle 253,055.50 + smsp__cycles_active.sum cycle 16,195,552 ---------------------------------------------------------------------- --------------- ------------------------------ diff --git a/homework_3/reportv2.3 b/homework_3/analyse/RC2-f749862/profReportv2.txt similarity index 77% rename from homework_3/reportv2.3 rename to homework_3/analyse/RC2-f749862/profReportv2.txt index 1f50b79..d0436d9 100644 --- a/homework_3/reportv2.3 +++ b/homework_3/analyse/RC2-f749862/profReportv2.txt @@ -1,108 +1,112 @@ -==PROF== Connected to process 20279 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v2/bitonicCUDA) -==PROF== Profiling "prephase" - 1: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 5 passes -==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 5 passes -==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 5 passes -==PROF== Disconnected from process 20279 -[20279] bitonicCUDA@127.0.0.1 - void prephase(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 - Section: Command line profiler metrics - ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum msecond 1.20 - l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 - l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a - l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 - l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 +==PROF== Connected to process 38875 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v2/bitonicCUDA) +==PROF== Profiling "prephase" - 1: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 6 passes +==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 6 passes +==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 6 passes +==PROF== Disconnected from process 38875 +[38875] bitonicCUDA@127.0.0.1 + void prephase(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:22, Context 1, Stream 7 + Section: Command line profiler metrics + ---------------------------------------------------------------------- --------------- ------------------------------ + gpu__time_duration.sum msecond 1.21 + l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 + l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 186,368 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 186,368 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 186,368 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 2,981,888 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 111,946.88 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 112,116 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 111,795 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 1,791,150 - smsp__average_warp_latency_issue_stalled_barrier.pct % 644,345.26 - smsp__average_warp_latency_issue_stalled_barrier.ratio 6,443.45 - smsp__inst_executed.avg inst 1,030,868.94 - smsp__inst_executed.max inst 1,031,062 - smsp__inst_executed.min inst 1,030,675 - smsp__inst_executed.sum inst 65,975,612 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.50 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 111,970.88 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 112,151 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 111,739 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 1,791,534 + smsp__average_warp_latency_issue_stalled_barrier.pct % 637,077.10 + smsp__average_warp_latency_issue_stalled_barrier.ratio 6,370.77 + smsp__inst_executed.avg inst 1,030,627.31 + smsp__inst_executed.max inst 1,030,849 + smsp__inst_executed.min inst 1,030,423 + smsp__inst_executed.sum inst 65,960,148 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.38 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12 - smsp__cycles_active.avg cycle 1,666,829.12 - smsp__cycles_active.sum cycle 106,677,064 + smsp__cycles_active.avg cycle 1,665,720.27 + smsp__cycles_active.sum cycle 106,606,097 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:22, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.84 + gpu__time_duration.sum usecond 59.87 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -113,52 +117,56 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,308.59 - smsp__inst_executed.max inst 12,538 - smsp__inst_executed.min inst 11,945 - smsp__inst_executed.sum inst 787,750 + smsp__inst_executed.avg inst 12,309.31 + smsp__inst_executed.max inst 12,551 + smsp__inst_executed.min inst 12,072 + smsp__inst_executed.sum inst 787,796 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 73,268.67 - smsp__cycles_active.sum cycle 4,689,195 + smsp__cycles_active.avg cycle 72,981.36 + smsp__cycles_active.sum cycle 4,670,807 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:22, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 231.30 + gpu__time_duration.sum usecond 230.75 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,642.38 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,963 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,322 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,278 - smsp__average_warp_latency_issue_stalled_barrier.pct % 123,392.55 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,233.93 - smsp__inst_executed.avg inst 189,292.45 - smsp__inst_executed.max inst 192,372 - smsp__inst_executed.min inst 186,246 - smsp__inst_executed.sum inst 12,114,717 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.81 - smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 - smsp__cycles_active.avg cycle 316,267.31 - smsp__cycles_active.sum cycle 20,241,108 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,677.81 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,960 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,401 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,845 + smsp__average_warp_latency_issue_stalled_barrier.pct % 116,386.30 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,163.86 + smsp__inst_executed.avg inst 189,038.39 + smsp__inst_executed.max inst 192,131 + smsp__inst_executed.min inst 185,953 + smsp__inst_executed.sum inst 12,098,457 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.15 + smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12 + smsp__cycles_active.avg cycle 315,800.30 + smsp__cycles_active.sum cycle 20,211,219 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:22, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.34 + gpu__time_duration.sum usecond 58.05 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -169,24 +177,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,298.58 - smsp__inst_executed.max inst 12,667 - smsp__inst_executed.min inst 11,936 - smsp__inst_executed.sum inst 787,109 + smsp__inst_executed.avg inst 12,299.28 + smsp__inst_executed.max inst 12,618 + smsp__inst_executed.min inst 11,935 + smsp__inst_executed.sum inst 787,154 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,505.30 - smsp__cycles_active.sum cycle 4,512,339 + smsp__cycles_active.avg cycle 70,789.86 + smsp__cycles_active.sum cycle 4,530,551 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:22, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.55 + gpu__time_duration.sum usecond 59.81 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.20 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.90 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -197,52 +207,56 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,309.17 - smsp__inst_executed.max inst 12,702 - smsp__inst_executed.min inst 11,606 - smsp__inst_executed.sum inst 787,787 + smsp__inst_executed.avg inst 12,309.86 + smsp__inst_executed.max inst 12,524 + smsp__inst_executed.min inst 11,866 + smsp__inst_executed.sum inst 787,831 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,897.17 - smsp__cycles_active.sum cycle 4,665,419 + smsp__cycles_active.avg cycle 74,000.58 + smsp__cycles_active.sum cycle 4,736,037 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:22, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 230.91 + gpu__time_duration.sum usecond 227.78 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,680 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,009 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,334 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,880 - smsp__average_warp_latency_issue_stalled_barrier.pct % 123,674.16 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,236.74 - smsp__inst_executed.avg inst 189,294.36 - smsp__inst_executed.max inst 192,238 - smsp__inst_executed.min inst 186,252 - smsp__inst_executed.sum inst 12,114,839 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.85 - smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 - smsp__cycles_active.avg cycle 316,040.81 - smsp__cycles_active.sum cycle 20,226,612 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,678.44 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,985 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,420 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,855 + smsp__average_warp_latency_issue_stalled_barrier.pct % 116,807.09 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,168.07 + smsp__inst_executed.avg inst 189,034.89 + smsp__inst_executed.max inst 191,946 + smsp__inst_executed.min inst 186,150 + smsp__inst_executed.sum inst 12,098,233 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.15 + smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12 + smsp__cycles_active.avg cycle 316,724.77 + smsp__cycles_active.sum cycle 20,270,385 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:22, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.72 + gpu__time_duration.sum usecond 58.43 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -253,24 +267,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,293.78 - smsp__inst_executed.max inst 12,542 - smsp__inst_executed.min inst 11,960 - smsp__inst_executed.sum inst 786,802 + smsp__inst_executed.avg inst 12,292.47 + smsp__inst_executed.max inst 12,744 + smsp__inst_executed.min inst 12,048 + smsp__inst_executed.sum inst 786,718 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,235.28 - smsp__cycles_active.sum cycle 4,559,058 + smsp__cycles_active.avg cycle 70,511.59 + smsp__cycles_active.sum cycle 4,512,742 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.56 + gpu__time_duration.sum usecond 58.14 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -281,24 +297,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,298.95 - smsp__inst_executed.max inst 12,560 - smsp__inst_executed.min inst 12,096 - smsp__inst_executed.sum inst 787,133 + smsp__inst_executed.avg inst 12,298.75 + smsp__inst_executed.max inst 12,734 + smsp__inst_executed.min inst 11,912 + smsp__inst_executed.sum inst 787,120 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,575.53 - smsp__cycles_active.sum cycle 4,516,834 + smsp__cycles_active.avg cycle 71,007.67 + smsp__cycles_active.sum cycle 4,544,491 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.42 + gpu__time_duration.sum usecond 59.55 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -309,52 +327,56 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,308.61 - smsp__inst_executed.max inst 12,640 - smsp__inst_executed.min inst 12,096 - smsp__inst_executed.sum inst 787,751 + smsp__inst_executed.avg inst 12,309.34 + smsp__inst_executed.max inst 12,774 + smsp__inst_executed.min inst 11,741 + smsp__inst_executed.sum inst 787,798 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,641.39 - smsp__cycles_active.sum cycle 4,649,049 + smsp__cycles_active.avg cycle 73,984.34 + smsp__cycles_active.sum cycle 4,734,998 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 231.87 + gpu__time_duration.sum usecond 228.67 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,674.75 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,017 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,354 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,796 - smsp__average_warp_latency_issue_stalled_barrier.pct % 123,483.94 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,234.84 - smsp__inst_executed.avg inst 189,288.14 - smsp__inst_executed.max inst 192,081 - smsp__inst_executed.min inst 186,477 - smsp__inst_executed.sum inst 12,114,441 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.86 - smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 - smsp__cycles_active.avg cycle 315,433.75 - smsp__cycles_active.sum cycle 20,187,760 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,686.81 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,018 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,390 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,989 + smsp__average_warp_latency_issue_stalled_barrier.pct % 117,091.55 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,170.92 + smsp__inst_executed.avg inst 189,041.44 + smsp__inst_executed.max inst 191,914 + smsp__inst_executed.min inst 186,023 + smsp__inst_executed.sum inst 12,098,652 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.19 + smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12 + smsp__cycles_active.avg cycle 316,433.83 + smsp__cycles_active.sum cycle 20,251,765 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.14 + gpu__time_duration.sum usecond 58.27 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -365,24 +387,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,290.34 - smsp__inst_executed.max inst 12,724 - smsp__inst_executed.min inst 12,076 - smsp__inst_executed.sum inst 786,582 + smsp__inst_executed.avg inst 12,290.77 + smsp__inst_executed.max inst 12,524 + smsp__inst_executed.min inst 12,028 + smsp__inst_executed.sum inst 786,609 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,402.61 - smsp__cycles_active.sum cycle 4,505,767 + smsp__cycles_active.avg cycle 71,664.83 + smsp__cycles_active.sum cycle 4,586,549 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.56 + gpu__time_duration.sum usecond 58.53 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -393,24 +417,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,294.27 - smsp__inst_executed.max inst 12,717 - smsp__inst_executed.min inst 11,988 - smsp__inst_executed.sum inst 786,833 + smsp__inst_executed.avg inst 12,292.66 + smsp__inst_executed.max inst 12,890 + smsp__inst_executed.min inst 11,524 + smsp__inst_executed.sum inst 786,730 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,681.59 - smsp__cycles_active.sum cycle 4,523,622 + smsp__cycles_active.avg cycle 71,510.14 + smsp__cycles_active.sum cycle 4,576,649 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.05 + gpu__time_duration.sum usecond 58.14 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.37 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -421,24 +447,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,298.42 - smsp__inst_executed.max inst 12,663 - smsp__inst_executed.min inst 11,882 - smsp__inst_executed.sum inst 787,099 + smsp__inst_executed.avg inst 12,298.09 + smsp__inst_executed.max inst 12,729 + smsp__inst_executed.min inst 11,878 + smsp__inst_executed.sum inst 787,078 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,688.28 - smsp__cycles_active.sum cycle 4,524,050 + smsp__cycles_active.avg cycle 70,087.36 + smsp__cycles_active.sum cycle 4,485,591 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.49 + gpu__time_duration.sum usecond 59.65 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -449,52 +477,56 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,309.03 - smsp__inst_executed.max inst 12,686 - smsp__inst_executed.min inst 11,852 - smsp__inst_executed.sum inst 787,778 + smsp__inst_executed.avg inst 12,308.77 + smsp__inst_executed.max inst 12,772 + smsp__inst_executed.min inst 11,917 + smsp__inst_executed.sum inst 787,761 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,892.83 - smsp__cycles_active.sum cycle 4,665,141 + smsp__cycles_active.avg cycle 72,945.83 + smsp__cycles_active.sum cycle 4,668,533 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 231.33 + gpu__time_duration.sum usecond 231.20 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,677 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,976 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,331 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,832 - smsp__average_warp_latency_issue_stalled_barrier.pct % 123,882.24 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,238.82 - smsp__inst_executed.avg inst 189,292.19 - smsp__inst_executed.max inst 192,340 - smsp__inst_executed.min inst 186,215 - smsp__inst_executed.sum inst 12,114,700 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.86 - smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 - smsp__cycles_active.avg cycle 316,203.25 - smsp__cycles_active.sum cycle 20,237,008 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,679.50 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,958 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,366 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,872 + smsp__average_warp_latency_issue_stalled_barrier.pct % 117,303.63 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,173.04 + smsp__inst_executed.avg inst 189,034.44 + smsp__inst_executed.max inst 192,047 + smsp__inst_executed.min inst 186,006 + smsp__inst_executed.sum inst 12,098,204 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.29 + smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12 + smsp__cycles_active.avg cycle 314,652.92 + smsp__cycles_active.sum cycle 20,137,787 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.08 + gpu__time_duration.sum usecond 58.24 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -505,24 +537,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,289.06 - smsp__inst_executed.max inst 12,694 - smsp__inst_executed.min inst 11,900 - smsp__inst_executed.sum inst 786,500 + smsp__inst_executed.avg inst 12,289.17 + smsp__inst_executed.max inst 12,704 + smsp__inst_executed.min inst 11,772 + smsp__inst_executed.sum inst 786,507 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,488.72 - smsp__cycles_active.sum cycle 4,511,278 + smsp__cycles_active.avg cycle 70,558.91 + smsp__cycles_active.sum cycle 4,515,770 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.27 + gpu__time_duration.sum usecond 58.18 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.47 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -533,24 +567,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,291.25 - smsp__inst_executed.max inst 12,681 - smsp__inst_executed.min inst 12,008 - smsp__inst_executed.sum inst 786,640 + smsp__inst_executed.avg inst 12,290.81 + smsp__inst_executed.max inst 12,711 + smsp__inst_executed.min inst 11,886 + smsp__inst_executed.sum inst 786,612 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,605.89 - smsp__cycles_active.sum cycle 4,518,777 + smsp__cycles_active.avg cycle 71,430.86 + smsp__cycles_active.sum cycle 4,571,575 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.34 + gpu__time_duration.sum usecond 58.46 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -561,24 +597,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,292.84 - smsp__inst_executed.max inst 12,543 - smsp__inst_executed.min inst 11,998 - smsp__inst_executed.sum inst 786,742 + smsp__inst_executed.avg inst 12,293.34 + smsp__inst_executed.max inst 12,505 + smsp__inst_executed.min inst 11,800 + smsp__inst_executed.sum inst 786,774 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,795.58 - smsp__cycles_active.sum cycle 4,530,917 + smsp__cycles_active.avg cycle 71,942.39 + smsp__cycles_active.sum cycle 4,604,313 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.02 + gpu__time_duration.sum usecond 58.18 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -589,24 +627,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,299.95 - smsp__inst_executed.max inst 12,683 - smsp__inst_executed.min inst 11,720 - smsp__inst_executed.sum inst 787,197 + smsp__inst_executed.avg inst 12,298.81 + smsp__inst_executed.max inst 12,649 + smsp__inst_executed.min inst 12,114 + smsp__inst_executed.sum inst 787,124 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,136.48 - smsp__cycles_active.sum cycle 4,488,735 + smsp__cycles_active.avg cycle 71,234.89 + smsp__cycles_active.sum cycle 4,559,033 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.52 + gpu__time_duration.sum usecond 59.39 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -617,52 +657,56 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,309.09 - smsp__inst_executed.max inst 12,613 - smsp__inst_executed.min inst 11,865 - smsp__inst_executed.sum inst 787,782 + smsp__inst_executed.avg inst 12,308.92 + smsp__inst_executed.max inst 12,739 + smsp__inst_executed.min inst 11,660 + smsp__inst_executed.sum inst 787,771 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,887.53 - smsp__cycles_active.sum cycle 4,664,802 + smsp__cycles_active.avg cycle 73,053.95 + smsp__cycles_active.sum cycle 4,675,453 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 231.30 + gpu__time_duration.sum usecond 228.06 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,682.56 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,017 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,315 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,921 - smsp__average_warp_latency_issue_stalled_barrier.pct % 124,910.64 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,249.11 - smsp__inst_executed.avg inst 189,291.42 - smsp__inst_executed.max inst 192,361 - smsp__inst_executed.min inst 186,192 - smsp__inst_executed.sum inst 12,114,651 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.97 - smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 - smsp__cycles_active.avg cycle 316,146.12 - smsp__cycles_active.sum cycle 20,233,352 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,672.12 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,021 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,352 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,754 + smsp__average_warp_latency_issue_stalled_barrier.pct % 116,983.48 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,169.83 + smsp__inst_executed.avg inst 189,031.84 + smsp__inst_executed.max inst 192,116 + smsp__inst_executed.min inst 185,910 + smsp__inst_executed.sum inst 12,098,038 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.18 + smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12 + smsp__cycles_active.avg cycle 316,447.16 + smsp__cycles_active.sum cycle 20,252,618 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 60.03 + gpu__time_duration.sum usecond 60.10 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -673,24 +717,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.48 - smsp__inst_executed.max inst 12,672 - smsp__inst_executed.min inst 11,868 - smsp__inst_executed.sum inst 786,463 + smsp__inst_executed.avg inst 12,288.73 + smsp__inst_executed.max inst 12,512 + smsp__inst_executed.min inst 12,088 + smsp__inst_executed.sum inst 786,479 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 73,004.22 - smsp__cycles_active.sum cycle 4,672,270 + smsp__cycles_active.avg cycle 74,125.95 + smsp__cycles_active.sum cycle 4,744,061 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.08 + gpu__time_duration.sum usecond 58.21 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -701,24 +747,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,289.81 - smsp__inst_executed.max inst 12,480 - smsp__inst_executed.min inst 12,068 - smsp__inst_executed.sum inst 786,548 + smsp__inst_executed.avg inst 12,289.52 + smsp__inst_executed.max inst 12,659 + smsp__inst_executed.min inst 12,064 + smsp__inst_executed.sum inst 786,529 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,790.83 - smsp__cycles_active.sum cycle 4,530,613 + smsp__cycles_active.avg cycle 71,326.66 + smsp__cycles_active.sum cycle 4,564,906 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.46 + gpu__time_duration.sum usecond 58.27 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.47 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -729,24 +777,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,290.59 - smsp__inst_executed.max inst 12,701 - smsp__inst_executed.min inst 12,068 - smsp__inst_executed.sum inst 786,598 + smsp__inst_executed.avg inst 12,290.06 + smsp__inst_executed.max inst 12,821 + smsp__inst_executed.min inst 11,676 + smsp__inst_executed.sum inst 786,564 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,847.19 - smsp__cycles_active.sum cycle 4,534,220 + smsp__cycles_active.avg cycle 71,406.72 + smsp__cycles_active.sum cycle 4,570,030 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.27 + gpu__time_duration.sum usecond 58.50 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.97 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -757,17 +807,17 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,293.72 - smsp__inst_executed.max inst 12,656 - smsp__inst_executed.min inst 12,038 - smsp__inst_executed.sum inst 786,798 + smsp__inst_executed.avg inst 12,293.75 + smsp__inst_executed.max inst 12,525 + smsp__inst_executed.min inst 11,886 + smsp__inst_executed.sum inst 786,800 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,747 - smsp__cycles_active.sum cycle 4,527,808 + smsp__cycles_active.avg cycle 72,203.08 + smsp__cycles_active.sum cycle 4,620,997 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.95 @@ -775,6 +825,8 @@ l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.37 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -785,24 +837,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,298.14 - smsp__inst_executed.max inst 12,645 - smsp__inst_executed.min inst 12,029 - smsp__inst_executed.sum inst 787,081 + smsp__inst_executed.avg inst 12,298.52 + smsp__inst_executed.max inst 12,534 + smsp__inst_executed.min inst 12,060 + smsp__inst_executed.sum inst 787,105 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,059.03 - smsp__cycles_active.sum cycle 4,483,778 + smsp__cycles_active.avg cycle 70,465.11 + smsp__cycles_active.sum cycle 4,509,767 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.58 + gpu__time_duration.sum usecond 59.39 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -813,52 +867,56 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,308.86 - smsp__inst_executed.max inst 12,724 - smsp__inst_executed.min inst 11,654 - smsp__inst_executed.sum inst 787,767 + smsp__inst_executed.avg inst 12,309.41 + smsp__inst_executed.max inst 12,967 + smsp__inst_executed.min inst 11,668 + smsp__inst_executed.sum inst 787,802 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,813.80 - smsp__cycles_active.sum cycle 4,660,083 + smsp__cycles_active.avg cycle 73,798.33 + smsp__cycles_active.sum cycle 4,723,093 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 231.90 + gpu__time_duration.sum usecond 228.83 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,669.44 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,942 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,386 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,711 - smsp__average_warp_latency_issue_stalled_barrier.pct % 125,049.38 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,250.49 - smsp__inst_executed.avg inst 189,291.03 - smsp__inst_executed.max inst 192,313 - smsp__inst_executed.min inst 186,310 - smsp__inst_executed.sum inst 12,114,626 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.97 - smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 - smsp__cycles_active.avg cycle 316,608.81 - smsp__cycles_active.sum cycle 20,262,964 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,669.75 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,971 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,352 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,716 + smsp__average_warp_latency_issue_stalled_barrier.pct % 118,094.62 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,180.95 + smsp__inst_executed.avg inst 189,037.22 + smsp__inst_executed.max inst 192,039 + smsp__inst_executed.min inst 186,030 + smsp__inst_executed.sum inst 12,098,382 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.36 + smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12 + smsp__cycles_active.avg cycle 315,017.08 + smsp__cycles_active.sum cycle 20,161,093 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.78 + gpu__time_duration.sum usecond 58.72 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -869,24 +927,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,287.95 - smsp__inst_executed.max inst 12,856 + smsp__inst_executed.avg inst 12,288.30 + smsp__inst_executed.max inst 12,664 smsp__inst_executed.min inst 11,904 - smsp__inst_executed.sum inst 786,429 + smsp__inst_executed.sum inst 786,451 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,331.70 - smsp__cycles_active.sum cycle 4,565,229 + smsp__cycles_active.avg cycle 72,385.16 + smsp__cycles_active.sum cycle 4,632,650 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.94 + gpu__time_duration.sum usecond 60.03 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -897,24 +957,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.03 - smsp__inst_executed.max inst 12,488 - smsp__inst_executed.min inst 11,888 - smsp__inst_executed.sum inst 786,434 + smsp__inst_executed.avg inst 12,288.97 + smsp__inst_executed.max inst 12,828 + smsp__inst_executed.min inst 11,696 + smsp__inst_executed.sum inst 786,494 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 73,232.05 - smsp__cycles_active.sum cycle 4,686,851 + smsp__cycles_active.avg cycle 74,209.61 + smsp__cycles_active.sum cycle 4,749,415 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.27 + gpu__time_duration.sum usecond 57.89 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -925,24 +987,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,289.50 - smsp__inst_executed.max inst 12,488 - smsp__inst_executed.min inst 12,072 - smsp__inst_executed.sum inst 786,528 + smsp__inst_executed.avg inst 12,289.61 + smsp__inst_executed.max inst 12,704 + smsp__inst_executed.min inst 11,952 + smsp__inst_executed.sum inst 786,535 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,846.25 - smsp__cycles_active.sum cycle 4,534,160 + smsp__cycles_active.avg cycle 71,674.52 + smsp__cycles_active.sum cycle 4,587,169 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.11 + gpu__time_duration.sum usecond 58.37 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -953,24 +1017,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,290.84 - smsp__inst_executed.max inst 12,564 - smsp__inst_executed.min inst 12,104 - smsp__inst_executed.sum inst 786,614 + smsp__inst_executed.avg inst 12,291.02 + smsp__inst_executed.max inst 12,520 + smsp__inst_executed.min inst 12,108 + smsp__inst_executed.sum inst 786,625 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,881.05 - smsp__cycles_active.sum cycle 4,536,387 + smsp__cycles_active.avg cycle 71,742.84 + smsp__cycles_active.sum cycle 4,591,542 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.40 + gpu__time_duration.sum usecond 58.62 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -981,24 +1047,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,293.73 - smsp__inst_executed.max inst 12,757 - smsp__inst_executed.min inst 11,970 - smsp__inst_executed.sum inst 786,799 + smsp__inst_executed.avg inst 12,293.81 + smsp__inst_executed.max inst 12,676 + smsp__inst_executed.min inst 12,021 + smsp__inst_executed.sum inst 786,804 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,142.94 - smsp__cycles_active.sum cycle 4,553,148 + smsp__cycles_active.avg cycle 71,224 + smsp__cycles_active.sum cycle 4,558,336 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 57.95 + gpu__time_duration.sum usecond 58.27 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1009,24 +1077,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,298.62 - smsp__inst_executed.max inst 12,553 - smsp__inst_executed.min inst 12,119 - smsp__inst_executed.sum inst 787,112 + smsp__inst_executed.avg inst 12,298.45 + smsp__inst_executed.max inst 12,492 + smsp__inst_executed.min inst 11,896 + smsp__inst_executed.sum inst 787,101 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,189.52 - smsp__cycles_active.sum cycle 4,492,129 + smsp__cycles_active.avg cycle 71,438.62 + smsp__cycles_active.sum cycle 4,572,072 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.71 + gpu__time_duration.sum usecond 59.46 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1038,51 +1108,55 @@ smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,309.52 - smsp__inst_executed.max inst 12,538 - smsp__inst_executed.min inst 12,074 + smsp__inst_executed.max inst 12,762 + smsp__inst_executed.min inst 11,951 smsp__inst_executed.sum inst 787,809 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,879.23 - smsp__cycles_active.sum cycle 4,664,271 + smsp__cycles_active.avg cycle 73,469.19 + smsp__cycles_active.sum cycle 4,702,028 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 231.42 + gpu__time_duration.sum usecond 228 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,673 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,007 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,299 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,768 - smsp__average_warp_latency_issue_stalled_barrier.pct % 124,557.10 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,245.57 - smsp__inst_executed.avg inst 189,303.22 - smsp__inst_executed.max inst 192,317 - smsp__inst_executed.min inst 186,277 - smsp__inst_executed.sum inst 12,115,406 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.96 - smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 - smsp__cycles_active.avg cycle 315,741.19 - smsp__cycles_active.sum cycle 20,207,436 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,690.44 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,977 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,373 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 315,047 + smsp__average_warp_latency_issue_stalled_barrier.pct % 116,796.43 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,167.96 + smsp__inst_executed.avg inst 189,032.39 + smsp__inst_executed.max inst 192,016 + smsp__inst_executed.min inst 186,010 + smsp__inst_executed.sum inst 12,098,073 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.20 + smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12 + smsp__cycles_active.avg cycle 315,448.41 + smsp__cycles_active.sum cycle 20,188,698 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.40 + gpu__time_duration.sum usecond 58.66 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1093,24 +1167,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,287.92 - smsp__inst_executed.max inst 12,648 - smsp__inst_executed.min inst 11,912 - smsp__inst_executed.sum inst 786,427 + smsp__inst_executed.avg inst 12,288.34 + smsp__inst_executed.max inst 12,672 + smsp__inst_executed.min inst 12,084 + smsp__inst_executed.sum inst 786,454 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,978.88 - smsp__cycles_active.sum cycle 4,606,648 + smsp__cycles_active.avg cycle 72,637.31 + smsp__cycles_active.sum cycle 4,648,788 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.62 + gpu__time_duration.sum usecond 59.23 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1121,24 +1197,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.30 - smsp__inst_executed.max inst 12,848 + smsp__inst_executed.avg inst 12,288.91 + smsp__inst_executed.max inst 12,665 smsp__inst_executed.min inst 11,904 - smsp__inst_executed.sum inst 786,451 + smsp__inst_executed.sum inst 786,490 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,708.22 - smsp__cycles_active.sum cycle 4,589,326 + smsp__cycles_active.avg cycle 72,738.33 + smsp__cycles_active.sum cycle 4,655,253 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 60.19 + gpu__time_duration.sum usecond 60.22 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1149,24 +1227,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,289.11 - smsp__inst_executed.max inst 12,876 - smsp__inst_executed.min inst 11,688 - smsp__inst_executed.sum inst 786,503 + smsp__inst_executed.avg inst 12,289.70 + smsp__inst_executed.max inst 12,664 + smsp__inst_executed.min inst 11,496 + smsp__inst_executed.sum inst 786,541 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 73,332.14 - smsp__cycles_active.sum cycle 4,693,257 + smsp__cycles_active.avg cycle 74,202.39 + smsp__cycles_active.sum cycle 4,748,953 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.50 + gpu__time_duration.sum usecond 58.30 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1177,17 +1257,17 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.89 - smsp__inst_executed.max inst 12,507 - smsp__inst_executed.min inst 12,092 - smsp__inst_executed.sum inst 786,489 + smsp__inst_executed.avg inst 12,289.03 + smsp__inst_executed.max inst 12,656 + smsp__inst_executed.min inst 11,886 + smsp__inst_executed.sum inst 786,498 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,441.14 - smsp__cycles_active.sum cycle 4,508,233 + smsp__cycles_active.avg cycle 71,574 + smsp__cycles_active.sum cycle 4,580,736 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.30 @@ -1195,6 +1275,8 @@ l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1205,24 +1287,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,290.69 - smsp__inst_executed.max inst 12,682 - smsp__inst_executed.min inst 11,866 - smsp__inst_executed.sum inst 786,604 + smsp__inst_executed.avg inst 12,290.56 + smsp__inst_executed.max inst 12,493 + smsp__inst_executed.min inst 12,052 + smsp__inst_executed.sum inst 786,596 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,768.55 - smsp__cycles_active.sum cycle 4,529,187 + smsp__cycles_active.avg cycle 71,559.62 + smsp__cycles_active.sum cycle 4,579,816 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.62 + gpu__time_duration.sum usecond 58.53 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1233,24 +1317,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,293.67 - smsp__inst_executed.max inst 12,534 - smsp__inst_executed.min inst 11,732 - smsp__inst_executed.sum inst 786,795 + smsp__inst_executed.avg inst 12,293.20 + smsp__inst_executed.max inst 12,558 + smsp__inst_executed.min inst 12,110 + smsp__inst_executed.sum inst 786,765 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,007.56 - smsp__cycles_active.sum cycle 4,544,484 + smsp__cycles_active.avg cycle 71,728.27 + smsp__cycles_active.sum cycle 4,590,609 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.05 + gpu__time_duration.sum usecond 58.08 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1261,24 +1347,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,299.09 - smsp__inst_executed.max inst 12,656 - smsp__inst_executed.min inst 11,912 - smsp__inst_executed.sum inst 787,142 + smsp__inst_executed.avg inst 12,297.78 + smsp__inst_executed.max inst 12,685 + smsp__inst_executed.min inst 12,030 + smsp__inst_executed.sum inst 787,058 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,781.25 - smsp__cycles_active.sum cycle 4,530,000 + smsp__cycles_active.avg cycle 71,361.31 + smsp__cycles_active.sum cycle 4,567,124 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.14 + gpu__time_duration.sum usecond 59.58 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1289,52 +1377,56 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,309.02 - smsp__inst_executed.max inst 12,707 - smsp__inst_executed.min inst 11,847 - smsp__inst_executed.sum inst 787,777 + smsp__inst_executed.avg inst 12,308.97 + smsp__inst_executed.max inst 12,728 + smsp__inst_executed.min inst 12,067 + smsp__inst_executed.sum inst 787,774 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,505.88 - smsp__cycles_active.sum cycle 4,640,376 + smsp__cycles_active.avg cycle 73,261.38 + smsp__cycles_active.sum cycle 4,688,728 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 231.14 + gpu__time_duration.sum usecond 228.22 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,666.06 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,013 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,348 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,657 - smsp__average_warp_latency_issue_stalled_barrier.pct % 124,275.15 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,242.75 - smsp__inst_executed.avg inst 189,315.86 - smsp__inst_executed.max inst 192,371 - smsp__inst_executed.min inst 186,294 - smsp__inst_executed.sum inst 12,116,215 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.90 - smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 - smsp__cycles_active.avg cycle 316,297.72 - smsp__cycles_active.sum cycle 20,243,054 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,686.12 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,951 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,409 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,978 + smsp__average_warp_latency_issue_stalled_barrier.pct % 117,160.55 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,171.61 + smsp__inst_executed.avg inst 189,022.41 + smsp__inst_executed.max inst 192,049 + smsp__inst_executed.min inst 186,033 + smsp__inst_executed.sum inst 12,097,434 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.23 + smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12 + smsp__cycles_active.avg cycle 315,736.44 + smsp__cycles_active.sum cycle 20,207,132 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 60.42 + gpu__time_duration.sum usecond 60.86 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1345,24 +1437,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.20 - smsp__inst_executed.max inst 12,484 - smsp__inst_executed.min inst 12,092 - smsp__inst_executed.sum inst 786,445 + smsp__inst_executed.avg inst 12,288.12 + smsp__inst_executed.max inst 12,680 + smsp__inst_executed.min inst 11,716 + smsp__inst_executed.sum inst 786,440 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 74,382.31 - smsp__cycles_active.sum cycle 4,760,468 + smsp__cycles_active.avg cycle 74,287.59 + smsp__cycles_active.sum cycle 4,754,406 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.88 + gpu__time_duration.sum usecond 58.78 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1373,24 +1467,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.11 - smsp__inst_executed.max inst 12,484 - smsp__inst_executed.min inst 11,716 - smsp__inst_executed.sum inst 786,439 + smsp__inst_executed.avg inst 12,288.20 + smsp__inst_executed.max inst 12,672 + smsp__inst_executed.min inst 12,092 + smsp__inst_executed.sum inst 786,445 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,860.06 - smsp__cycles_active.sum cycle 4,599,044 + smsp__cycles_active.avg cycle 72,712.78 + smsp__cycles_active.sum cycle 4,653,618 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.04 + gpu__time_duration.sum usecond 58.56 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1401,24 +1497,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.05 - smsp__inst_executed.max inst 12,664 - smsp__inst_executed.min inst 11,700 - smsp__inst_executed.sum inst 786,435 + smsp__inst_executed.avg inst 12,287.97 + smsp__inst_executed.max inst 12,860 + smsp__inst_executed.min inst 12,094 + smsp__inst_executed.sum inst 786,430 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,882.38 - smsp__cycles_active.sum cycle 4,600,472 + smsp__cycles_active.avg cycle 71,055.23 + smsp__cycles_active.sum cycle 4,547,535 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 60.13 + gpu__time_duration.sum usecond 60.32 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1429,24 +1527,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.81 - smsp__inst_executed.max inst 12,870 - smsp__inst_executed.min inst 11,908 - smsp__inst_executed.sum inst 786,484 + smsp__inst_executed.avg inst 12,288.66 + smsp__inst_executed.max inst 12,509 + smsp__inst_executed.min inst 11,904 + smsp__inst_executed.sum inst 786,474 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 73,247.75 - smsp__cycles_active.sum cycle 4,687,856 + smsp__cycles_active.avg cycle 72,694.91 + smsp__cycles_active.sum cycle 4,652,474 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 57.89 + gpu__time_duration.sum usecond 58.08 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1457,24 +1557,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,289.59 - smsp__inst_executed.max inst 12,494 - smsp__inst_executed.min inst 11,898 - smsp__inst_executed.sum inst 786,534 + smsp__inst_executed.avg inst 12,289.78 + smsp__inst_executed.max inst 12,493 + smsp__inst_executed.min inst 12,050 + smsp__inst_executed.sum inst 786,546 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,630.66 - smsp__cycles_active.sum cycle 4,520,362 + smsp__cycles_active.avg cycle 70,318.98 + smsp__cycles_active.sum cycle 4,500,415 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.14 + gpu__time_duration.sum usecond 58.27 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1485,24 +1587,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,291.27 - smsp__inst_executed.max inst 12,510 - smsp__inst_executed.min inst 12,082 - smsp__inst_executed.sum inst 786,641 + smsp__inst_executed.avg inst 12,290.84 + smsp__inst_executed.max inst 12,691 + smsp__inst_executed.min inst 12,005 + smsp__inst_executed.sum inst 786,614 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,548.77 - smsp__cycles_active.sum cycle 4,515,121 + smsp__cycles_active.avg cycle 70,641.02 + smsp__cycles_active.sum cycle 4,521,025 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.66 + gpu__time_duration.sum usecond 58.59 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1514,23 +1618,25 @@ smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,294.64 - smsp__inst_executed.max inst 12,656 - smsp__inst_executed.min inst 11,924 + smsp__inst_executed.max inst 12,527 + smsp__inst_executed.min inst 12,057 smsp__inst_executed.sum inst 786,857 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,171.45 - smsp__cycles_active.sum cycle 4,554,973 + smsp__cycles_active.avg cycle 72,218 + smsp__cycles_active.sum cycle 4,621,952 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 57.86 + gpu__time_duration.sum usecond 58.14 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.37 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1541,24 +1647,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,301.05 - smsp__inst_executed.max inst 12,725 - smsp__inst_executed.min inst 11,871 - smsp__inst_executed.sum inst 787,267 + smsp__inst_executed.avg inst 12,300.36 + smsp__inst_executed.max inst 12,681 + smsp__inst_executed.min inst 11,918 + smsp__inst_executed.sum inst 787,223 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,490.50 - smsp__cycles_active.sum cycle 4,511,392 + smsp__cycles_active.avg cycle 71,114.91 + smsp__cycles_active.sum cycle 4,551,354 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 59.17 + gpu__time_duration.sum usecond 59.42 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1569,52 +1677,56 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,316.05 - smsp__inst_executed.max inst 12,594 - smsp__inst_executed.min inst 11,865 - smsp__inst_executed.sum inst 788,227 + smsp__inst_executed.avg inst 12,315.06 + smsp__inst_executed.max inst 12,754 + smsp__inst_executed.min inst 11,731 + smsp__inst_executed.sum inst 788,164 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,533.61 - smsp__cycles_active.sum cycle 4,642,151 + smsp__cycles_active.avg cycle 73,447.34 + smsp__cycles_active.sum cycle 4,700,630 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 231.55 + gpu__time_duration.sum usecond 231.01 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,681.88 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,120 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,332 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,910 - smsp__average_warp_latency_issue_stalled_barrier.pct % 123,982.60 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,239.83 - smsp__inst_executed.avg inst 189,283.48 - smsp__inst_executed.max inst 192,309 - smsp__inst_executed.min inst 186,242 - smsp__inst_executed.sum inst 12,114,143 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.88 - smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 - smsp__cycles_active.avg cycle 316,209.50 - smsp__cycles_active.sum cycle 20,237,408 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,667.44 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,990 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,305 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,679 + smsp__average_warp_latency_issue_stalled_barrier.pct % 118,065.21 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,180.65 + smsp__inst_executed.avg inst 189,024.22 + smsp__inst_executed.max inst 192,046 + smsp__inst_executed.min inst 186,039 + smsp__inst_executed.sum inst 12,097,550 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.35 + smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12 + smsp__cycles_active.avg cycle 315,105.62 + smsp__cycles_active.sum cycle 20,166,760 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 56.70 + gpu__time_duration.sum usecond 56.86 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1625,24 +1737,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,287.97 - smsp__inst_executed.max inst 12,492 + smsp__inst_executed.avg inst 12,288.06 + smsp__inst_executed.max inst 12,680 smsp__inst_executed.min inst 11,896 - smsp__inst_executed.sum inst 786,430 + smsp__inst_executed.sum inst 786,436 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 68,714 - smsp__cycles_active.sum cycle 4,397,696 + smsp__cycles_active.avg cycle 69,539.08 + smsp__cycles_active.sum cycle 4,450,501 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 60.64 + gpu__time_duration.sum usecond 60.74 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1653,24 +1767,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.14 - smsp__inst_executed.max inst 12,844 - smsp__inst_executed.min inst 11,528 - smsp__inst_executed.sum inst 786,441 + smsp__inst_executed.avg inst 12,288.19 + smsp__inst_executed.max inst 12,488 + smsp__inst_executed.min inst 11,700 + smsp__inst_executed.sum inst 786,444 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 74,171.33 - smsp__cycles_active.sum cycle 4,746,965 + smsp__cycles_active.avg cycle 75,362.86 + smsp__cycles_active.sum cycle 4,823,223 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.72 + gpu__time_duration.sum usecond 58.75 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1681,24 +1797,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.55 - smsp__inst_executed.max inst 12,684 - smsp__inst_executed.min inst 11,884 - smsp__inst_executed.sum inst 786,467 + smsp__inst_executed.avg inst 12,288.42 + smsp__inst_executed.max inst 12,668 + smsp__inst_executed.min inst 11,916 + smsp__inst_executed.sum inst 786,459 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,696.42 - smsp__cycles_active.sum cycle 4,588,571 + smsp__cycles_active.avg cycle 72,588.78 + smsp__cycles_active.sum cycle 4,645,682 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.94 + gpu__time_duration.sum usecond 59.01 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1709,24 +1827,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.08 - smsp__inst_executed.max inst 12,660 - smsp__inst_executed.min inst 11,724 - smsp__inst_executed.sum inst 786,437 + smsp__inst_executed.avg inst 12,287.92 + smsp__inst_executed.max inst 12,489 + smsp__inst_executed.min inst 11,942 + smsp__inst_executed.sum inst 786,427 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,640.89 - smsp__cycles_active.sum cycle 4,585,017 + smsp__cycles_active.avg cycle 72,949.64 + smsp__cycles_active.sum cycle 4,668,777 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:28, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 60.06 + gpu__time_duration.sum usecond 59.97 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1737,24 +1857,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,288.06 - smsp__inst_executed.max inst 12,524 - smsp__inst_executed.min inst 11,900 - smsp__inst_executed.sum inst 786,436 + smsp__inst_executed.avg inst 12,288.47 + smsp__inst_executed.max inst 12,707 + smsp__inst_executed.min inst 11,904 + smsp__inst_executed.sum inst 786,462 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 73,132.61 - smsp__cycles_active.sum cycle 4,680,487 + smsp__cycles_active.avg cycle 74,444.11 + smsp__cycles_active.sum cycle 4,764,423 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:28, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.08 + gpu__time_duration.sum usecond 58.43 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1765,24 +1887,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,289.61 - smsp__inst_executed.max inst 12,634 - smsp__inst_executed.min inst 11,884 - smsp__inst_executed.sum inst 786,535 + smsp__inst_executed.avg inst 12,289.03 + smsp__inst_executed.max inst 12,510 + smsp__inst_executed.min inst 11,724 + smsp__inst_executed.sum inst 786,498 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,620.73 - smsp__cycles_active.sum cycle 4,519,727 + smsp__cycles_active.avg cycle 70,381.17 + smsp__cycles_active.sum cycle 4,504,395 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:28, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.24 + gpu__time_duration.sum usecond 58.34 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1793,24 +1917,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,291.28 - smsp__inst_executed.max inst 12,704 - smsp__inst_executed.min inst 11,892 - smsp__inst_executed.sum inst 786,642 + smsp__inst_executed.avg inst 12,290.17 + smsp__inst_executed.max inst 12,557 + smsp__inst_executed.min inst 12,092 + smsp__inst_executed.sum inst 786,571 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 71,037.52 - smsp__cycles_active.sum cycle 4,546,401 + smsp__cycles_active.avg cycle 70,414.30 + smsp__cycles_active.sum cycle 4,506,515 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:28, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.82 + gpu__time_duration.sum usecond 58.24 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1821,24 +1947,26 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,293.23 - smsp__inst_executed.max inst 12,931 - smsp__inst_executed.min inst 11,840 - smsp__inst_executed.sum inst 786,767 + smsp__inst_executed.avg inst 12,293.06 + smsp__inst_executed.max inst 12,927 + smsp__inst_executed.min inst 11,694 + smsp__inst_executed.sum inst 786,756 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,840.56 - smsp__cycles_active.sum cycle 4,533,796 + smsp__cycles_active.avg cycle 70,712.52 + smsp__cycles_active.sum cycle 4,525,601 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:28, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 58.24 + gpu__time_duration.sum usecond 57.92 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1849,17 +1977,17 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,298.42 - smsp__inst_executed.max inst 12,587 - smsp__inst_executed.min inst 11,966 - smsp__inst_executed.sum inst 787,099 + smsp__inst_executed.avg inst 12,298.02 + smsp__inst_executed.max inst 12,742 + smsp__inst_executed.min inst 11,876 + smsp__inst_executed.sum inst 787,073 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 70,543.30 - smsp__cycles_active.sum cycle 4,514,771 + smsp__cycles_active.avg cycle 70,394.77 + smsp__cycles_active.sum cycle 4,505,265 ---------------------------------------------------------------------- --------------- ------------------------------ - void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 + void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:28, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.39 @@ -1867,6 +1995,8 @@ l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.23 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 @@ -1877,41 +2007,43 @@ l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 - smsp__inst_executed.avg inst 12,309.44 - smsp__inst_executed.max inst 12,751 - smsp__inst_executed.min inst 11,714 - smsp__inst_executed.sum inst 787,804 + smsp__inst_executed.avg inst 12,308.81 + smsp__inst_executed.max inst 12,825 + smsp__inst_executed.min inst 11,716 + smsp__inst_executed.sum inst 787,764 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 - smsp__cycles_active.avg cycle 72,313.14 - smsp__cycles_active.sum cycle 4,628,041 + smsp__cycles_active.avg cycle 72,582.98 + smsp__cycles_active.sum cycle 4,645,311 ---------------------------------------------------------------------- --------------- ------------------------------ - void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 + void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:28, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ - gpu__time_duration.sum usecond 228.54 + gpu__time_duration.sum usecond 227.52 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50 + l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,691.25 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,988 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,367 - l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 315,060 - smsp__average_warp_latency_issue_stalled_barrier.pct % 123,962.42 - smsp__average_warp_latency_issue_stalled_barrier.ratio 1,239.62 - smsp__inst_executed.avg inst 189,051.73 - smsp__inst_executed.max inst 192,054 - smsp__inst_executed.min inst 186,060 - smsp__inst_executed.sum inst 12,099,311 - smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.83 - smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 - smsp__cycles_active.avg cycle 317,268.88 - smsp__cycles_active.sum cycle 20,305,208 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,692.19 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,059 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,488 + l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 315,075 + smsp__average_warp_latency_issue_stalled_barrier.pct % 118,035.67 + smsp__average_warp_latency_issue_stalled_barrier.ratio 1,180.36 + smsp__inst_executed.avg inst 188,792.84 + smsp__inst_executed.max inst 191,758 + smsp__inst_executed.min inst 185,797 + smsp__inst_executed.sum inst 12,082,742 + smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.34 + smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12 + smsp__cycles_active.avg cycle 315,152.44 + smsp__cycles_active.sum cycle 20,169,756 ---------------------------------------------------------------------- --------------- ------------------------------ diff --git a/homework_3/analyse/extract_results.sh b/homework_3/analyse/extract_results.sh new file mode 100755 index 0000000..0024c7a --- /dev/null +++ b/homework_3/analyse/extract_results.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# Check if a directory argument is given +if [[ -z "$1" ]]; then + echo "Usage: $0 " + exit 1 +fi + +LOG_DIR="$1" + +# Extract GPU name and block size (only once) +GPU=$(grep -h "GPU:" "$LOG_DIR"/* | head -n 1 | awk -F": " '{print $3}') +BLOCK_SIZE=$(grep -h "Block size:" "$LOG_DIR"/* | head -n 1 | awk -F": " '{print $3}') + +# Print Header +printf "%-20s %s\n" "GPU:" "${GPU:-Unknown}" +printf "%-20s %s\n" "Block size:" "${BLOCK_SIZE:-Unknown}" +printf "%s\n" "--------------------------------------" + +# Loop through code versions +for VERSION in V0 V1 V2; do + printf "\n%-15s %s\n" "Code version:" "$VERSION" + printf "%-10s %-18s %-18s %-18s\n" "" "Total" "Mem-xch" "Sorting" + + for Q in {20..30}; do + FILE=$(grep -l "\[Log\]: Code version: $VERSION" "$LOG_DIR"/* | xargs grep -l "Q=$Q") + + if [[ -n "$FILE" ]]; then + TOTAL=$(grep "\[Timing\] Total" "$FILE" | awk '{print $4, $5}') + MEM_XCH=$(grep "\[Timing\] Mem-xch" "$FILE" | awk '{print $4, $5}') + SORTING=$(grep "\[Timing\] Sorting" "$FILE" | awk '{print $4, $5}') + else + TOTAL="N/A" + MEM_XCH="N/A" + SORTING="N/A" + fi + + printf "Q%-2d: %-18s %-18s %-18s\n" "$Q" "$TOTAL" "$MEM_XCH" "$SORTING" + done +done +