2050 lines
228 KiB
Plaintext
2050 lines
228 KiB
Plaintext
==PROF== Connected to process 38811 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v1/bitonicCUDA)
|
|
==PROF== Profiling "prephase" - 1: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 6 passes
|
|
==PROF== Disconnected from process 38811
|
|
[38811] bitonicCUDA@127.0.0.1
|
|
void prephase<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum msecond 1.06
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.91
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 17.11
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 5.48
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 1,052,474.71
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 10,524.75
|
|
smsp__inst_executed.avg inst 770,268.77
|
|
smsp__inst_executed.max inst 770,551
|
|
smsp__inst_executed.min inst 770,034
|
|
smsp__inst_executed.sum inst 49,297,201
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 23.25
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.23
|
|
smsp__cycles_active.avg cycle 1,463,898.61
|
|
smsp__cycles_active.sum cycle 93,689,511
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.78
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,434.38
|
|
smsp__inst_executed.max inst 13,034
|
|
smsp__inst_executed.min inst 12,078
|
|
smsp__inst_executed.sum inst 795,800
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,392.50
|
|
smsp__cycles_active.sum cycle 4,633,120
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 182.78
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 158,915.20
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,589.15
|
|
smsp__inst_executed.avg inst 132,204.09
|
|
smsp__inst_executed.max inst 134,291
|
|
smsp__inst_executed.min inst 130,034
|
|
smsp__inst_executed.sum inst 8,461,062
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.76
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 252,696.25
|
|
smsp__cycles_active.sum cycle 16,172,560
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.86
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,300.02
|
|
smsp__inst_executed.max inst 12,527
|
|
smsp__inst_executed.min inst 11,906
|
|
smsp__inst_executed.sum inst 787,201
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,419.20
|
|
smsp__cycles_active.sum cycle 4,570,829
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.27
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.58
|
|
smsp__inst_executed.max inst 12,592
|
|
smsp__inst_executed.min inst 12,022
|
|
smsp__inst_executed.sum inst 787,813
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,582.45
|
|
smsp__cycles_active.sum cycle 4,581,277
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 185.60
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.71
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 160,972.19
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,609.72
|
|
smsp__inst_executed.avg inst 132,207.95
|
|
smsp__inst_executed.max inst 134,315
|
|
smsp__inst_executed.min inst 130,093
|
|
smsp__inst_executed.sum inst 8,461,309
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.08
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 251,877
|
|
smsp__cycles_active.sum cycle 16,120,128
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.72
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.97
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,294.97
|
|
smsp__inst_executed.max inst 12,632
|
|
smsp__inst_executed.min inst 11,676
|
|
smsp__inst_executed.sum inst 786,878
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,658.42
|
|
smsp__cycles_active.sum cycle 4,650,139
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.27
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.64
|
|
smsp__inst_executed.max inst 12,609
|
|
smsp__inst_executed.min inst 11,896
|
|
smsp__inst_executed.sum inst 787,113
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,467.41
|
|
smsp__cycles_active.sum cycle 4,573,914
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.46
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.77
|
|
smsp__inst_executed.max inst 12,684
|
|
smsp__inst_executed.min inst 11,895
|
|
smsp__inst_executed.sum inst 787,761
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,717.34
|
|
smsp__cycles_active.sum cycle 4,653,910
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 186.30
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.71
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 159,835.16
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,598.35
|
|
smsp__inst_executed.avg inst 132,204.20
|
|
smsp__inst_executed.max inst 134,182
|
|
smsp__inst_executed.min inst 130,077
|
|
smsp__inst_executed.sum inst 8,461,069
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.87
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 252,623.66
|
|
smsp__cycles_active.sum cycle 16,167,914
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.24
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,291.53
|
|
smsp__inst_executed.max inst 12,717
|
|
smsp__inst_executed.min inst 11,826
|
|
smsp__inst_executed.sum inst 786,658
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,072.03
|
|
smsp__cycles_active.sum cycle 4,548,610
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.50
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.39
|
|
smsp__inst_executed.max inst 12,679
|
|
smsp__inst_executed.min inst 11,917
|
|
smsp__inst_executed.sum inst 786,777
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,284.91
|
|
smsp__cycles_active.sum cycle 4,626,234
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.95
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.83
|
|
smsp__inst_executed.max inst 12,502
|
|
smsp__inst_executed.min inst 12,048
|
|
smsp__inst_executed.sum inst 787,125
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,602.17
|
|
smsp__cycles_active.sum cycle 4,582,539
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.50
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.36
|
|
smsp__inst_executed.max inst 12,725
|
|
smsp__inst_executed.min inst 11,813
|
|
smsp__inst_executed.sum inst 787,799
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,676.52
|
|
smsp__cycles_active.sum cycle 4,587,297
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 185.50
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 157,267.79
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,572.68
|
|
smsp__inst_executed.avg inst 132,200.03
|
|
smsp__inst_executed.max inst 134,283
|
|
smsp__inst_executed.min inst 130,070
|
|
smsp__inst_executed.sum inst 8,460,802
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.56
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 252,497.38
|
|
smsp__cycles_active.sum cycle 16,159,832
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.98
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.03
|
|
smsp__inst_executed.max inst 12,632
|
|
smsp__inst_executed.min inst 12,096
|
|
smsp__inst_executed.sum inst 786,562
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,852.95
|
|
smsp__cycles_active.sum cycle 4,534,589
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.02
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,291.22
|
|
smsp__inst_executed.max inst 12,768
|
|
smsp__inst_executed.min inst 11,984
|
|
smsp__inst_executed.sum inst 786,638
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,387.03
|
|
smsp__cycles_active.sum cycle 4,504,770
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.43
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.27
|
|
smsp__inst_executed.max inst 12,615
|
|
smsp__inst_executed.min inst 11,964
|
|
smsp__inst_executed.sum inst 786,769
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,163
|
|
smsp__cycles_active.sum cycle 4,618,432
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.95
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.78
|
|
smsp__inst_executed.max inst 12,912
|
|
smsp__inst_executed.min inst 11,741
|
|
smsp__inst_executed.sum inst 787,122
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,542.72
|
|
smsp__cycles_active.sum cycle 4,578,734
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.50
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.25
|
|
smsp__inst_executed.max inst 12,746
|
|
smsp__inst_executed.min inst 11,941
|
|
smsp__inst_executed.sum inst 787,792
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,521.23
|
|
smsp__cycles_active.sum cycle 4,577,359
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 185.66
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 160,534.14
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,605.34
|
|
smsp__inst_executed.avg inst 132,205.30
|
|
smsp__inst_executed.max inst 134,375
|
|
smsp__inst_executed.min inst 130,047
|
|
smsp__inst_executed.sum inst 8,461,139
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.03
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 251,948.69
|
|
smsp__cycles_active.sum cycle 16,124,716
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.14
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.38
|
|
smsp__inst_executed.max inst 12,640
|
|
smsp__inst_executed.min inst 11,952
|
|
smsp__inst_executed.sum inst 786,520
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,646.14
|
|
smsp__cycles_active.sum cycle 4,585,353
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.05
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.72
|
|
smsp__inst_executed.max inst 12,532
|
|
smsp__inst_executed.min inst 11,872
|
|
smsp__inst_executed.sum inst 786,542
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,515.61
|
|
smsp__cycles_active.sum cycle 4,576,999
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.82
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.48
|
|
smsp__inst_executed.max inst 12,775
|
|
smsp__inst_executed.min inst 11,898
|
|
smsp__inst_executed.sum inst 786,591
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,662.92
|
|
smsp__cycles_active.sum cycle 4,586,427
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.53
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.97
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.81
|
|
smsp__inst_executed.max inst 12,753
|
|
smsp__inst_executed.min inst 11,862
|
|
smsp__inst_executed.sum inst 786,804
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,455.17
|
|
smsp__cycles_active.sum cycle 4,637,131
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.02
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,299.31
|
|
smsp__inst_executed.max inst 12,673
|
|
smsp__inst_executed.min inst 12,034
|
|
smsp__inst_executed.sum inst 787,156
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,010.81
|
|
smsp__cycles_active.sum cycle 4,544,692
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.72
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,307.36
|
|
smsp__inst_executed.max inst 12,544
|
|
smsp__inst_executed.min inst 11,923
|
|
smsp__inst_executed.sum inst 787,671
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,317.72
|
|
smsp__cycles_active.sum cycle 4,628,334
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 183.36
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.70
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 160,035.27
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,600.35
|
|
smsp__inst_executed.avg inst 132,214.05
|
|
smsp__inst_executed.max inst 134,326
|
|
smsp__inst_executed.min inst 130,109
|
|
smsp__inst_executed.sum inst 8,461,699
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.87
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 252,974.50
|
|
smsp__cycles_active.sum cycle 16,190,368
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.69
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.52
|
|
smsp__inst_executed.max inst 12,488
|
|
smsp__inst_executed.min inst 11,936
|
|
smsp__inst_executed.sum inst 786,465
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,501.95
|
|
smsp__cycles_active.sum cycle 4,640,125
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.27
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.92
|
|
smsp__inst_executed.max inst 12,484
|
|
smsp__inst_executed.min inst 12,100
|
|
smsp__inst_executed.sum inst 786,427
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,503.14
|
|
smsp__cycles_active.sum cycle 4,576,201
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.08
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.14
|
|
smsp__inst_executed.max inst 12,869
|
|
smsp__inst_executed.min inst 11,892
|
|
smsp__inst_executed.sum inst 786,505
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,524.88
|
|
smsp__cycles_active.sum cycle 4,577,592
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.05
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.83
|
|
smsp__inst_executed.max inst 12,572
|
|
smsp__inst_executed.min inst 12,020
|
|
smsp__inst_executed.sum inst 786,613
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,350.59
|
|
smsp__cycles_active.sum cycle 4,502,438
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.43
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,292.59
|
|
smsp__inst_executed.max inst 12,681
|
|
smsp__inst_executed.min inst 11,988
|
|
smsp__inst_executed.sum inst 786,726
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,302.25
|
|
smsp__cycles_active.sum cycle 4,563,344
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.89
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.28
|
|
smsp__inst_executed.max inst 12,708
|
|
smsp__inst_executed.min inst 11,898
|
|
smsp__inst_executed.sum inst 787,090
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,771.03
|
|
smsp__cycles_active.sum cycle 4,593,346
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.53
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,310.67
|
|
smsp__inst_executed.max inst 12,575
|
|
smsp__inst_executed.min inst 12,060
|
|
smsp__inst_executed.sum inst 787,883
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,137
|
|
smsp__cycles_active.sum cycle 4,680,768
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 184.67
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 161,865.73
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,618.66
|
|
smsp__inst_executed.avg inst 132,202.36
|
|
smsp__inst_executed.max inst 134,344
|
|
smsp__inst_executed.min inst 130,057
|
|
smsp__inst_executed.sum inst 8,460,951
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.11
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 252,870.69
|
|
smsp__cycles_active.sum cycle 16,183,724
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.23
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.84
|
|
smsp__inst_executed.max inst 12,700
|
|
smsp__inst_executed.min inst 11,884
|
|
smsp__inst_executed.sum inst 786,422
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,958.33
|
|
smsp__cycles_active.sum cycle 4,605,333
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.91
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.59
|
|
smsp__inst_executed.max inst 12,836
|
|
smsp__inst_executed.min inst 11,892
|
|
smsp__inst_executed.sum inst 786,470
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,352.05
|
|
smsp__cycles_active.sum cycle 4,630,531
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.43
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.62
|
|
smsp__inst_executed.max inst 12,636
|
|
smsp__inst_executed.min inst 11,942
|
|
smsp__inst_executed.sum inst 786,472
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,507.44
|
|
smsp__cycles_active.sum cycle 4,576,476
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.11
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.72
|
|
smsp__inst_executed.max inst 12,672
|
|
smsp__inst_executed.min inst 11,662
|
|
smsp__inst_executed.sum inst 786,542
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,165.02
|
|
smsp__cycles_active.sum cycle 4,554,561
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.02
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,291
|
|
smsp__inst_executed.max inst 12,677
|
|
smsp__inst_executed.min inst 11,882
|
|
smsp__inst_executed.sum inst 786,624
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,224.42
|
|
smsp__cycles_active.sum cycle 4,494,363
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.72
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,294.19
|
|
smsp__inst_executed.max inst 12,761
|
|
smsp__inst_executed.min inst 11,776
|
|
smsp__inst_executed.sum inst 786,828
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,352.88
|
|
smsp__cycles_active.sum cycle 4,630,584
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.08
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,300.17
|
|
smsp__inst_executed.max inst 12,699
|
|
smsp__inst_executed.min inst 11,741
|
|
smsp__inst_executed.sum inst 787,211
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,852.11
|
|
smsp__cycles_active.sum cycle 4,598,535
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.69
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,305.34
|
|
smsp__inst_executed.max inst 12,557
|
|
smsp__inst_executed.min inst 12,098
|
|
smsp__inst_executed.sum inst 787,542
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,448.42
|
|
smsp__cycles_active.sum cycle 4,636,699
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 183.71
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 159,410.38
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,594.10
|
|
smsp__inst_executed.avg inst 132,239.30
|
|
smsp__inst_executed.max inst 134,389
|
|
smsp__inst_executed.min inst 130,159
|
|
smsp__inst_executed.sum inst 8,463,315
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.78
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 253,259.25
|
|
smsp__cycles_active.sum cycle 16,208,592
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.68
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.97
|
|
smsp__inst_executed.max inst 12,696
|
|
smsp__inst_executed.min inst 11,904
|
|
smsp__inst_executed.sum inst 786,430
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,186.48
|
|
smsp__cycles_active.sum cycle 4,683,935
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.14
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.12
|
|
smsp__inst_executed.max inst 12,680
|
|
smsp__inst_executed.min inst 12,068
|
|
smsp__inst_executed.sum inst 786,440
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,841.56
|
|
smsp__cycles_active.sum cycle 4,597,860
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.20
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.52
|
|
smsp__inst_executed.max inst 12,664
|
|
smsp__inst_executed.min inst 11,916
|
|
smsp__inst_executed.sum inst 786,465
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,395.02
|
|
smsp__cycles_active.sum cycle 4,569,281
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.37
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.83
|
|
smsp__inst_executed.max inst 12,652
|
|
smsp__inst_executed.min inst 12,092
|
|
smsp__inst_executed.sum inst 786,485
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,939.75
|
|
smsp__cycles_active.sum cycle 4,540,144
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.05
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.27
|
|
smsp__inst_executed.max inst 12,671
|
|
smsp__inst_executed.min inst 11,948
|
|
smsp__inst_executed.sum inst 786,513
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,398.56
|
|
smsp__cycles_active.sum cycle 4,505,508
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.89
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.94
|
|
smsp__inst_executed.max inst 12,634
|
|
smsp__inst_executed.min inst 11,856
|
|
smsp__inst_executed.sum inst 786,620
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,430.73
|
|
smsp__cycles_active.sum cycle 4,571,567
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.91
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.97
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,294.67
|
|
smsp__inst_executed.max inst 12,714
|
|
smsp__inst_executed.min inst 11,973
|
|
smsp__inst_executed.sum inst 786,859
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,485.34
|
|
smsp__cycles_active.sum cycle 4,639,062
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.11
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,300.77
|
|
smsp__inst_executed.max inst 12,715
|
|
smsp__inst_executed.min inst 11,880
|
|
smsp__inst_executed.sum inst 787,249
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,482.33
|
|
smsp__cycles_active.sum cycle 4,574,869
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.40
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,305.27
|
|
smsp__inst_executed.max inst 12,723
|
|
smsp__inst_executed.min inst 11,888
|
|
smsp__inst_executed.sum inst 787,537
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,478.97
|
|
smsp__cycles_active.sum cycle 4,574,654
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 182.66
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.71
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 159,442.50
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,594.42
|
|
smsp__inst_executed.avg inst 132,250.77
|
|
smsp__inst_executed.max inst 134,372
|
|
smsp__inst_executed.min inst 130,113
|
|
smsp__inst_executed.sum inst 8,464,049
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.82
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 252,682.78
|
|
smsp__cycles_active.sum cycle 16,171,698
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 56.86
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.31
|
|
smsp__inst_executed.max inst 12,684
|
|
smsp__inst_executed.min inst 12,072
|
|
smsp__inst_executed.sum inst 786,452
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 69,556.61
|
|
smsp__cycles_active.sum cycle 4,451,623
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.84
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.48
|
|
smsp__inst_executed.max inst 12,868
|
|
smsp__inst_executed.min inst 11,704
|
|
smsp__inst_executed.sum inst 786,399
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,568.34
|
|
smsp__cycles_active.sum cycle 4,708,374
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.88
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.06
|
|
smsp__inst_executed.max inst 12,672
|
|
smsp__inst_executed.min inst 11,712
|
|
smsp__inst_executed.sum inst 786,436
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,217.27
|
|
smsp__cycles_active.sum cycle 4,685,905
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.39
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.70
|
|
smsp__inst_executed.max inst 12,684
|
|
smsp__inst_executed.min inst 12,060
|
|
smsp__inst_executed.sum inst 786,477
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,586.44
|
|
smsp__cycles_active.sum cycle 4,645,532
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.24
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.72
|
|
smsp__inst_executed.max inst 12,496
|
|
smsp__inst_executed.min inst 12,076
|
|
smsp__inst_executed.sum inst 786,414
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,872.52
|
|
smsp__cycles_active.sum cycle 4,599,841
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.14
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.17
|
|
smsp__inst_executed.max inst 12,692
|
|
smsp__inst_executed.min inst 11,950
|
|
smsp__inst_executed.sum inst 786,507
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,569.31
|
|
smsp__cycles_active.sum cycle 4,580,436
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.98
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.83
|
|
smsp__inst_executed.max inst 12,676
|
|
smsp__inst_executed.min inst 12,066
|
|
smsp__inst_executed.sum inst 786,613
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,942.83
|
|
smsp__cycles_active.sum cycle 4,540,341
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.78
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,292.48
|
|
smsp__inst_executed.max inst 12,668
|
|
smsp__inst_executed.min inst 11,864
|
|
smsp__inst_executed.sum inst 786,719
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,652.72
|
|
smsp__cycles_active.sum cycle 4,585,774
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:14, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.92
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,297.83
|
|
smsp__inst_executed.max inst 12,507
|
|
smsp__inst_executed.min inst 12,080
|
|
smsp__inst_executed.sum inst 787,061
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,989.52
|
|
smsp__cycles_active.sum cycle 4,607,329
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:14, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.56
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.70
|
|
smsp__inst_executed.max inst 12,667
|
|
smsp__inst_executed.min inst 12,088
|
|
smsp__inst_executed.sum inst 787,821
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,280.86
|
|
smsp__cycles_active.sum cycle 4,625,975
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:14, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 183.58
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.70
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.70
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 159,496.29
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,594.96
|
|
smsp__inst_executed.avg inst 131,966.41
|
|
smsp__inst_executed.max inst 134,088
|
|
smsp__inst_executed.min inst 129,856
|
|
smsp__inst_executed.sum inst 8,445,850
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.79
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 253,055.50
|
|
smsp__cycles_active.sum cycle 16,195,552
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|