2050 lines
228 KiB
Plaintext
2050 lines
228 KiB
Plaintext
==PROF== Connected to process 38875 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v2/bitonicCUDA)
|
|
==PROF== Profiling "prephase" - 1: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 6 passes
|
|
==PROF== Disconnected from process 38875
|
|
[38875] bitonicCUDA@127.0.0.1
|
|
void prephase<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:22, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum msecond 1.21
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 186,368
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 186,368
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 186,368
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 2,981,888
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 111,970.88
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 112,151
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 111,739
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 1,791,534
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 637,077.10
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 6,370.77
|
|
smsp__inst_executed.avg inst 1,030,627.31
|
|
smsp__inst_executed.max inst 1,030,849
|
|
smsp__inst_executed.min inst 1,030,423
|
|
smsp__inst_executed.sum inst 65,960,148
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.38
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
|
|
smsp__cycles_active.avg cycle 1,665,720.27
|
|
smsp__cycles_active.sum cycle 106,606,097
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:22, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.87
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.31
|
|
smsp__inst_executed.max inst 12,551
|
|
smsp__inst_executed.min inst 12,072
|
|
smsp__inst_executed.sum inst 787,796
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,981.36
|
|
smsp__cycles_active.sum cycle 4,670,807
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:22, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 230.75
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,677.81
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,960
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,401
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,845
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 116,386.30
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,163.86
|
|
smsp__inst_executed.avg inst 189,038.39
|
|
smsp__inst_executed.max inst 192,131
|
|
smsp__inst_executed.min inst 185,953
|
|
smsp__inst_executed.sum inst 12,098,457
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.15
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
|
|
smsp__cycles_active.avg cycle 315,800.30
|
|
smsp__cycles_active.sum cycle 20,211,219
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:22, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.05
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,299.28
|
|
smsp__inst_executed.max inst 12,618
|
|
smsp__inst_executed.min inst 11,935
|
|
smsp__inst_executed.sum inst 787,154
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,789.86
|
|
smsp__cycles_active.sum cycle 4,530,551
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:22, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.81
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.20
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.90
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.86
|
|
smsp__inst_executed.max inst 12,524
|
|
smsp__inst_executed.min inst 11,866
|
|
smsp__inst_executed.sum inst 787,831
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 74,000.58
|
|
smsp__cycles_active.sum cycle 4,736,037
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:22, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 227.78
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,678.44
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,985
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,420
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,855
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 116,807.09
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,168.07
|
|
smsp__inst_executed.avg inst 189,034.89
|
|
smsp__inst_executed.max inst 191,946
|
|
smsp__inst_executed.min inst 186,150
|
|
smsp__inst_executed.sum inst 12,098,233
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.15
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
|
|
smsp__cycles_active.avg cycle 316,724.77
|
|
smsp__cycles_active.sum cycle 20,270,385
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:22, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.43
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,292.47
|
|
smsp__inst_executed.max inst 12,744
|
|
smsp__inst_executed.min inst 12,048
|
|
smsp__inst_executed.sum inst 786,718
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,511.59
|
|
smsp__cycles_active.sum cycle 4,512,742
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.14
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.75
|
|
smsp__inst_executed.max inst 12,734
|
|
smsp__inst_executed.min inst 11,912
|
|
smsp__inst_executed.sum inst 787,120
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,007.67
|
|
smsp__cycles_active.sum cycle 4,544,491
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.55
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.34
|
|
smsp__inst_executed.max inst 12,774
|
|
smsp__inst_executed.min inst 11,741
|
|
smsp__inst_executed.sum inst 787,798
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,984.34
|
|
smsp__cycles_active.sum cycle 4,734,998
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 228.67
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,686.81
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,018
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,390
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,989
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 117,091.55
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,170.92
|
|
smsp__inst_executed.avg inst 189,041.44
|
|
smsp__inst_executed.max inst 191,914
|
|
smsp__inst_executed.min inst 186,023
|
|
smsp__inst_executed.sum inst 12,098,652
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.19
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
|
|
smsp__cycles_active.avg cycle 316,433.83
|
|
smsp__cycles_active.sum cycle 20,251,765
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.27
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.77
|
|
smsp__inst_executed.max inst 12,524
|
|
smsp__inst_executed.min inst 12,028
|
|
smsp__inst_executed.sum inst 786,609
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,664.83
|
|
smsp__cycles_active.sum cycle 4,586,549
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.53
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,292.66
|
|
smsp__inst_executed.max inst 12,890
|
|
smsp__inst_executed.min inst 11,524
|
|
smsp__inst_executed.sum inst 786,730
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,510.14
|
|
smsp__cycles_active.sum cycle 4,576,649
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.14
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.37
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.09
|
|
smsp__inst_executed.max inst 12,729
|
|
smsp__inst_executed.min inst 11,878
|
|
smsp__inst_executed.sum inst 787,078
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,087.36
|
|
smsp__cycles_active.sum cycle 4,485,591
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.65
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.77
|
|
smsp__inst_executed.max inst 12,772
|
|
smsp__inst_executed.min inst 11,917
|
|
smsp__inst_executed.sum inst 787,761
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,945.83
|
|
smsp__cycles_active.sum cycle 4,668,533
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 231.20
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,679.50
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,958
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,366
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,872
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 117,303.63
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,173.04
|
|
smsp__inst_executed.avg inst 189,034.44
|
|
smsp__inst_executed.max inst 192,047
|
|
smsp__inst_executed.min inst 186,006
|
|
smsp__inst_executed.sum inst 12,098,204
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.29
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
|
|
smsp__cycles_active.avg cycle 314,652.92
|
|
smsp__cycles_active.sum cycle 20,137,787
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.24
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.17
|
|
smsp__inst_executed.max inst 12,704
|
|
smsp__inst_executed.min inst 11,772
|
|
smsp__inst_executed.sum inst 786,507
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,558.91
|
|
smsp__cycles_active.sum cycle 4,515,770
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.18
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.81
|
|
smsp__inst_executed.max inst 12,711
|
|
smsp__inst_executed.min inst 11,886
|
|
smsp__inst_executed.sum inst 786,612
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,430.86
|
|
smsp__cycles_active.sum cycle 4,571,575
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:23, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.46
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.34
|
|
smsp__inst_executed.max inst 12,505
|
|
smsp__inst_executed.min inst 11,800
|
|
smsp__inst_executed.sum inst 786,774
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,942.39
|
|
smsp__cycles_active.sum cycle 4,604,313
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.18
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.81
|
|
smsp__inst_executed.max inst 12,649
|
|
smsp__inst_executed.min inst 12,114
|
|
smsp__inst_executed.sum inst 787,124
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,234.89
|
|
smsp__cycles_active.sum cycle 4,559,033
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.39
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.92
|
|
smsp__inst_executed.max inst 12,739
|
|
smsp__inst_executed.min inst 11,660
|
|
smsp__inst_executed.sum inst 787,771
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,053.95
|
|
smsp__cycles_active.sum cycle 4,675,453
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 228.06
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,672.12
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,021
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,352
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,754
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 116,983.48
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,169.83
|
|
smsp__inst_executed.avg inst 189,031.84
|
|
smsp__inst_executed.max inst 192,116
|
|
smsp__inst_executed.min inst 185,910
|
|
smsp__inst_executed.sum inst 12,098,038
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.18
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
|
|
smsp__cycles_active.avg cycle 316,447.16
|
|
smsp__cycles_active.sum cycle 20,252,618
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.10
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.73
|
|
smsp__inst_executed.max inst 12,512
|
|
smsp__inst_executed.min inst 12,088
|
|
smsp__inst_executed.sum inst 786,479
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 74,125.95
|
|
smsp__cycles_active.sum cycle 4,744,061
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.21
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.52
|
|
smsp__inst_executed.max inst 12,659
|
|
smsp__inst_executed.min inst 12,064
|
|
smsp__inst_executed.sum inst 786,529
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,326.66
|
|
smsp__cycles_active.sum cycle 4,564,906
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.27
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.06
|
|
smsp__inst_executed.max inst 12,821
|
|
smsp__inst_executed.min inst 11,676
|
|
smsp__inst_executed.sum inst 786,564
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,406.72
|
|
smsp__cycles_active.sum cycle 4,570,030
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.50
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.97
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.75
|
|
smsp__inst_executed.max inst 12,525
|
|
smsp__inst_executed.min inst 11,886
|
|
smsp__inst_executed.sum inst 786,800
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,203.08
|
|
smsp__cycles_active.sum cycle 4,620,997
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.95
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.37
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.52
|
|
smsp__inst_executed.max inst 12,534
|
|
smsp__inst_executed.min inst 12,060
|
|
smsp__inst_executed.sum inst 787,105
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,465.11
|
|
smsp__cycles_active.sum cycle 4,509,767
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.39
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.41
|
|
smsp__inst_executed.max inst 12,967
|
|
smsp__inst_executed.min inst 11,668
|
|
smsp__inst_executed.sum inst 787,802
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,798.33
|
|
smsp__cycles_active.sum cycle 4,723,093
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:24, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 228.83
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,669.75
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,971
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,352
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,716
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 118,094.62
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,180.95
|
|
smsp__inst_executed.avg inst 189,037.22
|
|
smsp__inst_executed.max inst 192,039
|
|
smsp__inst_executed.min inst 186,030
|
|
smsp__inst_executed.sum inst 12,098,382
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.36
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
|
|
smsp__cycles_active.avg cycle 315,017.08
|
|
smsp__cycles_active.sum cycle 20,161,093
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.72
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.30
|
|
smsp__inst_executed.max inst 12,664
|
|
smsp__inst_executed.min inst 11,904
|
|
smsp__inst_executed.sum inst 786,451
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,385.16
|
|
smsp__cycles_active.sum cycle 4,632,650
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.03
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.97
|
|
smsp__inst_executed.max inst 12,828
|
|
smsp__inst_executed.min inst 11,696
|
|
smsp__inst_executed.sum inst 786,494
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 74,209.61
|
|
smsp__cycles_active.sum cycle 4,749,415
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.89
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.61
|
|
smsp__inst_executed.max inst 12,704
|
|
smsp__inst_executed.min inst 11,952
|
|
smsp__inst_executed.sum inst 786,535
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,674.52
|
|
smsp__cycles_active.sum cycle 4,587,169
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.37
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,291.02
|
|
smsp__inst_executed.max inst 12,520
|
|
smsp__inst_executed.min inst 12,108
|
|
smsp__inst_executed.sum inst 786,625
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,742.84
|
|
smsp__cycles_active.sum cycle 4,591,542
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.62
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.81
|
|
smsp__inst_executed.max inst 12,676
|
|
smsp__inst_executed.min inst 12,021
|
|
smsp__inst_executed.sum inst 786,804
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,224
|
|
smsp__cycles_active.sum cycle 4,558,336
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.27
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.45
|
|
smsp__inst_executed.max inst 12,492
|
|
smsp__inst_executed.min inst 11,896
|
|
smsp__inst_executed.sum inst 787,101
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,438.62
|
|
smsp__cycles_active.sum cycle 4,572,072
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.46
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.52
|
|
smsp__inst_executed.max inst 12,762
|
|
smsp__inst_executed.min inst 11,951
|
|
smsp__inst_executed.sum inst 787,809
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,469.19
|
|
smsp__cycles_active.sum cycle 4,702,028
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 228
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,690.44
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,977
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,373
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 315,047
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 116,796.43
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,167.96
|
|
smsp__inst_executed.avg inst 189,032.39
|
|
smsp__inst_executed.max inst 192,016
|
|
smsp__inst_executed.min inst 186,010
|
|
smsp__inst_executed.sum inst 12,098,073
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.20
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
|
|
smsp__cycles_active.avg cycle 315,448.41
|
|
smsp__cycles_active.sum cycle 20,188,698
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.66
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.34
|
|
smsp__inst_executed.max inst 12,672
|
|
smsp__inst_executed.min inst 12,084
|
|
smsp__inst_executed.sum inst 786,454
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,637.31
|
|
smsp__cycles_active.sum cycle 4,648,788
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:25, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.23
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.91
|
|
smsp__inst_executed.max inst 12,665
|
|
smsp__inst_executed.min inst 11,904
|
|
smsp__inst_executed.sum inst 786,490
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,738.33
|
|
smsp__cycles_active.sum cycle 4,655,253
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.22
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.70
|
|
smsp__inst_executed.max inst 12,664
|
|
smsp__inst_executed.min inst 11,496
|
|
smsp__inst_executed.sum inst 786,541
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 74,202.39
|
|
smsp__cycles_active.sum cycle 4,748,953
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.30
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.03
|
|
smsp__inst_executed.max inst 12,656
|
|
smsp__inst_executed.min inst 11,886
|
|
smsp__inst_executed.sum inst 786,498
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,574
|
|
smsp__cycles_active.sum cycle 4,580,736
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.30
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.56
|
|
smsp__inst_executed.max inst 12,493
|
|
smsp__inst_executed.min inst 12,052
|
|
smsp__inst_executed.sum inst 786,596
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,559.62
|
|
smsp__cycles_active.sum cycle 4,579,816
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.53
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.20
|
|
smsp__inst_executed.max inst 12,558
|
|
smsp__inst_executed.min inst 12,110
|
|
smsp__inst_executed.sum inst 786,765
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,728.27
|
|
smsp__cycles_active.sum cycle 4,590,609
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.08
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,297.78
|
|
smsp__inst_executed.max inst 12,685
|
|
smsp__inst_executed.min inst 12,030
|
|
smsp__inst_executed.sum inst 787,058
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,361.31
|
|
smsp__cycles_active.sum cycle 4,567,124
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.58
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.97
|
|
smsp__inst_executed.max inst 12,728
|
|
smsp__inst_executed.min inst 12,067
|
|
smsp__inst_executed.sum inst 787,774
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,261.38
|
|
smsp__cycles_active.sum cycle 4,688,728
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 228.22
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,686.12
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,951
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,409
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,978
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 117,160.55
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,171.61
|
|
smsp__inst_executed.avg inst 189,022.41
|
|
smsp__inst_executed.max inst 192,049
|
|
smsp__inst_executed.min inst 186,033
|
|
smsp__inst_executed.sum inst 12,097,434
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.23
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
|
|
smsp__cycles_active.avg cycle 315,736.44
|
|
smsp__cycles_active.sum cycle 20,207,132
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.86
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.12
|
|
smsp__inst_executed.max inst 12,680
|
|
smsp__inst_executed.min inst 11,716
|
|
smsp__inst_executed.sum inst 786,440
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 74,287.59
|
|
smsp__cycles_active.sum cycle 4,754,406
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.78
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.20
|
|
smsp__inst_executed.max inst 12,672
|
|
smsp__inst_executed.min inst 12,092
|
|
smsp__inst_executed.sum inst 786,445
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,712.78
|
|
smsp__cycles_active.sum cycle 4,653,618
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:26, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.56
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.97
|
|
smsp__inst_executed.max inst 12,860
|
|
smsp__inst_executed.min inst 12,094
|
|
smsp__inst_executed.sum inst 786,430
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,055.23
|
|
smsp__cycles_active.sum cycle 4,547,535
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.32
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.66
|
|
smsp__inst_executed.max inst 12,509
|
|
smsp__inst_executed.min inst 11,904
|
|
smsp__inst_executed.sum inst 786,474
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,694.91
|
|
smsp__cycles_active.sum cycle 4,652,474
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.08
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.78
|
|
smsp__inst_executed.max inst 12,493
|
|
smsp__inst_executed.min inst 12,050
|
|
smsp__inst_executed.sum inst 786,546
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,318.98
|
|
smsp__cycles_active.sum cycle 4,500,415
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.27
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.84
|
|
smsp__inst_executed.max inst 12,691
|
|
smsp__inst_executed.min inst 12,005
|
|
smsp__inst_executed.sum inst 786,614
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,641.02
|
|
smsp__cycles_active.sum cycle 4,521,025
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.59
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,294.64
|
|
smsp__inst_executed.max inst 12,527
|
|
smsp__inst_executed.min inst 12,057
|
|
smsp__inst_executed.sum inst 786,857
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,218
|
|
smsp__cycles_active.sum cycle 4,621,952
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.14
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.37
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,300.36
|
|
smsp__inst_executed.max inst 12,681
|
|
smsp__inst_executed.min inst 11,918
|
|
smsp__inst_executed.sum inst 787,223
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,114.91
|
|
smsp__cycles_active.sum cycle 4,551,354
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.42
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,315.06
|
|
smsp__inst_executed.max inst 12,754
|
|
smsp__inst_executed.min inst 11,731
|
|
smsp__inst_executed.sum inst 788,164
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,447.34
|
|
smsp__cycles_active.sum cycle 4,700,630
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 231.01
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,667.44
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,990
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,305
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,679
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 118,065.21
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,180.65
|
|
smsp__inst_executed.avg inst 189,024.22
|
|
smsp__inst_executed.max inst 192,046
|
|
smsp__inst_executed.min inst 186,039
|
|
smsp__inst_executed.sum inst 12,097,550
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.35
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
|
|
smsp__cycles_active.avg cycle 315,105.62
|
|
smsp__cycles_active.sum cycle 20,166,760
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 56.86
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.06
|
|
smsp__inst_executed.max inst 12,680
|
|
smsp__inst_executed.min inst 11,896
|
|
smsp__inst_executed.sum inst 786,436
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 69,539.08
|
|
smsp__cycles_active.sum cycle 4,450,501
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.74
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.19
|
|
smsp__inst_executed.max inst 12,488
|
|
smsp__inst_executed.min inst 11,700
|
|
smsp__inst_executed.sum inst 786,444
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 75,362.86
|
|
smsp__cycles_active.sum cycle 4,823,223
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.75
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.42
|
|
smsp__inst_executed.max inst 12,668
|
|
smsp__inst_executed.min inst 11,916
|
|
smsp__inst_executed.sum inst 786,459
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,588.78
|
|
smsp__cycles_active.sum cycle 4,645,682
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:27, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.01
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.92
|
|
smsp__inst_executed.max inst 12,489
|
|
smsp__inst_executed.min inst 11,942
|
|
smsp__inst_executed.sum inst 786,427
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,949.64
|
|
smsp__cycles_active.sum cycle 4,668,777
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:28, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.97
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.47
|
|
smsp__inst_executed.max inst 12,707
|
|
smsp__inst_executed.min inst 11,904
|
|
smsp__inst_executed.sum inst 786,462
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 74,444.11
|
|
smsp__cycles_active.sum cycle 4,764,423
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:28, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.43
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.03
|
|
smsp__inst_executed.max inst 12,510
|
|
smsp__inst_executed.min inst 11,724
|
|
smsp__inst_executed.sum inst 786,498
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,381.17
|
|
smsp__cycles_active.sum cycle 4,504,395
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:28, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.34
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.17
|
|
smsp__inst_executed.max inst 12,557
|
|
smsp__inst_executed.min inst 12,092
|
|
smsp__inst_executed.sum inst 786,571
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,414.30
|
|
smsp__cycles_active.sum cycle 4,506,515
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:28, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.24
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.06
|
|
smsp__inst_executed.max inst 12,927
|
|
smsp__inst_executed.min inst 11,694
|
|
smsp__inst_executed.sum inst 786,756
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,712.52
|
|
smsp__cycles_active.sum cycle 4,525,601
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:28, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.92
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.02
|
|
smsp__inst_executed.max inst 12,742
|
|
smsp__inst_executed.min inst 11,876
|
|
smsp__inst_executed.sum inst 787,073
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,394.77
|
|
smsp__cycles_active.sum cycle 4,505,265
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:28, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.39
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.23
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.81
|
|
smsp__inst_executed.max inst 12,825
|
|
smsp__inst_executed.min inst 11,716
|
|
smsp__inst_executed.sum inst 787,764
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,582.98
|
|
smsp__cycles_active.sum cycle 4,645,311
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:28, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 227.52
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,692.19
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,059
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,488
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 315,075
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 118,035.67
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,180.36
|
|
smsp__inst_executed.avg inst 188,792.84
|
|
smsp__inst_executed.max inst 191,758
|
|
smsp__inst_executed.min inst 185,797
|
|
smsp__inst_executed.sum inst 12,082,742
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.34
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
|
|
smsp__cycles_active.avg cycle 315,152.44
|
|
smsp__cycles_active.sum cycle 20,169,756
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|