2050 lines
228 KiB
Plaintext
2050 lines
228 KiB
Plaintext
==PROF== Connected to process 23012 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v2/bitonicCUDA)
|
|
==PROF== Profiling "prephase" - 1: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 6 passes
|
|
==PROF== Disconnected from process 23012
|
|
[23012] bitonicCUDA@127.0.0.1
|
|
void prephase<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:31, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum msecond 1.20
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 186,368
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 186,368
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 186,368
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 2,981,888
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 111,954.62
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 112,106
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 111,827
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 1,791,274
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 644,639.76
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 6,446.40
|
|
smsp__inst_executed.avg inst 1,030,883.66
|
|
smsp__inst_executed.max inst 1,031,104
|
|
smsp__inst_executed.min inst 1,030,650
|
|
smsp__inst_executed.sum inst 65,976,554
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.50
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
|
|
smsp__cycles_active.avg cycle 1,667,322.50
|
|
smsp__cycles_active.sum cycle 106,708,640
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:31, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.94
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.62
|
|
smsp__inst_executed.max inst 12,930
|
|
smsp__inst_executed.min inst 12,094
|
|
smsp__inst_executed.sum inst 787,752
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,839.28
|
|
smsp__cycles_active.sum cycle 4,725,714
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:31, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 231.58
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,673.56
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,005
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,385
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,777
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 123,076.57
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,230.77
|
|
smsp__inst_executed.avg inst 189,293.97
|
|
smsp__inst_executed.max inst 192,369
|
|
smsp__inst_executed.min inst 186,352
|
|
smsp__inst_executed.sum inst 12,114,814
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.78
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
|
|
smsp__cycles_active.avg cycle 316,320.25
|
|
smsp__cycles_active.sum cycle 20,244,496
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:31, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.08
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.77
|
|
smsp__inst_executed.max inst 12,539
|
|
smsp__inst_executed.min inst 12,060
|
|
smsp__inst_executed.sum inst 787,121
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,962.83
|
|
smsp__cycles_active.sum cycle 4,541,621
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:31, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.71
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.31
|
|
smsp__inst_executed.max inst 12,697
|
|
smsp__inst_executed.min inst 11,822
|
|
smsp__inst_executed.sum inst 787,796
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,821.19
|
|
smsp__cycles_active.sum cycle 4,660,556
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:31, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 232.45
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,692.06
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,017
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,412
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 315,073
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 124,072.53
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,240.73
|
|
smsp__inst_executed.avg inst 189,295.75
|
|
smsp__inst_executed.max inst 192,417
|
|
smsp__inst_executed.min inst 186,276
|
|
smsp__inst_executed.sum inst 12,114,928
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.89
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
|
|
smsp__cycles_active.avg cycle 316,238.97
|
|
smsp__cycles_active.sum cycle 20,239,294
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:31, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.37
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,292.88
|
|
smsp__inst_executed.max inst 12,554
|
|
smsp__inst_executed.min inst 11,832
|
|
smsp__inst_executed.sum inst 786,744
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,835.05
|
|
smsp__cycles_active.sum cycle 4,597,443
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:32, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.89
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.73
|
|
smsp__inst_executed.max inst 12,788
|
|
smsp__inst_executed.min inst 11,840
|
|
smsp__inst_executed.sum inst 787,119
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,345.14
|
|
smsp__cycles_active.sum cycle 4,566,089
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:32, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.71
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.23
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.28
|
|
smsp__inst_executed.max inst 12,596
|
|
smsp__inst_executed.min inst 11,926
|
|
smsp__inst_executed.sum inst 787,730
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,894.73
|
|
smsp__cycles_active.sum cycle 4,729,263
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:32, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 229.41
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,675.88
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,994
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,371
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,814
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 124,117.44
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,241.17
|
|
smsp__inst_executed.avg inst 189,295.52
|
|
smsp__inst_executed.max inst 192,256
|
|
smsp__inst_executed.min inst 186,332
|
|
smsp__inst_executed.sum inst 12,114,913
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.85
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
|
|
smsp__cycles_active.avg cycle 317,343.97
|
|
smsp__cycles_active.sum cycle 20,310,014
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:32, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.11
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.78
|
|
smsp__inst_executed.max inst 12,480
|
|
smsp__inst_executed.min inst 11,928
|
|
smsp__inst_executed.sum inst 786,610
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,846.03
|
|
smsp__cycles_active.sum cycle 4,534,146
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:32, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.59
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.23
|
|
smsp__inst_executed.max inst 12,604
|
|
smsp__inst_executed.min inst 11,836
|
|
smsp__inst_executed.sum inst 786,767
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,106.22
|
|
smsp__cycles_active.sum cycle 4,550,798
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:32, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.11
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.37
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.36
|
|
smsp__inst_executed.max inst 12,513
|
|
smsp__inst_executed.min inst 11,712
|
|
smsp__inst_executed.sum inst 787,095
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,202.59
|
|
smsp__cycles_active.sum cycle 4,492,966
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:32, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.52
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.52
|
|
smsp__inst_executed.max inst 12,682
|
|
smsp__inst_executed.min inst 11,859
|
|
smsp__inst_executed.sum inst 787,745
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,621.48
|
|
smsp__cycles_active.sum cycle 4,711,775
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:32, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 229.09
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,686.12
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,974
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,355
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,978
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 124,010.98
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,240.11
|
|
smsp__inst_executed.avg inst 189,293.06
|
|
smsp__inst_executed.max inst 192,343
|
|
smsp__inst_executed.min inst 186,209
|
|
smsp__inst_executed.sum inst 12,114,756
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.81
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
|
|
smsp__cycles_active.avg cycle 317,996.44
|
|
smsp__cycles_active.sum cycle 20,351,772
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:32, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.02
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.22
|
|
smsp__inst_executed.max inst 12,699
|
|
smsp__inst_executed.min inst 11,910
|
|
smsp__inst_executed.sum inst 786,510
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,268.28
|
|
smsp__cycles_active.sum cycle 4,561,170
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:32, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.24
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.45
|
|
smsp__inst_executed.max inst 12,669
|
|
smsp__inst_executed.min inst 11,950
|
|
smsp__inst_executed.sum inst 786,589
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,528.27
|
|
smsp__cycles_active.sum cycle 4,513,809
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.50
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.23
|
|
smsp__inst_executed.max inst 12,648
|
|
smsp__inst_executed.min inst 11,996
|
|
smsp__inst_executed.sum inst 786,767
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,717.06
|
|
smsp__cycles_active.sum cycle 4,525,892
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.02
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.44
|
|
smsp__inst_executed.max inst 12,776
|
|
smsp__inst_executed.min inst 11,972
|
|
smsp__inst_executed.sum inst 787,100
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,171.77
|
|
smsp__cycles_active.sum cycle 4,490,993
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.58
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.16
|
|
smsp__inst_executed.max inst 12,776
|
|
smsp__inst_executed.min inst 12,048
|
|
smsp__inst_executed.sum inst 787,786
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,678.91
|
|
smsp__cycles_active.sum cycle 4,715,450
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 228.96
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,689.06
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,011
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,382
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 315,025
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 125,081.44
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,250.81
|
|
smsp__inst_executed.avg inst 189,292.86
|
|
smsp__inst_executed.max inst 192,415
|
|
smsp__inst_executed.min inst 186,212
|
|
smsp__inst_executed.sum inst 12,114,743
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.96
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
|
|
smsp__cycles_active.avg cycle 316,855.81
|
|
smsp__cycles_active.sum cycle 20,278,772
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.97
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.30
|
|
smsp__inst_executed.max inst 12,684
|
|
smsp__inst_executed.min inst 11,920
|
|
smsp__inst_executed.sum inst 786,451
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,684.17
|
|
smsp__cycles_active.sum cycle 4,715,787
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.02
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.70
|
|
smsp__inst_executed.max inst 12,656
|
|
smsp__inst_executed.min inst 11,904
|
|
smsp__inst_executed.sum inst 786,477
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,366.16
|
|
smsp__cycles_active.sum cycle 4,567,434
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.95
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.27
|
|
smsp__inst_executed.max inst 12,693
|
|
smsp__inst_executed.min inst 12,042
|
|
smsp__inst_executed.sum inst 786,577
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,688.91
|
|
smsp__cycles_active.sum cycle 4,524,090
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.53
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.41
|
|
smsp__inst_executed.max inst 12,585
|
|
smsp__inst_executed.min inst 11,776
|
|
smsp__inst_executed.sum inst 786,778
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,063.67
|
|
smsp__cycles_active.sum cycle 4,548,075
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.46
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.94
|
|
smsp__inst_executed.max inst 12,684
|
|
smsp__inst_executed.min inst 11,776
|
|
smsp__inst_executed.sum inst 787,132
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,037.02
|
|
smsp__cycles_active.sum cycle 4,546,369
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.74
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.58
|
|
smsp__inst_executed.max inst 12,726
|
|
smsp__inst_executed.min inst 12,072
|
|
smsp__inst_executed.sum inst 787,813
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,656
|
|
smsp__cycles_active.sum cycle 4,713,984
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 231.42
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,682.75
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,995
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,336
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,924
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 124,533.56
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,245.34
|
|
smsp__inst_executed.avg inst 189,299.56
|
|
smsp__inst_executed.max inst 192,317
|
|
smsp__inst_executed.min inst 186,295
|
|
smsp__inst_executed.sum inst 12,115,172
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.91
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
|
|
smsp__cycles_active.avg cycle 316,917.50
|
|
smsp__cycles_active.sum cycle 20,282,720
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.56
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.72
|
|
smsp__inst_executed.max inst 12,664
|
|
smsp__inst_executed.min inst 11,916
|
|
smsp__inst_executed.sum inst 786,414
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,857.64
|
|
smsp__cycles_active.sum cycle 4,598,889
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.97
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.48
|
|
smsp__inst_executed.max inst 12,684
|
|
smsp__inst_executed.min inst 11,892
|
|
smsp__inst_executed.sum inst 786,463
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,070.30
|
|
smsp__cycles_active.sum cycle 4,676,499
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.24
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.53
|
|
smsp__inst_executed.max inst 12,851
|
|
smsp__inst_executed.min inst 11,908
|
|
smsp__inst_executed.sum inst 786,530
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,288.86
|
|
smsp__cycles_active.sum cycle 4,498,487
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.92
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.62
|
|
smsp__inst_executed.max inst 12,632
|
|
smsp__inst_executed.min inst 12,036
|
|
smsp__inst_executed.sum inst 786,600
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,317.92
|
|
smsp__cycles_active.sum cycle 4,564,347
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.53
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.17
|
|
smsp__inst_executed.max inst 12,673
|
|
smsp__inst_executed.min inst 11,880
|
|
smsp__inst_executed.sum inst 786,763
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,760.25
|
|
smsp__cycles_active.sum cycle 4,592,656
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.34
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.23
|
|
smsp__inst_executed.max inst 12,716
|
|
smsp__inst_executed.min inst 11,876
|
|
smsp__inst_executed.sum inst 787,087
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,216.77
|
|
smsp__cycles_active.sum cycle 4,557,873
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.39
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.23
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.69
|
|
smsp__inst_executed.max inst 12,831
|
|
smsp__inst_executed.min inst 11,757
|
|
smsp__inst_executed.sum inst 787,756
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,196.75
|
|
smsp__cycles_active.sum cycle 4,684,592
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 228.77
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,694.50
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,913
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,365
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 315,112
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 123,085.89
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,230.86
|
|
smsp__inst_executed.avg inst 189,320.77
|
|
smsp__inst_executed.max inst 192,334
|
|
smsp__inst_executed.min inst 186,279
|
|
smsp__inst_executed.sum inst 12,116,529
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.78
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
|
|
smsp__cycles_active.avg cycle 316,332
|
|
smsp__cycles_active.sum cycle 20,245,248
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.72
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.30
|
|
smsp__inst_executed.max inst 12,488
|
|
smsp__inst_executed.min inst 11,916
|
|
smsp__inst_executed.sum inst 786,451
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,574.31
|
|
smsp__cycles_active.sum cycle 4,580,756
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.20
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.91
|
|
smsp__inst_executed.max inst 12,672
|
|
smsp__inst_executed.min inst 11,910
|
|
smsp__inst_executed.sum inst 786,490
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,645.88
|
|
smsp__cycles_active.sum cycle 4,585,336
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.29
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.03
|
|
smsp__inst_executed.max inst 12,492
|
|
smsp__inst_executed.min inst 11,892
|
|
smsp__inst_executed.sum inst 786,498
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,041.34
|
|
smsp__cycles_active.sum cycle 4,674,646
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.27
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.23
|
|
smsp__inst_executed.max inst 12,678
|
|
smsp__inst_executed.min inst 12,066
|
|
smsp__inst_executed.sum inst 786,511
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,712.69
|
|
smsp__cycles_active.sum cycle 4,525,612
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.21
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.75
|
|
smsp__inst_executed.max inst 12,526
|
|
smsp__inst_executed.min inst 12,050
|
|
smsp__inst_executed.sum inst 786,544
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,762.81
|
|
smsp__cycles_active.sum cycle 4,592,820
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.59
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.14
|
|
smsp__inst_executed.max inst 12,740
|
|
smsp__inst_executed.min inst 11,704
|
|
smsp__inst_executed.sum inst 786,761
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,097.56
|
|
smsp__cycles_active.sum cycle 4,614,244
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.05
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.39
|
|
smsp__inst_executed.max inst 12,695
|
|
smsp__inst_executed.min inst 11,854
|
|
smsp__inst_executed.sum inst 787,097
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,856.70
|
|
smsp__cycles_active.sum cycle 4,534,829
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.30
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.20
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.80
|
|
smsp__inst_executed.max inst 12,711
|
|
smsp__inst_executed.min inst 11,932
|
|
smsp__inst_executed.sum inst 787,827
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,244.53
|
|
smsp__cycles_active.sum cycle 4,623,650
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 228.90
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,686.06
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,967
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,333
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,977
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 124,987.84
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,249.88
|
|
smsp__inst_executed.avg inst 189,274.94
|
|
smsp__inst_executed.max inst 192,335
|
|
smsp__inst_executed.min inst 186,200
|
|
smsp__inst_executed.sum inst 12,113,596
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.97
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
|
|
smsp__cycles_active.avg cycle 316,573.25
|
|
smsp__cycles_active.sum cycle 20,260,688
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.42
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.78
|
|
smsp__inst_executed.max inst 12,852
|
|
smsp__inst_executed.min inst 11,520
|
|
smsp__inst_executed.sum inst 786,418
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 75,112.75
|
|
smsp__cycles_active.sum cycle 4,807,216
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.91
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.53
|
|
smsp__inst_executed.max inst 12,679
|
|
smsp__inst_executed.min inst 11,900
|
|
smsp__inst_executed.sum inst 786,466
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,599.36
|
|
smsp__cycles_active.sum cycle 4,646,359
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.59
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.06
|
|
smsp__inst_executed.max inst 12,478
|
|
smsp__inst_executed.min inst 12,100
|
|
smsp__inst_executed.sum inst 786,436
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,386.41
|
|
smsp__cycles_active.sum cycle 4,568,730
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.22
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.20
|
|
smsp__inst_executed.max inst 12,496
|
|
smsp__inst_executed.min inst 11,920
|
|
smsp__inst_executed.sum inst 786,509
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,866.02
|
|
smsp__cycles_active.sum cycle 4,727,425
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.59
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.89
|
|
smsp__inst_executed.max inst 12,675
|
|
smsp__inst_executed.min inst 11,914
|
|
smsp__inst_executed.sum inst 786,553
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,482.77
|
|
smsp__cycles_active.sum cycle 4,574,897
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.69
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,291.39
|
|
smsp__inst_executed.max inst 12,501
|
|
smsp__inst_executed.min inst 12,038
|
|
smsp__inst_executed.sum inst 786,649
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,637.31
|
|
smsp__cycles_active.sum cycle 4,584,788
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.56
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,294.56
|
|
smsp__inst_executed.max inst 12,543
|
|
smsp__inst_executed.min inst 11,712
|
|
smsp__inst_executed.sum inst 786,852
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,770.67
|
|
smsp__cycles_active.sum cycle 4,593,323
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.05
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,301.70
|
|
smsp__inst_executed.max inst 12,527
|
|
smsp__inst_executed.min inst 12,071
|
|
smsp__inst_executed.sum inst 787,309
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,540.92
|
|
smsp__cycles_active.sum cycle 4,578,619
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.23
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.23
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,314.02
|
|
smsp__inst_executed.max inst 12,699
|
|
smsp__inst_executed.min inst 11,912
|
|
smsp__inst_executed.sum inst 788,097
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,509.19
|
|
smsp__cycles_active.sum cycle 4,640,588
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 232.10
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,689.25
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,069
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,389
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 315,028
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 124,193.57
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,241.94
|
|
smsp__inst_executed.avg inst 189,278.17
|
|
smsp__inst_executed.max inst 192,324
|
|
smsp__inst_executed.min inst 186,272
|
|
smsp__inst_executed.sum inst 12,113,803
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.87
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
|
|
smsp__cycles_active.avg cycle 316,996.25
|
|
smsp__cycles_active.sum cycle 20,287,760
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 56.90
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.95
|
|
smsp__inst_executed.max inst 12,680
|
|
smsp__inst_executed.min inst 11,896
|
|
smsp__inst_executed.sum inst 786,429
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 68,417.62
|
|
smsp__cycles_active.sum cycle 4,378,728
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.45
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.66
|
|
smsp__inst_executed.max inst 12,672
|
|
smsp__inst_executed.min inst 11,908
|
|
smsp__inst_executed.sum inst 786,410
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 74,576.05
|
|
smsp__cycles_active.sum cycle 4,772,867
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.85
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.80
|
|
smsp__inst_executed.max inst 12,492
|
|
smsp__inst_executed.min inst 12,088
|
|
smsp__inst_executed.sum inst 786,419
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,754.67
|
|
smsp__cycles_active.sum cycle 4,656,299
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.85
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.94
|
|
smsp__inst_executed.max inst 12,660
|
|
smsp__inst_executed.min inst 12,090
|
|
smsp__inst_executed.sum inst 786,428
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,405.38
|
|
smsp__cycles_active.sum cycle 4,633,944
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.90
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.11
|
|
smsp__inst_executed.max inst 12,516
|
|
smsp__inst_executed.min inst 11,872
|
|
smsp__inst_executed.sum inst 786,503
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 74,029.31
|
|
smsp__cycles_active.sum cycle 4,737,876
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.08
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.77
|
|
smsp__inst_executed.max inst 12,648
|
|
smsp__inst_executed.min inst 11,890
|
|
smsp__inst_executed.sum inst 786,481
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,229.30
|
|
smsp__cycles_active.sum cycle 4,494,675
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.27
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.17
|
|
smsp__inst_executed.max inst 12,491
|
|
smsp__inst_executed.min inst 12,062
|
|
smsp__inst_executed.sum inst 786,571
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,537.89
|
|
smsp__cycles_active.sum cycle 4,578,425
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.46
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.97
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,294.28
|
|
smsp__inst_executed.max inst 12,521
|
|
smsp__inst_executed.min inst 12,037
|
|
smsp__inst_executed.sum inst 786,834
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,773.67
|
|
smsp__cycles_active.sum cycle 4,593,515
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.95
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.47
|
|
smsp__inst_executed.max inst 12,737
|
|
smsp__inst_executed.min inst 11,886
|
|
smsp__inst_executed.sum inst 787,102
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,021.17
|
|
smsp__cycles_active.sum cycle 4,545,355
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.26
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.88
|
|
smsp__inst_executed.max inst 12,759
|
|
smsp__inst_executed.min inst 12,026
|
|
smsp__inst_executed.sum inst 787,832
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,721.27
|
|
smsp__cycles_active.sum cycle 4,654,161
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 14:04:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 231.97
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,679.75
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,990
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,275
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,876
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 125,400.79
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,254.01
|
|
smsp__inst_executed.avg inst 189,032.33
|
|
smsp__inst_executed.max inst 192,028
|
|
smsp__inst_executed.min inst 186,044
|
|
smsp__inst_executed.sum inst 12,098,069
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 13.03
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
|
|
smsp__cycles_active.avg cycle 316,085.12
|
|
smsp__cycles_active.sum cycle 20,229,448
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|