==PROF== Connected to process 20279 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v2/bitonicCUDA) ==PROF== Profiling "prephase" - 1: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 5 passes ==PROF== Disconnected from process 20279 [20279] bitonicCUDA@127.0.0.1 void prephase(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum msecond 1.20 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 186,368 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 186,368 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 186,368 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 2,981,888 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 111,946.88 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 112,116 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 111,795 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 1,791,150 smsp__average_warp_latency_issue_stalled_barrier.pct % 644,345.26 smsp__average_warp_latency_issue_stalled_barrier.ratio 6,443.45 smsp__inst_executed.avg inst 1,030,868.94 smsp__inst_executed.max inst 1,031,062 smsp__inst_executed.min inst 1,030,675 smsp__inst_executed.sum inst 65,975,612 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.50 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12 smsp__cycles_active.avg cycle 1,666,829.12 smsp__cycles_active.sum cycle 106,677,064 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.84 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,308.59 smsp__inst_executed.max inst 12,538 smsp__inst_executed.min inst 11,945 smsp__inst_executed.sum inst 787,750 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 73,268.67 smsp__cycles_active.sum cycle 4,689,195 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 231.30 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,642.38 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,963 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,322 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,278 smsp__average_warp_latency_issue_stalled_barrier.pct % 123,392.55 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,233.93 smsp__inst_executed.avg inst 189,292.45 smsp__inst_executed.max inst 192,372 smsp__inst_executed.min inst 186,246 smsp__inst_executed.sum inst 12,114,717 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.81 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 smsp__cycles_active.avg cycle 316,267.31 smsp__cycles_active.sum cycle 20,241,108 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.34 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,298.58 smsp__inst_executed.max inst 12,667 smsp__inst_executed.min inst 11,936 smsp__inst_executed.sum inst 787,109 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,505.30 smsp__cycles_active.sum cycle 4,512,339 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.55 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,309.17 smsp__inst_executed.max inst 12,702 smsp__inst_executed.min inst 11,606 smsp__inst_executed.sum inst 787,787 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,897.17 smsp__cycles_active.sum cycle 4,665,419 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 230.91 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,680 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,009 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,334 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,880 smsp__average_warp_latency_issue_stalled_barrier.pct % 123,674.16 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,236.74 smsp__inst_executed.avg inst 189,294.36 smsp__inst_executed.max inst 192,238 smsp__inst_executed.min inst 186,252 smsp__inst_executed.sum inst 12,114,839 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.85 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 smsp__cycles_active.avg cycle 316,040.81 smsp__cycles_active.sum cycle 20,226,612 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.72 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,293.78 smsp__inst_executed.max inst 12,542 smsp__inst_executed.min inst 11,960 smsp__inst_executed.sum inst 786,802 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,235.28 smsp__cycles_active.sum cycle 4,559,058 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.56 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,298.95 smsp__inst_executed.max inst 12,560 smsp__inst_executed.min inst 12,096 smsp__inst_executed.sum inst 787,133 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,575.53 smsp__cycles_active.sum cycle 4,516,834 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.42 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,308.61 smsp__inst_executed.max inst 12,640 smsp__inst_executed.min inst 12,096 smsp__inst_executed.sum inst 787,751 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,641.39 smsp__cycles_active.sum cycle 4,649,049 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 231.87 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,674.75 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,017 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,354 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,796 smsp__average_warp_latency_issue_stalled_barrier.pct % 123,483.94 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,234.84 smsp__inst_executed.avg inst 189,288.14 smsp__inst_executed.max inst 192,081 smsp__inst_executed.min inst 186,477 smsp__inst_executed.sum inst 12,114,441 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.86 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 smsp__cycles_active.avg cycle 315,433.75 smsp__cycles_active.sum cycle 20,187,760 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.14 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,290.34 smsp__inst_executed.max inst 12,724 smsp__inst_executed.min inst 12,076 smsp__inst_executed.sum inst 786,582 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,402.61 smsp__cycles_active.sum cycle 4,505,767 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.56 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,294.27 smsp__inst_executed.max inst 12,717 smsp__inst_executed.min inst 11,988 smsp__inst_executed.sum inst 786,833 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,681.59 smsp__cycles_active.sum cycle 4,523,622 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.05 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,298.42 smsp__inst_executed.max inst 12,663 smsp__inst_executed.min inst 11,882 smsp__inst_executed.sum inst 787,099 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,688.28 smsp__cycles_active.sum cycle 4,524,050 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.49 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,309.03 smsp__inst_executed.max inst 12,686 smsp__inst_executed.min inst 11,852 smsp__inst_executed.sum inst 787,778 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,892.83 smsp__cycles_active.sum cycle 4,665,141 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 231.33 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,677 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,976 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,331 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,832 smsp__average_warp_latency_issue_stalled_barrier.pct % 123,882.24 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,238.82 smsp__inst_executed.avg inst 189,292.19 smsp__inst_executed.max inst 192,340 smsp__inst_executed.min inst 186,215 smsp__inst_executed.sum inst 12,114,700 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.86 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 smsp__cycles_active.avg cycle 316,203.25 smsp__cycles_active.sum cycle 20,237,008 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.08 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,289.06 smsp__inst_executed.max inst 12,694 smsp__inst_executed.min inst 11,900 smsp__inst_executed.sum inst 786,500 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,488.72 smsp__cycles_active.sum cycle 4,511,278 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.27 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,291.25 smsp__inst_executed.max inst 12,681 smsp__inst_executed.min inst 12,008 smsp__inst_executed.sum inst 786,640 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,605.89 smsp__cycles_active.sum cycle 4,518,777 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.34 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,292.84 smsp__inst_executed.max inst 12,543 smsp__inst_executed.min inst 11,998 smsp__inst_executed.sum inst 786,742 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,795.58 smsp__cycles_active.sum cycle 4,530,917 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.02 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,299.95 smsp__inst_executed.max inst 12,683 smsp__inst_executed.min inst 11,720 smsp__inst_executed.sum inst 787,197 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,136.48 smsp__cycles_active.sum cycle 4,488,735 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.52 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,309.09 smsp__inst_executed.max inst 12,613 smsp__inst_executed.min inst 11,865 smsp__inst_executed.sum inst 787,782 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,887.53 smsp__cycles_active.sum cycle 4,664,802 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 231.30 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,682.56 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,017 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,315 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,921 smsp__average_warp_latency_issue_stalled_barrier.pct % 124,910.64 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,249.11 smsp__inst_executed.avg inst 189,291.42 smsp__inst_executed.max inst 192,361 smsp__inst_executed.min inst 186,192 smsp__inst_executed.sum inst 12,114,651 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.97 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 smsp__cycles_active.avg cycle 316,146.12 smsp__cycles_active.sum cycle 20,233,352 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 60.03 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.48 smsp__inst_executed.max inst 12,672 smsp__inst_executed.min inst 11,868 smsp__inst_executed.sum inst 786,463 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 73,004.22 smsp__cycles_active.sum cycle 4,672,270 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.08 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,289.81 smsp__inst_executed.max inst 12,480 smsp__inst_executed.min inst 12,068 smsp__inst_executed.sum inst 786,548 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,790.83 smsp__cycles_active.sum cycle 4,530,613 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.46 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,290.59 smsp__inst_executed.max inst 12,701 smsp__inst_executed.min inst 12,068 smsp__inst_executed.sum inst 786,598 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,847.19 smsp__cycles_active.sum cycle 4,534,220 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.27 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,293.72 smsp__inst_executed.max inst 12,656 smsp__inst_executed.min inst 12,038 smsp__inst_executed.sum inst 786,798 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,747 smsp__cycles_active.sum cycle 4,527,808 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.95 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,298.14 smsp__inst_executed.max inst 12,645 smsp__inst_executed.min inst 12,029 smsp__inst_executed.sum inst 787,081 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,059.03 smsp__cycles_active.sum cycle 4,483,778 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.58 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,308.86 smsp__inst_executed.max inst 12,724 smsp__inst_executed.min inst 11,654 smsp__inst_executed.sum inst 787,767 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,813.80 smsp__cycles_active.sum cycle 4,660,083 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 231.90 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,669.44 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,942 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,386 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,711 smsp__average_warp_latency_issue_stalled_barrier.pct % 125,049.38 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,250.49 smsp__inst_executed.avg inst 189,291.03 smsp__inst_executed.max inst 192,313 smsp__inst_executed.min inst 186,310 smsp__inst_executed.sum inst 12,114,626 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.97 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 smsp__cycles_active.avg cycle 316,608.81 smsp__cycles_active.sum cycle 20,262,964 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.78 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,287.95 smsp__inst_executed.max inst 12,856 smsp__inst_executed.min inst 11,904 smsp__inst_executed.sum inst 786,429 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,331.70 smsp__cycles_active.sum cycle 4,565,229 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.94 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.03 smsp__inst_executed.max inst 12,488 smsp__inst_executed.min inst 11,888 smsp__inst_executed.sum inst 786,434 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 73,232.05 smsp__cycles_active.sum cycle 4,686,851 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.27 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,289.50 smsp__inst_executed.max inst 12,488 smsp__inst_executed.min inst 12,072 smsp__inst_executed.sum inst 786,528 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,846.25 smsp__cycles_active.sum cycle 4,534,160 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.11 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,290.84 smsp__inst_executed.max inst 12,564 smsp__inst_executed.min inst 12,104 smsp__inst_executed.sum inst 786,614 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,881.05 smsp__cycles_active.sum cycle 4,536,387 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.40 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,293.73 smsp__inst_executed.max inst 12,757 smsp__inst_executed.min inst 11,970 smsp__inst_executed.sum inst 786,799 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,142.94 smsp__cycles_active.sum cycle 4,553,148 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.95 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,298.62 smsp__inst_executed.max inst 12,553 smsp__inst_executed.min inst 12,119 smsp__inst_executed.sum inst 787,112 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,189.52 smsp__cycles_active.sum cycle 4,492,129 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.71 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,309.52 smsp__inst_executed.max inst 12,538 smsp__inst_executed.min inst 12,074 smsp__inst_executed.sum inst 787,809 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,879.23 smsp__cycles_active.sum cycle 4,664,271 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 231.42 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,673 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,007 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,299 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,768 smsp__average_warp_latency_issue_stalled_barrier.pct % 124,557.10 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,245.57 smsp__inst_executed.avg inst 189,303.22 smsp__inst_executed.max inst 192,317 smsp__inst_executed.min inst 186,277 smsp__inst_executed.sum inst 12,115,406 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.96 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 smsp__cycles_active.avg cycle 315,741.19 smsp__cycles_active.sum cycle 20,207,436 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.40 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,287.92 smsp__inst_executed.max inst 12,648 smsp__inst_executed.min inst 11,912 smsp__inst_executed.sum inst 786,427 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,978.88 smsp__cycles_active.sum cycle 4,606,648 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.62 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.30 smsp__inst_executed.max inst 12,848 smsp__inst_executed.min inst 11,904 smsp__inst_executed.sum inst 786,451 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,708.22 smsp__cycles_active.sum cycle 4,589,326 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 60.19 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,289.11 smsp__inst_executed.max inst 12,876 smsp__inst_executed.min inst 11,688 smsp__inst_executed.sum inst 786,503 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 73,332.14 smsp__cycles_active.sum cycle 4,693,257 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.50 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.89 smsp__inst_executed.max inst 12,507 smsp__inst_executed.min inst 12,092 smsp__inst_executed.sum inst 786,489 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,441.14 smsp__cycles_active.sum cycle 4,508,233 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.30 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,290.69 smsp__inst_executed.max inst 12,682 smsp__inst_executed.min inst 11,866 smsp__inst_executed.sum inst 786,604 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,768.55 smsp__cycles_active.sum cycle 4,529,187 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.62 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,293.67 smsp__inst_executed.max inst 12,534 smsp__inst_executed.min inst 11,732 smsp__inst_executed.sum inst 786,795 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,007.56 smsp__cycles_active.sum cycle 4,544,484 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.05 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,299.09 smsp__inst_executed.max inst 12,656 smsp__inst_executed.min inst 11,912 smsp__inst_executed.sum inst 787,142 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,781.25 smsp__cycles_active.sum cycle 4,530,000 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.14 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,309.02 smsp__inst_executed.max inst 12,707 smsp__inst_executed.min inst 11,847 smsp__inst_executed.sum inst 787,777 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,505.88 smsp__cycles_active.sum cycle 4,640,376 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 231.14 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,666.06 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,013 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,348 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,657 smsp__average_warp_latency_issue_stalled_barrier.pct % 124,275.15 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,242.75 smsp__inst_executed.avg inst 189,315.86 smsp__inst_executed.max inst 192,371 smsp__inst_executed.min inst 186,294 smsp__inst_executed.sum inst 12,116,215 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.90 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 smsp__cycles_active.avg cycle 316,297.72 smsp__cycles_active.sum cycle 20,243,054 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 60.42 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.20 smsp__inst_executed.max inst 12,484 smsp__inst_executed.min inst 12,092 smsp__inst_executed.sum inst 786,445 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 74,382.31 smsp__cycles_active.sum cycle 4,760,468 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.88 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.11 smsp__inst_executed.max inst 12,484 smsp__inst_executed.min inst 11,716 smsp__inst_executed.sum inst 786,439 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,860.06 smsp__cycles_active.sum cycle 4,599,044 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.04 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.05 smsp__inst_executed.max inst 12,664 smsp__inst_executed.min inst 11,700 smsp__inst_executed.sum inst 786,435 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,882.38 smsp__cycles_active.sum cycle 4,600,472 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 60.13 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.81 smsp__inst_executed.max inst 12,870 smsp__inst_executed.min inst 11,908 smsp__inst_executed.sum inst 786,484 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 73,247.75 smsp__cycles_active.sum cycle 4,687,856 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.89 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,289.59 smsp__inst_executed.max inst 12,494 smsp__inst_executed.min inst 11,898 smsp__inst_executed.sum inst 786,534 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,630.66 smsp__cycles_active.sum cycle 4,520,362 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.14 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,291.27 smsp__inst_executed.max inst 12,510 smsp__inst_executed.min inst 12,082 smsp__inst_executed.sum inst 786,641 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,548.77 smsp__cycles_active.sum cycle 4,515,121 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.66 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,294.64 smsp__inst_executed.max inst 12,656 smsp__inst_executed.min inst 11,924 smsp__inst_executed.sum inst 786,857 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,171.45 smsp__cycles_active.sum cycle 4,554,973 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.86 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,301.05 smsp__inst_executed.max inst 12,725 smsp__inst_executed.min inst 11,871 smsp__inst_executed.sum inst 787,267 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,490.50 smsp__cycles_active.sum cycle 4,511,392 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.17 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,316.05 smsp__inst_executed.max inst 12,594 smsp__inst_executed.min inst 11,865 smsp__inst_executed.sum inst 788,227 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,533.61 smsp__cycles_active.sum cycle 4,642,151 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 231.55 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,681.88 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,120 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,332 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,910 smsp__average_warp_latency_issue_stalled_barrier.pct % 123,982.60 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,239.83 smsp__inst_executed.avg inst 189,283.48 smsp__inst_executed.max inst 192,309 smsp__inst_executed.min inst 186,242 smsp__inst_executed.sum inst 12,114,143 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.88 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 smsp__cycles_active.avg cycle 316,209.50 smsp__cycles_active.sum cycle 20,237,408 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 56.70 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,287.97 smsp__inst_executed.max inst 12,492 smsp__inst_executed.min inst 11,896 smsp__inst_executed.sum inst 786,430 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 68,714 smsp__cycles_active.sum cycle 4,397,696 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 60.64 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.14 smsp__inst_executed.max inst 12,844 smsp__inst_executed.min inst 11,528 smsp__inst_executed.sum inst 786,441 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 74,171.33 smsp__cycles_active.sum cycle 4,746,965 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.72 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.55 smsp__inst_executed.max inst 12,684 smsp__inst_executed.min inst 11,884 smsp__inst_executed.sum inst 786,467 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,696.42 smsp__cycles_active.sum cycle 4,588,571 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.94 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.08 smsp__inst_executed.max inst 12,660 smsp__inst_executed.min inst 11,724 smsp__inst_executed.sum inst 786,437 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,640.89 smsp__cycles_active.sum cycle 4,585,017 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 60.06 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.06 smsp__inst_executed.max inst 12,524 smsp__inst_executed.min inst 11,900 smsp__inst_executed.sum inst 786,436 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 73,132.61 smsp__cycles_active.sum cycle 4,680,487 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.08 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,289.61 smsp__inst_executed.max inst 12,634 smsp__inst_executed.min inst 11,884 smsp__inst_executed.sum inst 786,535 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,620.73 smsp__cycles_active.sum cycle 4,519,727 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.24 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,291.28 smsp__inst_executed.max inst 12,704 smsp__inst_executed.min inst 11,892 smsp__inst_executed.sum inst 786,642 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,037.52 smsp__cycles_active.sum cycle 4,546,401 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.82 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,293.23 smsp__inst_executed.max inst 12,931 smsp__inst_executed.min inst 11,840 smsp__inst_executed.sum inst 786,767 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,840.56 smsp__cycles_active.sum cycle 4,533,796 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.24 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,298.42 smsp__inst_executed.max inst 12,587 smsp__inst_executed.min inst 11,966 smsp__inst_executed.sum inst 787,099 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,543.30 smsp__cycles_active.sum cycle 4,514,771 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.39 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,309.44 smsp__inst_executed.max inst 12,751 smsp__inst_executed.min inst 11,714 smsp__inst_executed.sum inst 787,804 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,313.14 smsp__cycles_active.sum cycle 4,628,041 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 228.54 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,691.25 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,988 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,367 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 315,060 smsp__average_warp_latency_issue_stalled_barrier.pct % 123,962.42 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,239.62 smsp__inst_executed.avg inst 189,051.73 smsp__inst_executed.max inst 192,054 smsp__inst_executed.min inst 186,060 smsp__inst_executed.sum inst 12,099,311 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.83 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13 smsp__cycles_active.avg cycle 317,268.88 smsp__cycles_active.sum cycle 20,305,208 ---------------------------------------------------------------------- --------------- ------------------------------