AUTH's THMMY "Parallel and distributed systems" course assignments.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1918 lines
212 KiB

  1. ==PROF== Connected to process 20279 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v2/bitonicCUDA)
  2. ==PROF== Profiling "prephase" - 1: 0%....50%....100% - 5 passes
  3. ==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 5 passes
  4. ==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 5 passes
  5. ==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 5 passes
  6. ==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 5 passes
  7. ==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 5 passes
  8. ==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 5 passes
  9. ==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 5 passes
  10. ==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 5 passes
  11. ==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 5 passes
  12. ==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 5 passes
  13. ==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 5 passes
  14. ==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 5 passes
  15. ==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 5 passes
  16. ==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 5 passes
  17. ==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 5 passes
  18. ==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 5 passes
  19. ==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 5 passes
  20. ==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 5 passes
  21. ==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 5 passes
  22. ==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 5 passes
  23. ==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 5 passes
  24. ==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 5 passes
  25. ==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 5 passes
  26. ==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 5 passes
  27. ==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 5 passes
  28. ==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 5 passes
  29. ==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 5 passes
  30. ==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 5 passes
  31. ==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 5 passes
  32. ==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 5 passes
  33. ==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 5 passes
  34. ==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 5 passes
  35. ==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 5 passes
  36. ==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 5 passes
  37. ==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 5 passes
  38. ==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 5 passes
  39. ==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 5 passes
  40. ==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 5 passes
  41. ==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 5 passes
  42. ==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 5 passes
  43. ==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 5 passes
  44. ==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 5 passes
  45. ==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 5 passes
  46. ==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 5 passes
  47. ==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 5 passes
  48. ==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 5 passes
  49. ==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 5 passes
  50. ==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 5 passes
  51. ==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 5 passes
  52. ==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 5 passes
  53. ==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 5 passes
  54. ==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 5 passes
  55. ==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 5 passes
  56. ==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 5 passes
  57. ==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 5 passes
  58. ==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 5 passes
  59. ==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 5 passes
  60. ==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 5 passes
  61. ==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 5 passes
  62. ==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 5 passes
  63. ==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 5 passes
  64. ==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 5 passes
  65. ==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 5 passes
  66. ==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 5 passes
  67. ==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 5 passes
  68. ==PROF== Disconnected from process 20279
  69. [20279] bitonicCUDA@127.0.0.1
  70. void prephase<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  71. Section: Command line profiler metrics
  72. ---------------------------------------------------------------------- --------------- ------------------------------
  73. gpu__time_duration.sum msecond 1.20
  74. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  75. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  76. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  77. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  78. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 186,368
  79. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 186,368
  80. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 186,368
  81. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 2,981,888
  82. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 111,946.88
  83. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 112,116
  84. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 111,795
  85. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 1,791,150
  86. smsp__average_warp_latency_issue_stalled_barrier.pct % 644,345.26
  87. smsp__average_warp_latency_issue_stalled_barrier.ratio 6,443.45
  88. smsp__inst_executed.avg inst 1,030,868.94
  89. smsp__inst_executed.max inst 1,031,062
  90. smsp__inst_executed.min inst 1,030,675
  91. smsp__inst_executed.sum inst 65,975,612
  92. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.50
  93. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
  94. smsp__cycles_active.avg cycle 1,666,829.12
  95. smsp__cycles_active.sum cycle 106,677,064
  96. ---------------------------------------------------------------------- --------------- ------------------------------
  97. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  98. Section: Command line profiler metrics
  99. ---------------------------------------------------------------------- --------------- ------------------------------
  100. gpu__time_duration.sum usecond 59.84
  101. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  102. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  103. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  104. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  105. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  106. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  107. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  108. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  109. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  110. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  111. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  112. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  113. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  114. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  115. smsp__inst_executed.avg inst 12,308.59
  116. smsp__inst_executed.max inst 12,538
  117. smsp__inst_executed.min inst 11,945
  118. smsp__inst_executed.sum inst 787,750
  119. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  120. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  121. smsp__cycles_active.avg cycle 73,268.67
  122. smsp__cycles_active.sum cycle 4,689,195
  123. ---------------------------------------------------------------------- --------------- ------------------------------
  124. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  125. Section: Command line profiler metrics
  126. ---------------------------------------------------------------------- --------------- ------------------------------
  127. gpu__time_duration.sum usecond 231.30
  128. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  129. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  130. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  131. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  132. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  133. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  134. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  135. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  136. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,642.38
  137. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,963
  138. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,322
  139. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,278
  140. smsp__average_warp_latency_issue_stalled_barrier.pct % 123,392.55
  141. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,233.93
  142. smsp__inst_executed.avg inst 189,292.45
  143. smsp__inst_executed.max inst 192,372
  144. smsp__inst_executed.min inst 186,246
  145. smsp__inst_executed.sum inst 12,114,717
  146. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.81
  147. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  148. smsp__cycles_active.avg cycle 316,267.31
  149. smsp__cycles_active.sum cycle 20,241,108
  150. ---------------------------------------------------------------------- --------------- ------------------------------
  151. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  152. Section: Command line profiler metrics
  153. ---------------------------------------------------------------------- --------------- ------------------------------
  154. gpu__time_duration.sum usecond 58.34
  155. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  156. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  157. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  158. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  159. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  160. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  161. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  162. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  163. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  164. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  165. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  166. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  167. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  168. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  169. smsp__inst_executed.avg inst 12,298.58
  170. smsp__inst_executed.max inst 12,667
  171. smsp__inst_executed.min inst 11,936
  172. smsp__inst_executed.sum inst 787,109
  173. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  174. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  175. smsp__cycles_active.avg cycle 70,505.30
  176. smsp__cycles_active.sum cycle 4,512,339
  177. ---------------------------------------------------------------------- --------------- ------------------------------
  178. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  179. Section: Command line profiler metrics
  180. ---------------------------------------------------------------------- --------------- ------------------------------
  181. gpu__time_duration.sum usecond 59.55
  182. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  183. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  184. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  185. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  186. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  187. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  188. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  189. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  190. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  191. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  192. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  193. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  194. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  195. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  196. smsp__inst_executed.avg inst 12,309.17
  197. smsp__inst_executed.max inst 12,702
  198. smsp__inst_executed.min inst 11,606
  199. smsp__inst_executed.sum inst 787,787
  200. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  201. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  202. smsp__cycles_active.avg cycle 72,897.17
  203. smsp__cycles_active.sum cycle 4,665,419
  204. ---------------------------------------------------------------------- --------------- ------------------------------
  205. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  206. Section: Command line profiler metrics
  207. ---------------------------------------------------------------------- --------------- ------------------------------
  208. gpu__time_duration.sum usecond 230.91
  209. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  210. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  211. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  212. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  213. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  214. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  215. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  216. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  217. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,680
  218. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,009
  219. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,334
  220. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,880
  221. smsp__average_warp_latency_issue_stalled_barrier.pct % 123,674.16
  222. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,236.74
  223. smsp__inst_executed.avg inst 189,294.36
  224. smsp__inst_executed.max inst 192,238
  225. smsp__inst_executed.min inst 186,252
  226. smsp__inst_executed.sum inst 12,114,839
  227. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.85
  228. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  229. smsp__cycles_active.avg cycle 316,040.81
  230. smsp__cycles_active.sum cycle 20,226,612
  231. ---------------------------------------------------------------------- --------------- ------------------------------
  232. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  233. Section: Command line profiler metrics
  234. ---------------------------------------------------------------------- --------------- ------------------------------
  235. gpu__time_duration.sum usecond 58.72
  236. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  237. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  238. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  239. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  240. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  241. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  242. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  243. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  244. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  245. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  246. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  247. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  248. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  249. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  250. smsp__inst_executed.avg inst 12,293.78
  251. smsp__inst_executed.max inst 12,542
  252. smsp__inst_executed.min inst 11,960
  253. smsp__inst_executed.sum inst 786,802
  254. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  255. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  256. smsp__cycles_active.avg cycle 71,235.28
  257. smsp__cycles_active.sum cycle 4,559,058
  258. ---------------------------------------------------------------------- --------------- ------------------------------
  259. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  260. Section: Command line profiler metrics
  261. ---------------------------------------------------------------------- --------------- ------------------------------
  262. gpu__time_duration.sum usecond 58.56
  263. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  264. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  265. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  266. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  267. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  268. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  269. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  270. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  271. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  272. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  273. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  274. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  275. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  276. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  277. smsp__inst_executed.avg inst 12,298.95
  278. smsp__inst_executed.max inst 12,560
  279. smsp__inst_executed.min inst 12,096
  280. smsp__inst_executed.sum inst 787,133
  281. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  282. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  283. smsp__cycles_active.avg cycle 70,575.53
  284. smsp__cycles_active.sum cycle 4,516,834
  285. ---------------------------------------------------------------------- --------------- ------------------------------
  286. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  287. Section: Command line profiler metrics
  288. ---------------------------------------------------------------------- --------------- ------------------------------
  289. gpu__time_duration.sum usecond 59.42
  290. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  291. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  292. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  293. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  294. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  295. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  296. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  297. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  298. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  299. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  300. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  301. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  302. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  303. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  304. smsp__inst_executed.avg inst 12,308.61
  305. smsp__inst_executed.max inst 12,640
  306. smsp__inst_executed.min inst 12,096
  307. smsp__inst_executed.sum inst 787,751
  308. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  309. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  310. smsp__cycles_active.avg cycle 72,641.39
  311. smsp__cycles_active.sum cycle 4,649,049
  312. ---------------------------------------------------------------------- --------------- ------------------------------
  313. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  314. Section: Command line profiler metrics
  315. ---------------------------------------------------------------------- --------------- ------------------------------
  316. gpu__time_duration.sum usecond 231.87
  317. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  318. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  319. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  320. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  321. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  322. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  323. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  324. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  325. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,674.75
  326. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,017
  327. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,354
  328. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,796
  329. smsp__average_warp_latency_issue_stalled_barrier.pct % 123,483.94
  330. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,234.84
  331. smsp__inst_executed.avg inst 189,288.14
  332. smsp__inst_executed.max inst 192,081
  333. smsp__inst_executed.min inst 186,477
  334. smsp__inst_executed.sum inst 12,114,441
  335. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.86
  336. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  337. smsp__cycles_active.avg cycle 315,433.75
  338. smsp__cycles_active.sum cycle 20,187,760
  339. ---------------------------------------------------------------------- --------------- ------------------------------
  340. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  341. Section: Command line profiler metrics
  342. ---------------------------------------------------------------------- --------------- ------------------------------
  343. gpu__time_duration.sum usecond 58.14
  344. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  345. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  346. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  347. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  348. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  349. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  350. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  351. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  352. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  353. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  354. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  355. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  356. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  357. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  358. smsp__inst_executed.avg inst 12,290.34
  359. smsp__inst_executed.max inst 12,724
  360. smsp__inst_executed.min inst 12,076
  361. smsp__inst_executed.sum inst 786,582
  362. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  363. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  364. smsp__cycles_active.avg cycle 70,402.61
  365. smsp__cycles_active.sum cycle 4,505,767
  366. ---------------------------------------------------------------------- --------------- ------------------------------
  367. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  368. Section: Command line profiler metrics
  369. ---------------------------------------------------------------------- --------------- ------------------------------
  370. gpu__time_duration.sum usecond 58.56
  371. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  372. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  373. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  374. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  375. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  376. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  377. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  378. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  379. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  380. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  381. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  382. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  383. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  384. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  385. smsp__inst_executed.avg inst 12,294.27
  386. smsp__inst_executed.max inst 12,717
  387. smsp__inst_executed.min inst 11,988
  388. smsp__inst_executed.sum inst 786,833
  389. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  390. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  391. smsp__cycles_active.avg cycle 70,681.59
  392. smsp__cycles_active.sum cycle 4,523,622
  393. ---------------------------------------------------------------------- --------------- ------------------------------
  394. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  395. Section: Command line profiler metrics
  396. ---------------------------------------------------------------------- --------------- ------------------------------
  397. gpu__time_duration.sum usecond 58.05
  398. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  399. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  400. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  401. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  402. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  403. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  404. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  405. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  406. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  407. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  408. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  409. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  410. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  411. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  412. smsp__inst_executed.avg inst 12,298.42
  413. smsp__inst_executed.max inst 12,663
  414. smsp__inst_executed.min inst 11,882
  415. smsp__inst_executed.sum inst 787,099
  416. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  417. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  418. smsp__cycles_active.avg cycle 70,688.28
  419. smsp__cycles_active.sum cycle 4,524,050
  420. ---------------------------------------------------------------------- --------------- ------------------------------
  421. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  422. Section: Command line profiler metrics
  423. ---------------------------------------------------------------------- --------------- ------------------------------
  424. gpu__time_duration.sum usecond 59.49
  425. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  426. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  427. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  428. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  429. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  430. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  431. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  432. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  433. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  434. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  435. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  436. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  437. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  438. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  439. smsp__inst_executed.avg inst 12,309.03
  440. smsp__inst_executed.max inst 12,686
  441. smsp__inst_executed.min inst 11,852
  442. smsp__inst_executed.sum inst 787,778
  443. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  444. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  445. smsp__cycles_active.avg cycle 72,892.83
  446. smsp__cycles_active.sum cycle 4,665,141
  447. ---------------------------------------------------------------------- --------------- ------------------------------
  448. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  449. Section: Command line profiler metrics
  450. ---------------------------------------------------------------------- --------------- ------------------------------
  451. gpu__time_duration.sum usecond 231.33
  452. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  453. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  454. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  455. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  456. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  457. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  458. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  459. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  460. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,677
  461. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,976
  462. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,331
  463. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,832
  464. smsp__average_warp_latency_issue_stalled_barrier.pct % 123,882.24
  465. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,238.82
  466. smsp__inst_executed.avg inst 189,292.19
  467. smsp__inst_executed.max inst 192,340
  468. smsp__inst_executed.min inst 186,215
  469. smsp__inst_executed.sum inst 12,114,700
  470. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.86
  471. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  472. smsp__cycles_active.avg cycle 316,203.25
  473. smsp__cycles_active.sum cycle 20,237,008
  474. ---------------------------------------------------------------------- --------------- ------------------------------
  475. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  476. Section: Command line profiler metrics
  477. ---------------------------------------------------------------------- --------------- ------------------------------
  478. gpu__time_duration.sum usecond 58.08
  479. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  480. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  481. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  482. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  483. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  484. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  485. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  486. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  487. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  488. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  489. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  490. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  491. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  492. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  493. smsp__inst_executed.avg inst 12,289.06
  494. smsp__inst_executed.max inst 12,694
  495. smsp__inst_executed.min inst 11,900
  496. smsp__inst_executed.sum inst 786,500
  497. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  498. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  499. smsp__cycles_active.avg cycle 70,488.72
  500. smsp__cycles_active.sum cycle 4,511,278
  501. ---------------------------------------------------------------------- --------------- ------------------------------
  502. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  503. Section: Command line profiler metrics
  504. ---------------------------------------------------------------------- --------------- ------------------------------
  505. gpu__time_duration.sum usecond 58.27
  506. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  507. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  508. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  509. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  510. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  511. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  512. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  513. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  514. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  515. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  516. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  517. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  518. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  519. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  520. smsp__inst_executed.avg inst 12,291.25
  521. smsp__inst_executed.max inst 12,681
  522. smsp__inst_executed.min inst 12,008
  523. smsp__inst_executed.sum inst 786,640
  524. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  525. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  526. smsp__cycles_active.avg cycle 70,605.89
  527. smsp__cycles_active.sum cycle 4,518,777
  528. ---------------------------------------------------------------------- --------------- ------------------------------
  529. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  530. Section: Command line profiler metrics
  531. ---------------------------------------------------------------------- --------------- ------------------------------
  532. gpu__time_duration.sum usecond 58.34
  533. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  534. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  535. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  536. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  537. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  538. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  539. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  540. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  541. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  542. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  543. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  544. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  545. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  546. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  547. smsp__inst_executed.avg inst 12,292.84
  548. smsp__inst_executed.max inst 12,543
  549. smsp__inst_executed.min inst 11,998
  550. smsp__inst_executed.sum inst 786,742
  551. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  552. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  553. smsp__cycles_active.avg cycle 70,795.58
  554. smsp__cycles_active.sum cycle 4,530,917
  555. ---------------------------------------------------------------------- --------------- ------------------------------
  556. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  557. Section: Command line profiler metrics
  558. ---------------------------------------------------------------------- --------------- ------------------------------
  559. gpu__time_duration.sum usecond 58.02
  560. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  561. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  562. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  563. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  564. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  565. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  566. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  567. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  568. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  569. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  570. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  571. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  572. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  573. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  574. smsp__inst_executed.avg inst 12,299.95
  575. smsp__inst_executed.max inst 12,683
  576. smsp__inst_executed.min inst 11,720
  577. smsp__inst_executed.sum inst 787,197
  578. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  579. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  580. smsp__cycles_active.avg cycle 70,136.48
  581. smsp__cycles_active.sum cycle 4,488,735
  582. ---------------------------------------------------------------------- --------------- ------------------------------
  583. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  584. Section: Command line profiler metrics
  585. ---------------------------------------------------------------------- --------------- ------------------------------
  586. gpu__time_duration.sum usecond 59.52
  587. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  588. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  589. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  590. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  591. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  592. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  593. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  594. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  595. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  596. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  597. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  598. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  599. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  600. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  601. smsp__inst_executed.avg inst 12,309.09
  602. smsp__inst_executed.max inst 12,613
  603. smsp__inst_executed.min inst 11,865
  604. smsp__inst_executed.sum inst 787,782
  605. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  606. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  607. smsp__cycles_active.avg cycle 72,887.53
  608. smsp__cycles_active.sum cycle 4,664,802
  609. ---------------------------------------------------------------------- --------------- ------------------------------
  610. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  611. Section: Command line profiler metrics
  612. ---------------------------------------------------------------------- --------------- ------------------------------
  613. gpu__time_duration.sum usecond 231.30
  614. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  615. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  616. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  617. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  618. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  619. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  620. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  621. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  622. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,682.56
  623. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,017
  624. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,315
  625. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,921
  626. smsp__average_warp_latency_issue_stalled_barrier.pct % 124,910.64
  627. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,249.11
  628. smsp__inst_executed.avg inst 189,291.42
  629. smsp__inst_executed.max inst 192,361
  630. smsp__inst_executed.min inst 186,192
  631. smsp__inst_executed.sum inst 12,114,651
  632. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.97
  633. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  634. smsp__cycles_active.avg cycle 316,146.12
  635. smsp__cycles_active.sum cycle 20,233,352
  636. ---------------------------------------------------------------------- --------------- ------------------------------
  637. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  638. Section: Command line profiler metrics
  639. ---------------------------------------------------------------------- --------------- ------------------------------
  640. gpu__time_duration.sum usecond 60.03
  641. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  642. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  643. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  644. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  645. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  646. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  647. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  648. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  649. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  650. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  651. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  652. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  653. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  654. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  655. smsp__inst_executed.avg inst 12,288.48
  656. smsp__inst_executed.max inst 12,672
  657. smsp__inst_executed.min inst 11,868
  658. smsp__inst_executed.sum inst 786,463
  659. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  660. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  661. smsp__cycles_active.avg cycle 73,004.22
  662. smsp__cycles_active.sum cycle 4,672,270
  663. ---------------------------------------------------------------------- --------------- ------------------------------
  664. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  665. Section: Command line profiler metrics
  666. ---------------------------------------------------------------------- --------------- ------------------------------
  667. gpu__time_duration.sum usecond 58.08
  668. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  669. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  670. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  671. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  672. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  673. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  674. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  675. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  676. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  677. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  678. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  679. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  680. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  681. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  682. smsp__inst_executed.avg inst 12,289.81
  683. smsp__inst_executed.max inst 12,480
  684. smsp__inst_executed.min inst 12,068
  685. smsp__inst_executed.sum inst 786,548
  686. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  687. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  688. smsp__cycles_active.avg cycle 70,790.83
  689. smsp__cycles_active.sum cycle 4,530,613
  690. ---------------------------------------------------------------------- --------------- ------------------------------
  691. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  692. Section: Command line profiler metrics
  693. ---------------------------------------------------------------------- --------------- ------------------------------
  694. gpu__time_duration.sum usecond 58.46
  695. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  696. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  697. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  698. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  699. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  700. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  701. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  702. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  703. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  704. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  705. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  706. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  707. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  708. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  709. smsp__inst_executed.avg inst 12,290.59
  710. smsp__inst_executed.max inst 12,701
  711. smsp__inst_executed.min inst 12,068
  712. smsp__inst_executed.sum inst 786,598
  713. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  714. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  715. smsp__cycles_active.avg cycle 70,847.19
  716. smsp__cycles_active.sum cycle 4,534,220
  717. ---------------------------------------------------------------------- --------------- ------------------------------
  718. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  719. Section: Command line profiler metrics
  720. ---------------------------------------------------------------------- --------------- ------------------------------
  721. gpu__time_duration.sum usecond 58.27
  722. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  723. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  724. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  725. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  726. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  727. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  728. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  729. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  730. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  731. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  732. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  733. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  734. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  735. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  736. smsp__inst_executed.avg inst 12,293.72
  737. smsp__inst_executed.max inst 12,656
  738. smsp__inst_executed.min inst 12,038
  739. smsp__inst_executed.sum inst 786,798
  740. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  741. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  742. smsp__cycles_active.avg cycle 70,747
  743. smsp__cycles_active.sum cycle 4,527,808
  744. ---------------------------------------------------------------------- --------------- ------------------------------
  745. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  746. Section: Command line profiler metrics
  747. ---------------------------------------------------------------------- --------------- ------------------------------
  748. gpu__time_duration.sum usecond 57.95
  749. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  750. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  751. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  752. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  753. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  754. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  755. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  756. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  757. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  758. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  759. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  760. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  761. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  762. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  763. smsp__inst_executed.avg inst 12,298.14
  764. smsp__inst_executed.max inst 12,645
  765. smsp__inst_executed.min inst 12,029
  766. smsp__inst_executed.sum inst 787,081
  767. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  768. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  769. smsp__cycles_active.avg cycle 70,059.03
  770. smsp__cycles_active.sum cycle 4,483,778
  771. ---------------------------------------------------------------------- --------------- ------------------------------
  772. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  773. Section: Command line profiler metrics
  774. ---------------------------------------------------------------------- --------------- ------------------------------
  775. gpu__time_duration.sum usecond 59.58
  776. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  777. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  778. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  779. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  780. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  781. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  782. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  783. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  784. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  785. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  786. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  787. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  788. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  789. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  790. smsp__inst_executed.avg inst 12,308.86
  791. smsp__inst_executed.max inst 12,724
  792. smsp__inst_executed.min inst 11,654
  793. smsp__inst_executed.sum inst 787,767
  794. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  795. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  796. smsp__cycles_active.avg cycle 72,813.80
  797. smsp__cycles_active.sum cycle 4,660,083
  798. ---------------------------------------------------------------------- --------------- ------------------------------
  799. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  800. Section: Command line profiler metrics
  801. ---------------------------------------------------------------------- --------------- ------------------------------
  802. gpu__time_duration.sum usecond 231.90
  803. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  804. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  805. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  806. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  807. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  808. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  809. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  810. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  811. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,669.44
  812. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,942
  813. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,386
  814. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,711
  815. smsp__average_warp_latency_issue_stalled_barrier.pct % 125,049.38
  816. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,250.49
  817. smsp__inst_executed.avg inst 189,291.03
  818. smsp__inst_executed.max inst 192,313
  819. smsp__inst_executed.min inst 186,310
  820. smsp__inst_executed.sum inst 12,114,626
  821. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.97
  822. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  823. smsp__cycles_active.avg cycle 316,608.81
  824. smsp__cycles_active.sum cycle 20,262,964
  825. ---------------------------------------------------------------------- --------------- ------------------------------
  826. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  827. Section: Command line profiler metrics
  828. ---------------------------------------------------------------------- --------------- ------------------------------
  829. gpu__time_duration.sum usecond 58.78
  830. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  831. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  832. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  833. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  834. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  835. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  836. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  837. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  838. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  839. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  840. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  841. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  842. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  843. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  844. smsp__inst_executed.avg inst 12,287.95
  845. smsp__inst_executed.max inst 12,856
  846. smsp__inst_executed.min inst 11,904
  847. smsp__inst_executed.sum inst 786,429
  848. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  849. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  850. smsp__cycles_active.avg cycle 71,331.70
  851. smsp__cycles_active.sum cycle 4,565,229
  852. ---------------------------------------------------------------------- --------------- ------------------------------
  853. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  854. Section: Command line profiler metrics
  855. ---------------------------------------------------------------------- --------------- ------------------------------
  856. gpu__time_duration.sum usecond 59.94
  857. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  858. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  859. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  860. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  861. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  862. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  863. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  864. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  865. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  866. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  867. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  868. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  869. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  870. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  871. smsp__inst_executed.avg inst 12,288.03
  872. smsp__inst_executed.max inst 12,488
  873. smsp__inst_executed.min inst 11,888
  874. smsp__inst_executed.sum inst 786,434
  875. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  876. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  877. smsp__cycles_active.avg cycle 73,232.05
  878. smsp__cycles_active.sum cycle 4,686,851
  879. ---------------------------------------------------------------------- --------------- ------------------------------
  880. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  881. Section: Command line profiler metrics
  882. ---------------------------------------------------------------------- --------------- ------------------------------
  883. gpu__time_duration.sum usecond 58.27
  884. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  885. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  886. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  887. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  888. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  889. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  890. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  891. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  892. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  893. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  894. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  895. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  896. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  897. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  898. smsp__inst_executed.avg inst 12,289.50
  899. smsp__inst_executed.max inst 12,488
  900. smsp__inst_executed.min inst 12,072
  901. smsp__inst_executed.sum inst 786,528
  902. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  903. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  904. smsp__cycles_active.avg cycle 70,846.25
  905. smsp__cycles_active.sum cycle 4,534,160
  906. ---------------------------------------------------------------------- --------------- ------------------------------
  907. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  908. Section: Command line profiler metrics
  909. ---------------------------------------------------------------------- --------------- ------------------------------
  910. gpu__time_duration.sum usecond 58.11
  911. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  912. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  913. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  914. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  915. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  916. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  917. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  918. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  919. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  920. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  921. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  922. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  923. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  924. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  925. smsp__inst_executed.avg inst 12,290.84
  926. smsp__inst_executed.max inst 12,564
  927. smsp__inst_executed.min inst 12,104
  928. smsp__inst_executed.sum inst 786,614
  929. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  930. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  931. smsp__cycles_active.avg cycle 70,881.05
  932. smsp__cycles_active.sum cycle 4,536,387
  933. ---------------------------------------------------------------------- --------------- ------------------------------
  934. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  935. Section: Command line profiler metrics
  936. ---------------------------------------------------------------------- --------------- ------------------------------
  937. gpu__time_duration.sum usecond 58.40
  938. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  939. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  940. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  941. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  942. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  943. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  944. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  945. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  946. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  947. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  948. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  949. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  950. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  951. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  952. smsp__inst_executed.avg inst 12,293.73
  953. smsp__inst_executed.max inst 12,757
  954. smsp__inst_executed.min inst 11,970
  955. smsp__inst_executed.sum inst 786,799
  956. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  957. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  958. smsp__cycles_active.avg cycle 71,142.94
  959. smsp__cycles_active.sum cycle 4,553,148
  960. ---------------------------------------------------------------------- --------------- ------------------------------
  961. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  962. Section: Command line profiler metrics
  963. ---------------------------------------------------------------------- --------------- ------------------------------
  964. gpu__time_duration.sum usecond 57.95
  965. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  966. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  967. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  968. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  969. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  970. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  971. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  972. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  973. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  974. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  975. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  976. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  977. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  978. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  979. smsp__inst_executed.avg inst 12,298.62
  980. smsp__inst_executed.max inst 12,553
  981. smsp__inst_executed.min inst 12,119
  982. smsp__inst_executed.sum inst 787,112
  983. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  984. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  985. smsp__cycles_active.avg cycle 70,189.52
  986. smsp__cycles_active.sum cycle 4,492,129
  987. ---------------------------------------------------------------------- --------------- ------------------------------
  988. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  989. Section: Command line profiler metrics
  990. ---------------------------------------------------------------------- --------------- ------------------------------
  991. gpu__time_duration.sum usecond 59.71
  992. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  993. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  994. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  995. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  996. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  997. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  998. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  999. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1000. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1001. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1002. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1003. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1004. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1005. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1006. smsp__inst_executed.avg inst 12,309.52
  1007. smsp__inst_executed.max inst 12,538
  1008. smsp__inst_executed.min inst 12,074
  1009. smsp__inst_executed.sum inst 787,809
  1010. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1011. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1012. smsp__cycles_active.avg cycle 72,879.23
  1013. smsp__cycles_active.sum cycle 4,664,271
  1014. ---------------------------------------------------------------------- --------------- ------------------------------
  1015. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1016. Section: Command line profiler metrics
  1017. ---------------------------------------------------------------------- --------------- ------------------------------
  1018. gpu__time_duration.sum usecond 231.42
  1019. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1020. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1021. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1022. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1023. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  1024. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  1025. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  1026. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  1027. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,673
  1028. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,007
  1029. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,299
  1030. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,768
  1031. smsp__average_warp_latency_issue_stalled_barrier.pct % 124,557.10
  1032. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,245.57
  1033. smsp__inst_executed.avg inst 189,303.22
  1034. smsp__inst_executed.max inst 192,317
  1035. smsp__inst_executed.min inst 186,277
  1036. smsp__inst_executed.sum inst 12,115,406
  1037. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.96
  1038. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  1039. smsp__cycles_active.avg cycle 315,741.19
  1040. smsp__cycles_active.sum cycle 20,207,436
  1041. ---------------------------------------------------------------------- --------------- ------------------------------
  1042. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1043. Section: Command line profiler metrics
  1044. ---------------------------------------------------------------------- --------------- ------------------------------
  1045. gpu__time_duration.sum usecond 58.40
  1046. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1047. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1048. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1049. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1050. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1051. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1052. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1053. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1054. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1055. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1056. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1057. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1058. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1059. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1060. smsp__inst_executed.avg inst 12,287.92
  1061. smsp__inst_executed.max inst 12,648
  1062. smsp__inst_executed.min inst 11,912
  1063. smsp__inst_executed.sum inst 786,427
  1064. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1065. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1066. smsp__cycles_active.avg cycle 71,978.88
  1067. smsp__cycles_active.sum cycle 4,606,648
  1068. ---------------------------------------------------------------------- --------------- ------------------------------
  1069. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1070. Section: Command line profiler metrics
  1071. ---------------------------------------------------------------------- --------------- ------------------------------
  1072. gpu__time_duration.sum usecond 58.62
  1073. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1074. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1075. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1076. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1077. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1078. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1079. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1080. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1081. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1082. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1083. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1084. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1085. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1086. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1087. smsp__inst_executed.avg inst 12,288.30
  1088. smsp__inst_executed.max inst 12,848
  1089. smsp__inst_executed.min inst 11,904
  1090. smsp__inst_executed.sum inst 786,451
  1091. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1092. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1093. smsp__cycles_active.avg cycle 71,708.22
  1094. smsp__cycles_active.sum cycle 4,589,326
  1095. ---------------------------------------------------------------------- --------------- ------------------------------
  1096. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1097. Section: Command line profiler metrics
  1098. ---------------------------------------------------------------------- --------------- ------------------------------
  1099. gpu__time_duration.sum usecond 60.19
  1100. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1101. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1102. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1103. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1104. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1105. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1106. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1107. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1108. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1109. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1110. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1111. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1112. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1113. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1114. smsp__inst_executed.avg inst 12,289.11
  1115. smsp__inst_executed.max inst 12,876
  1116. smsp__inst_executed.min inst 11,688
  1117. smsp__inst_executed.sum inst 786,503
  1118. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1119. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1120. smsp__cycles_active.avg cycle 73,332.14
  1121. smsp__cycles_active.sum cycle 4,693,257
  1122. ---------------------------------------------------------------------- --------------- ------------------------------
  1123. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1124. Section: Command line profiler metrics
  1125. ---------------------------------------------------------------------- --------------- ------------------------------
  1126. gpu__time_duration.sum usecond 58.50
  1127. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1128. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1129. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1130. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1131. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1132. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1133. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1134. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1135. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1136. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1137. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1138. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1139. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1140. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1141. smsp__inst_executed.avg inst 12,288.89
  1142. smsp__inst_executed.max inst 12,507
  1143. smsp__inst_executed.min inst 12,092
  1144. smsp__inst_executed.sum inst 786,489
  1145. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1146. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1147. smsp__cycles_active.avg cycle 70,441.14
  1148. smsp__cycles_active.sum cycle 4,508,233
  1149. ---------------------------------------------------------------------- --------------- ------------------------------
  1150. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1151. Section: Command line profiler metrics
  1152. ---------------------------------------------------------------------- --------------- ------------------------------
  1153. gpu__time_duration.sum usecond 58.30
  1154. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1155. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1156. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1157. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1158. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1159. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1160. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1161. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1162. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1163. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1164. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1165. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1166. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1167. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1168. smsp__inst_executed.avg inst 12,290.69
  1169. smsp__inst_executed.max inst 12,682
  1170. smsp__inst_executed.min inst 11,866
  1171. smsp__inst_executed.sum inst 786,604
  1172. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1173. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1174. smsp__cycles_active.avg cycle 70,768.55
  1175. smsp__cycles_active.sum cycle 4,529,187
  1176. ---------------------------------------------------------------------- --------------- ------------------------------
  1177. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1178. Section: Command line profiler metrics
  1179. ---------------------------------------------------------------------- --------------- ------------------------------
  1180. gpu__time_duration.sum usecond 58.62
  1181. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1182. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1183. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1184. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1185. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1186. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1187. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1188. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1189. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1190. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1191. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1192. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1193. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1194. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1195. smsp__inst_executed.avg inst 12,293.67
  1196. smsp__inst_executed.max inst 12,534
  1197. smsp__inst_executed.min inst 11,732
  1198. smsp__inst_executed.sum inst 786,795
  1199. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1200. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1201. smsp__cycles_active.avg cycle 71,007.56
  1202. smsp__cycles_active.sum cycle 4,544,484
  1203. ---------------------------------------------------------------------- --------------- ------------------------------
  1204. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1205. Section: Command line profiler metrics
  1206. ---------------------------------------------------------------------- --------------- ------------------------------
  1207. gpu__time_duration.sum usecond 58.05
  1208. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1209. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1210. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1211. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1212. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1213. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1214. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1215. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1216. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1217. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1218. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1219. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1220. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1221. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1222. smsp__inst_executed.avg inst 12,299.09
  1223. smsp__inst_executed.max inst 12,656
  1224. smsp__inst_executed.min inst 11,912
  1225. smsp__inst_executed.sum inst 787,142
  1226. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1227. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1228. smsp__cycles_active.avg cycle 70,781.25
  1229. smsp__cycles_active.sum cycle 4,530,000
  1230. ---------------------------------------------------------------------- --------------- ------------------------------
  1231. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1232. Section: Command line profiler metrics
  1233. ---------------------------------------------------------------------- --------------- ------------------------------
  1234. gpu__time_duration.sum usecond 59.14
  1235. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1236. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1237. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1238. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1239. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1240. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1241. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1242. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1243. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1244. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1245. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1246. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1247. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1248. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1249. smsp__inst_executed.avg inst 12,309.02
  1250. smsp__inst_executed.max inst 12,707
  1251. smsp__inst_executed.min inst 11,847
  1252. smsp__inst_executed.sum inst 787,777
  1253. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1254. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1255. smsp__cycles_active.avg cycle 72,505.88
  1256. smsp__cycles_active.sum cycle 4,640,376
  1257. ---------------------------------------------------------------------- --------------- ------------------------------
  1258. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1259. Section: Command line profiler metrics
  1260. ---------------------------------------------------------------------- --------------- ------------------------------
  1261. gpu__time_duration.sum usecond 231.14
  1262. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1263. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1264. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1265. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1266. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  1267. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  1268. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  1269. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  1270. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,666.06
  1271. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,013
  1272. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,348
  1273. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,657
  1274. smsp__average_warp_latency_issue_stalled_barrier.pct % 124,275.15
  1275. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,242.75
  1276. smsp__inst_executed.avg inst 189,315.86
  1277. smsp__inst_executed.max inst 192,371
  1278. smsp__inst_executed.min inst 186,294
  1279. smsp__inst_executed.sum inst 12,116,215
  1280. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.90
  1281. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  1282. smsp__cycles_active.avg cycle 316,297.72
  1283. smsp__cycles_active.sum cycle 20,243,054
  1284. ---------------------------------------------------------------------- --------------- ------------------------------
  1285. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1286. Section: Command line profiler metrics
  1287. ---------------------------------------------------------------------- --------------- ------------------------------
  1288. gpu__time_duration.sum usecond 60.42
  1289. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1290. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1291. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1292. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1293. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1294. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1295. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1296. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1297. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1298. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1299. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1300. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1301. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1302. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1303. smsp__inst_executed.avg inst 12,288.20
  1304. smsp__inst_executed.max inst 12,484
  1305. smsp__inst_executed.min inst 12,092
  1306. smsp__inst_executed.sum inst 786,445
  1307. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1308. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1309. smsp__cycles_active.avg cycle 74,382.31
  1310. smsp__cycles_active.sum cycle 4,760,468
  1311. ---------------------------------------------------------------------- --------------- ------------------------------
  1312. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1313. Section: Command line profiler metrics
  1314. ---------------------------------------------------------------------- --------------- ------------------------------
  1315. gpu__time_duration.sum usecond 58.88
  1316. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1317. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1318. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1319. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1320. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1321. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1322. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1323. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1324. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1325. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1326. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1327. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1328. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1329. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1330. smsp__inst_executed.avg inst 12,288.11
  1331. smsp__inst_executed.max inst 12,484
  1332. smsp__inst_executed.min inst 11,716
  1333. smsp__inst_executed.sum inst 786,439
  1334. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1335. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1336. smsp__cycles_active.avg cycle 71,860.06
  1337. smsp__cycles_active.sum cycle 4,599,044
  1338. ---------------------------------------------------------------------- --------------- ------------------------------
  1339. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1340. Section: Command line profiler metrics
  1341. ---------------------------------------------------------------------- --------------- ------------------------------
  1342. gpu__time_duration.sum usecond 59.04
  1343. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1344. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1345. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1346. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1347. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1348. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1349. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1350. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1351. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1352. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1353. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1354. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1355. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1356. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1357. smsp__inst_executed.avg inst 12,288.05
  1358. smsp__inst_executed.max inst 12,664
  1359. smsp__inst_executed.min inst 11,700
  1360. smsp__inst_executed.sum inst 786,435
  1361. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1362. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1363. smsp__cycles_active.avg cycle 71,882.38
  1364. smsp__cycles_active.sum cycle 4,600,472
  1365. ---------------------------------------------------------------------- --------------- ------------------------------
  1366. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1367. Section: Command line profiler metrics
  1368. ---------------------------------------------------------------------- --------------- ------------------------------
  1369. gpu__time_duration.sum usecond 60.13
  1370. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1371. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1372. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1373. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1374. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1375. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1376. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1377. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1378. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1379. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1380. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1381. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1382. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1383. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1384. smsp__inst_executed.avg inst 12,288.81
  1385. smsp__inst_executed.max inst 12,870
  1386. smsp__inst_executed.min inst 11,908
  1387. smsp__inst_executed.sum inst 786,484
  1388. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1389. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1390. smsp__cycles_active.avg cycle 73,247.75
  1391. smsp__cycles_active.sum cycle 4,687,856
  1392. ---------------------------------------------------------------------- --------------- ------------------------------
  1393. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1394. Section: Command line profiler metrics
  1395. ---------------------------------------------------------------------- --------------- ------------------------------
  1396. gpu__time_duration.sum usecond 57.89
  1397. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1398. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1399. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1400. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1401. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1402. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1403. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1404. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1405. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1406. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1407. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1408. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1409. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1410. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1411. smsp__inst_executed.avg inst 12,289.59
  1412. smsp__inst_executed.max inst 12,494
  1413. smsp__inst_executed.min inst 11,898
  1414. smsp__inst_executed.sum inst 786,534
  1415. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1416. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1417. smsp__cycles_active.avg cycle 70,630.66
  1418. smsp__cycles_active.sum cycle 4,520,362
  1419. ---------------------------------------------------------------------- --------------- ------------------------------
  1420. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1421. Section: Command line profiler metrics
  1422. ---------------------------------------------------------------------- --------------- ------------------------------
  1423. gpu__time_duration.sum usecond 58.14
  1424. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1425. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1426. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1427. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1428. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1429. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1430. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1431. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1432. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1433. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1434. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1435. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1436. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1437. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1438. smsp__inst_executed.avg inst 12,291.27
  1439. smsp__inst_executed.max inst 12,510
  1440. smsp__inst_executed.min inst 12,082
  1441. smsp__inst_executed.sum inst 786,641
  1442. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1443. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1444. smsp__cycles_active.avg cycle 70,548.77
  1445. smsp__cycles_active.sum cycle 4,515,121
  1446. ---------------------------------------------------------------------- --------------- ------------------------------
  1447. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1448. Section: Command line profiler metrics
  1449. ---------------------------------------------------------------------- --------------- ------------------------------
  1450. gpu__time_duration.sum usecond 58.66
  1451. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1452. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1453. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1454. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1455. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1456. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1457. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1458. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1459. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1460. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1461. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1462. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1463. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1464. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1465. smsp__inst_executed.avg inst 12,294.64
  1466. smsp__inst_executed.max inst 12,656
  1467. smsp__inst_executed.min inst 11,924
  1468. smsp__inst_executed.sum inst 786,857
  1469. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1470. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1471. smsp__cycles_active.avg cycle 71,171.45
  1472. smsp__cycles_active.sum cycle 4,554,973
  1473. ---------------------------------------------------------------------- --------------- ------------------------------
  1474. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1475. Section: Command line profiler metrics
  1476. ---------------------------------------------------------------------- --------------- ------------------------------
  1477. gpu__time_duration.sum usecond 57.86
  1478. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1479. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1480. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1481. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1482. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1483. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1484. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1485. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1486. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1487. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1488. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1489. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1490. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1491. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1492. smsp__inst_executed.avg inst 12,301.05
  1493. smsp__inst_executed.max inst 12,725
  1494. smsp__inst_executed.min inst 11,871
  1495. smsp__inst_executed.sum inst 787,267
  1496. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1497. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1498. smsp__cycles_active.avg cycle 70,490.50
  1499. smsp__cycles_active.sum cycle 4,511,392
  1500. ---------------------------------------------------------------------- --------------- ------------------------------
  1501. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1502. Section: Command line profiler metrics
  1503. ---------------------------------------------------------------------- --------------- ------------------------------
  1504. gpu__time_duration.sum usecond 59.17
  1505. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1506. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1507. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1508. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1509. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1510. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1511. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1512. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1513. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1514. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1515. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1516. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1517. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1518. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1519. smsp__inst_executed.avg inst 12,316.05
  1520. smsp__inst_executed.max inst 12,594
  1521. smsp__inst_executed.min inst 11,865
  1522. smsp__inst_executed.sum inst 788,227
  1523. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1524. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1525. smsp__cycles_active.avg cycle 72,533.61
  1526. smsp__cycles_active.sum cycle 4,642,151
  1527. ---------------------------------------------------------------------- --------------- ------------------------------
  1528. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1529. Section: Command line profiler metrics
  1530. ---------------------------------------------------------------------- --------------- ------------------------------
  1531. gpu__time_duration.sum usecond 231.55
  1532. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1533. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1534. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1535. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1536. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  1537. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  1538. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  1539. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  1540. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,681.88
  1541. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,120
  1542. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,332
  1543. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,910
  1544. smsp__average_warp_latency_issue_stalled_barrier.pct % 123,982.60
  1545. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,239.83
  1546. smsp__inst_executed.avg inst 189,283.48
  1547. smsp__inst_executed.max inst 192,309
  1548. smsp__inst_executed.min inst 186,242
  1549. smsp__inst_executed.sum inst 12,114,143
  1550. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.88
  1551. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  1552. smsp__cycles_active.avg cycle 316,209.50
  1553. smsp__cycles_active.sum cycle 20,237,408
  1554. ---------------------------------------------------------------------- --------------- ------------------------------
  1555. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1556. Section: Command line profiler metrics
  1557. ---------------------------------------------------------------------- --------------- ------------------------------
  1558. gpu__time_duration.sum usecond 56.70
  1559. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1560. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1561. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1562. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1563. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1564. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1565. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1566. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1567. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1568. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1569. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1570. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1571. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1572. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1573. smsp__inst_executed.avg inst 12,287.97
  1574. smsp__inst_executed.max inst 12,492
  1575. smsp__inst_executed.min inst 11,896
  1576. smsp__inst_executed.sum inst 786,430
  1577. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1578. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1579. smsp__cycles_active.avg cycle 68,714
  1580. smsp__cycles_active.sum cycle 4,397,696
  1581. ---------------------------------------------------------------------- --------------- ------------------------------
  1582. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1583. Section: Command line profiler metrics
  1584. ---------------------------------------------------------------------- --------------- ------------------------------
  1585. gpu__time_duration.sum usecond 60.64
  1586. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1587. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1588. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1589. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1590. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1591. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1592. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1593. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1594. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1595. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1596. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1597. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1598. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1599. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1600. smsp__inst_executed.avg inst 12,288.14
  1601. smsp__inst_executed.max inst 12,844
  1602. smsp__inst_executed.min inst 11,528
  1603. smsp__inst_executed.sum inst 786,441
  1604. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1605. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1606. smsp__cycles_active.avg cycle 74,171.33
  1607. smsp__cycles_active.sum cycle 4,746,965
  1608. ---------------------------------------------------------------------- --------------- ------------------------------
  1609. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1610. Section: Command line profiler metrics
  1611. ---------------------------------------------------------------------- --------------- ------------------------------
  1612. gpu__time_duration.sum usecond 58.72
  1613. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1614. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1615. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1616. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1617. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1618. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1619. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1620. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1621. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1622. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1623. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1624. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1625. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1626. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1627. smsp__inst_executed.avg inst 12,288.55
  1628. smsp__inst_executed.max inst 12,684
  1629. smsp__inst_executed.min inst 11,884
  1630. smsp__inst_executed.sum inst 786,467
  1631. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1632. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1633. smsp__cycles_active.avg cycle 71,696.42
  1634. smsp__cycles_active.sum cycle 4,588,571
  1635. ---------------------------------------------------------------------- --------------- ------------------------------
  1636. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1637. Section: Command line profiler metrics
  1638. ---------------------------------------------------------------------- --------------- ------------------------------
  1639. gpu__time_duration.sum usecond 58.94
  1640. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1641. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1642. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1643. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1644. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1645. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1646. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1647. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1648. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1649. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1650. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1651. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1652. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1653. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1654. smsp__inst_executed.avg inst 12,288.08
  1655. smsp__inst_executed.max inst 12,660
  1656. smsp__inst_executed.min inst 11,724
  1657. smsp__inst_executed.sum inst 786,437
  1658. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1659. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1660. smsp__cycles_active.avg cycle 71,640.89
  1661. smsp__cycles_active.sum cycle 4,585,017
  1662. ---------------------------------------------------------------------- --------------- ------------------------------
  1663. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1664. Section: Command line profiler metrics
  1665. ---------------------------------------------------------------------- --------------- ------------------------------
  1666. gpu__time_duration.sum usecond 60.06
  1667. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1668. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1669. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1670. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1671. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1672. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1673. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1674. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1675. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1676. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1677. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1678. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1679. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1680. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1681. smsp__inst_executed.avg inst 12,288.06
  1682. smsp__inst_executed.max inst 12,524
  1683. smsp__inst_executed.min inst 11,900
  1684. smsp__inst_executed.sum inst 786,436
  1685. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1686. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1687. smsp__cycles_active.avg cycle 73,132.61
  1688. smsp__cycles_active.sum cycle 4,680,487
  1689. ---------------------------------------------------------------------- --------------- ------------------------------
  1690. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1691. Section: Command line profiler metrics
  1692. ---------------------------------------------------------------------- --------------- ------------------------------
  1693. gpu__time_duration.sum usecond 58.08
  1694. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1695. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1696. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1697. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1698. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1699. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1700. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1701. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1702. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1703. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1704. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1705. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1706. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1707. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1708. smsp__inst_executed.avg inst 12,289.61
  1709. smsp__inst_executed.max inst 12,634
  1710. smsp__inst_executed.min inst 11,884
  1711. smsp__inst_executed.sum inst 786,535
  1712. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1713. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1714. smsp__cycles_active.avg cycle 70,620.73
  1715. smsp__cycles_active.sum cycle 4,519,727
  1716. ---------------------------------------------------------------------- --------------- ------------------------------
  1717. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1718. Section: Command line profiler metrics
  1719. ---------------------------------------------------------------------- --------------- ------------------------------
  1720. gpu__time_duration.sum usecond 58.24
  1721. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1722. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1723. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1724. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1725. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1726. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1727. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1728. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1729. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1730. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1731. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1732. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1733. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1734. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1735. smsp__inst_executed.avg inst 12,291.28
  1736. smsp__inst_executed.max inst 12,704
  1737. smsp__inst_executed.min inst 11,892
  1738. smsp__inst_executed.sum inst 786,642
  1739. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1740. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1741. smsp__cycles_active.avg cycle 71,037.52
  1742. smsp__cycles_active.sum cycle 4,546,401
  1743. ---------------------------------------------------------------------- --------------- ------------------------------
  1744. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1745. Section: Command line profiler metrics
  1746. ---------------------------------------------------------------------- --------------- ------------------------------
  1747. gpu__time_duration.sum usecond 58.82
  1748. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1749. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1750. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1751. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1752. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1753. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1754. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1755. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1756. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1757. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1758. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1759. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1760. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1761. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1762. smsp__inst_executed.avg inst 12,293.23
  1763. smsp__inst_executed.max inst 12,931
  1764. smsp__inst_executed.min inst 11,840
  1765. smsp__inst_executed.sum inst 786,767
  1766. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1767. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1768. smsp__cycles_active.avg cycle 70,840.56
  1769. smsp__cycles_active.sum cycle 4,533,796
  1770. ---------------------------------------------------------------------- --------------- ------------------------------
  1771. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1772. Section: Command line profiler metrics
  1773. ---------------------------------------------------------------------- --------------- ------------------------------
  1774. gpu__time_duration.sum usecond 58.24
  1775. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1776. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1777. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1778. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1779. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1780. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1781. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1782. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1783. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1784. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1785. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1786. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1787. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1788. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1789. smsp__inst_executed.avg inst 12,298.42
  1790. smsp__inst_executed.max inst 12,587
  1791. smsp__inst_executed.min inst 11,966
  1792. smsp__inst_executed.sum inst 787,099
  1793. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1794. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1795. smsp__cycles_active.avg cycle 70,543.30
  1796. smsp__cycles_active.sum cycle 4,514,771
  1797. ---------------------------------------------------------------------- --------------- ------------------------------
  1798. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1799. Section: Command line profiler metrics
  1800. ---------------------------------------------------------------------- --------------- ------------------------------
  1801. gpu__time_duration.sum usecond 59.39
  1802. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1803. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1804. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1805. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1806. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1807. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1808. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1809. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1810. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1811. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1812. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1813. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1814. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1815. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1816. smsp__inst_executed.avg inst 12,309.44
  1817. smsp__inst_executed.max inst 12,751
  1818. smsp__inst_executed.min inst 11,714
  1819. smsp__inst_executed.sum inst 787,804
  1820. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1821. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1822. smsp__cycles_active.avg cycle 72,313.14
  1823. smsp__cycles_active.sum cycle 4,628,041
  1824. ---------------------------------------------------------------------- --------------- ------------------------------
  1825. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1826. Section: Command line profiler metrics
  1827. ---------------------------------------------------------------------- --------------- ------------------------------
  1828. gpu__time_duration.sum usecond 228.54
  1829. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1830. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1831. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1832. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1833. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  1834. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  1835. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  1836. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  1837. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,691.25
  1838. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,988
  1839. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,367
  1840. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 315,060
  1841. smsp__average_warp_latency_issue_stalled_barrier.pct % 123,962.42
  1842. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,239.62
  1843. smsp__inst_executed.avg inst 189,051.73
  1844. smsp__inst_executed.max inst 192,054
  1845. smsp__inst_executed.min inst 186,060
  1846. smsp__inst_executed.sum inst 12,099,311
  1847. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.83
  1848. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  1849. smsp__cycles_active.avg cycle 317,268.88
  1850. smsp__cycles_active.sum cycle 20,305,208
  1851. ---------------------------------------------------------------------- --------------- ------------------------------