AUTH's THMMY "Parallel and distributed systems" course assignments.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 
 
 
 

1918 lignes
212 KiB

  1. ==PROF== Connected to process 20279 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v2/bitonicCUDA)
  2. ==PROF== Profiling "prephase" - 1: 0%....50%....100% - 5 passes
  3. ==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 5 passes
  4. ==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 5 passes
  5. ==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 5 passes
  6. ==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 5 passes
  7. ==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 5 passes
  8. ==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 5 passes
  9. ==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 5 passes
  10. ==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 5 passes
  11. ==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 5 passes
  12. ==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 5 passes
  13. ==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 5 passes
  14. ==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 5 passes
  15. ==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 5 passes
  16. ==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 5 passes
  17. ==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 5 passes
  18. ==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 5 passes
  19. ==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 5 passes
  20. ==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 5 passes
  21. ==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 5 passes
  22. ==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 5 passes
  23. ==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 5 passes
  24. ==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 5 passes
  25. ==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 5 passes
  26. ==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 5 passes
  27. ==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 5 passes
  28. ==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 5 passes
  29. ==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 5 passes
  30. ==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 5 passes
  31. ==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 5 passes
  32. ==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 5 passes
  33. ==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 5 passes
  34. ==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 5 passes
  35. ==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 5 passes
  36. ==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 5 passes
  37. ==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 5 passes
  38. ==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 5 passes
  39. ==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 5 passes
  40. ==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 5 passes
  41. ==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 5 passes
  42. ==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 5 passes
  43. ==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 5 passes
  44. ==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 5 passes
  45. ==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 5 passes
  46. ==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 5 passes
  47. ==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 5 passes
  48. ==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 5 passes
  49. ==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 5 passes
  50. ==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 5 passes
  51. ==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 5 passes
  52. ==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 5 passes
  53. ==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 5 passes
  54. ==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 5 passes
  55. ==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 5 passes
  56. ==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 5 passes
  57. ==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 5 passes
  58. ==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 5 passes
  59. ==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 5 passes
  60. ==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 5 passes
  61. ==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 5 passes
  62. ==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 5 passes
  63. ==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 5 passes
  64. ==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 5 passes
  65. ==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 5 passes
  66. ==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 5 passes
  67. ==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 5 passes
  68. ==PROF== Disconnected from process 20279
  69. [20279] bitonicCUDA@127.0.0.1
  70. void prephase<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  71. Section: Command line profiler metrics
  72. ---------------------------------------------------------------------- --------------- ------------------------------
  73. gpu__time_duration.sum msecond 1.20
  74. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  75. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  76. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  77. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  78. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 186,368
  79. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 186,368
  80. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 186,368
  81. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 2,981,888
  82. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 111,946.88
  83. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 112,116
  84. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 111,795
  85. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 1,791,150
  86. smsp__average_warp_latency_issue_stalled_barrier.pct % 644,345.26
  87. smsp__average_warp_latency_issue_stalled_barrier.ratio 6,443.45
  88. smsp__inst_executed.avg inst 1,030,868.94
  89. smsp__inst_executed.max inst 1,031,062
  90. smsp__inst_executed.min inst 1,030,675
  91. smsp__inst_executed.sum inst 65,975,612
  92. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.50
  93. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
  94. smsp__cycles_active.avg cycle 1,666,829.12
  95. smsp__cycles_active.sum cycle 106,677,064
  96. ---------------------------------------------------------------------- --------------- ------------------------------
  97. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  98. Section: Command line profiler metrics
  99. ---------------------------------------------------------------------- --------------- ------------------------------
  100. gpu__time_duration.sum usecond 59.84
  101. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  102. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  103. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  104. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  105. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  106. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  107. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  108. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  109. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  110. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  111. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  112. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  113. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  114. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  115. smsp__inst_executed.avg inst 12,308.59
  116. smsp__inst_executed.max inst 12,538
  117. smsp__inst_executed.min inst 11,945
  118. smsp__inst_executed.sum inst 787,750
  119. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  120. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  121. smsp__cycles_active.avg cycle 73,268.67
  122. smsp__cycles_active.sum cycle 4,689,195
  123. ---------------------------------------------------------------------- --------------- ------------------------------
  124. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  125. Section: Command line profiler metrics
  126. ---------------------------------------------------------------------- --------------- ------------------------------
  127. gpu__time_duration.sum usecond 231.30
  128. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  129. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  130. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  131. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  132. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  133. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  134. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  135. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  136. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,642.38
  137. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,963
  138. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,322
  139. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,278
  140. smsp__average_warp_latency_issue_stalled_barrier.pct % 123,392.55
  141. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,233.93
  142. smsp__inst_executed.avg inst 189,292.45
  143. smsp__inst_executed.max inst 192,372
  144. smsp__inst_executed.min inst 186,246
  145. smsp__inst_executed.sum inst 12,114,717
  146. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.81
  147. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  148. smsp__cycles_active.avg cycle 316,267.31
  149. smsp__cycles_active.sum cycle 20,241,108
  150. ---------------------------------------------------------------------- --------------- ------------------------------
  151. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  152. Section: Command line profiler metrics
  153. ---------------------------------------------------------------------- --------------- ------------------------------
  154. gpu__time_duration.sum usecond 58.34
  155. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  156. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  157. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  158. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  159. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  160. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  161. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  162. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  163. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  164. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  165. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  166. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  167. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  168. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  169. smsp__inst_executed.avg inst 12,298.58
  170. smsp__inst_executed.max inst 12,667
  171. smsp__inst_executed.min inst 11,936
  172. smsp__inst_executed.sum inst 787,109
  173. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  174. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  175. smsp__cycles_active.avg cycle 70,505.30
  176. smsp__cycles_active.sum cycle 4,512,339
  177. ---------------------------------------------------------------------- --------------- ------------------------------
  178. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  179. Section: Command line profiler metrics
  180. ---------------------------------------------------------------------- --------------- ------------------------------
  181. gpu__time_duration.sum usecond 59.55
  182. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  183. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  184. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  185. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  186. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  187. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  188. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  189. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  190. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  191. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  192. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  193. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  194. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  195. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  196. smsp__inst_executed.avg inst 12,309.17
  197. smsp__inst_executed.max inst 12,702
  198. smsp__inst_executed.min inst 11,606
  199. smsp__inst_executed.sum inst 787,787
  200. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  201. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  202. smsp__cycles_active.avg cycle 72,897.17
  203. smsp__cycles_active.sum cycle 4,665,419
  204. ---------------------------------------------------------------------- --------------- ------------------------------
  205. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  206. Section: Command line profiler metrics
  207. ---------------------------------------------------------------------- --------------- ------------------------------
  208. gpu__time_duration.sum usecond 230.91
  209. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  210. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  211. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  212. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  213. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  214. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  215. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  216. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  217. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,680
  218. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,009
  219. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,334
  220. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,880
  221. smsp__average_warp_latency_issue_stalled_barrier.pct % 123,674.16
  222. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,236.74
  223. smsp__inst_executed.avg inst 189,294.36
  224. smsp__inst_executed.max inst 192,238
  225. smsp__inst_executed.min inst 186,252
  226. smsp__inst_executed.sum inst 12,114,839
  227. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.85
  228. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  229. smsp__cycles_active.avg cycle 316,040.81
  230. smsp__cycles_active.sum cycle 20,226,612
  231. ---------------------------------------------------------------------- --------------- ------------------------------
  232. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  233. Section: Command line profiler metrics
  234. ---------------------------------------------------------------------- --------------- ------------------------------
  235. gpu__time_duration.sum usecond 58.72
  236. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  237. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  238. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  239. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  240. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  241. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  242. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  243. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  244. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  245. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  246. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  247. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  248. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  249. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  250. smsp__inst_executed.avg inst 12,293.78
  251. smsp__inst_executed.max inst 12,542
  252. smsp__inst_executed.min inst 11,960
  253. smsp__inst_executed.sum inst 786,802
  254. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  255. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  256. smsp__cycles_active.avg cycle 71,235.28
  257. smsp__cycles_active.sum cycle 4,559,058
  258. ---------------------------------------------------------------------- --------------- ------------------------------
  259. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  260. Section: Command line profiler metrics
  261. ---------------------------------------------------------------------- --------------- ------------------------------
  262. gpu__time_duration.sum usecond 58.56
  263. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  264. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  265. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  266. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  267. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  268. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  269. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  270. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  271. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  272. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  273. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  274. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  275. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  276. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  277. smsp__inst_executed.avg inst 12,298.95
  278. smsp__inst_executed.max inst 12,560
  279. smsp__inst_executed.min inst 12,096
  280. smsp__inst_executed.sum inst 787,133
  281. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  282. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  283. smsp__cycles_active.avg cycle 70,575.53
  284. smsp__cycles_active.sum cycle 4,516,834
  285. ---------------------------------------------------------------------- --------------- ------------------------------
  286. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  287. Section: Command line profiler metrics
  288. ---------------------------------------------------------------------- --------------- ------------------------------
  289. gpu__time_duration.sum usecond 59.42
  290. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  291. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  292. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  293. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  294. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  295. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  296. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  297. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  298. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  299. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  300. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  301. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  302. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  303. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  304. smsp__inst_executed.avg inst 12,308.61
  305. smsp__inst_executed.max inst 12,640
  306. smsp__inst_executed.min inst 12,096
  307. smsp__inst_executed.sum inst 787,751
  308. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  309. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  310. smsp__cycles_active.avg cycle 72,641.39
  311. smsp__cycles_active.sum cycle 4,649,049
  312. ---------------------------------------------------------------------- --------------- ------------------------------
  313. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
  314. Section: Command line profiler metrics
  315. ---------------------------------------------------------------------- --------------- ------------------------------
  316. gpu__time_duration.sum usecond 231.87
  317. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  318. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  319. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  320. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  321. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  322. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  323. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  324. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  325. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,674.75
  326. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,017
  327. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,354
  328. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,796
  329. smsp__average_warp_latency_issue_stalled_barrier.pct % 123,483.94
  330. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,234.84
  331. smsp__inst_executed.avg inst 189,288.14
  332. smsp__inst_executed.max inst 192,081
  333. smsp__inst_executed.min inst 186,477
  334. smsp__inst_executed.sum inst 12,114,441
  335. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.86
  336. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  337. smsp__cycles_active.avg cycle 315,433.75
  338. smsp__cycles_active.sum cycle 20,187,760
  339. ---------------------------------------------------------------------- --------------- ------------------------------
  340. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  341. Section: Command line profiler metrics
  342. ---------------------------------------------------------------------- --------------- ------------------------------
  343. gpu__time_duration.sum usecond 58.14
  344. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  345. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  346. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  347. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  348. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  349. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  350. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  351. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  352. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  353. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  354. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  355. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  356. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  357. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  358. smsp__inst_executed.avg inst 12,290.34
  359. smsp__inst_executed.max inst 12,724
  360. smsp__inst_executed.min inst 12,076
  361. smsp__inst_executed.sum inst 786,582
  362. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  363. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  364. smsp__cycles_active.avg cycle 70,402.61
  365. smsp__cycles_active.sum cycle 4,505,767
  366. ---------------------------------------------------------------------- --------------- ------------------------------
  367. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  368. Section: Command line profiler metrics
  369. ---------------------------------------------------------------------- --------------- ------------------------------
  370. gpu__time_duration.sum usecond 58.56
  371. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  372. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  373. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  374. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  375. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  376. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  377. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  378. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  379. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  380. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  381. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  382. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  383. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  384. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  385. smsp__inst_executed.avg inst 12,294.27
  386. smsp__inst_executed.max inst 12,717
  387. smsp__inst_executed.min inst 11,988
  388. smsp__inst_executed.sum inst 786,833
  389. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  390. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  391. smsp__cycles_active.avg cycle 70,681.59
  392. smsp__cycles_active.sum cycle 4,523,622
  393. ---------------------------------------------------------------------- --------------- ------------------------------
  394. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  395. Section: Command line profiler metrics
  396. ---------------------------------------------------------------------- --------------- ------------------------------
  397. gpu__time_duration.sum usecond 58.05
  398. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  399. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  400. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  401. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  402. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  403. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  404. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  405. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  406. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  407. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  408. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  409. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  410. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  411. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  412. smsp__inst_executed.avg inst 12,298.42
  413. smsp__inst_executed.max inst 12,663
  414. smsp__inst_executed.min inst 11,882
  415. smsp__inst_executed.sum inst 787,099
  416. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  417. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  418. smsp__cycles_active.avg cycle 70,688.28
  419. smsp__cycles_active.sum cycle 4,524,050
  420. ---------------------------------------------------------------------- --------------- ------------------------------
  421. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  422. Section: Command line profiler metrics
  423. ---------------------------------------------------------------------- --------------- ------------------------------
  424. gpu__time_duration.sum usecond 59.49
  425. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  426. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  427. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  428. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  429. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  430. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  431. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  432. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  433. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  434. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  435. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  436. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  437. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  438. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  439. smsp__inst_executed.avg inst 12,309.03
  440. smsp__inst_executed.max inst 12,686
  441. smsp__inst_executed.min inst 11,852
  442. smsp__inst_executed.sum inst 787,778
  443. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  444. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  445. smsp__cycles_active.avg cycle 72,892.83
  446. smsp__cycles_active.sum cycle 4,665,141
  447. ---------------------------------------------------------------------- --------------- ------------------------------
  448. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  449. Section: Command line profiler metrics
  450. ---------------------------------------------------------------------- --------------- ------------------------------
  451. gpu__time_duration.sum usecond 231.33
  452. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  453. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  454. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  455. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  456. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  457. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  458. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  459. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  460. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,677
  461. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,976
  462. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,331
  463. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,832
  464. smsp__average_warp_latency_issue_stalled_barrier.pct % 123,882.24
  465. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,238.82
  466. smsp__inst_executed.avg inst 189,292.19
  467. smsp__inst_executed.max inst 192,340
  468. smsp__inst_executed.min inst 186,215
  469. smsp__inst_executed.sum inst 12,114,700
  470. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.86
  471. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  472. smsp__cycles_active.avg cycle 316,203.25
  473. smsp__cycles_active.sum cycle 20,237,008
  474. ---------------------------------------------------------------------- --------------- ------------------------------
  475. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  476. Section: Command line profiler metrics
  477. ---------------------------------------------------------------------- --------------- ------------------------------
  478. gpu__time_duration.sum usecond 58.08
  479. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  480. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  481. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  482. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  483. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  484. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  485. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  486. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  487. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  488. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  489. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  490. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  491. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  492. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  493. smsp__inst_executed.avg inst 12,289.06
  494. smsp__inst_executed.max inst 12,694
  495. smsp__inst_executed.min inst 11,900
  496. smsp__inst_executed.sum inst 786,500
  497. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  498. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  499. smsp__cycles_active.avg cycle 70,488.72
  500. smsp__cycles_active.sum cycle 4,511,278
  501. ---------------------------------------------------------------------- --------------- ------------------------------
  502. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  503. Section: Command line profiler metrics
  504. ---------------------------------------------------------------------- --------------- ------------------------------
  505. gpu__time_duration.sum usecond 58.27
  506. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  507. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  508. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  509. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  510. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  511. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  512. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  513. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  514. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  515. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  516. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  517. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  518. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  519. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  520. smsp__inst_executed.avg inst 12,291.25
  521. smsp__inst_executed.max inst 12,681
  522. smsp__inst_executed.min inst 12,008
  523. smsp__inst_executed.sum inst 786,640
  524. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  525. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  526. smsp__cycles_active.avg cycle 70,605.89
  527. smsp__cycles_active.sum cycle 4,518,777
  528. ---------------------------------------------------------------------- --------------- ------------------------------
  529. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  530. Section: Command line profiler metrics
  531. ---------------------------------------------------------------------- --------------- ------------------------------
  532. gpu__time_duration.sum usecond 58.34
  533. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  534. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  535. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  536. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  537. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  538. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  539. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  540. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  541. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  542. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  543. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  544. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  545. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  546. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  547. smsp__inst_executed.avg inst 12,292.84
  548. smsp__inst_executed.max inst 12,543
  549. smsp__inst_executed.min inst 11,998
  550. smsp__inst_executed.sum inst 786,742
  551. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  552. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  553. smsp__cycles_active.avg cycle 70,795.58
  554. smsp__cycles_active.sum cycle 4,530,917
  555. ---------------------------------------------------------------------- --------------- ------------------------------
  556. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  557. Section: Command line profiler metrics
  558. ---------------------------------------------------------------------- --------------- ------------------------------
  559. gpu__time_duration.sum usecond 58.02
  560. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  561. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  562. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  563. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  564. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  565. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  566. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  567. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  568. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  569. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  570. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  571. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  572. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  573. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  574. smsp__inst_executed.avg inst 12,299.95
  575. smsp__inst_executed.max inst 12,683
  576. smsp__inst_executed.min inst 11,720
  577. smsp__inst_executed.sum inst 787,197
  578. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  579. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  580. smsp__cycles_active.avg cycle 70,136.48
  581. smsp__cycles_active.sum cycle 4,488,735
  582. ---------------------------------------------------------------------- --------------- ------------------------------
  583. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  584. Section: Command line profiler metrics
  585. ---------------------------------------------------------------------- --------------- ------------------------------
  586. gpu__time_duration.sum usecond 59.52
  587. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  588. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  589. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  590. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  591. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  592. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  593. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  594. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  595. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  596. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  597. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  598. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  599. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  600. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  601. smsp__inst_executed.avg inst 12,309.09
  602. smsp__inst_executed.max inst 12,613
  603. smsp__inst_executed.min inst 11,865
  604. smsp__inst_executed.sum inst 787,782
  605. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  606. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  607. smsp__cycles_active.avg cycle 72,887.53
  608. smsp__cycles_active.sum cycle 4,664,802
  609. ---------------------------------------------------------------------- --------------- ------------------------------
  610. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  611. Section: Command line profiler metrics
  612. ---------------------------------------------------------------------- --------------- ------------------------------
  613. gpu__time_duration.sum usecond 231.30
  614. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  615. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  616. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  617. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  618. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  619. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  620. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  621. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  622. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,682.56
  623. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,017
  624. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,315
  625. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,921
  626. smsp__average_warp_latency_issue_stalled_barrier.pct % 124,910.64
  627. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,249.11
  628. smsp__inst_executed.avg inst 189,291.42
  629. smsp__inst_executed.max inst 192,361
  630. smsp__inst_executed.min inst 186,192
  631. smsp__inst_executed.sum inst 12,114,651
  632. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.97
  633. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  634. smsp__cycles_active.avg cycle 316,146.12
  635. smsp__cycles_active.sum cycle 20,233,352
  636. ---------------------------------------------------------------------- --------------- ------------------------------
  637. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
  638. Section: Command line profiler metrics
  639. ---------------------------------------------------------------------- --------------- ------------------------------
  640. gpu__time_duration.sum usecond 60.03
  641. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  642. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  643. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  644. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  645. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  646. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  647. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  648. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  649. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  650. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  651. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  652. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  653. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  654. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  655. smsp__inst_executed.avg inst 12,288.48
  656. smsp__inst_executed.max inst 12,672
  657. smsp__inst_executed.min inst 11,868
  658. smsp__inst_executed.sum inst 786,463
  659. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  660. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  661. smsp__cycles_active.avg cycle 73,004.22
  662. smsp__cycles_active.sum cycle 4,672,270
  663. ---------------------------------------------------------------------- --------------- ------------------------------
  664. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  665. Section: Command line profiler metrics
  666. ---------------------------------------------------------------------- --------------- ------------------------------
  667. gpu__time_duration.sum usecond 58.08
  668. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  669. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  670. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  671. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  672. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  673. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  674. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  675. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  676. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  677. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  678. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  679. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  680. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  681. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  682. smsp__inst_executed.avg inst 12,289.81
  683. smsp__inst_executed.max inst 12,480
  684. smsp__inst_executed.min inst 12,068
  685. smsp__inst_executed.sum inst 786,548
  686. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  687. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  688. smsp__cycles_active.avg cycle 70,790.83
  689. smsp__cycles_active.sum cycle 4,530,613
  690. ---------------------------------------------------------------------- --------------- ------------------------------
  691. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  692. Section: Command line profiler metrics
  693. ---------------------------------------------------------------------- --------------- ------------------------------
  694. gpu__time_duration.sum usecond 58.46
  695. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  696. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  697. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  698. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  699. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  700. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  701. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  702. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  703. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  704. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  705. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  706. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  707. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  708. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  709. smsp__inst_executed.avg inst 12,290.59
  710. smsp__inst_executed.max inst 12,701
  711. smsp__inst_executed.min inst 12,068
  712. smsp__inst_executed.sum inst 786,598
  713. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  714. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  715. smsp__cycles_active.avg cycle 70,847.19
  716. smsp__cycles_active.sum cycle 4,534,220
  717. ---------------------------------------------------------------------- --------------- ------------------------------
  718. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  719. Section: Command line profiler metrics
  720. ---------------------------------------------------------------------- --------------- ------------------------------
  721. gpu__time_duration.sum usecond 58.27
  722. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  723. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  724. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  725. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  726. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  727. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  728. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  729. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  730. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  731. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  732. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  733. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  734. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  735. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  736. smsp__inst_executed.avg inst 12,293.72
  737. smsp__inst_executed.max inst 12,656
  738. smsp__inst_executed.min inst 12,038
  739. smsp__inst_executed.sum inst 786,798
  740. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  741. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  742. smsp__cycles_active.avg cycle 70,747
  743. smsp__cycles_active.sum cycle 4,527,808
  744. ---------------------------------------------------------------------- --------------- ------------------------------
  745. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  746. Section: Command line profiler metrics
  747. ---------------------------------------------------------------------- --------------- ------------------------------
  748. gpu__time_duration.sum usecond 57.95
  749. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  750. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  751. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  752. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  753. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  754. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  755. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  756. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  757. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  758. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  759. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  760. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  761. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  762. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  763. smsp__inst_executed.avg inst 12,298.14
  764. smsp__inst_executed.max inst 12,645
  765. smsp__inst_executed.min inst 12,029
  766. smsp__inst_executed.sum inst 787,081
  767. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  768. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  769. smsp__cycles_active.avg cycle 70,059.03
  770. smsp__cycles_active.sum cycle 4,483,778
  771. ---------------------------------------------------------------------- --------------- ------------------------------
  772. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  773. Section: Command line profiler metrics
  774. ---------------------------------------------------------------------- --------------- ------------------------------
  775. gpu__time_duration.sum usecond 59.58
  776. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  777. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  778. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  779. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  780. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  781. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  782. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  783. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  784. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  785. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  786. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  787. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  788. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  789. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  790. smsp__inst_executed.avg inst 12,308.86
  791. smsp__inst_executed.max inst 12,724
  792. smsp__inst_executed.min inst 11,654
  793. smsp__inst_executed.sum inst 787,767
  794. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  795. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  796. smsp__cycles_active.avg cycle 72,813.80
  797. smsp__cycles_active.sum cycle 4,660,083
  798. ---------------------------------------------------------------------- --------------- ------------------------------
  799. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  800. Section: Command line profiler metrics
  801. ---------------------------------------------------------------------- --------------- ------------------------------
  802. gpu__time_duration.sum usecond 231.90
  803. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  804. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  805. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  806. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  807. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  808. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  809. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  810. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  811. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,669.44
  812. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,942
  813. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,386
  814. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,711
  815. smsp__average_warp_latency_issue_stalled_barrier.pct % 125,049.38
  816. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,250.49
  817. smsp__inst_executed.avg inst 189,291.03
  818. smsp__inst_executed.max inst 192,313
  819. smsp__inst_executed.min inst 186,310
  820. smsp__inst_executed.sum inst 12,114,626
  821. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.97
  822. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  823. smsp__cycles_active.avg cycle 316,608.81
  824. smsp__cycles_active.sum cycle 20,262,964
  825. ---------------------------------------------------------------------- --------------- ------------------------------
  826. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  827. Section: Command line profiler metrics
  828. ---------------------------------------------------------------------- --------------- ------------------------------
  829. gpu__time_duration.sum usecond 58.78
  830. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  831. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  832. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  833. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  834. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  835. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  836. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  837. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  838. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  839. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  840. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  841. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  842. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  843. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  844. smsp__inst_executed.avg inst 12,287.95
  845. smsp__inst_executed.max inst 12,856
  846. smsp__inst_executed.min inst 11,904
  847. smsp__inst_executed.sum inst 786,429
  848. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  849. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  850. smsp__cycles_active.avg cycle 71,331.70
  851. smsp__cycles_active.sum cycle 4,565,229
  852. ---------------------------------------------------------------------- --------------- ------------------------------
  853. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  854. Section: Command line profiler metrics
  855. ---------------------------------------------------------------------- --------------- ------------------------------
  856. gpu__time_duration.sum usecond 59.94
  857. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  858. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  859. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  860. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  861. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  862. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  863. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  864. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  865. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  866. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  867. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  868. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  869. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  870. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  871. smsp__inst_executed.avg inst 12,288.03
  872. smsp__inst_executed.max inst 12,488
  873. smsp__inst_executed.min inst 11,888
  874. smsp__inst_executed.sum inst 786,434
  875. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  876. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  877. smsp__cycles_active.avg cycle 73,232.05
  878. smsp__cycles_active.sum cycle 4,686,851
  879. ---------------------------------------------------------------------- --------------- ------------------------------
  880. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  881. Section: Command line profiler metrics
  882. ---------------------------------------------------------------------- --------------- ------------------------------
  883. gpu__time_duration.sum usecond 58.27
  884. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  885. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  886. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  887. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  888. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  889. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  890. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  891. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  892. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  893. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  894. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  895. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  896. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  897. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  898. smsp__inst_executed.avg inst 12,289.50
  899. smsp__inst_executed.max inst 12,488
  900. smsp__inst_executed.min inst 12,072
  901. smsp__inst_executed.sum inst 786,528
  902. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  903. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  904. smsp__cycles_active.avg cycle 70,846.25
  905. smsp__cycles_active.sum cycle 4,534,160
  906. ---------------------------------------------------------------------- --------------- ------------------------------
  907. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  908. Section: Command line profiler metrics
  909. ---------------------------------------------------------------------- --------------- ------------------------------
  910. gpu__time_duration.sum usecond 58.11
  911. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  912. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  913. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  914. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  915. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  916. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  917. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  918. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  919. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  920. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  921. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  922. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  923. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  924. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  925. smsp__inst_executed.avg inst 12,290.84
  926. smsp__inst_executed.max inst 12,564
  927. smsp__inst_executed.min inst 12,104
  928. smsp__inst_executed.sum inst 786,614
  929. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  930. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  931. smsp__cycles_active.avg cycle 70,881.05
  932. smsp__cycles_active.sum cycle 4,536,387
  933. ---------------------------------------------------------------------- --------------- ------------------------------
  934. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
  935. Section: Command line profiler metrics
  936. ---------------------------------------------------------------------- --------------- ------------------------------
  937. gpu__time_duration.sum usecond 58.40
  938. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  939. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  940. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  941. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  942. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  943. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  944. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  945. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  946. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  947. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  948. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  949. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  950. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  951. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  952. smsp__inst_executed.avg inst 12,293.73
  953. smsp__inst_executed.max inst 12,757
  954. smsp__inst_executed.min inst 11,970
  955. smsp__inst_executed.sum inst 786,799
  956. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  957. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  958. smsp__cycles_active.avg cycle 71,142.94
  959. smsp__cycles_active.sum cycle 4,553,148
  960. ---------------------------------------------------------------------- --------------- ------------------------------
  961. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  962. Section: Command line profiler metrics
  963. ---------------------------------------------------------------------- --------------- ------------------------------
  964. gpu__time_duration.sum usecond 57.95
  965. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  966. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  967. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  968. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  969. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  970. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  971. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  972. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  973. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  974. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  975. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  976. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  977. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  978. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  979. smsp__inst_executed.avg inst 12,298.62
  980. smsp__inst_executed.max inst 12,553
  981. smsp__inst_executed.min inst 12,119
  982. smsp__inst_executed.sum inst 787,112
  983. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  984. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  985. smsp__cycles_active.avg cycle 70,189.52
  986. smsp__cycles_active.sum cycle 4,492,129
  987. ---------------------------------------------------------------------- --------------- ------------------------------
  988. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  989. Section: Command line profiler metrics
  990. ---------------------------------------------------------------------- --------------- ------------------------------
  991. gpu__time_duration.sum usecond 59.71
  992. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  993. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  994. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  995. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  996. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  997. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  998. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  999. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1000. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1001. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1002. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1003. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1004. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1005. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1006. smsp__inst_executed.avg inst 12,309.52
  1007. smsp__inst_executed.max inst 12,538
  1008. smsp__inst_executed.min inst 12,074
  1009. smsp__inst_executed.sum inst 787,809
  1010. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1011. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1012. smsp__cycles_active.avg cycle 72,879.23
  1013. smsp__cycles_active.sum cycle 4,664,271
  1014. ---------------------------------------------------------------------- --------------- ------------------------------
  1015. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1016. Section: Command line profiler metrics
  1017. ---------------------------------------------------------------------- --------------- ------------------------------
  1018. gpu__time_duration.sum usecond 231.42
  1019. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1020. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1021. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1022. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1023. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  1024. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  1025. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  1026. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  1027. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,673
  1028. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,007
  1029. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,299
  1030. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,768
  1031. smsp__average_warp_latency_issue_stalled_barrier.pct % 124,557.10
  1032. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,245.57
  1033. smsp__inst_executed.avg inst 189,303.22
  1034. smsp__inst_executed.max inst 192,317
  1035. smsp__inst_executed.min inst 186,277
  1036. smsp__inst_executed.sum inst 12,115,406
  1037. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.96
  1038. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  1039. smsp__cycles_active.avg cycle 315,741.19
  1040. smsp__cycles_active.sum cycle 20,207,436
  1041. ---------------------------------------------------------------------- --------------- ------------------------------
  1042. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1043. Section: Command line profiler metrics
  1044. ---------------------------------------------------------------------- --------------- ------------------------------
  1045. gpu__time_duration.sum usecond 58.40
  1046. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1047. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1048. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1049. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1050. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1051. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1052. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1053. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1054. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1055. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1056. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1057. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1058. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1059. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1060. smsp__inst_executed.avg inst 12,287.92
  1061. smsp__inst_executed.max inst 12,648
  1062. smsp__inst_executed.min inst 11,912
  1063. smsp__inst_executed.sum inst 786,427
  1064. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1065. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1066. smsp__cycles_active.avg cycle 71,978.88
  1067. smsp__cycles_active.sum cycle 4,606,648
  1068. ---------------------------------------------------------------------- --------------- ------------------------------
  1069. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1070. Section: Command line profiler metrics
  1071. ---------------------------------------------------------------------- --------------- ------------------------------
  1072. gpu__time_duration.sum usecond 58.62
  1073. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1074. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1075. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1076. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1077. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1078. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1079. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1080. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1081. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1082. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1083. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1084. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1085. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1086. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1087. smsp__inst_executed.avg inst 12,288.30
  1088. smsp__inst_executed.max inst 12,848
  1089. smsp__inst_executed.min inst 11,904
  1090. smsp__inst_executed.sum inst 786,451
  1091. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1092. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1093. smsp__cycles_active.avg cycle 71,708.22
  1094. smsp__cycles_active.sum cycle 4,589,326
  1095. ---------------------------------------------------------------------- --------------- ------------------------------
  1096. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1097. Section: Command line profiler metrics
  1098. ---------------------------------------------------------------------- --------------- ------------------------------
  1099. gpu__time_duration.sum usecond 60.19
  1100. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1101. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1102. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1103. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1104. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1105. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1106. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1107. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1108. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1109. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1110. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1111. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1112. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1113. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1114. smsp__inst_executed.avg inst 12,289.11
  1115. smsp__inst_executed.max inst 12,876
  1116. smsp__inst_executed.min inst 11,688
  1117. smsp__inst_executed.sum inst 786,503
  1118. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1119. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1120. smsp__cycles_active.avg cycle 73,332.14
  1121. smsp__cycles_active.sum cycle 4,693,257
  1122. ---------------------------------------------------------------------- --------------- ------------------------------
  1123. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1124. Section: Command line profiler metrics
  1125. ---------------------------------------------------------------------- --------------- ------------------------------
  1126. gpu__time_duration.sum usecond 58.50
  1127. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1128. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1129. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1130. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1131. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1132. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1133. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1134. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1135. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1136. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1137. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1138. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1139. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1140. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1141. smsp__inst_executed.avg inst 12,288.89
  1142. smsp__inst_executed.max inst 12,507
  1143. smsp__inst_executed.min inst 12,092
  1144. smsp__inst_executed.sum inst 786,489
  1145. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1146. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1147. smsp__cycles_active.avg cycle 70,441.14
  1148. smsp__cycles_active.sum cycle 4,508,233
  1149. ---------------------------------------------------------------------- --------------- ------------------------------
  1150. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1151. Section: Command line profiler metrics
  1152. ---------------------------------------------------------------------- --------------- ------------------------------
  1153. gpu__time_duration.sum usecond 58.30
  1154. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1155. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1156. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1157. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1158. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1159. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1160. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1161. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1162. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1163. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1164. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1165. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1166. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1167. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1168. smsp__inst_executed.avg inst 12,290.69
  1169. smsp__inst_executed.max inst 12,682
  1170. smsp__inst_executed.min inst 11,866
  1171. smsp__inst_executed.sum inst 786,604
  1172. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1173. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1174. smsp__cycles_active.avg cycle 70,768.55
  1175. smsp__cycles_active.sum cycle 4,529,187
  1176. ---------------------------------------------------------------------- --------------- ------------------------------
  1177. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1178. Section: Command line profiler metrics
  1179. ---------------------------------------------------------------------- --------------- ------------------------------
  1180. gpu__time_duration.sum usecond 58.62
  1181. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1182. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1183. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1184. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1185. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1186. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1187. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1188. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1189. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1190. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1191. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1192. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1193. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1194. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1195. smsp__inst_executed.avg inst 12,293.67
  1196. smsp__inst_executed.max inst 12,534
  1197. smsp__inst_executed.min inst 11,732
  1198. smsp__inst_executed.sum inst 786,795
  1199. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1200. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1201. smsp__cycles_active.avg cycle 71,007.56
  1202. smsp__cycles_active.sum cycle 4,544,484
  1203. ---------------------------------------------------------------------- --------------- ------------------------------
  1204. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1205. Section: Command line profiler metrics
  1206. ---------------------------------------------------------------------- --------------- ------------------------------
  1207. gpu__time_duration.sum usecond 58.05
  1208. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1209. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1210. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1211. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1212. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1213. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1214. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1215. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1216. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1217. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1218. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1219. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1220. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1221. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1222. smsp__inst_executed.avg inst 12,299.09
  1223. smsp__inst_executed.max inst 12,656
  1224. smsp__inst_executed.min inst 11,912
  1225. smsp__inst_executed.sum inst 787,142
  1226. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1227. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1228. smsp__cycles_active.avg cycle 70,781.25
  1229. smsp__cycles_active.sum cycle 4,530,000
  1230. ---------------------------------------------------------------------- --------------- ------------------------------
  1231. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
  1232. Section: Command line profiler metrics
  1233. ---------------------------------------------------------------------- --------------- ------------------------------
  1234. gpu__time_duration.sum usecond 59.14
  1235. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1236. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1237. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1238. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1239. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1240. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1241. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1242. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1243. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1244. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1245. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1246. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1247. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1248. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1249. smsp__inst_executed.avg inst 12,309.02
  1250. smsp__inst_executed.max inst 12,707
  1251. smsp__inst_executed.min inst 11,847
  1252. smsp__inst_executed.sum inst 787,777
  1253. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1254. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1255. smsp__cycles_active.avg cycle 72,505.88
  1256. smsp__cycles_active.sum cycle 4,640,376
  1257. ---------------------------------------------------------------------- --------------- ------------------------------
  1258. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1259. Section: Command line profiler metrics
  1260. ---------------------------------------------------------------------- --------------- ------------------------------
  1261. gpu__time_duration.sum usecond 231.14
  1262. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1263. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1264. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1265. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1266. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  1267. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  1268. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  1269. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  1270. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,666.06
  1271. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,013
  1272. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,348
  1273. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,657
  1274. smsp__average_warp_latency_issue_stalled_barrier.pct % 124,275.15
  1275. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,242.75
  1276. smsp__inst_executed.avg inst 189,315.86
  1277. smsp__inst_executed.max inst 192,371
  1278. smsp__inst_executed.min inst 186,294
  1279. smsp__inst_executed.sum inst 12,116,215
  1280. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.90
  1281. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  1282. smsp__cycles_active.avg cycle 316,297.72
  1283. smsp__cycles_active.sum cycle 20,243,054
  1284. ---------------------------------------------------------------------- --------------- ------------------------------
  1285. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1286. Section: Command line profiler metrics
  1287. ---------------------------------------------------------------------- --------------- ------------------------------
  1288. gpu__time_duration.sum usecond 60.42
  1289. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1290. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1291. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1292. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1293. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1294. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1295. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1296. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1297. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1298. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1299. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1300. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1301. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1302. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1303. smsp__inst_executed.avg inst 12,288.20
  1304. smsp__inst_executed.max inst 12,484
  1305. smsp__inst_executed.min inst 12,092
  1306. smsp__inst_executed.sum inst 786,445
  1307. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1308. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1309. smsp__cycles_active.avg cycle 74,382.31
  1310. smsp__cycles_active.sum cycle 4,760,468
  1311. ---------------------------------------------------------------------- --------------- ------------------------------
  1312. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1313. Section: Command line profiler metrics
  1314. ---------------------------------------------------------------------- --------------- ------------------------------
  1315. gpu__time_duration.sum usecond 58.88
  1316. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1317. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1318. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1319. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1320. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1321. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1322. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1323. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1324. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1325. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1326. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1327. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1328. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1329. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1330. smsp__inst_executed.avg inst 12,288.11
  1331. smsp__inst_executed.max inst 12,484
  1332. smsp__inst_executed.min inst 11,716
  1333. smsp__inst_executed.sum inst 786,439
  1334. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1335. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1336. smsp__cycles_active.avg cycle 71,860.06
  1337. smsp__cycles_active.sum cycle 4,599,044
  1338. ---------------------------------------------------------------------- --------------- ------------------------------
  1339. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1340. Section: Command line profiler metrics
  1341. ---------------------------------------------------------------------- --------------- ------------------------------
  1342. gpu__time_duration.sum usecond 59.04
  1343. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1344. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1345. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1346. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1347. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1348. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1349. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1350. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1351. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1352. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1353. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1354. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1355. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1356. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1357. smsp__inst_executed.avg inst 12,288.05
  1358. smsp__inst_executed.max inst 12,664
  1359. smsp__inst_executed.min inst 11,700
  1360. smsp__inst_executed.sum inst 786,435
  1361. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1362. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1363. smsp__cycles_active.avg cycle 71,882.38
  1364. smsp__cycles_active.sum cycle 4,600,472
  1365. ---------------------------------------------------------------------- --------------- ------------------------------
  1366. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1367. Section: Command line profiler metrics
  1368. ---------------------------------------------------------------------- --------------- ------------------------------
  1369. gpu__time_duration.sum usecond 60.13
  1370. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1371. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1372. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1373. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1374. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1375. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1376. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1377. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1378. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1379. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1380. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1381. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1382. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1383. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1384. smsp__inst_executed.avg inst 12,288.81
  1385. smsp__inst_executed.max inst 12,870
  1386. smsp__inst_executed.min inst 11,908
  1387. smsp__inst_executed.sum inst 786,484
  1388. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1389. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1390. smsp__cycles_active.avg cycle 73,247.75
  1391. smsp__cycles_active.sum cycle 4,687,856
  1392. ---------------------------------------------------------------------- --------------- ------------------------------
  1393. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1394. Section: Command line profiler metrics
  1395. ---------------------------------------------------------------------- --------------- ------------------------------
  1396. gpu__time_duration.sum usecond 57.89
  1397. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1398. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1399. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1400. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1401. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1402. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1403. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1404. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1405. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1406. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1407. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1408. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1409. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1410. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1411. smsp__inst_executed.avg inst 12,289.59
  1412. smsp__inst_executed.max inst 12,494
  1413. smsp__inst_executed.min inst 11,898
  1414. smsp__inst_executed.sum inst 786,534
  1415. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1416. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1417. smsp__cycles_active.avg cycle 70,630.66
  1418. smsp__cycles_active.sum cycle 4,520,362
  1419. ---------------------------------------------------------------------- --------------- ------------------------------
  1420. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1421. Section: Command line profiler metrics
  1422. ---------------------------------------------------------------------- --------------- ------------------------------
  1423. gpu__time_duration.sum usecond 58.14
  1424. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1425. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1426. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1427. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1428. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1429. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1430. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1431. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1432. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1433. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1434. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1435. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1436. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1437. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1438. smsp__inst_executed.avg inst 12,291.27
  1439. smsp__inst_executed.max inst 12,510
  1440. smsp__inst_executed.min inst 12,082
  1441. smsp__inst_executed.sum inst 786,641
  1442. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1443. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1444. smsp__cycles_active.avg cycle 70,548.77
  1445. smsp__cycles_active.sum cycle 4,515,121
  1446. ---------------------------------------------------------------------- --------------- ------------------------------
  1447. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1448. Section: Command line profiler metrics
  1449. ---------------------------------------------------------------------- --------------- ------------------------------
  1450. gpu__time_duration.sum usecond 58.66
  1451. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1452. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1453. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1454. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1455. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1456. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1457. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1458. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1459. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1460. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1461. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1462. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1463. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1464. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1465. smsp__inst_executed.avg inst 12,294.64
  1466. smsp__inst_executed.max inst 12,656
  1467. smsp__inst_executed.min inst 11,924
  1468. smsp__inst_executed.sum inst 786,857
  1469. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1470. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1471. smsp__cycles_active.avg cycle 71,171.45
  1472. smsp__cycles_active.sum cycle 4,554,973
  1473. ---------------------------------------------------------------------- --------------- ------------------------------
  1474. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1475. Section: Command line profiler metrics
  1476. ---------------------------------------------------------------------- --------------- ------------------------------
  1477. gpu__time_duration.sum usecond 57.86
  1478. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1479. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1480. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1481. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1482. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1483. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1484. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1485. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1486. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1487. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1488. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1489. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1490. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1491. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1492. smsp__inst_executed.avg inst 12,301.05
  1493. smsp__inst_executed.max inst 12,725
  1494. smsp__inst_executed.min inst 11,871
  1495. smsp__inst_executed.sum inst 787,267
  1496. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1497. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1498. smsp__cycles_active.avg cycle 70,490.50
  1499. smsp__cycles_active.sum cycle 4,511,392
  1500. ---------------------------------------------------------------------- --------------- ------------------------------
  1501. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1502. Section: Command line profiler metrics
  1503. ---------------------------------------------------------------------- --------------- ------------------------------
  1504. gpu__time_duration.sum usecond 59.17
  1505. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1506. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1507. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1508. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1509. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1510. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1511. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1512. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1513. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1514. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1515. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1516. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1517. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1518. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1519. smsp__inst_executed.avg inst 12,316.05
  1520. smsp__inst_executed.max inst 12,594
  1521. smsp__inst_executed.min inst 11,865
  1522. smsp__inst_executed.sum inst 788,227
  1523. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1524. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1525. smsp__cycles_active.avg cycle 72,533.61
  1526. smsp__cycles_active.sum cycle 4,642,151
  1527. ---------------------------------------------------------------------- --------------- ------------------------------
  1528. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1529. Section: Command line profiler metrics
  1530. ---------------------------------------------------------------------- --------------- ------------------------------
  1531. gpu__time_duration.sum usecond 231.55
  1532. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1533. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1534. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1535. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1536. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  1537. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  1538. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  1539. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  1540. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,681.88
  1541. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,120
  1542. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,332
  1543. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,910
  1544. smsp__average_warp_latency_issue_stalled_barrier.pct % 123,982.60
  1545. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,239.83
  1546. smsp__inst_executed.avg inst 189,283.48
  1547. smsp__inst_executed.max inst 192,309
  1548. smsp__inst_executed.min inst 186,242
  1549. smsp__inst_executed.sum inst 12,114,143
  1550. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.88
  1551. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  1552. smsp__cycles_active.avg cycle 316,209.50
  1553. smsp__cycles_active.sum cycle 20,237,408
  1554. ---------------------------------------------------------------------- --------------- ------------------------------
  1555. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
  1556. Section: Command line profiler metrics
  1557. ---------------------------------------------------------------------- --------------- ------------------------------
  1558. gpu__time_duration.sum usecond 56.70
  1559. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1560. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1561. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1562. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1563. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1564. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1565. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1566. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1567. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1568. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1569. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1570. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1571. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1572. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1573. smsp__inst_executed.avg inst 12,287.97
  1574. smsp__inst_executed.max inst 12,492
  1575. smsp__inst_executed.min inst 11,896
  1576. smsp__inst_executed.sum inst 786,430
  1577. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1578. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1579. smsp__cycles_active.avg cycle 68,714
  1580. smsp__cycles_active.sum cycle 4,397,696
  1581. ---------------------------------------------------------------------- --------------- ------------------------------
  1582. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1583. Section: Command line profiler metrics
  1584. ---------------------------------------------------------------------- --------------- ------------------------------
  1585. gpu__time_duration.sum usecond 60.64
  1586. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1587. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1588. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1589. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1590. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1591. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1592. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1593. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1594. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1595. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1596. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1597. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1598. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1599. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1600. smsp__inst_executed.avg inst 12,288.14
  1601. smsp__inst_executed.max inst 12,844
  1602. smsp__inst_executed.min inst 11,528
  1603. smsp__inst_executed.sum inst 786,441
  1604. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1605. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1606. smsp__cycles_active.avg cycle 74,171.33
  1607. smsp__cycles_active.sum cycle 4,746,965
  1608. ---------------------------------------------------------------------- --------------- ------------------------------
  1609. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1610. Section: Command line profiler metrics
  1611. ---------------------------------------------------------------------- --------------- ------------------------------
  1612. gpu__time_duration.sum usecond 58.72
  1613. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1614. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1615. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1616. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1617. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1618. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1619. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1620. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1621. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1622. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1623. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1624. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1625. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1626. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1627. smsp__inst_executed.avg inst 12,288.55
  1628. smsp__inst_executed.max inst 12,684
  1629. smsp__inst_executed.min inst 11,884
  1630. smsp__inst_executed.sum inst 786,467
  1631. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1632. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1633. smsp__cycles_active.avg cycle 71,696.42
  1634. smsp__cycles_active.sum cycle 4,588,571
  1635. ---------------------------------------------------------------------- --------------- ------------------------------
  1636. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1637. Section: Command line profiler metrics
  1638. ---------------------------------------------------------------------- --------------- ------------------------------
  1639. gpu__time_duration.sum usecond 58.94
  1640. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1641. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1642. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1643. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1644. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1645. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1646. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1647. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1648. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1649. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1650. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1651. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1652. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1653. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1654. smsp__inst_executed.avg inst 12,288.08
  1655. smsp__inst_executed.max inst 12,660
  1656. smsp__inst_executed.min inst 11,724
  1657. smsp__inst_executed.sum inst 786,437
  1658. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1659. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1660. smsp__cycles_active.avg cycle 71,640.89
  1661. smsp__cycles_active.sum cycle 4,585,017
  1662. ---------------------------------------------------------------------- --------------- ------------------------------
  1663. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1664. Section: Command line profiler metrics
  1665. ---------------------------------------------------------------------- --------------- ------------------------------
  1666. gpu__time_duration.sum usecond 60.06
  1667. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1668. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1669. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1670. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1671. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1672. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1673. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1674. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1675. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1676. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1677. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1678. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1679. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1680. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1681. smsp__inst_executed.avg inst 12,288.06
  1682. smsp__inst_executed.max inst 12,524
  1683. smsp__inst_executed.min inst 11,900
  1684. smsp__inst_executed.sum inst 786,436
  1685. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1686. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1687. smsp__cycles_active.avg cycle 73,132.61
  1688. smsp__cycles_active.sum cycle 4,680,487
  1689. ---------------------------------------------------------------------- --------------- ------------------------------
  1690. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1691. Section: Command line profiler metrics
  1692. ---------------------------------------------------------------------- --------------- ------------------------------
  1693. gpu__time_duration.sum usecond 58.08
  1694. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1695. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1696. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1697. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1698. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1699. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1700. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1701. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1702. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1703. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1704. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1705. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1706. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1707. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1708. smsp__inst_executed.avg inst 12,289.61
  1709. smsp__inst_executed.max inst 12,634
  1710. smsp__inst_executed.min inst 11,884
  1711. smsp__inst_executed.sum inst 786,535
  1712. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1713. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1714. smsp__cycles_active.avg cycle 70,620.73
  1715. smsp__cycles_active.sum cycle 4,519,727
  1716. ---------------------------------------------------------------------- --------------- ------------------------------
  1717. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1718. Section: Command line profiler metrics
  1719. ---------------------------------------------------------------------- --------------- ------------------------------
  1720. gpu__time_duration.sum usecond 58.24
  1721. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1722. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1723. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1724. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1725. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1726. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1727. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1728. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1729. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1730. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1731. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1732. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1733. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1734. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1735. smsp__inst_executed.avg inst 12,291.28
  1736. smsp__inst_executed.max inst 12,704
  1737. smsp__inst_executed.min inst 11,892
  1738. smsp__inst_executed.sum inst 786,642
  1739. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1740. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1741. smsp__cycles_active.avg cycle 71,037.52
  1742. smsp__cycles_active.sum cycle 4,546,401
  1743. ---------------------------------------------------------------------- --------------- ------------------------------
  1744. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1745. Section: Command line profiler metrics
  1746. ---------------------------------------------------------------------- --------------- ------------------------------
  1747. gpu__time_duration.sum usecond 58.82
  1748. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1749. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1750. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1751. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1752. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1753. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1754. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1755. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1756. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1757. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1758. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1759. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1760. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1761. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1762. smsp__inst_executed.avg inst 12,293.23
  1763. smsp__inst_executed.max inst 12,931
  1764. smsp__inst_executed.min inst 11,840
  1765. smsp__inst_executed.sum inst 786,767
  1766. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1767. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1768. smsp__cycles_active.avg cycle 70,840.56
  1769. smsp__cycles_active.sum cycle 4,533,796
  1770. ---------------------------------------------------------------------- --------------- ------------------------------
  1771. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1772. Section: Command line profiler metrics
  1773. ---------------------------------------------------------------------- --------------- ------------------------------
  1774. gpu__time_duration.sum usecond 58.24
  1775. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1776. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1777. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1778. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1779. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1780. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1781. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1782. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1783. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1784. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1785. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1786. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1787. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1788. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1789. smsp__inst_executed.avg inst 12,298.42
  1790. smsp__inst_executed.max inst 12,587
  1791. smsp__inst_executed.min inst 11,966
  1792. smsp__inst_executed.sum inst 787,099
  1793. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1794. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1795. smsp__cycles_active.avg cycle 70,543.30
  1796. smsp__cycles_active.sum cycle 4,514,771
  1797. ---------------------------------------------------------------------- --------------- ------------------------------
  1798. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1799. Section: Command line profiler metrics
  1800. ---------------------------------------------------------------------- --------------- ------------------------------
  1801. gpu__time_duration.sum usecond 59.39
  1802. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1803. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1804. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1805. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1806. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1807. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1808. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1809. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1810. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1811. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1812. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1813. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1814. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1815. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1816. smsp__inst_executed.avg inst 12,309.44
  1817. smsp__inst_executed.max inst 12,751
  1818. smsp__inst_executed.min inst 11,714
  1819. smsp__inst_executed.sum inst 787,804
  1820. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1821. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1822. smsp__cycles_active.avg cycle 72,313.14
  1823. smsp__cycles_active.sum cycle 4,628,041
  1824. ---------------------------------------------------------------------- --------------- ------------------------------
  1825. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
  1826. Section: Command line profiler metrics
  1827. ---------------------------------------------------------------------- --------------- ------------------------------
  1828. gpu__time_duration.sum usecond 228.54
  1829. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1830. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1831. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1832. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1833. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
  1834. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
  1835. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
  1836. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
  1837. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,691.25
  1838. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,988
  1839. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,367
  1840. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 315,060
  1841. smsp__average_warp_latency_issue_stalled_barrier.pct % 123,962.42
  1842. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,239.62
  1843. smsp__inst_executed.avg inst 189,051.73
  1844. smsp__inst_executed.max inst 192,054
  1845. smsp__inst_executed.min inst 186,060
  1846. smsp__inst_executed.sum inst 12,099,311
  1847. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.83
  1848. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
  1849. smsp__cycles_active.avg cycle 317,268.88
  1850. smsp__cycles_active.sum cycle 20,305,208
  1851. ---------------------------------------------------------------------- --------------- ------------------------------