AUTH's THMMY "Parallel and distributed systems" course assignments.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

profReportv1.txt 228 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049
  1. ==PROF== Connected to process 38811 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v1/bitonicCUDA)
  2. ==PROF== Profiling "prephase" - 1: 0%....50%....100% - 6 passes
  3. ==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 6 passes
  4. ==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 6 passes
  5. ==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 6 passes
  6. ==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 6 passes
  7. ==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 6 passes
  8. ==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 6 passes
  9. ==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 6 passes
  10. ==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 6 passes
  11. ==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 6 passes
  12. ==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 6 passes
  13. ==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 6 passes
  14. ==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 6 passes
  15. ==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 6 passes
  16. ==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 6 passes
  17. ==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 6 passes
  18. ==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 6 passes
  19. ==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 6 passes
  20. ==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 6 passes
  21. ==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 6 passes
  22. ==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 6 passes
  23. ==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 6 passes
  24. ==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 6 passes
  25. ==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 6 passes
  26. ==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 6 passes
  27. ==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 6 passes
  28. ==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 6 passes
  29. ==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 6 passes
  30. ==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 6 passes
  31. ==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 6 passes
  32. ==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 6 passes
  33. ==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 6 passes
  34. ==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 6 passes
  35. ==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 6 passes
  36. ==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 6 passes
  37. ==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 6 passes
  38. ==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 6 passes
  39. ==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 6 passes
  40. ==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 6 passes
  41. ==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 6 passes
  42. ==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 6 passes
  43. ==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 6 passes
  44. ==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 6 passes
  45. ==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 6 passes
  46. ==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 6 passes
  47. ==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 6 passes
  48. ==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 6 passes
  49. ==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 6 passes
  50. ==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 6 passes
  51. ==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 6 passes
  52. ==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 6 passes
  53. ==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 6 passes
  54. ==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 6 passes
  55. ==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 6 passes
  56. ==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 6 passes
  57. ==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 6 passes
  58. ==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 6 passes
  59. ==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 6 passes
  60. ==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 6 passes
  61. ==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 6 passes
  62. ==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 6 passes
  63. ==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 6 passes
  64. ==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 6 passes
  65. ==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 6 passes
  66. ==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 6 passes
  67. ==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 6 passes
  68. ==PROF== Disconnected from process 38811
  69. [38811] bitonicCUDA@127.0.0.1
  70. void prephase<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
  71. Section: Command line profiler metrics
  72. ---------------------------------------------------------------------- --------------- ------------------------------
  73. gpu__time_duration.sum msecond 1.06
  74. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  75. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  76. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.22
  77. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.91
  78. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 17.11
  79. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 5.48
  80. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  81. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  82. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  83. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  84. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  85. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  86. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  87. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  88. smsp__average_warp_latency_issue_stalled_barrier.pct % 1,052,474.71
  89. smsp__average_warp_latency_issue_stalled_barrier.ratio 10,524.75
  90. smsp__inst_executed.avg inst 770,268.77
  91. smsp__inst_executed.max inst 770,551
  92. smsp__inst_executed.min inst 770,034
  93. smsp__inst_executed.sum inst 49,297,201
  94. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 23.25
  95. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.23
  96. smsp__cycles_active.avg cycle 1,463,898.61
  97. smsp__cycles_active.sum cycle 93,689,511
  98. ---------------------------------------------------------------------- --------------- ------------------------------
  99. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
  100. Section: Command line profiler metrics
  101. ---------------------------------------------------------------------- --------------- ------------------------------
  102. gpu__time_duration.sum usecond 58.78
  103. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  104. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  105. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  106. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  107. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
  108. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
  109. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  110. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  111. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  112. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  113. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  114. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  115. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  116. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  117. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  118. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  119. smsp__inst_executed.avg inst 12,434.38
  120. smsp__inst_executed.max inst 13,034
  121. smsp__inst_executed.min inst 12,078
  122. smsp__inst_executed.sum inst 795,800
  123. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  124. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  125. smsp__cycles_active.avg cycle 72,392.50
  126. smsp__cycles_active.sum cycle 4,633,120
  127. ---------------------------------------------------------------------- --------------- ------------------------------
  128. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
  129. Section: Command line profiler metrics
  130. ---------------------------------------------------------------------- --------------- ------------------------------
  131. gpu__time_duration.sum usecond 182.78
  132. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  133. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  134. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
  135. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
  136. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72
  137. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
  138. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  139. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  140. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  141. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  142. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  143. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  144. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  145. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  146. smsp__average_warp_latency_issue_stalled_barrier.pct % 158,915.20
  147. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,589.15
  148. smsp__inst_executed.avg inst 132,204.09
  149. smsp__inst_executed.max inst 134,291
  150. smsp__inst_executed.min inst 130,034
  151. smsp__inst_executed.sum inst 8,461,062
  152. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.76
  153. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
  154. smsp__cycles_active.avg cycle 252,696.25
  155. smsp__cycles_active.sum cycle 16,172,560
  156. ---------------------------------------------------------------------- --------------- ------------------------------
  157. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
  158. Section: Command line profiler metrics
  159. ---------------------------------------------------------------------- --------------- ------------------------------
  160. gpu__time_duration.sum usecond 57.86
  161. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  162. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  163. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  164. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  165. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
  166. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
  167. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  168. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  169. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  170. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  171. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  172. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  173. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  174. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  175. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  176. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  177. smsp__inst_executed.avg inst 12,300.02
  178. smsp__inst_executed.max inst 12,527
  179. smsp__inst_executed.min inst 11,906
  180. smsp__inst_executed.sum inst 787,201
  181. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  182. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  183. smsp__cycles_active.avg cycle 71,419.20
  184. smsp__cycles_active.sum cycle 4,570,829
  185. ---------------------------------------------------------------------- --------------- ------------------------------
  186. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
  187. Section: Command line profiler metrics
  188. ---------------------------------------------------------------------- --------------- ------------------------------
  189. gpu__time_duration.sum usecond 58.27
  190. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  191. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  192. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  193. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  194. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
  195. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
  196. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  197. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  198. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  199. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  200. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  201. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  202. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  203. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  204. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  205. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  206. smsp__inst_executed.avg inst 12,309.58
  207. smsp__inst_executed.max inst 12,592
  208. smsp__inst_executed.min inst 12,022
  209. smsp__inst_executed.sum inst 787,813
  210. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  211. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  212. smsp__cycles_active.avg cycle 71,582.45
  213. smsp__cycles_active.sum cycle 4,581,277
  214. ---------------------------------------------------------------------- --------------- ------------------------------
  215. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
  216. Section: Command line profiler metrics
  217. ---------------------------------------------------------------------- --------------- ------------------------------
  218. gpu__time_duration.sum usecond 185.60
  219. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  220. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  221. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
  222. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
  223. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.71
  224. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
  225. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  226. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  227. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  228. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  229. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  230. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  231. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  232. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  233. smsp__average_warp_latency_issue_stalled_barrier.pct % 160,972.19
  234. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,609.72
  235. smsp__inst_executed.avg inst 132,207.95
  236. smsp__inst_executed.max inst 134,315
  237. smsp__inst_executed.min inst 130,093
  238. smsp__inst_executed.sum inst 8,461,309
  239. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.08
  240. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
  241. smsp__cycles_active.avg cycle 251,877
  242. smsp__cycles_active.sum cycle 16,120,128
  243. ---------------------------------------------------------------------- --------------- ------------------------------
  244. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
  245. Section: Command line profiler metrics
  246. ---------------------------------------------------------------------- --------------- ------------------------------
  247. gpu__time_duration.sum usecond 58.72
  248. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  249. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  250. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  251. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  252. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
  253. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.97
  254. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  255. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  256. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  257. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  258. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  259. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  260. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  261. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  262. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  263. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  264. smsp__inst_executed.avg inst 12,294.97
  265. smsp__inst_executed.max inst 12,632
  266. smsp__inst_executed.min inst 11,676
  267. smsp__inst_executed.sum inst 786,878
  268. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  269. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  270. smsp__cycles_active.avg cycle 72,658.42
  271. smsp__cycles_active.sum cycle 4,650,139
  272. ---------------------------------------------------------------------- --------------- ------------------------------
  273. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
  274. Section: Command line profiler metrics
  275. ---------------------------------------------------------------------- --------------- ------------------------------
  276. gpu__time_duration.sum usecond 58.27
  277. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  278. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  279. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  280. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  281. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
  282. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
  283. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  284. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  285. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  286. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  287. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  288. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  289. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  290. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  291. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  292. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  293. smsp__inst_executed.avg inst 12,298.64
  294. smsp__inst_executed.max inst 12,609
  295. smsp__inst_executed.min inst 11,896
  296. smsp__inst_executed.sum inst 787,113
  297. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  298. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  299. smsp__cycles_active.avg cycle 71,467.41
  300. smsp__cycles_active.sum cycle 4,573,914
  301. ---------------------------------------------------------------------- --------------- ------------------------------
  302. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
  303. Section: Command line profiler metrics
  304. ---------------------------------------------------------------------- --------------- ------------------------------
  305. gpu__time_duration.sum usecond 58.46
  306. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  307. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  308. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  309. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  310. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
  311. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
  312. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  313. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  314. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  315. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  316. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  317. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  318. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  319. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  320. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  321. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  322. smsp__inst_executed.avg inst 12,308.77
  323. smsp__inst_executed.max inst 12,684
  324. smsp__inst_executed.min inst 11,895
  325. smsp__inst_executed.sum inst 787,761
  326. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  327. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  328. smsp__cycles_active.avg cycle 72,717.34
  329. smsp__cycles_active.sum cycle 4,653,910
  330. ---------------------------------------------------------------------- --------------- ------------------------------
  331. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
  332. Section: Command line profiler metrics
  333. ---------------------------------------------------------------------- --------------- ------------------------------
  334. gpu__time_duration.sum usecond 186.30
  335. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  336. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  337. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
  338. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
  339. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.71
  340. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
  341. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  342. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  343. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  344. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  345. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  346. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  347. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  348. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  349. smsp__average_warp_latency_issue_stalled_barrier.pct % 159,835.16
  350. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,598.35
  351. smsp__inst_executed.avg inst 132,204.20
  352. smsp__inst_executed.max inst 134,182
  353. smsp__inst_executed.min inst 130,077
  354. smsp__inst_executed.sum inst 8,461,069
  355. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.87
  356. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
  357. smsp__cycles_active.avg cycle 252,623.66
  358. smsp__cycles_active.sum cycle 16,167,914
  359. ---------------------------------------------------------------------- --------------- ------------------------------
  360. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:08, Context 1, Stream 7
  361. Section: Command line profiler metrics
  362. ---------------------------------------------------------------------- --------------- ------------------------------
  363. gpu__time_duration.sum usecond 58.24
  364. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  365. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  366. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  367. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  368. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
  369. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
  370. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  371. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  372. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  373. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  374. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  375. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  376. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  377. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  378. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  379. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  380. smsp__inst_executed.avg inst 12,291.53
  381. smsp__inst_executed.max inst 12,717
  382. smsp__inst_executed.min inst 11,826
  383. smsp__inst_executed.sum inst 786,658
  384. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  385. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  386. smsp__cycles_active.avg cycle 71,072.03
  387. smsp__cycles_active.sum cycle 4,548,610
  388. ---------------------------------------------------------------------- --------------- ------------------------------
  389. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
  390. Section: Command line profiler metrics
  391. ---------------------------------------------------------------------- --------------- ------------------------------
  392. gpu__time_duration.sum usecond 58.50
  393. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  394. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  395. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  396. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  397. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
  398. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
  399. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  400. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  401. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  402. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  403. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  404. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  405. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  406. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  407. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  408. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  409. smsp__inst_executed.avg inst 12,293.39
  410. smsp__inst_executed.max inst 12,679
  411. smsp__inst_executed.min inst 11,917
  412. smsp__inst_executed.sum inst 786,777
  413. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  414. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  415. smsp__cycles_active.avg cycle 72,284.91
  416. smsp__cycles_active.sum cycle 4,626,234
  417. ---------------------------------------------------------------------- --------------- ------------------------------
  418. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
  419. Section: Command line profiler metrics
  420. ---------------------------------------------------------------------- --------------- ------------------------------
  421. gpu__time_duration.sum usecond 57.95
  422. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  423. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  424. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  425. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  426. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
  427. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
  428. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  429. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  430. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  431. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  432. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  433. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  434. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  435. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  436. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  437. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  438. smsp__inst_executed.avg inst 12,298.83
  439. smsp__inst_executed.max inst 12,502
  440. smsp__inst_executed.min inst 12,048
  441. smsp__inst_executed.sum inst 787,125
  442. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  443. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  444. smsp__cycles_active.avg cycle 71,602.17
  445. smsp__cycles_active.sum cycle 4,582,539
  446. ---------------------------------------------------------------------- --------------- ------------------------------
  447. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
  448. Section: Command line profiler metrics
  449. ---------------------------------------------------------------------- --------------- ------------------------------
  450. gpu__time_duration.sum usecond 58.50
  451. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  452. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  453. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  454. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  455. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
  456. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
  457. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  458. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  459. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  460. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  461. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  462. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  463. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  464. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  465. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  466. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  467. smsp__inst_executed.avg inst 12,309.36
  468. smsp__inst_executed.max inst 12,725
  469. smsp__inst_executed.min inst 11,813
  470. smsp__inst_executed.sum inst 787,799
  471. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  472. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  473. smsp__cycles_active.avg cycle 71,676.52
  474. smsp__cycles_active.sum cycle 4,587,297
  475. ---------------------------------------------------------------------- --------------- ------------------------------
  476. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
  477. Section: Command line profiler metrics
  478. ---------------------------------------------------------------------- --------------- ------------------------------
  479. gpu__time_duration.sum usecond 185.50
  480. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  481. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  482. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
  483. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
  484. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72
  485. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
  486. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  487. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  488. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  489. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  490. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  491. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  492. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  493. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  494. smsp__average_warp_latency_issue_stalled_barrier.pct % 157,267.79
  495. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,572.68
  496. smsp__inst_executed.avg inst 132,200.03
  497. smsp__inst_executed.max inst 134,283
  498. smsp__inst_executed.min inst 130,070
  499. smsp__inst_executed.sum inst 8,460,802
  500. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.56
  501. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
  502. smsp__cycles_active.avg cycle 252,497.38
  503. smsp__cycles_active.sum cycle 16,159,832
  504. ---------------------------------------------------------------------- --------------- ------------------------------
  505. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
  506. Section: Command line profiler metrics
  507. ---------------------------------------------------------------------- --------------- ------------------------------
  508. gpu__time_duration.sum usecond 57.98
  509. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  510. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  511. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  512. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  513. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
  514. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
  515. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  516. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  517. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  518. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  519. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  520. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  521. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  522. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  523. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  524. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  525. smsp__inst_executed.avg inst 12,290.03
  526. smsp__inst_executed.max inst 12,632
  527. smsp__inst_executed.min inst 12,096
  528. smsp__inst_executed.sum inst 786,562
  529. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  530. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  531. smsp__cycles_active.avg cycle 70,852.95
  532. smsp__cycles_active.sum cycle 4,534,589
  533. ---------------------------------------------------------------------- --------------- ------------------------------
  534. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
  535. Section: Command line profiler metrics
  536. ---------------------------------------------------------------------- --------------- ------------------------------
  537. gpu__time_duration.sum usecond 58.02
  538. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  539. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  540. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  541. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  542. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
  543. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
  544. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  545. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  546. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  547. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  548. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  549. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  550. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  551. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  552. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  553. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  554. smsp__inst_executed.avg inst 12,291.22
  555. smsp__inst_executed.max inst 12,768
  556. smsp__inst_executed.min inst 11,984
  557. smsp__inst_executed.sum inst 786,638
  558. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  559. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  560. smsp__cycles_active.avg cycle 70,387.03
  561. smsp__cycles_active.sum cycle 4,504,770
  562. ---------------------------------------------------------------------- --------------- ------------------------------
  563. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
  564. Section: Command line profiler metrics
  565. ---------------------------------------------------------------------- --------------- ------------------------------
  566. gpu__time_duration.sum usecond 58.43
  567. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  568. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  569. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  570. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  571. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
  572. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
  573. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  574. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  575. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  576. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  577. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  578. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  579. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  580. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  581. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  582. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  583. smsp__inst_executed.avg inst 12,293.27
  584. smsp__inst_executed.max inst 12,615
  585. smsp__inst_executed.min inst 11,964
  586. smsp__inst_executed.sum inst 786,769
  587. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  588. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  589. smsp__cycles_active.avg cycle 72,163
  590. smsp__cycles_active.sum cycle 4,618,432
  591. ---------------------------------------------------------------------- --------------- ------------------------------
  592. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
  593. Section: Command line profiler metrics
  594. ---------------------------------------------------------------------- --------------- ------------------------------
  595. gpu__time_duration.sum usecond 57.95
  596. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  597. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  598. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  599. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  600. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
  601. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
  602. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  603. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  604. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  605. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  606. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  607. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  608. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  609. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  610. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  611. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  612. smsp__inst_executed.avg inst 12,298.78
  613. smsp__inst_executed.max inst 12,912
  614. smsp__inst_executed.min inst 11,741
  615. smsp__inst_executed.sum inst 787,122
  616. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  617. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  618. smsp__cycles_active.avg cycle 71,542.72
  619. smsp__cycles_active.sum cycle 4,578,734
  620. ---------------------------------------------------------------------- --------------- ------------------------------
  621. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
  622. Section: Command line profiler metrics
  623. ---------------------------------------------------------------------- --------------- ------------------------------
  624. gpu__time_duration.sum usecond 58.50
  625. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  626. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  627. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  628. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  629. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
  630. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
  631. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  632. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  633. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  634. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  635. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  636. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  637. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  638. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  639. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  640. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  641. smsp__inst_executed.avg inst 12,309.25
  642. smsp__inst_executed.max inst 12,746
  643. smsp__inst_executed.min inst 11,941
  644. smsp__inst_executed.sum inst 787,792
  645. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  646. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  647. smsp__cycles_active.avg cycle 71,521.23
  648. smsp__cycles_active.sum cycle 4,577,359
  649. ---------------------------------------------------------------------- --------------- ------------------------------
  650. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:09, Context 1, Stream 7
  651. Section: Command line profiler metrics
  652. ---------------------------------------------------------------------- --------------- ------------------------------
  653. gpu__time_duration.sum usecond 185.66
  654. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  655. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  656. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
  657. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
  658. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72
  659. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
  660. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  661. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  662. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  663. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  664. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  665. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  666. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  667. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  668. smsp__average_warp_latency_issue_stalled_barrier.pct % 160,534.14
  669. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,605.34
  670. smsp__inst_executed.avg inst 132,205.30
  671. smsp__inst_executed.max inst 134,375
  672. smsp__inst_executed.min inst 130,047
  673. smsp__inst_executed.sum inst 8,461,139
  674. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.03
  675. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
  676. smsp__cycles_active.avg cycle 251,948.69
  677. smsp__cycles_active.sum cycle 16,124,716
  678. ---------------------------------------------------------------------- --------------- ------------------------------
  679. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
  680. Section: Command line profiler metrics
  681. ---------------------------------------------------------------------- --------------- ------------------------------
  682. gpu__time_duration.sum usecond 58.14
  683. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  684. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  685. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  686. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  687. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
  688. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  689. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  690. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  691. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  692. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  693. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  694. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  695. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  696. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  697. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  698. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  699. smsp__inst_executed.avg inst 12,289.38
  700. smsp__inst_executed.max inst 12,640
  701. smsp__inst_executed.min inst 11,952
  702. smsp__inst_executed.sum inst 786,520
  703. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  704. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  705. smsp__cycles_active.avg cycle 71,646.14
  706. smsp__cycles_active.sum cycle 4,585,353
  707. ---------------------------------------------------------------------- --------------- ------------------------------
  708. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
  709. Section: Command line profiler metrics
  710. ---------------------------------------------------------------------- --------------- ------------------------------
  711. gpu__time_duration.sum usecond 58.05
  712. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  713. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  714. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  715. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  716. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
  717. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  718. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  719. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  720. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  721. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  722. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  723. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  724. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  725. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  726. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  727. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  728. smsp__inst_executed.avg inst 12,289.72
  729. smsp__inst_executed.max inst 12,532
  730. smsp__inst_executed.min inst 11,872
  731. smsp__inst_executed.sum inst 786,542
  732. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  733. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  734. smsp__cycles_active.avg cycle 71,515.61
  735. smsp__cycles_active.sum cycle 4,576,999
  736. ---------------------------------------------------------------------- --------------- ------------------------------
  737. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
  738. Section: Command line profiler metrics
  739. ---------------------------------------------------------------------- --------------- ------------------------------
  740. gpu__time_duration.sum usecond 57.82
  741. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  742. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  743. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  744. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  745. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.47
  746. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
  747. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  748. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  749. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  750. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  751. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  752. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  753. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  754. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  755. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  756. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  757. smsp__inst_executed.avg inst 12,290.48
  758. smsp__inst_executed.max inst 12,775
  759. smsp__inst_executed.min inst 11,898
  760. smsp__inst_executed.sum inst 786,591
  761. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  762. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  763. smsp__cycles_active.avg cycle 71,662.92
  764. smsp__cycles_active.sum cycle 4,586,427
  765. ---------------------------------------------------------------------- --------------- ------------------------------
  766. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
  767. Section: Command line profiler metrics
  768. ---------------------------------------------------------------------- --------------- ------------------------------
  769. gpu__time_duration.sum usecond 58.53
  770. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  771. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  772. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  773. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  774. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
  775. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.97
  776. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  777. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  778. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  779. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  780. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  781. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  782. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  783. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  784. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  785. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  786. smsp__inst_executed.avg inst 12,293.81
  787. smsp__inst_executed.max inst 12,753
  788. smsp__inst_executed.min inst 11,862
  789. smsp__inst_executed.sum inst 786,804
  790. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  791. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  792. smsp__cycles_active.avg cycle 72,455.17
  793. smsp__cycles_active.sum cycle 4,637,131
  794. ---------------------------------------------------------------------- --------------- ------------------------------
  795. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
  796. Section: Command line profiler metrics
  797. ---------------------------------------------------------------------- --------------- ------------------------------
  798. gpu__time_duration.sum usecond 58.02
  799. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  800. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  801. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  802. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  803. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
  804. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
  805. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  806. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  807. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  808. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  809. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  810. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  811. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  812. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  813. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  814. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  815. smsp__inst_executed.avg inst 12,299.31
  816. smsp__inst_executed.max inst 12,673
  817. smsp__inst_executed.min inst 12,034
  818. smsp__inst_executed.sum inst 787,156
  819. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  820. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  821. smsp__cycles_active.avg cycle 71,010.81
  822. smsp__cycles_active.sum cycle 4,544,692
  823. ---------------------------------------------------------------------- --------------- ------------------------------
  824. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
  825. Section: Command line profiler metrics
  826. ---------------------------------------------------------------------- --------------- ------------------------------
  827. gpu__time_duration.sum usecond 58.72
  828. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  829. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  830. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  831. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  832. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
  833. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
  834. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  835. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  836. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  837. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  838. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  839. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  840. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  841. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  842. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  843. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  844. smsp__inst_executed.avg inst 12,307.36
  845. smsp__inst_executed.max inst 12,544
  846. smsp__inst_executed.min inst 11,923
  847. smsp__inst_executed.sum inst 787,671
  848. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  849. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  850. smsp__cycles_active.avg cycle 72,317.72
  851. smsp__cycles_active.sum cycle 4,628,334
  852. ---------------------------------------------------------------------- --------------- ------------------------------
  853. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
  854. Section: Command line profiler metrics
  855. ---------------------------------------------------------------------- --------------- ------------------------------
  856. gpu__time_duration.sum usecond 183.36
  857. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  858. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  859. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
  860. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
  861. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.70
  862. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
  863. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  864. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  865. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  866. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  867. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  868. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  869. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  870. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  871. smsp__average_warp_latency_issue_stalled_barrier.pct % 160,035.27
  872. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,600.35
  873. smsp__inst_executed.avg inst 132,214.05
  874. smsp__inst_executed.max inst 134,326
  875. smsp__inst_executed.min inst 130,109
  876. smsp__inst_executed.sum inst 8,461,699
  877. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.87
  878. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
  879. smsp__cycles_active.avg cycle 252,974.50
  880. smsp__cycles_active.sum cycle 16,190,368
  881. ---------------------------------------------------------------------- --------------- ------------------------------
  882. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
  883. Section: Command line profiler metrics
  884. ---------------------------------------------------------------------- --------------- ------------------------------
  885. gpu__time_duration.sum usecond 58.69
  886. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  887. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  888. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  889. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  890. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
  891. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  892. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  893. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  894. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  895. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  896. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  897. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  898. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  899. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  900. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  901. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  902. smsp__inst_executed.avg inst 12,288.52
  903. smsp__inst_executed.max inst 12,488
  904. smsp__inst_executed.min inst 11,936
  905. smsp__inst_executed.sum inst 786,465
  906. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  907. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  908. smsp__cycles_active.avg cycle 72,501.95
  909. smsp__cycles_active.sum cycle 4,640,125
  910. ---------------------------------------------------------------------- --------------- ------------------------------
  911. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
  912. Section: Command line profiler metrics
  913. ---------------------------------------------------------------------- --------------- ------------------------------
  914. gpu__time_duration.sum usecond 58.27
  915. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  916. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  917. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  918. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  919. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
  920. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  921. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  922. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  923. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  924. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  925. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  926. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  927. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  928. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  929. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  930. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  931. smsp__inst_executed.avg inst 12,287.92
  932. smsp__inst_executed.max inst 12,484
  933. smsp__inst_executed.min inst 12,100
  934. smsp__inst_executed.sum inst 786,427
  935. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  936. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  937. smsp__cycles_active.avg cycle 71,503.14
  938. smsp__cycles_active.sum cycle 4,576,201
  939. ---------------------------------------------------------------------- --------------- ------------------------------
  940. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:10, Context 1, Stream 7
  941. Section: Command line profiler metrics
  942. ---------------------------------------------------------------------- --------------- ------------------------------
  943. gpu__time_duration.sum usecond 58.08
  944. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  945. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  946. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  947. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  948. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
  949. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
  950. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  951. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  952. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  953. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  954. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  955. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  956. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  957. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  958. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  959. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  960. smsp__inst_executed.avg inst 12,289.14
  961. smsp__inst_executed.max inst 12,869
  962. smsp__inst_executed.min inst 11,892
  963. smsp__inst_executed.sum inst 786,505
  964. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  965. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  966. smsp__cycles_active.avg cycle 71,524.88
  967. smsp__cycles_active.sum cycle 4,577,592
  968. ---------------------------------------------------------------------- --------------- ------------------------------
  969. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
  970. Section: Command line profiler metrics
  971. ---------------------------------------------------------------------- --------------- ------------------------------
  972. gpu__time_duration.sum usecond 58.05
  973. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  974. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  975. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  976. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  977. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.47
  978. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
  979. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  980. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  981. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  982. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  983. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  984. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  985. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  986. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  987. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  988. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  989. smsp__inst_executed.avg inst 12,290.83
  990. smsp__inst_executed.max inst 12,572
  991. smsp__inst_executed.min inst 12,020
  992. smsp__inst_executed.sum inst 786,613
  993. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  994. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  995. smsp__cycles_active.avg cycle 70,350.59
  996. smsp__cycles_active.sum cycle 4,502,438
  997. ---------------------------------------------------------------------- --------------- ------------------------------
  998. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
  999. Section: Command line profiler metrics
  1000. ---------------------------------------------------------------------- --------------- ------------------------------
  1001. gpu__time_duration.sum usecond 58.43
  1002. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1003. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1004. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1005. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1006. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
  1007. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
  1008. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1009. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1010. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1011. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1012. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1013. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1014. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1015. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1016. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1017. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1018. smsp__inst_executed.avg inst 12,292.59
  1019. smsp__inst_executed.max inst 12,681
  1020. smsp__inst_executed.min inst 11,988
  1021. smsp__inst_executed.sum inst 786,726
  1022. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1023. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1024. smsp__cycles_active.avg cycle 71,302.25
  1025. smsp__cycles_active.sum cycle 4,563,344
  1026. ---------------------------------------------------------------------- --------------- ------------------------------
  1027. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
  1028. Section: Command line profiler metrics
  1029. ---------------------------------------------------------------------- --------------- ------------------------------
  1030. gpu__time_duration.sum usecond 57.89
  1031. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1032. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1033. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1034. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1035. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
  1036. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
  1037. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1038. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1039. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1040. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1041. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1042. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1043. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1044. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1045. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1046. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1047. smsp__inst_executed.avg inst 12,298.28
  1048. smsp__inst_executed.max inst 12,708
  1049. smsp__inst_executed.min inst 11,898
  1050. smsp__inst_executed.sum inst 787,090
  1051. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1052. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1053. smsp__cycles_active.avg cycle 71,771.03
  1054. smsp__cycles_active.sum cycle 4,593,346
  1055. ---------------------------------------------------------------------- --------------- ------------------------------
  1056. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
  1057. Section: Command line profiler metrics
  1058. ---------------------------------------------------------------------- --------------- ------------------------------
  1059. gpu__time_duration.sum usecond 58.53
  1060. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1061. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1062. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1063. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1064. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
  1065. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
  1066. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1067. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1068. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1069. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1070. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1071. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1072. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1073. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1074. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1075. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1076. smsp__inst_executed.avg inst 12,310.67
  1077. smsp__inst_executed.max inst 12,575
  1078. smsp__inst_executed.min inst 12,060
  1079. smsp__inst_executed.sum inst 787,883
  1080. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1081. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1082. smsp__cycles_active.avg cycle 73,137
  1083. smsp__cycles_active.sum cycle 4,680,768
  1084. ---------------------------------------------------------------------- --------------- ------------------------------
  1085. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
  1086. Section: Command line profiler metrics
  1087. ---------------------------------------------------------------------- --------------- ------------------------------
  1088. gpu__time_duration.sum usecond 184.67
  1089. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1090. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1091. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
  1092. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
  1093. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72
  1094. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
  1095. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1096. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1097. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1098. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1099. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1100. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1101. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1102. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1103. smsp__average_warp_latency_issue_stalled_barrier.pct % 161,865.73
  1104. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,618.66
  1105. smsp__inst_executed.avg inst 132,202.36
  1106. smsp__inst_executed.max inst 134,344
  1107. smsp__inst_executed.min inst 130,057
  1108. smsp__inst_executed.sum inst 8,460,951
  1109. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.11
  1110. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
  1111. smsp__cycles_active.avg cycle 252,870.69
  1112. smsp__cycles_active.sum cycle 16,183,724
  1113. ---------------------------------------------------------------------- --------------- ------------------------------
  1114. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
  1115. Section: Command line profiler metrics
  1116. ---------------------------------------------------------------------- --------------- ------------------------------
  1117. gpu__time_duration.sum usecond 59.23
  1118. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1119. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1120. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1121. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1122. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
  1123. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  1124. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1125. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1126. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1127. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1128. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1129. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1130. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1131. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1132. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1133. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1134. smsp__inst_executed.avg inst 12,287.84
  1135. smsp__inst_executed.max inst 12,700
  1136. smsp__inst_executed.min inst 11,884
  1137. smsp__inst_executed.sum inst 786,422
  1138. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1139. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1140. smsp__cycles_active.avg cycle 71,958.33
  1141. smsp__cycles_active.sum cycle 4,605,333
  1142. ---------------------------------------------------------------------- --------------- ------------------------------
  1143. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
  1144. Section: Command line profiler metrics
  1145. ---------------------------------------------------------------------- --------------- ------------------------------
  1146. gpu__time_duration.sum usecond 58.91
  1147. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1148. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1149. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1150. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1151. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
  1152. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  1153. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1154. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1155. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1156. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1157. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1158. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1159. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1160. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1161. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1162. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1163. smsp__inst_executed.avg inst 12,288.59
  1164. smsp__inst_executed.max inst 12,836
  1165. smsp__inst_executed.min inst 11,892
  1166. smsp__inst_executed.sum inst 786,470
  1167. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1168. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1169. smsp__cycles_active.avg cycle 72,352.05
  1170. smsp__cycles_active.sum cycle 4,630,531
  1171. ---------------------------------------------------------------------- --------------- ------------------------------
  1172. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
  1173. Section: Command line profiler metrics
  1174. ---------------------------------------------------------------------- --------------- ------------------------------
  1175. gpu__time_duration.sum usecond 58.43
  1176. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1177. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1178. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1179. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1180. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
  1181. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  1182. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1183. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1184. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1185. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1186. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1187. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1188. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1189. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1190. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1191. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1192. smsp__inst_executed.avg inst 12,288.62
  1193. smsp__inst_executed.max inst 12,636
  1194. smsp__inst_executed.min inst 11,942
  1195. smsp__inst_executed.sum inst 786,472
  1196. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1197. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1198. smsp__cycles_active.avg cycle 71,507.44
  1199. smsp__cycles_active.sum cycle 4,576,476
  1200. ---------------------------------------------------------------------- --------------- ------------------------------
  1201. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
  1202. Section: Command line profiler metrics
  1203. ---------------------------------------------------------------------- --------------- ------------------------------
  1204. gpu__time_duration.sum usecond 58.11
  1205. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1206. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1207. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1208. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1209. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
  1210. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
  1211. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1212. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1213. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1214. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1215. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1216. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1217. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1218. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1219. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1220. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1221. smsp__inst_executed.avg inst 12,289.72
  1222. smsp__inst_executed.max inst 12,672
  1223. smsp__inst_executed.min inst 11,662
  1224. smsp__inst_executed.sum inst 786,542
  1225. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1226. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1227. smsp__cycles_active.avg cycle 71,165.02
  1228. smsp__cycles_active.sum cycle 4,554,561
  1229. ---------------------------------------------------------------------- --------------- ------------------------------
  1230. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
  1231. Section: Command line profiler metrics
  1232. ---------------------------------------------------------------------- --------------- ------------------------------
  1233. gpu__time_duration.sum usecond 58.02
  1234. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1235. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1236. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1237. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1238. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
  1239. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
  1240. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1241. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1242. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1243. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1244. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1245. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1246. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1247. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1248. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1249. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1250. smsp__inst_executed.avg inst 12,291
  1251. smsp__inst_executed.max inst 12,677
  1252. smsp__inst_executed.min inst 11,882
  1253. smsp__inst_executed.sum inst 786,624
  1254. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1255. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1256. smsp__cycles_active.avg cycle 70,224.42
  1257. smsp__cycles_active.sum cycle 4,494,363
  1258. ---------------------------------------------------------------------- --------------- ------------------------------
  1259. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:11, Context 1, Stream 7
  1260. Section: Command line profiler metrics
  1261. ---------------------------------------------------------------------- --------------- ------------------------------
  1262. gpu__time_duration.sum usecond 58.72
  1263. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1264. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1265. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1266. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1267. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
  1268. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
  1269. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1270. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1271. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1272. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1273. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1274. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1275. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1276. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1277. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1278. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1279. smsp__inst_executed.avg inst 12,294.19
  1280. smsp__inst_executed.max inst 12,761
  1281. smsp__inst_executed.min inst 11,776
  1282. smsp__inst_executed.sum inst 786,828
  1283. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1284. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1285. smsp__cycles_active.avg cycle 72,352.88
  1286. smsp__cycles_active.sum cycle 4,630,584
  1287. ---------------------------------------------------------------------- --------------- ------------------------------
  1288. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
  1289. Section: Command line profiler metrics
  1290. ---------------------------------------------------------------------- --------------- ------------------------------
  1291. gpu__time_duration.sum usecond 58.08
  1292. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1293. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1294. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1295. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1296. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
  1297. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
  1298. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1299. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1300. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1301. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1302. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1303. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1304. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1305. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1306. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1307. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1308. smsp__inst_executed.avg inst 12,300.17
  1309. smsp__inst_executed.max inst 12,699
  1310. smsp__inst_executed.min inst 11,741
  1311. smsp__inst_executed.sum inst 787,211
  1312. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1313. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1314. smsp__cycles_active.avg cycle 71,852.11
  1315. smsp__cycles_active.sum cycle 4,598,535
  1316. ---------------------------------------------------------------------- --------------- ------------------------------
  1317. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
  1318. Section: Command line profiler metrics
  1319. ---------------------------------------------------------------------- --------------- ------------------------------
  1320. gpu__time_duration.sum usecond 58.69
  1321. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1322. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1323. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1324. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1325. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
  1326. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
  1327. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1328. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1329. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1330. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1331. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1332. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1333. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1334. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1335. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1336. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1337. smsp__inst_executed.avg inst 12,305.34
  1338. smsp__inst_executed.max inst 12,557
  1339. smsp__inst_executed.min inst 12,098
  1340. smsp__inst_executed.sum inst 787,542
  1341. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1342. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1343. smsp__cycles_active.avg cycle 72,448.42
  1344. smsp__cycles_active.sum cycle 4,636,699
  1345. ---------------------------------------------------------------------- --------------- ------------------------------
  1346. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
  1347. Section: Command line profiler metrics
  1348. ---------------------------------------------------------------------- --------------- ------------------------------
  1349. gpu__time_duration.sum usecond 183.71
  1350. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1351. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1352. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
  1353. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
  1354. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.72
  1355. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
  1356. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1357. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1358. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1359. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1360. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1361. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1362. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1363. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1364. smsp__average_warp_latency_issue_stalled_barrier.pct % 159,410.38
  1365. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,594.10
  1366. smsp__inst_executed.avg inst 132,239.30
  1367. smsp__inst_executed.max inst 134,389
  1368. smsp__inst_executed.min inst 130,159
  1369. smsp__inst_executed.sum inst 8,463,315
  1370. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.78
  1371. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
  1372. smsp__cycles_active.avg cycle 253,259.25
  1373. smsp__cycles_active.sum cycle 16,208,592
  1374. ---------------------------------------------------------------------- --------------- ------------------------------
  1375. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
  1376. Section: Command line profiler metrics
  1377. ---------------------------------------------------------------------- --------------- ------------------------------
  1378. gpu__time_duration.sum usecond 59.68
  1379. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1380. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1381. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1382. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1383. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
  1384. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  1385. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1386. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1387. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1388. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1389. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1390. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1391. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1392. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1393. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1394. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1395. smsp__inst_executed.avg inst 12,287.97
  1396. smsp__inst_executed.max inst 12,696
  1397. smsp__inst_executed.min inst 11,904
  1398. smsp__inst_executed.sum inst 786,430
  1399. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1400. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1401. smsp__cycles_active.avg cycle 73,186.48
  1402. smsp__cycles_active.sum cycle 4,683,935
  1403. ---------------------------------------------------------------------- --------------- ------------------------------
  1404. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
  1405. Section: Command line profiler metrics
  1406. ---------------------------------------------------------------------- --------------- ------------------------------
  1407. gpu__time_duration.sum usecond 59.14
  1408. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1409. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1410. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1411. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1412. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
  1413. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  1414. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1415. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1416. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1417. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1418. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1419. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1420. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1421. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1422. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1423. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1424. smsp__inst_executed.avg inst 12,288.12
  1425. smsp__inst_executed.max inst 12,680
  1426. smsp__inst_executed.min inst 12,068
  1427. smsp__inst_executed.sum inst 786,440
  1428. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1429. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1430. smsp__cycles_active.avg cycle 71,841.56
  1431. smsp__cycles_active.sum cycle 4,597,860
  1432. ---------------------------------------------------------------------- --------------- ------------------------------
  1433. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
  1434. Section: Command line profiler metrics
  1435. ---------------------------------------------------------------------- --------------- ------------------------------
  1436. gpu__time_duration.sum usecond 59.20
  1437. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1438. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1439. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1440. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1441. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
  1442. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  1443. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1444. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1445. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1446. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1447. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1448. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1449. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1450. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1451. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1452. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1453. smsp__inst_executed.avg inst 12,288.52
  1454. smsp__inst_executed.max inst 12,664
  1455. smsp__inst_executed.min inst 11,916
  1456. smsp__inst_executed.sum inst 786,465
  1457. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1458. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1459. smsp__cycles_active.avg cycle 71,395.02
  1460. smsp__cycles_active.sum cycle 4,569,281
  1461. ---------------------------------------------------------------------- --------------- ------------------------------
  1462. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
  1463. Section: Command line profiler metrics
  1464. ---------------------------------------------------------------------- --------------- ------------------------------
  1465. gpu__time_duration.sum usecond 58.37
  1466. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1467. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1468. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1469. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1470. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
  1471. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  1472. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1473. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1474. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1475. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1476. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1477. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1478. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1479. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1480. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1481. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1482. smsp__inst_executed.avg inst 12,288.83
  1483. smsp__inst_executed.max inst 12,652
  1484. smsp__inst_executed.min inst 12,092
  1485. smsp__inst_executed.sum inst 786,485
  1486. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1487. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1488. smsp__cycles_active.avg cycle 70,939.75
  1489. smsp__cycles_active.sum cycle 4,540,144
  1490. ---------------------------------------------------------------------- --------------- ------------------------------
  1491. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
  1492. Section: Command line profiler metrics
  1493. ---------------------------------------------------------------------- --------------- ------------------------------
  1494. gpu__time_duration.sum usecond 58.05
  1495. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1496. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1497. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1498. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1499. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
  1500. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  1501. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1502. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1503. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1504. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1505. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1506. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1507. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1508. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1509. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1510. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1511. smsp__inst_executed.avg inst 12,289.27
  1512. smsp__inst_executed.max inst 12,671
  1513. smsp__inst_executed.min inst 11,948
  1514. smsp__inst_executed.sum inst 786,513
  1515. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1516. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1517. smsp__cycles_active.avg cycle 70,398.56
  1518. smsp__cycles_active.sum cycle 4,505,508
  1519. ---------------------------------------------------------------------- --------------- ------------------------------
  1520. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
  1521. Section: Command line profiler metrics
  1522. ---------------------------------------------------------------------- --------------- ------------------------------
  1523. gpu__time_duration.sum usecond 57.89
  1524. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1525. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1526. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1527. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1528. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
  1529. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
  1530. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1531. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1532. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1533. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1534. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1535. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1536. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1537. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1538. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1539. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1540. smsp__inst_executed.avg inst 12,290.94
  1541. smsp__inst_executed.max inst 12,634
  1542. smsp__inst_executed.min inst 11,856
  1543. smsp__inst_executed.sum inst 786,620
  1544. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1545. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1546. smsp__cycles_active.avg cycle 71,430.73
  1547. smsp__cycles_active.sum cycle 4,571,567
  1548. ---------------------------------------------------------------------- --------------- ------------------------------
  1549. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:12, Context 1, Stream 7
  1550. Section: Command line profiler metrics
  1551. ---------------------------------------------------------------------- --------------- ------------------------------
  1552. gpu__time_duration.sum usecond 58.91
  1553. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1554. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1555. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1556. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1557. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
  1558. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.97
  1559. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1560. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1561. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1562. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1563. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1564. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1565. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1566. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1567. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1568. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1569. smsp__inst_executed.avg inst 12,294.67
  1570. smsp__inst_executed.max inst 12,714
  1571. smsp__inst_executed.min inst 11,973
  1572. smsp__inst_executed.sum inst 786,859
  1573. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1574. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1575. smsp__cycles_active.avg cycle 72,485.34
  1576. smsp__cycles_active.sum cycle 4,639,062
  1577. ---------------------------------------------------------------------- --------------- ------------------------------
  1578. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
  1579. Section: Command line profiler metrics
  1580. ---------------------------------------------------------------------- --------------- ------------------------------
  1581. gpu__time_duration.sum usecond 58.11
  1582. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1583. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1584. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1585. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1586. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
  1587. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
  1588. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1589. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1590. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1591. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1592. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1593. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1594. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1595. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1596. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1597. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1598. smsp__inst_executed.avg inst 12,300.77
  1599. smsp__inst_executed.max inst 12,715
  1600. smsp__inst_executed.min inst 11,880
  1601. smsp__inst_executed.sum inst 787,249
  1602. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1603. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1604. smsp__cycles_active.avg cycle 71,482.33
  1605. smsp__cycles_active.sum cycle 4,574,869
  1606. ---------------------------------------------------------------------- --------------- ------------------------------
  1607. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
  1608. Section: Command line profiler metrics
  1609. ---------------------------------------------------------------------- --------------- ------------------------------
  1610. gpu__time_duration.sum usecond 58.40
  1611. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1612. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1613. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1614. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1615. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
  1616. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
  1617. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1618. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1619. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1620. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1621. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1622. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1623. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1624. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1625. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1626. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1627. smsp__inst_executed.avg inst 12,305.27
  1628. smsp__inst_executed.max inst 12,723
  1629. smsp__inst_executed.min inst 11,888
  1630. smsp__inst_executed.sum inst 787,537
  1631. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1632. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1633. smsp__cycles_active.avg cycle 71,478.97
  1634. smsp__cycles_active.sum cycle 4,574,654
  1635. ---------------------------------------------------------------------- --------------- ------------------------------
  1636. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
  1637. Section: Command line profiler metrics
  1638. ---------------------------------------------------------------------- --------------- ------------------------------
  1639. gpu__time_duration.sum usecond 182.66
  1640. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1641. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1642. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
  1643. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
  1644. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.71
  1645. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.71
  1646. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1647. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1648. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1649. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1650. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1651. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1652. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1653. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1654. smsp__average_warp_latency_issue_stalled_barrier.pct % 159,442.50
  1655. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,594.42
  1656. smsp__inst_executed.avg inst 132,250.77
  1657. smsp__inst_executed.max inst 134,372
  1658. smsp__inst_executed.min inst 130,113
  1659. smsp__inst_executed.sum inst 8,464,049
  1660. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.82
  1661. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
  1662. smsp__cycles_active.avg cycle 252,682.78
  1663. smsp__cycles_active.sum cycle 16,171,698
  1664. ---------------------------------------------------------------------- --------------- ------------------------------
  1665. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
  1666. Section: Command line profiler metrics
  1667. ---------------------------------------------------------------------- --------------- ------------------------------
  1668. gpu__time_duration.sum usecond 56.86
  1669. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1670. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1671. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1672. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1673. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
  1674. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  1675. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1676. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1677. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1678. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1679. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1680. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1681. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1682. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1683. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1684. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1685. smsp__inst_executed.avg inst 12,288.31
  1686. smsp__inst_executed.max inst 12,684
  1687. smsp__inst_executed.min inst 12,072
  1688. smsp__inst_executed.sum inst 786,452
  1689. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1690. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1691. smsp__cycles_active.avg cycle 69,556.61
  1692. smsp__cycles_active.sum cycle 4,451,623
  1693. ---------------------------------------------------------------------- --------------- ------------------------------
  1694. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
  1695. Section: Command line profiler metrics
  1696. ---------------------------------------------------------------------- --------------- ------------------------------
  1697. gpu__time_duration.sum usecond 59.84
  1698. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1699. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1700. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1701. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1702. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
  1703. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  1704. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1705. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1706. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1707. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1708. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1709. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1710. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1711. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1712. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1713. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1714. smsp__inst_executed.avg inst 12,287.48
  1715. smsp__inst_executed.max inst 12,868
  1716. smsp__inst_executed.min inst 11,704
  1717. smsp__inst_executed.sum inst 786,399
  1718. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1719. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1720. smsp__cycles_active.avg cycle 73,568.34
  1721. smsp__cycles_active.sum cycle 4,708,374
  1722. ---------------------------------------------------------------------- --------------- ------------------------------
  1723. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
  1724. Section: Command line profiler metrics
  1725. ---------------------------------------------------------------------- --------------- ------------------------------
  1726. gpu__time_duration.sum usecond 58.88
  1727. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1728. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1729. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1730. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1731. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
  1732. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  1733. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1734. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1735. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1736. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1737. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1738. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1739. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1740. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1741. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1742. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1743. smsp__inst_executed.avg inst 12,288.06
  1744. smsp__inst_executed.max inst 12,672
  1745. smsp__inst_executed.min inst 11,712
  1746. smsp__inst_executed.sum inst 786,436
  1747. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1748. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1749. smsp__cycles_active.avg cycle 73,217.27
  1750. smsp__cycles_active.sum cycle 4,685,905
  1751. ---------------------------------------------------------------------- --------------- ------------------------------
  1752. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
  1753. Section: Command line profiler metrics
  1754. ---------------------------------------------------------------------- --------------- ------------------------------
  1755. gpu__time_duration.sum usecond 59.39
  1756. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1757. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1758. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1759. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1760. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
  1761. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  1762. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1763. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1764. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1765. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1766. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1767. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1768. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1769. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1770. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1771. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1772. smsp__inst_executed.avg inst 12,288.70
  1773. smsp__inst_executed.max inst 12,684
  1774. smsp__inst_executed.min inst 12,060
  1775. smsp__inst_executed.sum inst 786,477
  1776. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1777. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1778. smsp__cycles_active.avg cycle 72,586.44
  1779. smsp__cycles_active.sum cycle 4,645,532
  1780. ---------------------------------------------------------------------- --------------- ------------------------------
  1781. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
  1782. Section: Command line profiler metrics
  1783. ---------------------------------------------------------------------- --------------- ------------------------------
  1784. gpu__time_duration.sum usecond 58.24
  1785. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1786. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1787. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1788. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1789. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
  1790. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
  1791. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1792. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1793. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1794. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1795. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1796. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1797. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1798. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1799. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1800. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1801. smsp__inst_executed.avg inst 12,287.72
  1802. smsp__inst_executed.max inst 12,496
  1803. smsp__inst_executed.min inst 12,076
  1804. smsp__inst_executed.sum inst 786,414
  1805. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1806. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1807. smsp__cycles_active.avg cycle 71,872.52
  1808. smsp__cycles_active.sum cycle 4,599,841
  1809. ---------------------------------------------------------------------- --------------- ------------------------------
  1810. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
  1811. Section: Command line profiler metrics
  1812. ---------------------------------------------------------------------- --------------- ------------------------------
  1813. gpu__time_duration.sum usecond 58.14
  1814. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1815. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1816. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1817. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1818. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
  1819. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
  1820. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1821. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1822. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1823. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1824. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1825. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1826. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1827. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1828. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1829. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1830. smsp__inst_executed.avg inst 12,289.17
  1831. smsp__inst_executed.max inst 12,692
  1832. smsp__inst_executed.min inst 11,950
  1833. smsp__inst_executed.sum inst 786,507
  1834. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1835. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1836. smsp__cycles_active.avg cycle 71,569.31
  1837. smsp__cycles_active.sum cycle 4,580,436
  1838. ---------------------------------------------------------------------- --------------- ------------------------------
  1839. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
  1840. Section: Command line profiler metrics
  1841. ---------------------------------------------------------------------- --------------- ------------------------------
  1842. gpu__time_duration.sum usecond 57.98
  1843. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1844. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1845. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1846. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1847. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
  1848. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
  1849. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1850. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1851. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1852. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1853. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1854. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1855. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1856. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1857. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1858. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1859. smsp__inst_executed.avg inst 12,290.83
  1860. smsp__inst_executed.max inst 12,676
  1861. smsp__inst_executed.min inst 12,066
  1862. smsp__inst_executed.sum inst 786,613
  1863. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1864. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1865. smsp__cycles_active.avg cycle 70,942.83
  1866. smsp__cycles_active.sum cycle 4,540,341
  1867. ---------------------------------------------------------------------- --------------- ------------------------------
  1868. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:13, Context 1, Stream 7
  1869. Section: Command line profiler metrics
  1870. ---------------------------------------------------------------------- --------------- ------------------------------
  1871. gpu__time_duration.sum usecond 58.78
  1872. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1873. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1874. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1875. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1876. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
  1877. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
  1878. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1879. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1880. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1881. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1882. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1883. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1884. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1885. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1886. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1887. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1888. smsp__inst_executed.avg inst 12,292.48
  1889. smsp__inst_executed.max inst 12,668
  1890. smsp__inst_executed.min inst 11,864
  1891. smsp__inst_executed.sum inst 786,719
  1892. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1893. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1894. smsp__cycles_active.avg cycle 71,652.72
  1895. smsp__cycles_active.sum cycle 4,585,774
  1896. ---------------------------------------------------------------------- --------------- ------------------------------
  1897. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:14, Context 1, Stream 7
  1898. Section: Command line profiler metrics
  1899. ---------------------------------------------------------------------- --------------- ------------------------------
  1900. gpu__time_duration.sum usecond 57.92
  1901. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1902. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1903. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1904. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1905. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
  1906. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
  1907. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1908. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1909. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1910. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1911. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1912. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1913. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1914. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1915. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1916. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1917. smsp__inst_executed.avg inst 12,297.83
  1918. smsp__inst_executed.max inst 12,507
  1919. smsp__inst_executed.min inst 12,080
  1920. smsp__inst_executed.sum inst 787,061
  1921. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1922. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1923. smsp__cycles_active.avg cycle 71,989.52
  1924. smsp__cycles_active.sum cycle 4,607,329
  1925. ---------------------------------------------------------------------- --------------- ------------------------------
  1926. void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:14, Context 1, Stream 7
  1927. Section: Command line profiler metrics
  1928. ---------------------------------------------------------------------- --------------- ------------------------------
  1929. gpu__time_duration.sum usecond 58.56
  1930. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1931. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1932. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
  1933. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
  1934. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
  1935. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
  1936. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1937. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1938. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1939. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1940. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1941. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1942. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1943. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1944. smsp__average_warp_latency_issue_stalled_barrier.pct % 0
  1945. smsp__average_warp_latency_issue_stalled_barrier.ratio 0
  1946. smsp__inst_executed.avg inst 12,309.70
  1947. smsp__inst_executed.max inst 12,667
  1948. smsp__inst_executed.min inst 12,088
  1949. smsp__inst_executed.sum inst 787,821
  1950. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
  1951. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
  1952. smsp__cycles_active.avg cycle 72,280.86
  1953. smsp__cycles_active.sum cycle 4,625,975
  1954. ---------------------------------------------------------------------- --------------- ------------------------------
  1955. void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 16:07:14, Context 1, Stream 7
  1956. Section: Command line profiler metrics
  1957. ---------------------------------------------------------------------- --------------- ------------------------------
  1958. gpu__time_duration.sum usecond 183.58
  1959. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
  1960. l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
  1961. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
  1962. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
  1963. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 14.70
  1964. l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.70
  1965. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
  1966. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
  1967. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
  1968. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
  1969. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
  1970. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
  1971. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
  1972. l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
  1973. smsp__average_warp_latency_issue_stalled_barrier.pct % 159,496.29
  1974. smsp__average_warp_latency_issue_stalled_barrier.ratio 1,594.96
  1975. smsp__inst_executed.avg inst 131,966.41
  1976. smsp__inst_executed.max inst 134,088
  1977. smsp__inst_executed.min inst 129,856
  1978. smsp__inst_executed.sum inst 8,445,850
  1979. smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.79
  1980. smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
  1981. smsp__cycles_active.avg cycle 253,055.50
  1982. smsp__cycles_active.sum cycle 16,195,552
  1983. ---------------------------------------------------------------------- --------------- ------------------------------