|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917 |
- ==PROF== Connected to process 20279 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v2/bitonicCUDA)
- ==PROF== Profiling "prephase" - 1: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 5 passes
- ==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 5 passes
- ==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 5 passes
- ==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 5 passes
- ==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 5 passes
- ==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 5 passes
- ==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 5 passes
- ==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 5 passes
- ==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 5 passes
- ==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 5 passes
- ==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 5 passes
- ==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 5 passes
- ==PROF== Disconnected from process 20279
- [20279] bitonicCUDA@127.0.0.1
- void prephase<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum msecond 1.20
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 186,368
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 186,368
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 186,368
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 2,981,888
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 111,946.88
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 112,116
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 111,795
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 1,791,150
- smsp__average_warp_latency_issue_stalled_barrier.pct % 644,345.26
- smsp__average_warp_latency_issue_stalled_barrier.ratio 6,443.45
- smsp__inst_executed.avg inst 1,030,868.94
- smsp__inst_executed.max inst 1,031,062
- smsp__inst_executed.min inst 1,030,675
- smsp__inst_executed.sum inst 65,975,612
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.50
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
- smsp__cycles_active.avg cycle 1,666,829.12
- smsp__cycles_active.sum cycle 106,677,064
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 59.84
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,308.59
- smsp__inst_executed.max inst 12,538
- smsp__inst_executed.min inst 11,945
- smsp__inst_executed.sum inst 787,750
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 73,268.67
- smsp__cycles_active.sum cycle 4,689,195
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 231.30
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,642.38
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,963
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,322
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,278
- smsp__average_warp_latency_issue_stalled_barrier.pct % 123,392.55
- smsp__average_warp_latency_issue_stalled_barrier.ratio 1,233.93
- smsp__inst_executed.avg inst 189,292.45
- smsp__inst_executed.max inst 192,372
- smsp__inst_executed.min inst 186,246
- smsp__inst_executed.sum inst 12,114,717
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.81
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
- smsp__cycles_active.avg cycle 316,267.31
- smsp__cycles_active.sum cycle 20,241,108
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.34
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,298.58
- smsp__inst_executed.max inst 12,667
- smsp__inst_executed.min inst 11,936
- smsp__inst_executed.sum inst 787,109
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,505.30
- smsp__cycles_active.sum cycle 4,512,339
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 59.55
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,309.17
- smsp__inst_executed.max inst 12,702
- smsp__inst_executed.min inst 11,606
- smsp__inst_executed.sum inst 787,787
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 72,897.17
- smsp__cycles_active.sum cycle 4,665,419
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 230.91
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,680
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,009
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,334
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,880
- smsp__average_warp_latency_issue_stalled_barrier.pct % 123,674.16
- smsp__average_warp_latency_issue_stalled_barrier.ratio 1,236.74
- smsp__inst_executed.avg inst 189,294.36
- smsp__inst_executed.max inst 192,238
- smsp__inst_executed.min inst 186,252
- smsp__inst_executed.sum inst 12,114,839
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.85
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
- smsp__cycles_active.avg cycle 316,040.81
- smsp__cycles_active.sum cycle 20,226,612
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.72
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,293.78
- smsp__inst_executed.max inst 12,542
- smsp__inst_executed.min inst 11,960
- smsp__inst_executed.sum inst 786,802
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 71,235.28
- smsp__cycles_active.sum cycle 4,559,058
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.56
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,298.95
- smsp__inst_executed.max inst 12,560
- smsp__inst_executed.min inst 12,096
- smsp__inst_executed.sum inst 787,133
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,575.53
- smsp__cycles_active.sum cycle 4,516,834
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 59.42
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,308.61
- smsp__inst_executed.max inst 12,640
- smsp__inst_executed.min inst 12,096
- smsp__inst_executed.sum inst 787,751
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 72,641.39
- smsp__cycles_active.sum cycle 4,649,049
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 231.87
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,674.75
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,017
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,354
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,796
- smsp__average_warp_latency_issue_stalled_barrier.pct % 123,483.94
- smsp__average_warp_latency_issue_stalled_barrier.ratio 1,234.84
- smsp__inst_executed.avg inst 189,288.14
- smsp__inst_executed.max inst 192,081
- smsp__inst_executed.min inst 186,477
- smsp__inst_executed.sum inst 12,114,441
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.86
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
- smsp__cycles_active.avg cycle 315,433.75
- smsp__cycles_active.sum cycle 20,187,760
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.14
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,290.34
- smsp__inst_executed.max inst 12,724
- smsp__inst_executed.min inst 12,076
- smsp__inst_executed.sum inst 786,582
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,402.61
- smsp__cycles_active.sum cycle 4,505,767
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.56
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,294.27
- smsp__inst_executed.max inst 12,717
- smsp__inst_executed.min inst 11,988
- smsp__inst_executed.sum inst 786,833
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,681.59
- smsp__cycles_active.sum cycle 4,523,622
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.05
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,298.42
- smsp__inst_executed.max inst 12,663
- smsp__inst_executed.min inst 11,882
- smsp__inst_executed.sum inst 787,099
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,688.28
- smsp__cycles_active.sum cycle 4,524,050
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 59.49
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,309.03
- smsp__inst_executed.max inst 12,686
- smsp__inst_executed.min inst 11,852
- smsp__inst_executed.sum inst 787,778
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 72,892.83
- smsp__cycles_active.sum cycle 4,665,141
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 231.33
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,677
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,976
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,331
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,832
- smsp__average_warp_latency_issue_stalled_barrier.pct % 123,882.24
- smsp__average_warp_latency_issue_stalled_barrier.ratio 1,238.82
- smsp__inst_executed.avg inst 189,292.19
- smsp__inst_executed.max inst 192,340
- smsp__inst_executed.min inst 186,215
- smsp__inst_executed.sum inst 12,114,700
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.86
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
- smsp__cycles_active.avg cycle 316,203.25
- smsp__cycles_active.sum cycle 20,237,008
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.08
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,289.06
- smsp__inst_executed.max inst 12,694
- smsp__inst_executed.min inst 11,900
- smsp__inst_executed.sum inst 786,500
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,488.72
- smsp__cycles_active.sum cycle 4,511,278
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.27
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,291.25
- smsp__inst_executed.max inst 12,681
- smsp__inst_executed.min inst 12,008
- smsp__inst_executed.sum inst 786,640
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,605.89
- smsp__cycles_active.sum cycle 4,518,777
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.34
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,292.84
- smsp__inst_executed.max inst 12,543
- smsp__inst_executed.min inst 11,998
- smsp__inst_executed.sum inst 786,742
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,795.58
- smsp__cycles_active.sum cycle 4,530,917
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.02
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,299.95
- smsp__inst_executed.max inst 12,683
- smsp__inst_executed.min inst 11,720
- smsp__inst_executed.sum inst 787,197
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,136.48
- smsp__cycles_active.sum cycle 4,488,735
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 59.52
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,309.09
- smsp__inst_executed.max inst 12,613
- smsp__inst_executed.min inst 11,865
- smsp__inst_executed.sum inst 787,782
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 72,887.53
- smsp__cycles_active.sum cycle 4,664,802
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 231.30
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,682.56
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,017
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,315
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,921
- smsp__average_warp_latency_issue_stalled_barrier.pct % 124,910.64
- smsp__average_warp_latency_issue_stalled_barrier.ratio 1,249.11
- smsp__inst_executed.avg inst 189,291.42
- smsp__inst_executed.max inst 192,361
- smsp__inst_executed.min inst 186,192
- smsp__inst_executed.sum inst 12,114,651
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.97
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
- smsp__cycles_active.avg cycle 316,146.12
- smsp__cycles_active.sum cycle 20,233,352
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 60.03
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,288.48
- smsp__inst_executed.max inst 12,672
- smsp__inst_executed.min inst 11,868
- smsp__inst_executed.sum inst 786,463
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 73,004.22
- smsp__cycles_active.sum cycle 4,672,270
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.08
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,289.81
- smsp__inst_executed.max inst 12,480
- smsp__inst_executed.min inst 12,068
- smsp__inst_executed.sum inst 786,548
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,790.83
- smsp__cycles_active.sum cycle 4,530,613
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.46
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,290.59
- smsp__inst_executed.max inst 12,701
- smsp__inst_executed.min inst 12,068
- smsp__inst_executed.sum inst 786,598
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,847.19
- smsp__cycles_active.sum cycle 4,534,220
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.27
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,293.72
- smsp__inst_executed.max inst 12,656
- smsp__inst_executed.min inst 12,038
- smsp__inst_executed.sum inst 786,798
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,747
- smsp__cycles_active.sum cycle 4,527,808
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 57.95
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,298.14
- smsp__inst_executed.max inst 12,645
- smsp__inst_executed.min inst 12,029
- smsp__inst_executed.sum inst 787,081
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,059.03
- smsp__cycles_active.sum cycle 4,483,778
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 59.58
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,308.86
- smsp__inst_executed.max inst 12,724
- smsp__inst_executed.min inst 11,654
- smsp__inst_executed.sum inst 787,767
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 72,813.80
- smsp__cycles_active.sum cycle 4,660,083
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 231.90
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,669.44
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,942
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,386
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,711
- smsp__average_warp_latency_issue_stalled_barrier.pct % 125,049.38
- smsp__average_warp_latency_issue_stalled_barrier.ratio 1,250.49
- smsp__inst_executed.avg inst 189,291.03
- smsp__inst_executed.max inst 192,313
- smsp__inst_executed.min inst 186,310
- smsp__inst_executed.sum inst 12,114,626
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.97
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
- smsp__cycles_active.avg cycle 316,608.81
- smsp__cycles_active.sum cycle 20,262,964
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.78
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,287.95
- smsp__inst_executed.max inst 12,856
- smsp__inst_executed.min inst 11,904
- smsp__inst_executed.sum inst 786,429
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 71,331.70
- smsp__cycles_active.sum cycle 4,565,229
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 59.94
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,288.03
- smsp__inst_executed.max inst 12,488
- smsp__inst_executed.min inst 11,888
- smsp__inst_executed.sum inst 786,434
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 73,232.05
- smsp__cycles_active.sum cycle 4,686,851
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.27
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,289.50
- smsp__inst_executed.max inst 12,488
- smsp__inst_executed.min inst 12,072
- smsp__inst_executed.sum inst 786,528
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,846.25
- smsp__cycles_active.sum cycle 4,534,160
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.11
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,290.84
- smsp__inst_executed.max inst 12,564
- smsp__inst_executed.min inst 12,104
- smsp__inst_executed.sum inst 786,614
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,881.05
- smsp__cycles_active.sum cycle 4,536,387
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.40
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,293.73
- smsp__inst_executed.max inst 12,757
- smsp__inst_executed.min inst 11,970
- smsp__inst_executed.sum inst 786,799
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 71,142.94
- smsp__cycles_active.sum cycle 4,553,148
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 57.95
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,298.62
- smsp__inst_executed.max inst 12,553
- smsp__inst_executed.min inst 12,119
- smsp__inst_executed.sum inst 787,112
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,189.52
- smsp__cycles_active.sum cycle 4,492,129
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 59.71
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,309.52
- smsp__inst_executed.max inst 12,538
- smsp__inst_executed.min inst 12,074
- smsp__inst_executed.sum inst 787,809
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 72,879.23
- smsp__cycles_active.sum cycle 4,664,271
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 231.42
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,673
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,007
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,299
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,768
- smsp__average_warp_latency_issue_stalled_barrier.pct % 124,557.10
- smsp__average_warp_latency_issue_stalled_barrier.ratio 1,245.57
- smsp__inst_executed.avg inst 189,303.22
- smsp__inst_executed.max inst 192,317
- smsp__inst_executed.min inst 186,277
- smsp__inst_executed.sum inst 12,115,406
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.96
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
- smsp__cycles_active.avg cycle 315,741.19
- smsp__cycles_active.sum cycle 20,207,436
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.40
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,287.92
- smsp__inst_executed.max inst 12,648
- smsp__inst_executed.min inst 11,912
- smsp__inst_executed.sum inst 786,427
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 71,978.88
- smsp__cycles_active.sum cycle 4,606,648
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.62
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,288.30
- smsp__inst_executed.max inst 12,848
- smsp__inst_executed.min inst 11,904
- smsp__inst_executed.sum inst 786,451
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 71,708.22
- smsp__cycles_active.sum cycle 4,589,326
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 60.19
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,289.11
- smsp__inst_executed.max inst 12,876
- smsp__inst_executed.min inst 11,688
- smsp__inst_executed.sum inst 786,503
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 73,332.14
- smsp__cycles_active.sum cycle 4,693,257
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.50
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,288.89
- smsp__inst_executed.max inst 12,507
- smsp__inst_executed.min inst 12,092
- smsp__inst_executed.sum inst 786,489
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,441.14
- smsp__cycles_active.sum cycle 4,508,233
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.30
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,290.69
- smsp__inst_executed.max inst 12,682
- smsp__inst_executed.min inst 11,866
- smsp__inst_executed.sum inst 786,604
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,768.55
- smsp__cycles_active.sum cycle 4,529,187
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.62
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,293.67
- smsp__inst_executed.max inst 12,534
- smsp__inst_executed.min inst 11,732
- smsp__inst_executed.sum inst 786,795
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 71,007.56
- smsp__cycles_active.sum cycle 4,544,484
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.05
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,299.09
- smsp__inst_executed.max inst 12,656
- smsp__inst_executed.min inst 11,912
- smsp__inst_executed.sum inst 787,142
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,781.25
- smsp__cycles_active.sum cycle 4,530,000
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 59.14
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,309.02
- smsp__inst_executed.max inst 12,707
- smsp__inst_executed.min inst 11,847
- smsp__inst_executed.sum inst 787,777
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 72,505.88
- smsp__cycles_active.sum cycle 4,640,376
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 231.14
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,666.06
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,013
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,348
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,657
- smsp__average_warp_latency_issue_stalled_barrier.pct % 124,275.15
- smsp__average_warp_latency_issue_stalled_barrier.ratio 1,242.75
- smsp__inst_executed.avg inst 189,315.86
- smsp__inst_executed.max inst 192,371
- smsp__inst_executed.min inst 186,294
- smsp__inst_executed.sum inst 12,116,215
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.90
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
- smsp__cycles_active.avg cycle 316,297.72
- smsp__cycles_active.sum cycle 20,243,054
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 60.42
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,288.20
- smsp__inst_executed.max inst 12,484
- smsp__inst_executed.min inst 12,092
- smsp__inst_executed.sum inst 786,445
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 74,382.31
- smsp__cycles_active.sum cycle 4,760,468
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.88
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,288.11
- smsp__inst_executed.max inst 12,484
- smsp__inst_executed.min inst 11,716
- smsp__inst_executed.sum inst 786,439
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 71,860.06
- smsp__cycles_active.sum cycle 4,599,044
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 59.04
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,288.05
- smsp__inst_executed.max inst 12,664
- smsp__inst_executed.min inst 11,700
- smsp__inst_executed.sum inst 786,435
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 71,882.38
- smsp__cycles_active.sum cycle 4,600,472
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 60.13
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,288.81
- smsp__inst_executed.max inst 12,870
- smsp__inst_executed.min inst 11,908
- smsp__inst_executed.sum inst 786,484
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 73,247.75
- smsp__cycles_active.sum cycle 4,687,856
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 57.89
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,289.59
- smsp__inst_executed.max inst 12,494
- smsp__inst_executed.min inst 11,898
- smsp__inst_executed.sum inst 786,534
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,630.66
- smsp__cycles_active.sum cycle 4,520,362
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.14
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,291.27
- smsp__inst_executed.max inst 12,510
- smsp__inst_executed.min inst 12,082
- smsp__inst_executed.sum inst 786,641
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,548.77
- smsp__cycles_active.sum cycle 4,515,121
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.66
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,294.64
- smsp__inst_executed.max inst 12,656
- smsp__inst_executed.min inst 11,924
- smsp__inst_executed.sum inst 786,857
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 71,171.45
- smsp__cycles_active.sum cycle 4,554,973
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 57.86
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,301.05
- smsp__inst_executed.max inst 12,725
- smsp__inst_executed.min inst 11,871
- smsp__inst_executed.sum inst 787,267
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,490.50
- smsp__cycles_active.sum cycle 4,511,392
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 59.17
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,316.05
- smsp__inst_executed.max inst 12,594
- smsp__inst_executed.min inst 11,865
- smsp__inst_executed.sum inst 788,227
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 72,533.61
- smsp__cycles_active.sum cycle 4,642,151
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 231.55
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,681.88
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,120
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,332
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,910
- smsp__average_warp_latency_issue_stalled_barrier.pct % 123,982.60
- smsp__average_warp_latency_issue_stalled_barrier.ratio 1,239.83
- smsp__inst_executed.avg inst 189,283.48
- smsp__inst_executed.max inst 192,309
- smsp__inst_executed.min inst 186,242
- smsp__inst_executed.sum inst 12,114,143
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.88
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
- smsp__cycles_active.avg cycle 316,209.50
- smsp__cycles_active.sum cycle 20,237,408
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 56.70
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,287.97
- smsp__inst_executed.max inst 12,492
- smsp__inst_executed.min inst 11,896
- smsp__inst_executed.sum inst 786,430
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 68,714
- smsp__cycles_active.sum cycle 4,397,696
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 60.64
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,288.14
- smsp__inst_executed.max inst 12,844
- smsp__inst_executed.min inst 11,528
- smsp__inst_executed.sum inst 786,441
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 74,171.33
- smsp__cycles_active.sum cycle 4,746,965
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.72
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,288.55
- smsp__inst_executed.max inst 12,684
- smsp__inst_executed.min inst 11,884
- smsp__inst_executed.sum inst 786,467
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 71,696.42
- smsp__cycles_active.sum cycle 4,588,571
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.94
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,288.08
- smsp__inst_executed.max inst 12,660
- smsp__inst_executed.min inst 11,724
- smsp__inst_executed.sum inst 786,437
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 71,640.89
- smsp__cycles_active.sum cycle 4,585,017
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 60.06
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,288.06
- smsp__inst_executed.max inst 12,524
- smsp__inst_executed.min inst 11,900
- smsp__inst_executed.sum inst 786,436
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 73,132.61
- smsp__cycles_active.sum cycle 4,680,487
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.08
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,289.61
- smsp__inst_executed.max inst 12,634
- smsp__inst_executed.min inst 11,884
- smsp__inst_executed.sum inst 786,535
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,620.73
- smsp__cycles_active.sum cycle 4,519,727
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.24
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,291.28
- smsp__inst_executed.max inst 12,704
- smsp__inst_executed.min inst 11,892
- smsp__inst_executed.sum inst 786,642
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 71,037.52
- smsp__cycles_active.sum cycle 4,546,401
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.82
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,293.23
- smsp__inst_executed.max inst 12,931
- smsp__inst_executed.min inst 11,840
- smsp__inst_executed.sum inst 786,767
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,840.56
- smsp__cycles_active.sum cycle 4,533,796
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 58.24
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,298.42
- smsp__inst_executed.max inst 12,587
- smsp__inst_executed.min inst 11,966
- smsp__inst_executed.sum inst 787,099
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 70,543.30
- smsp__cycles_active.sum cycle 4,514,771
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 59.39
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
- smsp__average_warp_latency_issue_stalled_barrier.pct % 0
- smsp__average_warp_latency_issue_stalled_barrier.ratio 0
- smsp__inst_executed.avg inst 12,309.44
- smsp__inst_executed.max inst 12,751
- smsp__inst_executed.min inst 11,714
- smsp__inst_executed.sum inst 787,804
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
- smsp__cycles_active.avg cycle 72,313.14
- smsp__cycles_active.sum cycle 4,628,041
- ---------------------------------------------------------------------- --------------- ------------------------------
-
- void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
- Section: Command line profiler metrics
- ---------------------------------------------------------------------- --------------- ------------------------------
- gpu__time_duration.sum usecond 228.54
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
- l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
- l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,691.25
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,988
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,367
- l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 315,060
- smsp__average_warp_latency_issue_stalled_barrier.pct % 123,962.42
- smsp__average_warp_latency_issue_stalled_barrier.ratio 1,239.62
- smsp__inst_executed.avg inst 189,051.73
- smsp__inst_executed.max inst 192,054
- smsp__inst_executed.min inst 186,060
- smsp__inst_executed.sum inst 12,099,311
- smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.83
- smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
- smsp__cycles_active.avg cycle 317,268.88
- smsp__cycles_active.sum cycle 20,305,208
- ---------------------------------------------------------------------- --------------- ------------------------------
-
|