|
- #!/usr/bin/env bash
-
- #
- # prof.sh <exec> <report.file>
- #
-
- sudo /usr/local/cuda-11.4/bin/ncu \
- --target-processes all \
- --metrics "$(echo -n \
- "smsp__inst_executed,"\
- "smsp__cycles_active.avg,"\
- "smsp__cycles_active.sum,"\
- "gpu__time_duration.sum,"\
- "smsp__average_warp_latency_issue_stalled_barrier,"\
- "smsp__warp_issue_stalled_barrier_per_warp_active,"\
- "l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld,"\
- "l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st,"\
- "l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read,"\
- "l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write,"\
- "l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum,"\
- "l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum "\
- )" \
- "$1" -q 20 -b 512 > "$2"
|