24 lines
779 B
Bash
Executable File

#!/usr/bin/env bash
#
# prof.sh <exec> <report.file>
#
sudo /usr/local/cuda-11.4/bin/ncu \
--target-processes all \
--metrics "$(echo -n \
"smsp__inst_executed,"\
"smsp__cycles_active.avg,"\
"smsp__cycles_active.sum,"\
"gpu__time_duration.sum,"\
"smsp__average_warp_latency_issue_stalled_barrier,"\
"smsp__warp_issue_stalled_barrier_per_warp_active,"\
"l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld,"\
"l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st,"\
"l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read,"\
"l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write,"\
"l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum,"\
"l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum "\
)" \
"$1" -q 20 -b 512 > "$2"