#!/usr/bin/env bash # # prof.sh # sudo /usr/local/cuda-11.4/bin/ncu \ --target-processes all \ --metrics "$(echo -n \ "smsp__inst_executed,"\ "smsp__cycles_active.avg,"\ "smsp__cycles_active.sum,"\ "gpu__time_duration.sum,"\ "smsp__average_warp_latency_issue_stalled_barrier,"\ "smsp__warp_issue_stalled_barrier_per_warp_active,"\ "l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld,"\ "l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st,"\ "l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read,"\ "l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write,"\ "l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum,"\ "l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum "\ )" \ "$1" -q 20 -b 512 > "$2"