diff options
author | Besar Wicaksono <bwicaksono@nvidia.com> | 2024-10-31 14:21:17 +0000 |
---|---|---|
committer | Will Deacon <will@kernel.org> | 2024-12-09 15:07:49 +0000 |
commit | ca26df4b1036bcad326170a6ddb5245f6d6e8d82 (patch) | |
tree | 5652ca0db13ee2e5f33c2869705447d2299fd957 | |
parent | 5f7cd0dc98a658d6470bc738499e01172bc6007f (diff) |
perf: arm_cspmu: nvidia: enable NVLINK-C2C port filtering
Enable NVLINK-C2C port filtering to distinguish traffic from
different GPUs connected to NVLINK-C2C.
Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
Link: https://lore.kernel.org/r/20241031142118.1865965-4-bwicaksono@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
-rw-r--r-- | Documentation/admin-guide/perf/nvidia-pmu.rst | 30 | ||||
-rw-r--r-- | drivers/perf/arm_cspmu/nvidia_cspmu.c | 5 |
2 files changed, 33 insertions, 2 deletions
diff --git a/Documentation/admin-guide/perf/nvidia-pmu.rst b/Documentation/admin-guide/perf/nvidia-pmu.rst index 6e8ee0fcf471..4cfc806070d7 100644 --- a/Documentation/admin-guide/perf/nvidia-pmu.rst +++ b/Documentation/admin-guide/perf/nvidia-pmu.rst @@ -86,6 +86,21 @@ Example usage: perf stat -a -e nvidia_nvlink_c2c0_pmu_3/event=0x0/ +The NVLink-C2C has two ports that can be connected to one GPU (occupying both +ports) or to two GPUs (one GPU per port). The user can use "port" bitmap +parameter to select the port(s) to monitor. Each bit represents the port number, +e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. + +Example for port filtering: + +* Count event id 0x0 from the GPU connected with socket 0 on port 0:: + + perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x1/ + +* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1:: + + perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x3/ + NVLink-C2C1 PMU ------------------- @@ -116,6 +131,21 @@ Example usage: perf stat -a -e nvidia_nvlink_c2c1_pmu_3/event=0x0/ +The NVLink-C2C has two ports that can be connected to one GPU (occupying both +ports) or to two GPUs (one GPU per port). The user can use "port" bitmap +parameter to select the port(s) to monitor. Each bit represents the port number, +e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. + +Example for port filtering: + +* Count event id 0x0 from the GPU connected with socket 0 on port 0:: + + perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x1/ + +* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1:: + + perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x3/ + CNVLink PMU --------------- diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c index ea2d44adfa7c..7ab7d76e4ca1 100644 --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c @@ -130,6 +130,7 @@ static struct attribute *pcie_pmu_format_attrs[] = { static struct attribute *nvlink_c2c_pmu_format_attrs[] = { ARM_CSPMU_FORMAT_EVENT_ATTR, + ARM_CSPMU_FORMAT_ATTR(port, "config1:0-1"), NULL, }; @@ -210,7 +211,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = { { .prodid = 0x104, .prodid_mask = NV_PRODID_MASK, - .filter_mask = 0x0, + .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, .name_pattern = "nvidia_nvlink_c2c1_pmu_%u", .name_fmt = NAME_FMT_SOCKET, @@ -220,7 +221,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = { { .prodid = 0x105, .prodid_mask = NV_PRODID_MASK, - .filter_mask = 0x0, + .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, .name_pattern = "nvidia_nvlink_c2c0_pmu_%u", .name_fmt = NAME_FMT_SOCKET, |