From 5cfb1c7cc61ccc661a549f9b27049b21c4f4ae82 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 7 Nov 2025 11:56:31 +0800 Subject: [PATCH 01/12] net/smc: bpf: Introduce generic hook for handshake flow ANBZ: #28739 commit 15f295f55656658e65bdbc9b901d6b2e49d68d72 upstream. The introduction of IPPROTO_SMC enables eBPF programs to determine whether to use SMC based on the context of socket creation, such as network namespaces, PID and comm name, etc. As a subsequent enhancement, to introduce a new generic hook that allows decisions on whether to use SMC or not at runtime, including but not limited to local/remote IP address or ports. User can write their own implememtion via bpf_struct_ops now to choose whether to use SMC or not before TCP 3rd handshake to be comleted. Signed-off-by: D. Wythe Signed-off-by: Martin KaFai Lau Reviewed-by: Dust Li Link: https://patch.msgid.link/20251107035632.115950-3-alibuda@linux.alibaba.com [Fixes conflicts] Signed-off-by: Xiao Long Signed-off-by: D. Wythe --- .../default/CONFIG_SMC_HS_CTRL_BPF | 1 + include/net/netns/smc.h | 3 + include/net/smc.h | 53 +++++++ net/ipv4/tcp_output.c | 31 ++-- net/smc/Kconfig | 10 ++ net/smc/Makefile | 1 + net/smc/af_smc.c | 9 ++ net/smc/smc_hs_bpf.c | 137 ++++++++++++++++++ net/smc/smc_hs_bpf.h | 31 ++++ net/smc/smc_sysctl.c | 91 ++++++++++++ 10 files changed, 353 insertions(+), 14 deletions(-) create mode 100644 anolis/configs/L1-RECOMMEND/default/CONFIG_SMC_HS_CTRL_BPF create mode 100644 net/smc/smc_hs_bpf.c create mode 100644 net/smc/smc_hs_bpf.h diff --git a/anolis/configs/L1-RECOMMEND/default/CONFIG_SMC_HS_CTRL_BPF b/anolis/configs/L1-RECOMMEND/default/CONFIG_SMC_HS_CTRL_BPF new file mode 100644 index 000000000000..d8f700361360 --- /dev/null +++ b/anolis/configs/L1-RECOMMEND/default/CONFIG_SMC_HS_CTRL_BPF @@ -0,0 +1 @@ +CONFIG_SMC_HS_CTRL_BPF=y diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index 90862720247b..5b99e5ca9ea0 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -20,6 +20,9 @@ struct netns_smc { #ifdef CONFIG_SYSCTL struct ctl_table_header *smc_hdr; #endif +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + struct smc_hs_ctrl __rcu *hs_ctrl; +#endif /* CONFIG_SMC_HS_CTRL_BPF */ unsigned int sysctl_autocorking_size; unsigned int sysctl_smcr_buf_type; unsigned int sysctl_vendor_exp_options; diff --git a/include/net/smc.h b/include/net/smc.h index a3224cc0c8e2..7af3beaf1cf9 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -17,6 +17,8 @@ #include #include +struct tcp_sock; +struct inet_request_sock; struct sock; #define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */ @@ -53,4 +55,55 @@ struct smcd_dev { u8 going_away : 1; }; +#define SMC_HS_CTRL_NAME_MAX 16 + +enum { + /* ops can be inherit from init_net */ + SMC_HS_CTRL_FLAG_INHERITABLE = 0x1, + + SMC_HS_CTRL_ALL_FLAGS = SMC_HS_CTRL_FLAG_INHERITABLE, +}; + +struct smc_hs_ctrl { + /* private */ + + struct list_head list; + struct module *owner; + + /* public */ + + /* unique name */ + char name[SMC_HS_CTRL_NAME_MAX]; + int flags; + + /* Invoked before computing SMC option for SYN packets. + * We can control whether to set SMC options by returning various value. + * Return 0 to disable SMC, or return any other value to enable it. + */ + int (*syn_option)(struct tcp_sock *tp); + + /* Invoked before Set up SMC options for SYN-ACK packets + * We can control whether to respond SMC options by returning various + * value. Return 0 to disable SMC, or return any other value to enable + * it. + */ + int (*synack_option)(const struct tcp_sock *tp, + struct inet_request_sock *ireq); +}; + +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) +#define smc_call_hsbpf(init_val, tp, func, ...) ({ \ + typeof(init_val) __ret = (init_val); \ + struct smc_hs_ctrl *ctrl; \ + rcu_read_lock(); \ + ctrl = rcu_dereference(sock_net((struct sock *)(tp))->smc.hs_ctrl); \ + if (ctrl && ctrl->func) \ + __ret = ctrl->func(tp, ##__VA_ARGS__); \ + rcu_read_unlock(); \ + __ret; \ +}) +#else +#define smc_call_hsbpf(init_val, tp, ...) ({ (void)(tp); (init_val); }) +#endif /* CONFIG_SMC_HS_CTRL_BPF */ + #endif /* _SMC_H */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index cd2e6a04c42d..1797c541d61a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -39,6 +39,7 @@ #include #include +#include #include #include @@ -718,34 +719,36 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, mptcp_options_write(th, ptr, tp, opts); } -static void smc_set_option(const struct tcp_sock *tp, +static void smc_set_option(struct tcp_sock *tp, struct tcp_out_options *opts, unsigned int *remaining) { #if IS_ENABLED(CONFIG_SMC) - if (static_branch_unlikely(&tcp_have_smc)) { - if (tp->syn_smc) { - if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { - opts->options |= OPTION_SMC; - *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; - } + if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc) { + tp->syn_smc = !!smc_call_hsbpf(1, tp, syn_option); + /* re-check syn_smc */ + if (tp->syn_smc && + *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { + opts->options |= OPTION_SMC; + *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; } } #endif } static void smc_set_option_cond(const struct tcp_sock *tp, - const struct inet_request_sock *ireq, + struct inet_request_sock *ireq, struct tcp_out_options *opts, unsigned int *remaining) { #if IS_ENABLED(CONFIG_SMC) - if (static_branch_unlikely(&tcp_have_smc)) { - if (tp->syn_smc && ireq->smc_ok) { - if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { - opts->options |= OPTION_SMC; - *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; - } + if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc && ireq->smc_ok) { + ireq->smc_ok = !!smc_call_hsbpf(1, tp, synack_option, ireq); + /* re-check smc_ok */ + if (ireq->smc_ok && + *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { + opts->options |= OPTION_SMC; + *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; } } #endif diff --git a/net/smc/Kconfig b/net/smc/Kconfig index 99ecd59d1f4b..325addf83cc6 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -19,3 +19,13 @@ config SMC_DIAG smcss. if unsure, say Y. + +config SMC_HS_CTRL_BPF + bool "Generic eBPF hook for SMC handshake flow" + depends on SMC && BPF_SYSCALL + default y + help + SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC + handshake flow, which offer much greater flexibility in modifying the behavior + of the SMC protocol stack compared to a complete kernel-based approach. Select + this option if you want filtring the handshake process via eBPF programs. \ No newline at end of file diff --git a/net/smc/Makefile b/net/smc/Makefile index a4260e78f03e..fed5a330b163 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -6,3 +6,4 @@ smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o smc-y += smc_tracepoint.o smc_inet.o smc_dim.o smc-$(CONFIG_SYSCTL) += smc_sysctl.o +smc-$(CONFIG_SMC_HS_CTRL_BPF) += smc_hs_bpf.o \ No newline at end of file diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 667f6d312ea8..7ee836c08af2 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -58,6 +58,7 @@ #include "smc_tracepoint.h" #include "smc_sysctl.h" #include "smc_inet.h" +#include "smc_hs_bpf.h" static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server @@ -3825,8 +3826,16 @@ static int __init smc_init(void) pr_err("%s: smc_inet_init fails with %d\n", __func__, rc); goto out_ulp; } + rc = bpf_smc_hs_ctrl_init(); + if (rc) { + pr_err("%s: bpf_smc_hs_ctrl_init fails with %d\n", __func__, + rc); + goto out_inet; + } static_branch_enable(&tcp_have_smc); return 0; +out_inet: + smc_inet_exit(); out_ulp: tcp_unregister_ulp(&smc_ulp_ops); out_ib: diff --git a/net/smc/smc_hs_bpf.c b/net/smc/smc_hs_bpf.c new file mode 100644 index 000000000000..4d3230a3ade2 --- /dev/null +++ b/net/smc/smc_hs_bpf.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Generic hook for SMC handshake flow. + * + * Copyright IBM Corp. 2016 + * Copyright (c) 2025, Alibaba Inc. + * + * Author: D. Wythe + */ + +#include +#include +#include +#include + +#include "smc_hs_bpf.h" + +static DEFINE_SPINLOCK(smc_hs_ctrl_list_lock); +static LIST_HEAD(smc_hs_ctrl_list); + +static int smc_hs_ctrl_reg(struct smc_hs_ctrl *ctrl) +{ + int ret = 0; + + spin_lock(&smc_hs_ctrl_list_lock); + /* already exist or duplicate name */ + if (smc_hs_ctrl_find_by_name(ctrl->name)) + ret = -EEXIST; + else + list_add_tail_rcu(&ctrl->list, &smc_hs_ctrl_list); + spin_unlock(&smc_hs_ctrl_list_lock); + return ret; +} + +static void smc_hs_ctrl_unreg(struct smc_hs_ctrl *ctrl) +{ + spin_lock(&smc_hs_ctrl_list_lock); + list_del_rcu(&ctrl->list); + spin_unlock(&smc_hs_ctrl_list_lock); + + /* Ensure that all readers to complete */ + synchronize_rcu(); +} + +struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name) +{ + struct smc_hs_ctrl *ctrl; + + list_for_each_entry_rcu(ctrl, &smc_hs_ctrl_list, list) { + if (strcmp(ctrl->name, name) == 0) + return ctrl; + } + return NULL; +} + +static int __smc_bpf_stub_set_tcp_option(struct tcp_sock *tp) { return 1; } +static int __smc_bpf_stub_set_tcp_option_cond(const struct tcp_sock *tp, + struct inet_request_sock *ireq) +{ + return 1; +} + +static struct smc_hs_ctrl __smc_bpf_hs_ctrl = { + .syn_option = __smc_bpf_stub_set_tcp_option, + .synack_option = __smc_bpf_stub_set_tcp_option_cond, +}; + +static int smc_bpf_hs_ctrl_init(struct btf *btf) { return 0; } + +static int smc_bpf_hs_ctrl_reg(void *kdata) +{ + return smc_hs_ctrl_reg(kdata); +} + +static void smc_bpf_hs_ctrl_unreg(void *kdata) +{ + smc_hs_ctrl_unreg(kdata); +} + +static int smc_bpf_hs_ctrl_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + const struct smc_hs_ctrl *u_ctrl; + struct smc_hs_ctrl *k_ctrl; + u32 moff; + + u_ctrl = (const struct smc_hs_ctrl *)udata; + k_ctrl = (struct smc_hs_ctrl *)kdata; + + moff = __btf_member_bit_offset(t, member) / 8; + switch (moff) { + case offsetof(struct smc_hs_ctrl, name): + if (bpf_obj_name_cpy(k_ctrl->name, u_ctrl->name, + sizeof(u_ctrl->name)) <= 0) + return -EINVAL; + return 1; + case offsetof(struct smc_hs_ctrl, flags): + if (u_ctrl->flags & ~SMC_HS_CTRL_ALL_FLAGS) + return -EINVAL; + k_ctrl->flags = u_ctrl->flags; + return 1; + default: + break; + } + + return 0; +} + +static const struct bpf_func_proto * +bpf_smc_hs_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + return bpf_base_func_proto(func_id); +} + +static const struct bpf_verifier_ops smc_bpf_verifier_ops = { + .get_func_proto = bpf_smc_hs_func_proto, + .is_valid_access = bpf_tracing_btf_ctx_access, +}; + +static struct bpf_struct_ops bpf_smc_hs_ctrl_ops = { + .name = "smc_hs_ctrl", + .init = smc_bpf_hs_ctrl_init, + .reg = smc_bpf_hs_ctrl_reg, + .unreg = smc_bpf_hs_ctrl_unreg, + .cfi_stubs = &__smc_bpf_hs_ctrl, + .verifier_ops = &smc_bpf_verifier_ops, + .init_member = smc_bpf_hs_ctrl_init_member, + .owner = THIS_MODULE, +}; + +int bpf_smc_hs_ctrl_init(void) +{ + return register_bpf_struct_ops(&bpf_smc_hs_ctrl_ops, smc_hs_ctrl); +} diff --git a/net/smc/smc_hs_bpf.h b/net/smc/smc_hs_bpf.h new file mode 100644 index 000000000000..f5f1807c079e --- /dev/null +++ b/net/smc/smc_hs_bpf.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Generic hook for SMC handshake flow. + * + * Copyright IBM Corp. 2016 + * Copyright (c) 2025, Alibaba Inc. + * + * Author: D. Wythe + */ + +#ifndef __SMC_HS_CTRL +#define __SMC_HS_CTRL + +#include + +/* Find hs_ctrl by the target name, which required to be a c-string. + * Return NULL if no such ctrl was found,otherwise, return a valid ctrl. + * + * Note: Caller MUST ensure it's was invoked under rcu_read_lock. + */ +struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name); + +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) +int bpf_smc_hs_ctrl_init(void); +#else +static inline int bpf_smc_hs_ctrl_init(void) { return 0; } +#endif /* CONFIG_SMC_HS_CTRL_BPF */ + +#endif /* __SMC_HS_CTRL */ diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index efb672e3f101..b12916065f27 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -12,12 +12,14 @@ #include #include +#include #include #include "smc.h" #include "smc_core.h" #include "smc_llc.h" #include "smc_sysctl.h" +#include "smc_hs_bpf.h" static int min_sndbuf = SMC_BUF_MIN_SIZE; static int min_rcvbuf = SMC_BUF_MIN_SIZE; @@ -40,6 +42,69 @@ static int proc_global_mem(struct ctl_table *ctl, return proc_doulongvec_minmax(&tbl, write, buffer, lenp, ppos); } +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) +static int smc_net_replace_smc_hs_ctrl(struct net *net, const char *name) +{ + struct smc_hs_ctrl *ctrl = NULL; + + rcu_read_lock(); + /* null or empty name ask to clear current ctrl */ + if (name && name[0]) { + ctrl = smc_hs_ctrl_find_by_name(name); + if (!ctrl) { + rcu_read_unlock(); + return -EINVAL; + } + /* no change, just return */ + if (ctrl == rcu_dereference(net->smc.hs_ctrl)) { + rcu_read_unlock(); + return 0; + } + if (!bpf_try_module_get(ctrl, ctrl->owner)) { + rcu_read_unlock(); + return -EBUSY; + } + } + /* xhcg old ctrl with the new one atomically */ + ctrl = unrcu_pointer(xchg(&net->smc.hs_ctrl, RCU_INITIALIZER(ctrl))); + /* release old ctrl */ + if (ctrl) + bpf_module_put(ctrl, ctrl->owner); + + rcu_read_unlock(); + return 0; +} + +static int proc_smc_hs_ctrl(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct net *net = container_of(ctl->data, struct net, smc.hs_ctrl); + char val[SMC_HS_CTRL_NAME_MAX]; + struct ctl_table tbl = { + .data = val, + .maxlen = SMC_HS_CTRL_NAME_MAX, + }; + struct smc_hs_ctrl *ctrl; + int ret; + + rcu_read_lock(); + ctrl = rcu_dereference(net->smc.hs_ctrl); + if (ctrl) + memcpy(val, ctrl->name, sizeof(ctrl->name)); + else + val[0] = '\0'; + rcu_read_unlock(); + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (ret) + return ret; + + if (write) + ret = smc_net_replace_smc_hs_ctrl(net, val); + return ret; +} +#endif /* CONFIG_SMC_HS_CTRL_BPF */ + static struct ctl_table smc_table[] = { { .procname = "autocorking_size", @@ -129,6 +194,15 @@ static struct ctl_table smc_table[] = { .mode = 0644, .proc_handler = proc_global_mem, }, +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + { + .procname = "hs_ctrl", + .data = &init_net.smc.hs_ctrl, + .mode = 0644, + .maxlen = SMC_HS_CTRL_NAME_MAX, + .proc_handler = proc_smc_hs_ctrl, + }, +#endif /* CONFIG_SMC_HS_CTRL_BPF */ { } }; @@ -139,6 +213,16 @@ int __net_init smc_sysctl_net_init(struct net *net) table = smc_table; if (!net_eq(net, &init_net)) { int i; +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + struct smc_hs_ctrl *ctrl; + + rcu_read_lock(); + ctrl = rcu_dereference(init_net.smc.hs_ctrl); + if (ctrl && ctrl->flags & SMC_HS_CTRL_FLAG_INHERITABLE && + bpf_try_module_get(ctrl, ctrl->owner)) + rcu_assign_pointer(net->smc.hs_ctrl, ctrl); + rcu_read_unlock(); +#endif /* CONFIG_SMC_HS_CTRL_BPF */ table = kmemdup(table, sizeof(smc_table), GFP_KERNEL); if (!table) @@ -179,6 +263,9 @@ int __net_init smc_sysctl_net_init(struct net *net) if (!net_eq(net, &init_net)) kfree(table); err_alloc: +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + smc_net_replace_smc_hs_ctrl(net, NULL); +#endif /* CONFIG_SMC_HS_CTRL_BPF */ return -ENOMEM; } @@ -188,6 +275,10 @@ void __net_exit smc_sysctl_net_exit(struct net *net) table = net->smc.smc_hdr->ctl_table_arg; unregister_net_sysctl_table(net->smc.smc_hdr); +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + smc_net_replace_smc_hs_ctrl(net, NULL); +#endif /* CONFIG_SMC_HS_CTRL_BPF */ + if (!net_eq(net, &init_net)) kfree(table); } -- Gitee From 5061959d6c2061f0db90a44b8c7c7a1680e822f1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 4 Dec 2025 18:29:16 +0800 Subject: [PATCH 02/12] net: smc: SMC_HS_CTRL_BPF should depend on BPF_JIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #28739 commit 861111b69896145a928c889d9344797ea3711028 upstream. If CONFIG_BPF_SYSCALL=y, but CONFIG_BPF_JIT=n: net/smc/smc_hs_bpf.c: In function ‘bpf_smc_hs_ctrl_init’: include/linux/bpf.h:2068:50: error: statement with no effect [-Werror=unused-value] 2068 | #define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; }) | ^~~~~~~~~~~~~~~~ net/smc/smc_hs_bpf.c:139:16: note: in expansion of macro ‘register_bpf_struct_ops’ 139 | return register_bpf_struct_ops(&bpf_smc_hs_ctrl_ops, smc_hs_ctrl); | ^~~~~~~~~~~~~~~~~~~~~~~ While this compile error is caused by a bug in , none of the code in net/smc/smc_hs_bpf.c becomes effective if CONFIG_BPF_JIT is not enabled. Hence add a dependency on BPF_JIT. While at it, add the missing newline at the end of the file. Fixes: 15f295f55656658e ("net/smc: bpf: Introduce generic hook for handshake flow") Signed-off-by: Geert Uytterhoeven Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/988c61e5fea280872d81b3640f1f34d0619cfbbf.1764843951.git.geert@linux-m68k.org [Fixes conflicts] Signed-off-by: Xiao Long Signed-off-by: D. Wythe --- net/smc/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/smc/Kconfig b/net/smc/Kconfig index 325addf83cc6..277ef504bc26 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -22,10 +22,10 @@ config SMC_DIAG config SMC_HS_CTRL_BPF bool "Generic eBPF hook for SMC handshake flow" - depends on SMC && BPF_SYSCALL + depends on SMC && BPF_JIT && BPF_SYSCALL default y help SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC handshake flow, which offer much greater flexibility in modifying the behavior of the SMC protocol stack compared to a complete kernel-based approach. Select - this option if you want filtring the handshake process via eBPF programs. \ No newline at end of file + this option if you want filtring the handshake process via eBPF programs. -- Gitee From 3ba056d5d7a2900df7d6767e6102e1d17afe64ab Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 5 Oct 2023 10:53:11 +0530 Subject: [PATCH 03/12] perf/x86/amd/uncore: Refactor uncore management ANBZ: #28936 commit d6389d3ccc136a4229a8d497899c64f80fd3c5b3 upstream Since struct amd_uncore is used to manage per-cpu contexts, rename it to amd_uncore_ctx in order to better reflect its purpose. Add a new struct amd_uncore_pmu to encapsulate all attributes which are shared by per-cpu contexts for a corresponding PMU. These include the number of counters, active mask, MSR and RDPMC base addresses, etc. Since the struct pmu is now embedded, the corresponding amd_uncore_pmu for a given event can be found by simply using container_of(). Finally, move all PMU-specific code to separate functions. While the original event management functions continue to provide the base functionality, all PMU-specific quirks and customizations are applied in separate functions. The motivation is to simplify the management of uncore PMUs. [Backport Changes] 1.In arch/x86/events/amd/uncore.c, the boot_cpu_has(X86_FEATURE_PERFCTR_LLC) check and the associated L3 PMU attribute setup were moved from amd_uncore_init() into amd_uncore_l3_init() as part of the upstream refactoring. The Anolis kernel includes additional Hygon-specific handling introduced by commit ff742afb3a6f0, and this logic is preserved during the migration. Accordingly, both the AMD and Hygon code paths originally guarded by boot_cpu_has(X86_FEATURE_PERFCTR_LLC) are migrated together to amd_uncore_l3_init(), maintaining functional parity with the previous behavior while aligning with upstream structure. 2. In arch/x86/events/amd/uncore.c, the boot_cpu_has(X86_FEATURE_PERFCTR_NB) check and the associated DF PMU initialization were moved from amd_uncore_init() into amd_uncore_df_init() as part of the upstream refactoring. The Anolis kernel contains additional Hygon-specific handling introduced by commits 23205386a05d6 and a87fcdea90228, and this logic is preserved during the migration. Accordingly, both the AMD and Hygon code paths originally guarded by boot_cpu_has(X86_FEATURE_PERFCTR_NB) are migrated together to amd_uncore_df_init(), maintaining functional parity with the previous behavior while aligning with upstream structure. 3. Current upstream patch moves NB (Northbridge) / DF (Data Fabric) specific event validation out of amd_uncore_event_init() into newly introduced amd_uncore_df_event_init(). However, the upstream refactoring does not include support for Hygon processors introduced by commits 23205386a05d6 and a87fcdea90228. This backport preserves the existing Hygon Family 18h Northbridge raw event mask handling and migrates it unchanged into the newly separated amd_uncore_df_event_init() path. This ensures the upstream refactoring structure is respected while maintaining correct Hygon-specific behavior in Anolis. 4. In arch/x86/events/amd/uncore.c, the upstream change restructures uncore event handling by removing the helper function l3_thread_slice_mask() and integrating the mask calculation logic directly into the function amd_uncore_l3_event_init(). The Anolis source tree, however, includes additional Hygon-specific mask calculation logic previously implemented inside l3_thread_slice_mask(). To align with the upstream restructuring while preserving existing behavior, this Hygon-specific logic is conditionally integrated into the main function as part of the backport, following the same structure used upstream for AMD CPUs. This ensures that the upstream refactoring is respected while maintaining correct Hygon-specific behavior in Anolis. Signed-off-by: Sandipan Das Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/24b38c49a5dae65d8c96e5d75a2b96ae97aaa651.1696425185.git.sandipan.das@amd.com Signed-off-by: Abhishek Rajput Signed-off-by: PvsNarasimha --- arch/x86/events/amd/uncore.c | 831 ++++++++++++++++++----------------- 1 file changed, 426 insertions(+), 405 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index cb0aa2e93d06..ff5ecab8f39c 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -27,56 +27,41 @@ #define COUNTER_SHIFT 16 +#define NUM_UNCORES_MAX 2 /* DF (or NB) and L3 (or L2) */ +#define UNCORE_NAME_LEN 16 + #undef pr_fmt #define pr_fmt(fmt) "amd_uncore: " fmt static int pmu_version; -static int num_counters_llc; -static int num_counters_nb; -static bool l3_mask; static HLIST_HEAD(uncore_unused_list); -struct amd_uncore { +struct amd_uncore_ctx { int id; int refcnt; int cpu; - int num_counters; - int rdpmc_base; - u32 msr_base; - cpumask_t *active_mask; - struct pmu *pmu; struct perf_event **events; struct hlist_node node; }; -static struct amd_uncore * __percpu *amd_uncore_nb; -static struct amd_uncore * __percpu *amd_uncore_llc; - -static struct pmu amd_nb_pmu; -static struct pmu amd_llc_pmu; - -static cpumask_t amd_nb_active_mask; -static cpumask_t amd_llc_active_mask; - -static bool is_nb_event(struct perf_event *event) -{ - return event->pmu->type == amd_nb_pmu.type; -} +struct amd_uncore_pmu { + char name[UNCORE_NAME_LEN]; + int num_counters; + int rdpmc_base; + u32 msr_base; + cpumask_t active_mask; + struct pmu pmu; + struct amd_uncore_ctx * __percpu *ctx; + int (*id)(unsigned int cpu); +}; -static bool is_llc_event(struct perf_event *event) -{ - return event->pmu->type == amd_llc_pmu.type; -} +static struct amd_uncore_pmu pmus[NUM_UNCORES_MAX]; +static int num_pmus __read_mostly; -static struct amd_uncore *event_to_amd_uncore(struct perf_event *event) +static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event) { - if (is_nb_event(event) && amd_uncore_nb) - return *per_cpu_ptr(amd_uncore_nb, event->cpu); - else if (is_llc_event(event) && amd_uncore_llc) - return *per_cpu_ptr(amd_uncore_llc, event->cpu); - - return NULL; + return container_of(event->pmu, struct amd_uncore_pmu, pmu); } static void amd_uncore_read(struct perf_event *event) @@ -118,7 +103,7 @@ static void amd_uncore_stop(struct perf_event *event, int flags) hwc->state |= PERF_HES_STOPPED; if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { - amd_uncore_read(event); + event->pmu->read(event); hwc->state |= PERF_HES_UPTODATE; } } @@ -126,15 +111,16 @@ static void amd_uncore_stop(struct perf_event *event, int flags) static int amd_uncore_add(struct perf_event *event, int flags) { int i; - struct amd_uncore *uncore = event_to_amd_uncore(event); + struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); + struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); struct hw_perf_event *hwc = &event->hw; /* are we already assigned? */ - if (hwc->idx != -1 && uncore->events[hwc->idx] == event) + if (hwc->idx != -1 && ctx->events[hwc->idx] == event) goto out; - for (i = 0; i < uncore->num_counters; i++) { - if (uncore->events[i] == event) { + for (i = 0; i < pmu->num_counters; i++) { + if (ctx->events[i] == event) { hwc->idx = i; goto out; } @@ -142,8 +128,8 @@ static int amd_uncore_add(struct perf_event *event, int flags) /* if not, take the first available counter */ hwc->idx = -1; - for (i = 0; i < uncore->num_counters; i++) { - if (cmpxchg(&uncore->events[i], NULL, event) == NULL) { + for (i = 0; i < pmu->num_counters; i++) { + if (cmpxchg(&ctx->events[i], NULL, event) == NULL) { hwc->idx = i; break; } @@ -153,23 +139,13 @@ static int amd_uncore_add(struct perf_event *event, int flags) if (hwc->idx == -1) return -EBUSY; - hwc->config_base = uncore->msr_base + (2 * hwc->idx); - hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx); - hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx; + hwc->config_base = pmu->msr_base + (2 * hwc->idx); + hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx); + hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx; hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; - /* - * The first four DF counters are accessible via RDPMC index 6 to 9 - * followed by the L3 counters from index 10 to 15. For processors - * with more than four DF counters, the DF RDPMC assignments become - * discontiguous as the additional counters are accessible starting - * from index 16. - */ - if (is_nb_event(event) && hwc->idx >= NUM_COUNTERS_NB) - hwc->event_base_rdpmc += NUM_COUNTERS_L3; - if (flags & PERF_EF_START) - amd_uncore_start(event, PERF_EF_RELOAD); + event->pmu->start(event, PERF_EF_RELOAD); return 0; } @@ -177,79 +153,36 @@ static int amd_uncore_add(struct perf_event *event, int flags) static void amd_uncore_del(struct perf_event *event, int flags) { int i; - struct amd_uncore *uncore = event_to_amd_uncore(event); + struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); + struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); struct hw_perf_event *hwc = &event->hw; - amd_uncore_stop(event, PERF_EF_UPDATE); + event->pmu->stop(event, PERF_EF_UPDATE); - for (i = 0; i < uncore->num_counters; i++) { - if (cmpxchg(&uncore->events[i], event, NULL) == event) + for (i = 0; i < pmu->num_counters; i++) { + if (cmpxchg(&ctx->events[i], event, NULL) == event) break; } hwc->idx = -1; } -/* - * Return a full thread and slice mask unless user - * has provided them - */ -static u64 l3_thread_slice_mask(u64 config) -{ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 <= 0x18) - return ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) | - ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK); - - if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && - boot_cpu_data.x86 == 0x18) { - if (boot_cpu_data.x86_model >= 0x6 && boot_cpu_data.x86_model <= 0xf) - return ((config & HYGON_L3_SLICE_MASK) ? : HYGON_L3_SLICE_MASK) | - ((config & HYGON_L3_THREAD_MASK) ? : HYGON_L3_THREAD_MASK); - else - return ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) | - ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK); - } - - /* - * If the user doesn't specify a threadmask, they're not trying to - * count core 0, so we enable all cores & threads. - * We'll also assume that they want to count slice 0 if they specify - * a threadmask and leave sliceid and enallslices unpopulated. - */ - if (!(config & AMD64_L3_F19H_THREAD_MASK)) - return AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES | - AMD64_L3_EN_ALL_CORES; - - return config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK | - AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES | - AMD64_L3_COREID_MASK); -} - static int amd_uncore_event_init(struct perf_event *event) { - struct amd_uncore *uncore; + struct amd_uncore_pmu *pmu; + struct amd_uncore_ctx *ctx; struct hw_perf_event *hwc = &event->hw; - u64 event_mask = AMD64_RAW_EVENT_MASK_NB; if (event->attr.type != event->pmu->type) return -ENOENT; - if (pmu_version >= 2 && is_nb_event(event)) { - event_mask = AMD64_PERFMON_V2_RAW_EVENT_MASK_NB; - } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && - boot_cpu_data.x86 == 0x18 && - is_nb_event(event)) { - event_mask = HYGON_F18H_RAW_EVENT_MASK_NB; - if (boot_cpu_data.x86_model == 0x4 || - boot_cpu_data.x86_model == 0x5) - event_mask = HYGON_F18H_M4H_RAW_EVENT_MASK_NB; - if (boot_cpu_data.x86_model == 0x6 || - boot_cpu_data.x86_model == 0x7 || - boot_cpu_data.x86_model == 0x8 || - boot_cpu_data.x86_model == 0x10) - event_mask = HYGON_F18H_M6H_RAW_EVENT_MASK_NB; - } + if (event->cpu < 0) + return -EINVAL; + + pmu = event_to_amd_uncore_pmu(event); + ctx = *per_cpu_ptr(pmu->ctx, event->cpu); + if (!ctx) + return -ENODEV; /* * NB and Last level cache counters (MSRs) are shared across all cores @@ -259,28 +192,14 @@ static int amd_uncore_event_init(struct perf_event *event) * out. So we do not support sampling and per-thread events via * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: */ - hwc->config = event->attr.config & event_mask; + hwc->config = event->attr.config; hwc->idx = -1; - if (event->cpu < 0) - return -EINVAL; - - /* - * SliceMask and ThreadMask need to be set for certain L3 events. - * For other events, the two fields do not affect the count. - */ - if (l3_mask && is_llc_event(event)) - hwc->config |= l3_thread_slice_mask(event->attr.config); - - uncore = event_to_amd_uncore(event); - if (!uncore) - return -ENODEV; - /* * since request can come in to any of the shared cores, we will remap * to a single common cpu. */ - event->cpu = uncore->cpu; + event->cpu = ctx->cpu; return 0; } @@ -310,17 +229,10 @@ static ssize_t amd_uncore_attr_show_cpumask(struct device *dev, struct device_attribute *attr, char *buf) { - cpumask_t *active_mask; - struct pmu *pmu = dev_get_drvdata(dev); - - if (pmu->type == amd_nb_pmu.type) - active_mask = &amd_nb_active_mask; - else if (pmu->type == amd_llc_pmu.type) - active_mask = &amd_llc_active_mask; - else - return 0; + struct pmu *ptr = dev_get_drvdata(dev); + struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu); - return cpumap_print_to_pagebuf(true, buf, active_mask); + return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask); } static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL); @@ -450,113 +362,57 @@ static const struct attribute_group *hygon_uncore_l3_attr_update[] = { NULL, }; -static struct pmu amd_nb_pmu = { - .task_ctx_nr = perf_invalid_context, - .attr_groups = amd_uncore_df_attr_groups, - .name = "amd_nb", - .event_init = amd_uncore_event_init, - .add = amd_uncore_add, - .del = amd_uncore_del, - .start = amd_uncore_start, - .stop = amd_uncore_stop, - .read = amd_uncore_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, - .module = THIS_MODULE, -}; - -static struct pmu amd_llc_pmu = { - .task_ctx_nr = perf_invalid_context, - .attr_groups = amd_uncore_l3_attr_groups, - .attr_update = amd_uncore_l3_attr_update, - .name = "amd_l2", - .event_init = amd_uncore_event_init, - .add = amd_uncore_add, - .del = amd_uncore_del, - .start = amd_uncore_start, - .stop = amd_uncore_stop, - .read = amd_uncore_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, - .module = THIS_MODULE, -}; - -static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) -{ - return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL, - cpu_to_node(cpu)); -} - -static inline struct perf_event ** -amd_uncore_events_alloc(unsigned int num, unsigned int cpu) -{ - return kzalloc_node(sizeof(struct perf_event *) * num, GFP_KERNEL, - cpu_to_node(cpu)); -} - static int amd_uncore_cpu_up_prepare(unsigned int cpu) { - struct amd_uncore *uncore_nb = NULL, *uncore_llc = NULL; - - if (amd_uncore_nb) { - *per_cpu_ptr(amd_uncore_nb, cpu) = NULL; - uncore_nb = amd_uncore_alloc(cpu); - if (!uncore_nb) - goto fail; - uncore_nb->cpu = cpu; - uncore_nb->num_counters = num_counters_nb; - uncore_nb->rdpmc_base = RDPMC_BASE_NB; - uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; - uncore_nb->active_mask = &amd_nb_active_mask; - uncore_nb->pmu = &amd_nb_pmu; - uncore_nb->events = amd_uncore_events_alloc(num_counters_nb, cpu); - if (!uncore_nb->events) + struct amd_uncore_pmu *pmu; + struct amd_uncore_ctx *ctx; + int node = cpu_to_node(cpu), i; + + for (i = 0; i < num_pmus; i++) { + pmu = &pmus[i]; + *per_cpu_ptr(pmu->ctx, cpu) = NULL; + ctx = kzalloc_node(sizeof(struct amd_uncore_ctx), GFP_KERNEL, + node); + if (!ctx) goto fail; - uncore_nb->id = -1; - *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; - } - if (amd_uncore_llc) { - *per_cpu_ptr(amd_uncore_llc, cpu) = NULL; - uncore_llc = amd_uncore_alloc(cpu); - if (!uncore_llc) - goto fail; - uncore_llc->cpu = cpu; - uncore_llc->num_counters = num_counters_llc; - uncore_llc->rdpmc_base = RDPMC_BASE_LLC; - uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL; - uncore_llc->active_mask = &amd_llc_active_mask; - uncore_llc->pmu = &amd_llc_pmu; - uncore_llc->events = amd_uncore_events_alloc(num_counters_llc, cpu); - if (!uncore_llc->events) + ctx->cpu = cpu; + ctx->events = kzalloc_node(sizeof(struct perf_event *) * + pmu->num_counters, GFP_KERNEL, + node); + if (!ctx->events) goto fail; - uncore_llc->id = -1; - *per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc; + + ctx->id = -1; + *per_cpu_ptr(pmu->ctx, cpu) = ctx; } return 0; fail: - if (uncore_nb) { - kfree(uncore_nb->events); - kfree(uncore_nb); - } + /* Rollback */ + for (; i >= 0; i--) { + pmu = &pmus[i]; + ctx = *per_cpu_ptr(pmu->ctx, cpu); + if (!ctx) + continue; - if (uncore_llc) { - kfree(uncore_llc->events); - kfree(uncore_llc); + kfree(ctx->events); + kfree(ctx); } return -ENOMEM; } -static struct amd_uncore * -amd_uncore_find_online_sibling(struct amd_uncore *this, - struct amd_uncore * __percpu *uncores) +static struct amd_uncore_ctx * +amd_uncore_find_online_sibling(struct amd_uncore_ctx *this, + struct amd_uncore_pmu *pmu) { unsigned int cpu; - struct amd_uncore *that; + struct amd_uncore_ctx *that; for_each_online_cpu(cpu) { - that = *per_cpu_ptr(uncores, cpu); + that = *per_cpu_ptr(pmu->ctx, cpu); if (!that) continue; @@ -577,24 +433,16 @@ amd_uncore_find_online_sibling(struct amd_uncore *this, static int amd_uncore_cpu_starting(unsigned int cpu) { - unsigned int eax, ebx, ecx, edx; - struct amd_uncore *uncore; - - if (amd_uncore_nb) { - uncore = *per_cpu_ptr(amd_uncore_nb, cpu); - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - uncore->id = ecx & 0xff; - - uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb); - *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; - } - - if (amd_uncore_llc) { - uncore = *per_cpu_ptr(amd_uncore_llc, cpu); - uncore->id = per_cpu_llc_id(cpu); + struct amd_uncore_pmu *pmu; + struct amd_uncore_ctx *ctx; + int i; - uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc); - *per_cpu_ptr(amd_uncore_llc, cpu) = uncore; + for (i = 0; i < num_pmus; i++) { + pmu = &pmus[i]; + ctx = *per_cpu_ptr(pmu->ctx, cpu); + ctx->id = pmu->id(cpu); + ctx = amd_uncore_find_online_sibling(ctx, pmu); + *per_cpu_ptr(pmu->ctx, cpu) = ctx; } return 0; @@ -602,210 +450,398 @@ static int amd_uncore_cpu_starting(unsigned int cpu) static void uncore_clean_online(void) { - struct amd_uncore *uncore; + struct amd_uncore_ctx *ctx; struct hlist_node *n; - hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) { - hlist_del(&uncore->node); - kfree(uncore->events); - kfree(uncore); + hlist_for_each_entry_safe(ctx, n, &uncore_unused_list, node) { + hlist_del(&ctx->node); + kfree(ctx->events); + kfree(ctx); } } -static void uncore_online(unsigned int cpu, - struct amd_uncore * __percpu *uncores) +static int amd_uncore_cpu_online(unsigned int cpu) { - struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); + struct amd_uncore_pmu *pmu; + struct amd_uncore_ctx *ctx; + int i; uncore_clean_online(); - if (cpu == uncore->cpu) - cpumask_set_cpu(cpu, uncore->active_mask); + for (i = 0; i < num_pmus; i++) { + pmu = &pmus[i]; + ctx = *per_cpu_ptr(pmu->ctx, cpu); + if (cpu == ctx->cpu) + cpumask_set_cpu(cpu, &pmu->active_mask); + } + + return 0; } -static int amd_uncore_cpu_online(unsigned int cpu) +static int amd_uncore_cpu_down_prepare(unsigned int cpu) +{ + struct amd_uncore_ctx *this, *that; + struct amd_uncore_pmu *pmu; + int i, j; + + for (i = 0; i < num_pmus; i++) { + pmu = &pmus[i]; + this = *per_cpu_ptr(pmu->ctx, cpu); + + /* this cpu is going down, migrate to a shared sibling if possible */ + for_each_online_cpu(j) { + that = *per_cpu_ptr(pmu->ctx, j); + + if (cpu == j) + continue; + + if (this == that) { + perf_pmu_migrate_context(&pmu->pmu, cpu, j); + cpumask_clear_cpu(cpu, &pmu->active_mask); + cpumask_set_cpu(j, &pmu->active_mask); + that->cpu = j; + break; + } + } + } + + return 0; +} + +static int amd_uncore_cpu_dead(unsigned int cpu) { - if (amd_uncore_nb) - uncore_online(cpu, amd_uncore_nb); + struct amd_uncore_ctx *ctx; + struct amd_uncore_pmu *pmu; + int i; - if (amd_uncore_llc) - uncore_online(cpu, amd_uncore_llc); + for (i = 0; i < num_pmus; i++) { + pmu = &pmus[i]; + ctx = *per_cpu_ptr(pmu->ctx, cpu); + if (cpu == ctx->cpu) + cpumask_clear_cpu(cpu, &pmu->active_mask); + + if (!--ctx->refcnt) { + kfree(ctx->events); + kfree(ctx); + } + + *per_cpu_ptr(pmu->ctx, cpu) = NULL; + } return 0; } -static void uncore_down_prepare(unsigned int cpu, - struct amd_uncore * __percpu *uncores) +static int amd_uncore_df_id(unsigned int cpu) { - unsigned int i; - struct amd_uncore *this = *per_cpu_ptr(uncores, cpu); + unsigned int eax, ebx, ecx, edx; - if (this->cpu != cpu) - return; + cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - /* this cpu is going down, migrate to a shared sibling if possible */ - for_each_online_cpu(i) { - struct amd_uncore *that = *per_cpu_ptr(uncores, i); + return ecx & 0xff; +} - if (cpu == i) - continue; +static int amd_uncore_df_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int ret = amd_uncore_event_init(event); + u64 event_mask = AMD64_RAW_EVENT_MASK_NB; - if (this == that) { - perf_pmu_migrate_context(this->pmu, cpu, i); - cpumask_clear_cpu(cpu, that->active_mask); - cpumask_set_cpu(i, that->active_mask); - that->cpu = i; - break; - } + if (ret) + return ret; + if (pmu_version >= 2) { + event_mask = AMD64_PERFMON_V2_RAW_EVENT_MASK_NB; + } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && + boot_cpu_data.x86 == 0x18) { + event_mask = HYGON_F18H_RAW_EVENT_MASK_NB; + if (boot_cpu_data.x86_model == 0x4 || + boot_cpu_data.x86_model == 0x5) + event_mask = HYGON_F18H_M4H_RAW_EVENT_MASK_NB; + if (boot_cpu_data.x86_model == 0x6 || + boot_cpu_data.x86_model == 0x7 || + boot_cpu_data.x86_model == 0x8 || + boot_cpu_data.x86_model == 0x10) + event_mask = HYGON_F18H_M6H_RAW_EVENT_MASK_NB; } + + hwc->config = event->attr.config & event_mask; + + return 0; } -static int amd_uncore_cpu_down_prepare(unsigned int cpu) +static int amd_uncore_df_add(struct perf_event *event, int flags) { - if (amd_uncore_nb) - uncore_down_prepare(cpu, amd_uncore_nb); + int ret = amd_uncore_add(event, flags & ~PERF_EF_START); + struct hw_perf_event *hwc = &event->hw; - if (amd_uncore_llc) - uncore_down_prepare(cpu, amd_uncore_llc); + if (ret) + return ret; + + /* + * The first four DF counters are accessible via RDPMC index 6 to 9 + * followed by the L3 counters from index 10 to 15. For processors + * with more than four DF counters, the DF RDPMC assignments become + * discontiguous as the additional counters are accessible starting + * from index 16. + */ + if (hwc->idx >= NUM_COUNTERS_NB) + hwc->event_base_rdpmc += NUM_COUNTERS_L3; + + /* Delayed start after rdpmc base update */ + if (flags & PERF_EF_START) + amd_uncore_start(event, PERF_EF_RELOAD); return 0; } -static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores) +static int amd_uncore_df_init(void) { - struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); + struct attribute **df_attr = amd_uncore_df_format_attr; + struct amd_uncore_pmu *pmu = &pmus[num_pmus]; + union cpuid_0x80000022_ebx ebx; + int ret; + + if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB)) + return 0; - if (cpu == uncore->cpu) - cpumask_clear_cpu(cpu, uncore->active_mask); + /* + * For Family 17h and above, the Northbridge counters are repurposed + * as Data Fabric counters. The PMUs are exported based on family as + * either NB or DF. + */ + strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb", + sizeof(pmu->name)); + + pmu->num_counters = NUM_COUNTERS_NB; + pmu->msr_base = MSR_F15H_NB_PERF_CTL; + pmu->rdpmc_base = RDPMC_BASE_NB; + pmu->id = amd_uncore_df_id; + + if (pmu_version >= 2) { + *df_attr++ = &format_attr_event14v2.attr; + *df_attr++ = &format_attr_umask12.attr; + ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); + pmu->num_counters = ebx.split.num_df_pmc; + } else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 >= 0x17) { + *df_attr = &format_attr_event14.attr; + } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && + boot_cpu_data.x86 == 0x18) { + *df_attr++ = &format_attr_event14f18h.attr; + if (boot_cpu_data.x86_model == 0x4 || + boot_cpu_data.x86_model == 0x5) + *df_attr++ = &format_attr_umask10f18h.attr; + else if (boot_cpu_data.x86_model == 0x6 || + boot_cpu_data.x86_model == 0x7 || + boot_cpu_data.x86_model == 0x8 || + boot_cpu_data.x86_model == 0x10) + *df_attr++ = &format_attr_umask12f18h.attr; + } - if (!--uncore->refcnt) { - kfree(uncore->events); - kfree(uncore); + pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); + if (!pmu->ctx) + return -ENOMEM; + + pmu->pmu = (struct pmu) { + .task_ctx_nr = perf_invalid_context, + .attr_groups = amd_uncore_df_attr_groups, + .name = pmu->name, + .event_init = amd_uncore_df_event_init, + .add = amd_uncore_df_add, + .del = amd_uncore_del, + .start = amd_uncore_start, + .stop = amd_uncore_stop, + .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, + }; + + ret = perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1); + if (ret) { + free_percpu(pmu->ctx); + pmu->ctx = NULL; + return ret; } - *per_cpu_ptr(uncores, cpu) = NULL; + pr_info("%d %s %s counters detected\n", pmu->num_counters, + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", + pmu->pmu.name); + + num_pmus++; + + return 0; } -static int amd_uncore_cpu_dead(unsigned int cpu) +static int amd_uncore_l3_id(unsigned int cpu) +{ + return per_cpu_llc_id(cpu); +} + +static int amd_uncore_l3_event_init(struct perf_event *event) { - if (amd_uncore_nb) - uncore_dead(cpu, amd_uncore_nb); + int ret = amd_uncore_event_init(event); + struct hw_perf_event *hwc = &event->hw; + u64 config = event->attr.config; + u64 mask; + + hwc->config = config & AMD64_RAW_EVENT_MASK_NB; + + /* + * SliceMask and ThreadMask need to be set for certain L3 events. + * For other events, the two fields do not affect the count. + */ + if (ret || boot_cpu_data.x86 < 0x17) + return ret; + + mask = config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK | + AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES | + AMD64_L3_COREID_MASK); + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 <= 0x18) + mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) | + ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK); + else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && + boot_cpu_data.x86 == 0x18) { + if (boot_cpu_data.x86_model >= 0x6 && boot_cpu_data.x86_model <= 0xf) + mask = ((config & HYGON_L3_SLICE_MASK) ? : HYGON_L3_SLICE_MASK) | + ((config & HYGON_L3_THREAD_MASK) ? : HYGON_L3_THREAD_MASK); + else + mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) | + ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK); + } + + /* + * If the user doesn't specify a ThreadMask, they're not trying to + * count core 0, so we enable all cores & threads. + * We'll also assume that they want to count slice 0 if they specify + * a ThreadMask and leave SliceId and EnAllSlices unpopulated. + */ + else if (!(config & AMD64_L3_F19H_THREAD_MASK)) + mask = AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES | + AMD64_L3_EN_ALL_CORES; - if (amd_uncore_llc) - uncore_dead(cpu, amd_uncore_llc); + hwc->config |= mask; return 0; } -static int __init amd_uncore_init(void) +static int amd_uncore_l3_init(void) { - struct attribute **df_attr = amd_uncore_df_format_attr; struct attribute **l3_attr = amd_uncore_l3_format_attr; - union cpuid_0x80000022_ebx ebx; - int ret = -ENODEV; + struct amd_uncore_pmu *pmu = &pmus[num_pmus]; + int ret; - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && - boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) - return -ENODEV; + if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) + return 0; - if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) - return -ENODEV; + /* + * For Family 17h and above, L3 cache counters are available instead + * of L2 cache counters. The PMUs are exported based on family as + * either L2 or L3. + */ + strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2", + sizeof(pmu->name)); - if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) - pmu_version = 2; + pmu->num_counters = NUM_COUNTERS_L2; + pmu->msr_base = MSR_F16H_L2I_PERF_CTL; + pmu->rdpmc_base = RDPMC_BASE_LLC; + pmu->id = amd_uncore_l3_id; - num_counters_nb = NUM_COUNTERS_NB; - num_counters_llc = NUM_COUNTERS_L2; - if (boot_cpu_data.x86 >= 0x17) { - /* - * For F17h and above, the Northbridge counters are - * repurposed as Data Fabric counters. Also, L3 - * counters are supported too. The PMUs are exported - * based on family as either L2 or L3 and NB or DF. - */ - num_counters_llc = NUM_COUNTERS_L3; - amd_nb_pmu.name = "amd_df"; - amd_llc_pmu.name = "amd_l3"; - l3_mask = true; + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 >= 0x17) { + *l3_attr++ = &format_attr_event8.attr; + *l3_attr++ = &format_attr_umask8.attr; + *l3_attr++ = boot_cpu_data.x86 >= 0x19 ? + &format_attr_threadmask2.attr : + &format_attr_threadmask8.attr; + pmu->num_counters = NUM_COUNTERS_L3; + } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && + boot_cpu_data.x86 == 0x18) { + *l3_attr++ = &format_attr_event8.attr; + *l3_attr++ = &format_attr_umask8.attr; + if (boot_cpu_data.x86_model >= 0x6 && + boot_cpu_data.x86_model <= 0xf) { + *l3_attr++ = &format_attr_threadmask32.attr; + pmu->pmu.attr_update = hygon_uncore_l3_attr_update; + } else { + *l3_attr++ = &format_attr_threadmask8.attr; + } } - if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) { - if (pmu_version >= 2) { - *df_attr++ = &format_attr_event14v2.attr; - *df_attr++ = &format_attr_umask12.attr; - } else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 >= 0x17) { - *df_attr = &format_attr_event14.attr; - } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && - boot_cpu_data.x86 == 0x18) { - *df_attr++ = &format_attr_event14f18h.attr; - if (boot_cpu_data.x86_model == 0x4 || - boot_cpu_data.x86_model == 0x5) - *df_attr++ = &format_attr_umask10f18h.attr; - else if (boot_cpu_data.x86_model == 0x6 || - boot_cpu_data.x86_model == 0x7 || - boot_cpu_data.x86_model == 0x8 || - boot_cpu_data.x86_model == 0x10) - *df_attr++ = &format_attr_umask12f18h.attr; - } + pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); + if (!pmu->ctx) + return -ENOMEM; + + pmu->pmu = (struct pmu) { + .task_ctx_nr = perf_invalid_context, + .attr_groups = amd_uncore_l3_attr_groups, + .attr_update = amd_uncore_l3_attr_update, + .name = pmu->name, + .event_init = amd_uncore_l3_event_init, + .add = amd_uncore_add, + .del = amd_uncore_del, + .start = amd_uncore_start, + .stop = amd_uncore_stop, + .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, + }; + + ret = perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1); + if (ret) { + free_percpu(pmu->ctx); + pmu->ctx = NULL; + return ret; + } - amd_uncore_nb = alloc_percpu(struct amd_uncore *); - if (!amd_uncore_nb) { - ret = -ENOMEM; - goto fail_nb; - } - ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1); - if (ret) - goto fail_nb; + pr_info("%d %s %s counters detected\n", pmu->num_counters, + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", + pmu->pmu.name); - if (pmu_version >= 2) { - ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); - num_counters_nb = ebx.split.num_df_pmc; - } + num_pmus++; + + return 0; +} + +static void uncore_free(void) +{ + struct amd_uncore_pmu *pmu; + int i; - pr_info("%d %s %s counters detected\n", num_counters_nb, - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", - amd_nb_pmu.name); + for (i = 0; i < num_pmus; i++) { + pmu = &pmus[i]; + if (!pmu->ctx) + continue; - ret = 0; + perf_pmu_unregister(&pmu->pmu); + free_percpu(pmu->ctx); + pmu->ctx = NULL; } - if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) { - if (boot_cpu_data.x86 >= 0x19) { - *l3_attr++ = &format_attr_event8.attr; - *l3_attr++ = &format_attr_umask8.attr; - *l3_attr++ = &format_attr_threadmask2.attr; - } else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 >= 0x17) { - *l3_attr++ = &format_attr_event8.attr; - *l3_attr++ = &format_attr_umask8.attr; - *l3_attr++ = &format_attr_threadmask8.attr; - } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && - boot_cpu_data.x86 == 0x18) { - *l3_attr++ = &format_attr_event8.attr; - *l3_attr++ = &format_attr_umask8.attr; - if (boot_cpu_data.x86_model >= 0x6 && boot_cpu_data.x86_model <= 0xf) { - *l3_attr++ = &format_attr_threadmask32.attr; - amd_llc_pmu.attr_update = hygon_uncore_l3_attr_update; - } else { - *l3_attr++ = &format_attr_threadmask8.attr; - } - } + num_pmus = 0; +} - amd_uncore_llc = alloc_percpu(struct amd_uncore *); - if (!amd_uncore_llc) { - ret = -ENOMEM; - goto fail_llc; - } - ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1); - if (ret) - goto fail_llc; - - pr_info("%d %s %s counters detected\n", num_counters_llc, - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", - amd_llc_pmu.name); - ret = 0; - } +static int __init amd_uncore_init(void) +{ + int ret; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return -ENODEV; + + if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) + return -ENODEV; + + if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) + pmu_version = 2; + + ret = amd_uncore_df_init(); + if (ret) + goto fail; + + ret = amd_uncore_l3_init(); + if (ret) + goto fail; /* * Install callbacks. Core will call them for each online cpu. @@ -813,7 +849,7 @@ static int __init amd_uncore_init(void) if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, "perf/x86/amd/uncore:prepare", amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead)) - goto fail_llc; + goto fail; if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, "perf/x86/amd/uncore:starting", @@ -830,12 +866,8 @@ static int __init amd_uncore_init(void) cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); fail_prep: cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); -fail_llc: - if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) - perf_pmu_unregister(&amd_nb_pmu); - free_percpu(amd_uncore_llc); -fail_nb: - free_percpu(amd_uncore_nb); +fail: + uncore_free(); return ret; } @@ -845,18 +877,7 @@ static void __exit amd_uncore_exit(void) cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE); cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); - - if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) { - perf_pmu_unregister(&amd_llc_pmu); - free_percpu(amd_uncore_llc); - amd_uncore_llc = NULL; - } - - if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) { - perf_pmu_unregister(&amd_nb_pmu); - free_percpu(amd_uncore_nb); - amd_uncore_nb = NULL; - } + uncore_free(); } module_init(amd_uncore_init); -- Gitee From 1441b06a036c70a866f1048ba9e831a1c5bcc873 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 5 Oct 2023 10:53:12 +0530 Subject: [PATCH 04/12] perf/x86/amd/uncore: Move discovery and registration ANBZ: #28936 commit 07888daa056e809de0b6b234116b575c11f9f99d upstream Uncore PMUs have traditionally been registered in the module init path. This is fine for the existing DF and L3 PMUs since the CPUID information does not vary across CPUs but not for the memory controller (UMC) PMUs since information like active memory channels can vary for each socket depending on how the DIMMs have been physically populated. To overcome this, the discovery of PMU information using CPUID is moved to the startup of UNCORE_STARTING. This cannot be done in the startup of UNCORE_PREP since the hotplug callback does not run on the CPU that is being brought online. Previously, the startup of UNCORE_PREP was used for allocating uncore contexts following which, the startup of UNCORE_STARTING was used to find and reuse an existing sibling context, if possible. Any unused contexts were added to a list for reclaimation later during the startup of UNCORE_ONLINE. Since all required CPUID info is now available only after the startup of UNCORE_STARTING has completed, context allocation has been moved to the startup of UNCORE_ONLINE. Before allocating contexts, the first CPU that comes online has to take up the additional responsibility of registering the PMUs. This is a one-time process though. Since sibling discovery now happens prior to deciding whether a new context is required, there is no longer a need to track and free up unused contexts. The teardown of UNCORE_ONLINE and UNCORE_PREP functionally remain the same. Overall, the flow of control described above is achieved using the following handlers for managing uncore PMUs. It is mandatory to define them for each type of uncore PMU. * scan() runs during startup of UNCORE_STARTING and collects PMU info using CPUID. * init() runs during startup of UNCORE_ONLINE, registers PMUs and sets up uncore contexts. * move() runs during teardown of UNCORE_ONLINE and migrates uncore contexts to a shared sibling, if possible. * free() runs during teardown of UNCORE_PREP and frees up uncore contexts. Signed-off-by: Sandipan Das Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/e6c447e48872fcab8452e0dd81b1c9cb09f39eb4.1696425185.git.sandipan.das@amd.com Signed-off-by: Abhishek Rajput Signed-off-by: PvsNarasimha --- arch/x86/events/amd/uncore.c | 494 +++++++++++++++++++++-------------- 1 file changed, 305 insertions(+), 189 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index ff5ecab8f39c..ae8b96c51bcf 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -26,8 +26,6 @@ #define RDPMC_BASE_LLC 10 #define COUNTER_SHIFT 16 - -#define NUM_UNCORES_MAX 2 /* DF (or NB) and L3 (or L2) */ #define UNCORE_NAME_LEN 16 #undef pr_fmt @@ -35,10 +33,7 @@ static int pmu_version; -static HLIST_HEAD(uncore_unused_list); - struct amd_uncore_ctx { - int id; int refcnt; int cpu; struct perf_event **events; @@ -53,11 +48,36 @@ struct amd_uncore_pmu { cpumask_t active_mask; struct pmu pmu; struct amd_uncore_ctx * __percpu *ctx; - int (*id)(unsigned int cpu); }; -static struct amd_uncore_pmu pmus[NUM_UNCORES_MAX]; -static int num_pmus __read_mostly; +enum { + UNCORE_TYPE_DF, + UNCORE_TYPE_L3, + + UNCORE_TYPE_MAX +}; + +union amd_uncore_info { + struct { + u64 aux_data:32; /* auxiliary data */ + u64 num_pmcs:8; /* number of counters */ + u64 cid:8; /* context id */ + } split; + u64 full; +}; + +struct amd_uncore { + union amd_uncore_info * __percpu info; + struct amd_uncore_pmu *pmus; + unsigned int num_pmus; + bool init_done; + void (*scan)(struct amd_uncore *uncore, unsigned int cpu); + int (*init)(struct amd_uncore *uncore, unsigned int cpu); + void (*move)(struct amd_uncore *uncore, unsigned int cpu); + void (*free)(struct amd_uncore *uncore, unsigned int cpu); +}; + +static struct amd_uncore uncores[UNCORE_TYPE_MAX]; static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event) { @@ -362,182 +382,192 @@ static const struct attribute_group *hygon_uncore_l3_attr_update[] = { NULL, }; -static int amd_uncore_cpu_up_prepare(unsigned int cpu) +static __always_inline +int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu) { - struct amd_uncore_pmu *pmu; - struct amd_uncore_ctx *ctx; - int node = cpu_to_node(cpu), i; - - for (i = 0; i < num_pmus; i++) { - pmu = &pmus[i]; - *per_cpu_ptr(pmu->ctx, cpu) = NULL; - ctx = kzalloc_node(sizeof(struct amd_uncore_ctx), GFP_KERNEL, - node); - if (!ctx) - goto fail; - - ctx->cpu = cpu; - ctx->events = kzalloc_node(sizeof(struct perf_event *) * - pmu->num_counters, GFP_KERNEL, - node); - if (!ctx->events) - goto fail; - - ctx->id = -1; - *per_cpu_ptr(pmu->ctx, cpu) = ctx; - } - - return 0; - -fail: - /* Rollback */ - for (; i >= 0; i--) { - pmu = &pmus[i]; - ctx = *per_cpu_ptr(pmu->ctx, cpu); - if (!ctx) - continue; - - kfree(ctx->events); - kfree(ctx); - } + union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu); + return info->split.cid; +} - return -ENOMEM; +static __always_inline +int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu) +{ + union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu); + return info->split.num_pmcs; } -static struct amd_uncore_ctx * -amd_uncore_find_online_sibling(struct amd_uncore_ctx *this, - struct amd_uncore_pmu *pmu) +static void amd_uncore_ctx_free(struct amd_uncore *uncore, unsigned int cpu) { - unsigned int cpu; - struct amd_uncore_ctx *that; + struct amd_uncore_pmu *pmu; + struct amd_uncore_ctx *ctx; + int i; - for_each_online_cpu(cpu) { - that = *per_cpu_ptr(pmu->ctx, cpu); + if (!uncore->init_done) + return; - if (!that) + for (i = 0; i < uncore->num_pmus; i++) { + pmu = &uncore->pmus[i]; + ctx = *per_cpu_ptr(pmu->ctx, cpu); + if (!ctx) continue; - if (this == that) - continue; + if (cpu == ctx->cpu) + cpumask_clear_cpu(cpu, &pmu->active_mask); - if (this->id == that->id) { - hlist_add_head(&this->node, &uncore_unused_list); - this = that; - break; + if (!--ctx->refcnt) { + kfree(ctx->events); + kfree(ctx); } - } - this->refcnt++; - return this; + *per_cpu_ptr(pmu->ctx, cpu) = NULL; + } } -static int amd_uncore_cpu_starting(unsigned int cpu) +static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu) { + struct amd_uncore_ctx *curr, *prev; struct amd_uncore_pmu *pmu; - struct amd_uncore_ctx *ctx; - int i; + int node, cid, i, j; - for (i = 0; i < num_pmus; i++) { - pmu = &pmus[i]; - ctx = *per_cpu_ptr(pmu->ctx, cpu); - ctx->id = pmu->id(cpu); - ctx = amd_uncore_find_online_sibling(ctx, pmu); - *per_cpu_ptr(pmu->ctx, cpu) = ctx; - } + if (!uncore->init_done || !uncore->num_pmus) + return 0; - return 0; -} + cid = amd_uncore_ctx_cid(uncore, cpu); -static void uncore_clean_online(void) -{ - struct amd_uncore_ctx *ctx; - struct hlist_node *n; + for (i = 0; i < uncore->num_pmus; i++) { + pmu = &uncore->pmus[i]; + *per_cpu_ptr(pmu->ctx, cpu) = NULL; + curr = NULL; - hlist_for_each_entry_safe(ctx, n, &uncore_unused_list, node) { - hlist_del(&ctx->node); - kfree(ctx->events); - kfree(ctx); - } -} + /* Find a sibling context */ + for_each_online_cpu(j) { + if (cpu == j) + continue; -static int amd_uncore_cpu_online(unsigned int cpu) -{ - struct amd_uncore_pmu *pmu; - struct amd_uncore_ctx *ctx; - int i; + prev = *per_cpu_ptr(pmu->ctx, j); + if (!prev) + continue; - uncore_clean_online(); + if (cid == amd_uncore_ctx_cid(uncore, j)) { + curr = prev; + break; + } + } + + /* Allocate context if sibling does not exist */ + if (!curr) { + node = cpu_to_node(cpu); + curr = kzalloc_node(sizeof(*curr), GFP_KERNEL, node); + if (!curr) + goto fail; + + curr->cpu = cpu; + curr->events = kzalloc_node(sizeof(*curr->events) * + pmu->num_counters, + GFP_KERNEL, node); + if (!curr->events) { + kfree(curr); + goto fail; + } - for (i = 0; i < num_pmus; i++) { - pmu = &pmus[i]; - ctx = *per_cpu_ptr(pmu->ctx, cpu); - if (cpu == ctx->cpu) cpumask_set_cpu(cpu, &pmu->active_mask); + } + + curr->refcnt++; + *per_cpu_ptr(pmu->ctx, cpu) = curr; } return 0; + +fail: + amd_uncore_ctx_free(uncore, cpu); + + return -ENOMEM; } -static int amd_uncore_cpu_down_prepare(unsigned int cpu) +static void amd_uncore_ctx_move(struct amd_uncore *uncore, unsigned int cpu) { - struct amd_uncore_ctx *this, *that; + struct amd_uncore_ctx *curr, *next; struct amd_uncore_pmu *pmu; int i, j; - for (i = 0; i < num_pmus; i++) { - pmu = &pmus[i]; - this = *per_cpu_ptr(pmu->ctx, cpu); + if (!uncore->init_done) + return; - /* this cpu is going down, migrate to a shared sibling if possible */ - for_each_online_cpu(j) { - that = *per_cpu_ptr(pmu->ctx, j); + for (i = 0; i < uncore->num_pmus; i++) { + pmu = &uncore->pmus[i]; + curr = *per_cpu_ptr(pmu->ctx, cpu); + if (!curr) + continue; - if (cpu == j) + /* Migrate to a shared sibling if possible */ + for_each_online_cpu(j) { + next = *per_cpu_ptr(pmu->ctx, j); + if (!next || cpu == j) continue; - if (this == that) { + if (curr == next) { perf_pmu_migrate_context(&pmu->pmu, cpu, j); cpumask_clear_cpu(cpu, &pmu->active_mask); cpumask_set_cpu(j, &pmu->active_mask); - that->cpu = j; + next->cpu = j; break; } } } +} + +static int amd_uncore_cpu_starting(unsigned int cpu) +{ + struct amd_uncore *uncore; + int i; + + for (i = 0; i < UNCORE_TYPE_MAX; i++) { + uncore = &uncores[i]; + uncore->scan(uncore, cpu); + } return 0; } -static int amd_uncore_cpu_dead(unsigned int cpu) +static int amd_uncore_cpu_online(unsigned int cpu) { - struct amd_uncore_ctx *ctx; - struct amd_uncore_pmu *pmu; + struct amd_uncore *uncore; int i; - for (i = 0; i < num_pmus; i++) { - pmu = &pmus[i]; - ctx = *per_cpu_ptr(pmu->ctx, cpu); - if (cpu == ctx->cpu) - cpumask_clear_cpu(cpu, &pmu->active_mask); + for (i = 0; i < UNCORE_TYPE_MAX; i++) { + uncore = &uncores[i]; + if (uncore->init(uncore, cpu)) + break; + } - if (!--ctx->refcnt) { - kfree(ctx->events); - kfree(ctx); - } + return 0; +} - *per_cpu_ptr(pmu->ctx, cpu) = NULL; +static int amd_uncore_cpu_down_prepare(unsigned int cpu) +{ + struct amd_uncore *uncore; + int i; + + for (i = 0; i < UNCORE_TYPE_MAX; i++) { + uncore = &uncores[i]; + uncore->move(uncore, cpu); } return 0; } -static int amd_uncore_df_id(unsigned int cpu) +static int amd_uncore_cpu_dead(unsigned int cpu) { - unsigned int eax, ebx, ecx, edx; + struct amd_uncore *uncore; + int i; - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); + for (i = 0; i < UNCORE_TYPE_MAX; i++) { + uncore = &uncores[i]; + uncore->free(uncore, cpu); + } - return ecx & 0xff; + return 0; } static int amd_uncore_df_event_init(struct perf_event *event) @@ -593,34 +623,59 @@ static int amd_uncore_df_add(struct perf_event *event, int flags) return 0; } -static int amd_uncore_df_init(void) +static +void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) { - struct attribute **df_attr = amd_uncore_df_format_attr; - struct amd_uncore_pmu *pmu = &pmus[num_pmus]; union cpuid_0x80000022_ebx ebx; - int ret; + union amd_uncore_info info; if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB)) - return 0; + return; + + info.split.aux_data = 0; + info.split.num_pmcs = NUM_COUNTERS_NB; + info.split.cid = topology_die_id(cpu); + + if (pmu_version >= 2) { + ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); + info.split.num_pmcs = ebx.split.num_df_pmc; + } + + *per_cpu_ptr(uncore->info, cpu) = info; +} + +static +int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu) +{ + struct attribute **df_attr = amd_uncore_df_format_attr; + struct amd_uncore_pmu *pmu; + + /* Run just once */ + if (uncore->init_done) + return amd_uncore_ctx_init(uncore, cpu); + + /* No grouping, single instance for a system */ + uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); + if (!uncore->pmus) { + uncore->num_pmus = 0; + goto done; + } /* * For Family 17h and above, the Northbridge counters are repurposed * as Data Fabric counters. The PMUs are exported based on family as * either NB or DF. */ + pmu = &uncore->pmus[0]; strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb", sizeof(pmu->name)); - - pmu->num_counters = NUM_COUNTERS_NB; + pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); pmu->msr_base = MSR_F15H_NB_PERF_CTL; pmu->rdpmc_base = RDPMC_BASE_NB; - pmu->id = amd_uncore_df_id; if (pmu_version >= 2) { *df_attr++ = &format_attr_event14v2.attr; *df_attr++ = &format_attr_umask12.attr; - ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); - pmu->num_counters = ebx.split.num_df_pmc; } else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 >= 0x17) { *df_attr = &format_attr_event14.attr; @@ -639,7 +694,7 @@ static int amd_uncore_df_init(void) pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); if (!pmu->ctx) - return -ENOMEM; + goto done; pmu->pmu = (struct pmu) { .task_ctx_nr = perf_invalid_context, @@ -655,25 +710,22 @@ static int amd_uncore_df_init(void) .module = THIS_MODULE, }; - ret = perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1); - if (ret) { + if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) { free_percpu(pmu->ctx); pmu->ctx = NULL; - return ret; + goto done; } - pr_info("%d %s %s counters detected\n", pmu->num_counters, - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", + pr_info("%d %s%s counters detected\n", pmu->num_counters, + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "", pmu->pmu.name); - num_pmus++; + uncore->num_pmus = 1; - return 0; -} +done: + uncore->init_done = true; -static int amd_uncore_l3_id(unsigned int cpu) -{ - return per_cpu_llc_id(cpu); + return amd_uncore_ctx_init(uncore, cpu); } static int amd_uncore_l3_event_init(struct perf_event *event) @@ -725,27 +777,52 @@ static int amd_uncore_l3_event_init(struct perf_event *event) return 0; } -static int amd_uncore_l3_init(void) +static +void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) { - struct attribute **l3_attr = amd_uncore_l3_format_attr; - struct amd_uncore_pmu *pmu = &pmus[num_pmus]; - int ret; + union amd_uncore_info info; if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) - return 0; + return; + + info.split.aux_data = 0; + info.split.num_pmcs = NUM_COUNTERS_L2; + info.split.cid = per_cpu_llc_id(cpu); + + if (boot_cpu_data.x86 >= 0x17) + info.split.num_pmcs = NUM_COUNTERS_L3; + + *per_cpu_ptr(uncore->info, cpu) = info; +} + +static +int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) +{ + struct attribute **l3_attr = amd_uncore_l3_format_attr; + struct amd_uncore_pmu *pmu; + + /* Run just once */ + if (uncore->init_done) + return amd_uncore_ctx_init(uncore, cpu); + + /* No grouping, single instance for a system */ + uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); + if (!uncore->pmus) { + uncore->num_pmus = 0; + goto done; + } /* * For Family 17h and above, L3 cache counters are available instead * of L2 cache counters. The PMUs are exported based on family as * either L2 or L3. */ + pmu = &uncore->pmus[0]; strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2", sizeof(pmu->name)); - - pmu->num_counters = NUM_COUNTERS_L2; + pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); pmu->msr_base = MSR_F16H_L2I_PERF_CTL; pmu->rdpmc_base = RDPMC_BASE_LLC; - pmu->id = amd_uncore_l3_id; if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 >= 0x17) { @@ -754,7 +831,6 @@ static int amd_uncore_l3_init(void) *l3_attr++ = boot_cpu_data.x86 >= 0x19 ? &format_attr_threadmask2.attr : &format_attr_threadmask8.attr; - pmu->num_counters = NUM_COUNTERS_L3; } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && boot_cpu_data.x86 == 0x18) { *l3_attr++ = &format_attr_event8.attr; @@ -770,7 +846,7 @@ static int amd_uncore_l3_init(void) pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); if (!pmu->ctx) - return -ENOMEM; + goto done; pmu->pmu = (struct pmu) { .task_ctx_nr = perf_invalid_context, @@ -787,43 +863,45 @@ static int amd_uncore_l3_init(void) .module = THIS_MODULE, }; - ret = perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1); - if (ret) { + if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) { free_percpu(pmu->ctx); pmu->ctx = NULL; - return ret; + goto done; } - pr_info("%d %s %s counters detected\n", pmu->num_counters, - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", + pr_info("%d %s%s counters detected\n", pmu->num_counters, + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "", pmu->pmu.name); - num_pmus++; - - return 0; -} - -static void uncore_free(void) -{ - struct amd_uncore_pmu *pmu; - int i; + uncore->num_pmus = 1; - for (i = 0; i < num_pmus; i++) { - pmu = &pmus[i]; - if (!pmu->ctx) - continue; - - perf_pmu_unregister(&pmu->pmu); - free_percpu(pmu->ctx); - pmu->ctx = NULL; - } +done: + uncore->init_done = true; - num_pmus = 0; + return amd_uncore_ctx_init(uncore, cpu); } +static struct amd_uncore uncores[UNCORE_TYPE_MAX] = { + /* UNCORE_TYPE_DF */ + { + .scan = amd_uncore_df_ctx_scan, + .init = amd_uncore_df_ctx_init, + .move = amd_uncore_ctx_move, + .free = amd_uncore_ctx_free, + }, + /* UNCORE_TYPE_L3 */ + { + .scan = amd_uncore_l3_ctx_scan, + .init = amd_uncore_l3_ctx_init, + .move = amd_uncore_ctx_move, + .free = amd_uncore_ctx_free, + }, +}; + static int __init amd_uncore_init(void) { - int ret; + struct amd_uncore *uncore; + int ret, i; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) @@ -835,20 +913,27 @@ static int __init amd_uncore_init(void) if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) pmu_version = 2; - ret = amd_uncore_df_init(); - if (ret) - goto fail; + for (i = 0; i < UNCORE_TYPE_MAX; i++) { + uncore = &uncores[i]; - ret = amd_uncore_l3_init(); - if (ret) - goto fail; + BUG_ON(!uncore->scan); + BUG_ON(!uncore->init); + BUG_ON(!uncore->move); + BUG_ON(!uncore->free); + + uncore->info = alloc_percpu(union amd_uncore_info); + if (!uncore->info) { + ret = -ENOMEM; + goto fail; + } + }; /* * Install callbacks. Core will call them for each online cpu. */ if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, "perf/x86/amd/uncore:prepare", - amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead)) + NULL, amd_uncore_cpu_dead)) goto fail; if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, @@ -867,17 +952,48 @@ static int __init amd_uncore_init(void) fail_prep: cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); fail: - uncore_free(); + for (i = 0; i < UNCORE_TYPE_MAX; i++) { + uncore = &uncores[i]; + if (uncore->info) { + free_percpu(uncore->info); + uncore->info = NULL; + } + } return ret; } static void __exit amd_uncore_exit(void) { + struct amd_uncore *uncore; + struct amd_uncore_pmu *pmu; + int i, j; + cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE); cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); - uncore_free(); + + for (i = 0; i < UNCORE_TYPE_MAX; i++) { + uncore = &uncores[i]; + if (!uncore->info) + continue; + + free_percpu(uncore->info); + uncore->info = NULL; + + for (j = 0; j < uncore->num_pmus; j++) { + pmu = &uncore->pmus[j]; + if (!pmu->ctx) + continue; + + perf_pmu_unregister(&pmu->pmu); + free_percpu(pmu->ctx); + pmu->ctx = NULL; + } + + kfree(uncore->pmus); + uncore->pmus = NULL; + } } module_init(amd_uncore_init); -- Gitee From 2b98612c2b9298f1c1685b1e687fb0d279fc8faa Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 5 Oct 2023 10:53:13 +0530 Subject: [PATCH 05/12] perf/x86/amd/uncore: Use rdmsr if rdpmc is unavailable ANBZ: #28936 commit 7ef0343855dc23a979a53b3143540f93f3e5bef8 upstream Not all uncore PMUs may support the use of the RDPMC instruction for reading counters. In such cases, read the count from the corresponding PERF_CTR register using the RDMSR instruction. Signed-off-by: Sandipan Das Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/e9d994e32a3fcb39fa59fcf43ab4260d11aba097.1696425185.git.sandipan.das@amd.com Signed-off-by: Abhishek Rajput Signed-off-by: PvsNarasimha --- arch/x86/events/amd/uncore.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index ae8b96c51bcf..1caed0ee9f32 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -96,7 +96,16 @@ static void amd_uncore_read(struct perf_event *event) */ prev = local64_read(&hwc->prev_count); - rdpmcl(hwc->event_base_rdpmc, new); + + /* + * Some uncore PMUs do not have RDPMC assignments. In such cases, + * read counts directly from the corresponding PERF_CTR. + */ + if (hwc->event_base_rdpmc < 0) + rdmsrl(hwc->event_base, new); + else + rdpmcl(hwc->event_base_rdpmc, new); + local64_set(&hwc->prev_count, new); delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); delta >>= COUNTER_SHIFT; @@ -164,6 +173,9 @@ static int amd_uncore_add(struct perf_event *event, int flags) hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx; hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (pmu->rdpmc_base < 0) + hwc->event_base_rdpmc = -1; + if (flags & PERF_EF_START) event->pmu->start(event, PERF_EF_RELOAD); -- Gitee From aa2c30f4d3fcd8d882627c76cd6a101666df9b3c Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 5 Oct 2023 10:53:14 +0530 Subject: [PATCH 06/12] perf/x86/amd/uncore: Add group exclusivity ANBZ: #28936 commit 83a43c622123e714b0317a57176b336187f5deb3 upstream In some cases, it may be necessary to restrict opening PMU events to a subset of CPUs. E.g. Unified Memory Controller (UMC) PMUs are specific to each active memory channel and the MSR address space for the PERF_CTL and PERF_CTR registers is reused on each socket. Thus, opening events for a specific UMC PMU should be restricted to CPUs belonging to the same socket as that of the UMC. The "cpumask" of the PMU should also reflect this accordingly. Uncore PMUs which require this can use the new group attribute in struct amd_uncore_pmu to set a valid group ID during the scan() phase. Later, during init(), an uncore context for a CPU will be unavailable if the group ID does not match. Signed-off-by: Sandipan Das Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/937d6d71010a48ea4e069f4904b3116a5f99ecdf.1696425185.git.sandipan.das@amd.com Signed-off-by: Abhishek Rajput Signed-off-by: PvsNarasimha --- arch/x86/events/amd/uncore.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 1caed0ee9f32..655b8f6b0f51 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -27,6 +27,7 @@ #define COUNTER_SHIFT 16 #define UNCORE_NAME_LEN 16 +#define UNCORE_GROUP_MAX 256 #undef pr_fmt #define pr_fmt(fmt) "amd_uncore: " fmt @@ -45,6 +46,7 @@ struct amd_uncore_pmu { int num_counters; int rdpmc_base; u32 msr_base; + int group; cpumask_t active_mask; struct pmu pmu; struct amd_uncore_ctx * __percpu *ctx; @@ -61,6 +63,7 @@ union amd_uncore_info { struct { u64 aux_data:32; /* auxiliary data */ u64 num_pmcs:8; /* number of counters */ + u64 gid:8; /* group id */ u64 cid:8; /* context id */ } split; u64 full; @@ -401,6 +404,13 @@ int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu) return info->split.cid; } +static __always_inline +int amd_uncore_ctx_gid(struct amd_uncore *uncore, unsigned int cpu) +{ + union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu); + return info->split.gid; +} + static __always_inline int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu) { @@ -439,18 +449,23 @@ static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu) { struct amd_uncore_ctx *curr, *prev; struct amd_uncore_pmu *pmu; - int node, cid, i, j; + int node, cid, gid, i, j; if (!uncore->init_done || !uncore->num_pmus) return 0; cid = amd_uncore_ctx_cid(uncore, cpu); + gid = amd_uncore_ctx_gid(uncore, cpu); for (i = 0; i < uncore->num_pmus; i++) { pmu = &uncore->pmus[i]; *per_cpu_ptr(pmu->ctx, cpu) = NULL; curr = NULL; + /* Check for group exclusivity */ + if (gid != pmu->group) + continue; + /* Find a sibling context */ for_each_online_cpu(j) { if (cpu == j) @@ -646,6 +661,7 @@ void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) info.split.aux_data = 0; info.split.num_pmcs = NUM_COUNTERS_NB; + info.split.gid = 0; info.split.cid = topology_die_id(cpu); if (pmu_version >= 2) { @@ -684,6 +700,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu) pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); pmu->msr_base = MSR_F15H_NB_PERF_CTL; pmu->rdpmc_base = RDPMC_BASE_NB; + pmu->group = amd_uncore_ctx_gid(uncore, cpu); if (pmu_version >= 2) { *df_attr++ = &format_attr_event14v2.attr; @@ -799,6 +816,7 @@ void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) info.split.aux_data = 0; info.split.num_pmcs = NUM_COUNTERS_L2; + info.split.gid = 0; info.split.cid = per_cpu_llc_id(cpu); if (boot_cpu_data.x86 >= 0x17) @@ -835,6 +853,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); pmu->msr_base = MSR_F16H_L2I_PERF_CTL; pmu->rdpmc_base = RDPMC_BASE_LLC; + pmu->group = amd_uncore_ctx_gid(uncore, cpu); if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 >= 0x17) { -- Gitee From 3cd556c6f15399c46cbdaf2e7aa8df3b54b546d4 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 5 Oct 2023 10:53:15 +0530 Subject: [PATCH 07/12] perf/x86/amd/uncore: Add memory controller support ANBZ: #28936 commit 25e56847821f7375bdee7dae1027c7917d07ce4b upstream Unified Memory Controller (UMC) events were introduced with Zen 4 as a part of the Performance Monitoring Version 2 (PerfMonV2) enhancements. An event is specified using the EventSelect bits and the RdWrMask bits can be used for additional filtering of read and write requests. As of now, a maximum of 12 channels of DDR5 are available on each socket and each channel is controlled by a dedicated UMC. Each UMC, in turn, has its own set of performance monitoring counters. Since the MSR address space for the UMC PERF_CTL and PERF_CTR registers are reused across sockets, uncore groups are created on the basis of socket IDs. Hence, group exclusivity is mandatory while opening events so that events for an UMC can only be opened on CPUs which are on the same socket as the corresponding memory channel. For each socket, the total number of available UMC counters and active memory channels are determined from CPUID leaf 0x80000022 EBX and ECX respectively. Usually, on Zen 4, each UMC has four counters. MSR assignments are determined on the basis of active UMCs. E.g. if UMCs 1, 4 and 9 are active for a given socket, then * UMC 1 gets MSRs 0xc0010800 to 0xc0010807 as PERF_CTLs and PERF_CTRs * UMC 4 gets MSRs 0xc0010808 to 0xc001080f as PERF_CTLs and PERF_CTRs * UMC 9 gets MSRs 0xc0010810 to 0xc0010817 as PERF_CTLs and PERF_CTRs If there are sockets without any online CPUs when the amd_uncore driver is loaded, UMCs for such sockets will not be discoverable since the mechanism relies on executing the CPUID instruction on an online CPU from the socket. Signed-off-by: Sandipan Das Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/b25f391205c22733493abec1ed850b71784edc5f.1696425185.git.sandipan.das@amd.com Signed-off-by: Abhishek Rajput Signed-off-by: PvsNarasimha --- arch/x86/events/amd/uncore.c | 156 +++++++++++++++++++++++++++++- arch/x86/include/asm/msr-index.h | 4 + arch/x86/include/asm/perf_event.h | 9 ++ 3 files changed, 168 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 655b8f6b0f51..bd2ebccf0bed 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -55,6 +55,7 @@ struct amd_uncore_pmu { enum { UNCORE_TYPE_DF, UNCORE_TYPE_L3, + UNCORE_TYPE_UMC, UNCORE_TYPE_MAX }; @@ -295,7 +296,7 @@ DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35"); DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */ DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */ DEFINE_UNCORE_FORMAT_ATTR(event14f18h, event, "config:0-7,32-35,61-62"); /* F18h DF */ -DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */ +DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3, PerfMonV2 UMC */ DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(umask10f18h, umask, "config:8-17"); /* F18h M4h DF */ DEFINE_UNCORE_FORMAT_ATTR(umask12f18h, umask, "config:8-19"); /* F18h M6h DF */ @@ -309,6 +310,7 @@ DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 * DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */ DEFINE_UNCORE_FORMAT_ATTR(slicemask4, slicemask, "config:28-31"); /* F18h L3 */ DEFINE_UNCORE_FORMAT_ATTR(threadmask32, threadmask, "config:32-63"); /* F18h L3 */ +DEFINE_UNCORE_FORMAT_ATTR(rdwrmask, rdwrmask, "config:8-9"); /* PerfMonV2 UMC */ /* Common DF and NB attributes */ static struct attribute *amd_uncore_df_format_attr[] = { @@ -325,6 +327,13 @@ static struct attribute *amd_uncore_l3_format_attr[] = { NULL, }; +/* Common UMC attributes */ +static struct attribute *amd_uncore_umc_format_attr[] = { + &format_attr_event8.attr, /* event */ + &format_attr_rdwrmask.attr, /* rdwrmask */ + NULL, +}; + /* F17h unique L3 attributes */ static struct attribute *amd_f17h_uncore_l3_format_attr[] = { &format_attr_slicemask.attr, /* slicemask */ @@ -374,6 +383,11 @@ static struct attribute_group amd_f19h_uncore_l3_format_group = { .is_visible = amd_f19h_uncore_is_visible, }; +static struct attribute_group amd_uncore_umc_format_group = { + .name = "format", + .attrs = amd_uncore_umc_format_attr, +}; + static const struct attribute_group *amd_uncore_df_attr_groups[] = { &amd_uncore_attr_group, &amd_uncore_df_format_group, @@ -397,6 +411,12 @@ static const struct attribute_group *hygon_uncore_l3_attr_update[] = { NULL, }; +static const struct attribute_group *amd_uncore_umc_attr_groups[] = { + &amd_uncore_attr_group, + &amd_uncore_umc_format_group, + NULL, +}; + static __always_inline int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu) { @@ -912,6 +932,133 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) return amd_uncore_ctx_init(uncore, cpu); } +static int amd_uncore_umc_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int ret = amd_uncore_event_init(event); + + if (ret) + return ret; + + hwc->config = event->attr.config & AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC; + + return 0; +} + +static void amd_uncore_umc_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (flags & PERF_EF_RELOAD) + wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); + + hwc->state = 0; + wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC)); + perf_event_update_userpage(event); +} + +static +void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) +{ + union cpuid_0x80000022_ebx ebx; + union amd_uncore_info info; + unsigned int eax, ecx, edx; + + if (pmu_version < 2) + return; + + cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx); + info.split.aux_data = ecx; /* stash active mask */ + info.split.num_pmcs = ebx.split.num_umc_pmc; + info.split.gid = topology_die_id(cpu); + info.split.cid = topology_die_id(cpu); + *per_cpu_ptr(uncore->info, cpu) = info; +} + +static +int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu) +{ + DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 }; + u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 }; + u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 }; + union amd_uncore_info info; + struct amd_uncore_pmu *pmu; + int index = 0, gid, i; + + if (pmu_version < 2) + return 0; + + /* Run just once */ + if (uncore->init_done) + return amd_uncore_ctx_init(uncore, cpu); + + /* Find unique groups */ + for_each_online_cpu(i) { + info = *per_cpu_ptr(uncore->info, i); + gid = info.split.gid; + if (test_bit(gid, gmask)) + continue; + + __set_bit(gid, gmask); + group_num_pmus[gid] = hweight32(info.split.aux_data); + group_num_pmcs[gid] = info.split.num_pmcs; + uncore->num_pmus += group_num_pmus[gid]; + } + + uncore->pmus = kzalloc(sizeof(*uncore->pmus) * uncore->num_pmus, + GFP_KERNEL); + if (!uncore->pmus) { + uncore->num_pmus = 0; + goto done; + } + + for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) { + for (i = 0; i < group_num_pmus[gid]; i++) { + pmu = &uncore->pmus[index]; + snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index); + pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid]; + pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2; + pmu->rdpmc_base = -1; + pmu->group = gid; + + pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); + if (!pmu->ctx) + goto done; + + pmu->pmu = (struct pmu) { + .task_ctx_nr = perf_invalid_context, + .attr_groups = amd_uncore_umc_attr_groups, + .name = pmu->name, + .event_init = amd_uncore_umc_event_init, + .add = amd_uncore_add, + .del = amd_uncore_del, + .start = amd_uncore_umc_start, + .stop = amd_uncore_stop, + .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, + }; + + if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) { + free_percpu(pmu->ctx); + pmu->ctx = NULL; + goto done; + } + + pr_info("%d %s counters detected\n", pmu->num_counters, + pmu->pmu.name); + + index++; + } + } + +done: + uncore->num_pmus = index; + uncore->init_done = true; + + return amd_uncore_ctx_init(uncore, cpu); +} + static struct amd_uncore uncores[UNCORE_TYPE_MAX] = { /* UNCORE_TYPE_DF */ { @@ -927,6 +1074,13 @@ static struct amd_uncore uncores[UNCORE_TYPE_MAX] = { .move = amd_uncore_ctx_move, .free = amd_uncore_ctx_free, }, + /* UNCORE_TYPE_UMC */ + { + .scan = amd_uncore_umc_ctx_scan, + .init = amd_uncore_umc_ctx_init, + .move = amd_uncore_ctx_move, + .free = amd_uncore_ctx_free, + }, }; static int __init amd_uncore_init(void) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 70d9da2c8500..628fa27920fb 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -703,6 +703,10 @@ /* Zen4 */ #define MSR_ZEN4_BP_CFG 0xc001102e #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 +/* Fam 19h MSRs */ +#define MSR_F19H_UMC_PERF_CTL 0xc0010800 +#define MSR_F19H_UMC_PERF_CTR 0xc0010801 + /* Zen 2 */ #define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index e671cce1b417..2185ae1c459c 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -140,6 +140,13 @@ (AMD64_PERFMON_V2_EVENTSEL_EVENT_NB | \ AMD64_PERFMON_V2_EVENTSEL_UMASK_NB) +#define AMD64_PERFMON_V2_ENABLE_UMC BIT_ULL(31) +#define AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC GENMASK_ULL(7, 0) +#define AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC GENMASK_ULL(9, 8) +#define AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC \ + (AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC | \ + AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC) + #define AMD64_NUM_COUNTERS 4 #define AMD64_NUM_COUNTERS_CORE 6 #define AMD64_NUM_COUNTERS_NB 4 @@ -294,6 +301,8 @@ union cpuid_0x80000022_ebx { unsigned int lbr_v2_stack_sz:6; /* Number of Data Fabric Counters */ unsigned int num_df_pmc:6; + /* Number of Unified Memory Controller Counters */ + unsigned int num_umc_pmc:6; } split; unsigned int full; }; -- Gitee From 72a3db95b071fa6008cebf93a2076b17f25f0902 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Oct 2023 10:18:12 +0300 Subject: [PATCH 08/12] perf/x86/amd/uncore: Fix uninitialized return value in amd_uncore_init() ANBZ: #28936 commit 7543365739a4ff61d40ad53ab68c17d2e7dfb0c9 upstream Some of the error paths in this function return don't initialize the error code. Return -ENODEV by default. Fixes: d6389d3ccc13 ("perf/x86/amd/uncore: Refactor uncore management") Signed-off-by: Dan Carpenter Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/cec62eba-c4b8-4cb7-9671-58894dd4b974@moroto.mountain Signed-off-by: Abhishek Rajput Signed-off-by: PvsNarasimha --- arch/x86/events/amd/uncore.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index bd2ebccf0bed..b82e1847dfc7 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -1086,7 +1086,8 @@ static struct amd_uncore uncores[UNCORE_TYPE_MAX] = { static int __init amd_uncore_init(void) { struct amd_uncore *uncore; - int ret, i; + int ret = -ENODEV; + int i; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) -- Gitee From bc6f899e5c5d98c232b47e0bc9560bd5d3eff164 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 16 Oct 2023 11:37:43 +0530 Subject: [PATCH 09/12] perf/x86/amd/uncore: Pass through error code for initialization failures, instead of -ENODEV ANBZ: #28936 commit 744940f1921c8feb90e3c4bcc1e153fdd6e10fe2 upstream Pass through the appropriate error code when the registration of hotplug callbacks fail during initialization, instead of returning a blanket -ENODEV. [ mingo: Updated the changelog. ] Reported-by: Dan Carpenter Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231016060743.332051-1-sandipan.das@amd.com Signed-off-by: Abhishek Rajput Signed-off-by: PvsNarasimha --- arch/x86/events/amd/uncore.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index b82e1847dfc7..8b966e2a167c 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -1117,20 +1117,25 @@ static int __init amd_uncore_init(void) /* * Install callbacks. Core will call them for each online cpu. */ - if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, - "perf/x86/amd/uncore:prepare", - NULL, amd_uncore_cpu_dead)) + ret = cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, + "perf/x86/amd/uncore:prepare", + NULL, amd_uncore_cpu_dead); + if (ret) goto fail; - if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, - "perf/x86/amd/uncore:starting", - amd_uncore_cpu_starting, NULL)) + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, + "perf/x86/amd/uncore:starting", + amd_uncore_cpu_starting, NULL); + if (ret) goto fail_prep; - if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, - "perf/x86/amd/uncore:online", - amd_uncore_cpu_online, - amd_uncore_cpu_down_prepare)) + + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, + "perf/x86/amd/uncore:online", + amd_uncore_cpu_online, + amd_uncore_cpu_down_prepare); + if (ret) goto fail_start; + return 0; fail_start: -- Gitee From 027579de76d6e5bde0e63e54dc547ba0d9263edf Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Wed, 29 Nov 2023 11:55:04 +0530 Subject: [PATCH 10/12] perf vendor events amd: Add Zen 4 memory controller events ANBZ: #28936 commit 346878dacc81f53667381c8f4bb5018195ca10be upstream Make the jevents parser aware of the Unified Memory Controller (UMC) PMU and add events taken from Section 8.2.1 "UMC Performance Monitor Events" of the Processor Programming Reference (PPR) for AMD Family 19h Model 11h processors. The events capture UMC command activity such as CAS, ACTIVATE, PRECHARGE etc. while the metrics derive data bus utilization and memory bandwidth out of these events. Signed-off-by: Sandipan Das Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ananth Narayan Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Stephane Eranian Link: https://lore.kernel.org/r/e0d8a7e8ca8ee3e378d8029e80b456ac327d6419.1701238314.git.sandipan.das@amd.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Abhishek Rajput Signed-off-by: PvsNarasimha --- .../arch/x86/amdzen4/memory-controller.json | 101 ++++++++++++++++++ .../arch/x86/amdzen4/recommended.json | 84 +++++++++++++++ tools/perf/pmu-events/jevents.py | 2 + 3 files changed, 187 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json diff --git a/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json b/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json new file mode 100644 index 000000000000..55263e5e4f69 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json @@ -0,0 +1,101 @@ +[ + { + "EventName": "umc_mem_clk", + "PublicDescription": "Number of memory clock cycles.", + "EventCode": "0x00", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_act_cmd.all", + "PublicDescription": "Number of ACTIVATE commands sent.", + "EventCode": "0x05", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_act_cmd.rd", + "PublicDescription": "Number of ACTIVATE commands sent for reads.", + "EventCode": "0x05", + "RdWrMask": "0x1", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_act_cmd.wr", + "PublicDescription": "Number of ACTIVATE commands sent for writes.", + "EventCode": "0x05", + "RdWrMask": "0x2", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_pchg_cmd.all", + "PublicDescription": "Number of PRECHARGE commands sent.", + "EventCode": "0x06", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_pchg_cmd.rd", + "PublicDescription": "Number of PRECHARGE commands sent for reads.", + "EventCode": "0x06", + "RdWrMask": "0x1", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_pchg_cmd.wr", + "PublicDescription": "Number of PRECHARGE commands sent for writes.", + "EventCode": "0x06", + "RdWrMask": "0x2", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_cas_cmd.all", + "PublicDescription": "Number of CAS commands sent.", + "EventCode": "0x0a", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_cas_cmd.rd", + "PublicDescription": "Number of CAS commands sent for reads.", + "EventCode": "0x0a", + "RdWrMask": "0x1", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_cas_cmd.wr", + "PublicDescription": "Number of CAS commands sent for writes.", + "EventCode": "0x0a", + "RdWrMask": "0x2", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_data_slot_clks.all", + "PublicDescription": "Number of clocks used by the data bus.", + "EventCode": "0x14", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_data_slot_clks.rd", + "PublicDescription": "Number of clocks used by the data bus for reads.", + "EventCode": "0x14", + "RdWrMask": "0x1", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_data_slot_clks.wr", + "PublicDescription": "Number of clocks used by the data bus for writes.", + "EventCode": "0x14", + "RdWrMask": "0x2", + "PerPkg": "1", + "Unit": "UMCPMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json index 5e6a793acf7b..96e06401c6cb 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json +++ b/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json @@ -330,5 +330,89 @@ "MetricGroup": "data_fabric", "PerPkg": "1", "ScaleUnit": "6.103515625e-5MiB" + }, + { + "MetricName": "umc_data_bus_utilization", + "BriefDescription": "Memory controller data bus utilization.", + "MetricExpr": "d_ratio(umc_data_slot_clks.all / 2, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "100%" + }, + { + "MetricName": "umc_cas_cmd_rate", + "BriefDescription": "Memory controller CAS command rate.", + "MetricExpr": "d_ratio(umc_cas_cmd.all * 1000, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1" + }, + { + "MetricName": "umc_cas_cmd_read_ratio", + "BriefDescription": "Ratio of memory controller CAS commands for reads.", + "MetricExpr": "d_ratio(umc_cas_cmd.rd, umc_cas_cmd.all)", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "100%" + }, + { + "MetricName": "umc_cas_cmd_write_ratio", + "BriefDescription": "Ratio of memory controller CAS commands for writes.", + "MetricExpr": "d_ratio(umc_cas_cmd.wr, umc_cas_cmd.all)", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "100%" + }, + { + "MetricName": "umc_mem_read_bandwidth", + "BriefDescription": "Estimated memory read bandwidth.", + "MetricExpr": "(umc_cas_cmd.rd * 64) / 1e6 / duration_time", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "1MB/s" + }, + { + "MetricName": "umc_mem_write_bandwidth", + "BriefDescription": "Estimated memory write bandwidth.", + "MetricExpr": "(umc_cas_cmd.wr * 64) / 1e6 / duration_time", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "1MB/s" + }, + { + "MetricName": "umc_mem_bandwidth", + "BriefDescription": "Estimated combined memory bandwidth.", + "MetricExpr": "(umc_cas_cmd.all * 64) / 1e6 / duration_time", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "1MB/s" + }, + { + "MetricName": "umc_cas_cmd_read_ratio", + "BriefDescription": "Ratio of memory controller CAS commands for reads.", + "MetricExpr": "d_ratio(umc_cas_cmd.rd, umc_cas_cmd.all)", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "100%" + }, + { + "MetricName": "umc_cas_cmd_rate", + "BriefDescription": "Memory controller CAS command rate.", + "MetricExpr": "d_ratio(umc_cas_cmd.all * 1000, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1" + }, + { + "MetricName": "umc_activate_cmd_rate", + "BriefDescription": "Memory controller ACTIVATE command rate.", + "MetricExpr": "d_ratio(umc_act_cmd.all * 1000, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1" + }, + { + "MetricName": "umc_precharge_cmd_rate", + "BriefDescription": "Memory controller PRECHARGE command rate.", + "MetricExpr": "d_ratio(umc_pchg_cmd.all * 1000, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1" } ] diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 3c091ab75305..3952d68ae748 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -286,6 +286,7 @@ class JsonEvent: 'imx8_ddr': 'imx8_ddr', 'L3PMC': 'amd_l3', 'DFPMC': 'amd_df', + 'UMCPMC': 'amd_umc', 'cpu_core': 'cpu_core', 'cpu_atom': 'cpu_atom', 'ali_drw': 'ali_drw', @@ -354,6 +355,7 @@ class JsonEvent: ('SampleAfterValue', 'period='), ('UMask', 'umask='), ('NodeType', 'type='), + ('RdWrMask', 'rdwrmask='), ] for key, value in event_fields: if key in jd and jd[key] != '0': -- Gitee From b8eed476cc81a310979015f3cd0e1e1614adf723 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Wed, 26 Jun 2024 13:14:04 +0530 Subject: [PATCH 11/12] perf/x86/amd/uncore: Avoid PMU registration if counters are unavailable ANBZ: #28936 commit f997e208b6c96858a2f6c0855debfbdb9b52f131 upstream X86_FEATURE_PERFCTR_NB and X86_FEATURE_PERFCTR_LLC are derived from CPUID leaf 0x80000001 ECX bits 24 and 28 respectively and denote the availability of DF and L3 counters. When these bits are not set, the corresponding PMUs have no counters and hence, should not be registered. Fixes: 07888daa056e ("perf/x86/amd/uncore: Move discovery and registration") Signed-off-by: Sandipan Das Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20240626074404.1044230-1-sandipan.das@amd.com Signed-off-by: Abhishek Rajput Signed-off-by: PvsNarasimha --- arch/x86/events/amd/uncore.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 8b966e2a167c..933bd4618d83 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -697,17 +697,20 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu) { struct attribute **df_attr = amd_uncore_df_format_attr; struct amd_uncore_pmu *pmu; + int num_counters; /* Run just once */ if (uncore->init_done) return amd_uncore_ctx_init(uncore, cpu); + num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); + if (!num_counters) + goto done; + /* No grouping, single instance for a system */ uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); - if (!uncore->pmus) { - uncore->num_pmus = 0; + if (!uncore->pmus) goto done; - } /* * For Family 17h and above, the Northbridge counters are repurposed @@ -717,7 +720,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu) pmu = &uncore->pmus[0]; strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb", sizeof(pmu->name)); - pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); + pmu->num_counters = num_counters; pmu->msr_base = MSR_F15H_NB_PERF_CTL; pmu->rdpmc_base = RDPMC_BASE_NB; pmu->group = amd_uncore_ctx_gid(uncore, cpu); @@ -850,17 +853,20 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) { struct attribute **l3_attr = amd_uncore_l3_format_attr; struct amd_uncore_pmu *pmu; + int num_counters; /* Run just once */ if (uncore->init_done) return amd_uncore_ctx_init(uncore, cpu); + num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); + if (!num_counters) + goto done; + /* No grouping, single instance for a system */ uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); - if (!uncore->pmus) { - uncore->num_pmus = 0; + if (!uncore->pmus) goto done; - } /* * For Family 17h and above, L3 cache counters are available instead @@ -870,7 +876,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) pmu = &uncore->pmus[0]; strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2", sizeof(pmu->name)); - pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); + pmu->num_counters = num_counters; pmu->msr_base = MSR_F16H_L2I_PERF_CTL; pmu->rdpmc_base = RDPMC_BASE_LLC; pmu->group = amd_uncore_ctx_gid(uncore, cpu); -- Gitee From a09024f58e89efd19d91c3dfeb117198baa95cd3 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Wed, 26 Jun 2024 13:19:42 +0530 Subject: [PATCH 12/12] perf/x86/amd/uncore: Fix DF and UMC domain identification ANBZ: #28936 commit 57e11990f45f89bc29d0f84dd7b13a4e4263eeb2 upstream For uncore PMUs, a single context is shared across all CPUs in a domain. The domain can be a CCX, like in the case of the L3 PMU, or a socket, like in the case of DF and UMC PMUs. This information is available via the PMU's cpumask. For contexts shared across a socket, the domain is currently determined from topology_die_id() which is incorrect after the introduction of commit 63edbaa48a57 ("x86/cpu/topology: Add support for the AMD 0x80000026 leaf") as it now returns a CCX identifier on Zen 4 and later systems which support CPUID leaf 0x80000026. Use topology_logical_package_id() instead as it always returns a socket identifier irrespective of the availability of CPUID leaf 0x80000026. Fixes: 63edbaa48a57 ("x86/cpu/topology: Add support for the AMD 0x80000026 leaf") Signed-off-by: Sandipan Das Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20240626074942.1044818-1-sandipan.das@amd.com Signed-off-by: Abhishek Rajput Signed-off-by: PvsNarasimha --- arch/x86/events/amd/uncore.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 933bd4618d83..e275c9df1a5e 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -682,7 +682,7 @@ void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) info.split.aux_data = 0; info.split.num_pmcs = NUM_COUNTERS_NB; info.split.gid = 0; - info.split.cid = topology_die_id(cpu); + info.split.cid = topology_logical_package_id(cpu); if (pmu_version >= 2) { ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); @@ -976,8 +976,8 @@ void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx); info.split.aux_data = ecx; /* stash active mask */ info.split.num_pmcs = ebx.split.num_umc_pmc; - info.split.gid = topology_die_id(cpu); - info.split.cid = topology_die_id(cpu); + info.split.gid = topology_logical_package_id(cpu); + info.split.cid = topology_logical_package_id(cpu); *per_cpu_ptr(uncore->info, cpu) = info; } -- Gitee