From a434b8bd80f2bb83db4cd154c6b2f8fd04641f54 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 19 Jul 2022 17:21:23 -0700 Subject: [PATCH 01/21] ftrace: Add modify_ftrace_direct_multi_nolock ANBZ: #7841 commit f96f644ab97abeed3f7007c953836a574ce928cc upstream. This is similar to modify_ftrace_direct_multi, but does not acquire direct_mutex. This is useful when direct_mutex is already locked by the user. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/bpf/20220720002126.803253-2-song@kernel.org Signed-off-by: Tianchen Ding --- include/linux/ftrace.h | 5 +++ kernel/trace/ftrace.c | 86 ++++++++++++++++++++++++++++++------------ 2 files changed, 67 insertions(+), 24 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e501d7ba9468..de2d1679e7b6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -332,6 +332,7 @@ unsigned long ftrace_find_rec_direct(unsigned long ip); int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); +int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr); #else struct ftrace_ops; @@ -376,6 +377,10 @@ static inline int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned lo { return -ENODEV; } +static inline int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr) +{ + return -ENODEV; +} #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b941bcd844e3..48810ea9e135 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5602,22 +5602,8 @@ int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) } EXPORT_SYMBOL_GPL(unregister_ftrace_direct_multi); -/** - * modify_ftrace_direct_multi - Modify an existing direct 'multi' call - * to call something else - * @ops: The address of the struct ftrace_ops object - * @addr: The address of the new trampoline to call at @ops functions - * - * This is used to unregister currently registered direct caller and - * register new one @addr on functions registered in @ops object. - * - * Note there's window between ftrace_shutdown and ftrace_startup calls - * where there will be no callbacks called. - * - * Returns: zero on success. Non zero on error, which includes: - * -EINVAL - The @ops object was not properly registered. - */ -int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +static int +__modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) { struct ftrace_hash *hash; struct ftrace_func_entry *entry, *iter; @@ -5628,12 +5614,7 @@ int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) int i, size; int err; - if (check_direct_multi(ops)) - return -EINVAL; - if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) - return -EINVAL; - - mutex_lock(&direct_mutex); + lockdep_assert_held_once(&direct_mutex); /* Enable the tmp_ops to have the same functions as the direct ops */ ftrace_ops_init(&tmp_ops); @@ -5641,7 +5622,7 @@ int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) err = register_ftrace_function(&tmp_ops); if (err) - goto out_direct; + return err; /* * Now the ftrace_ops_list_func() is called to do the direct callers. @@ -5665,7 +5646,64 @@ int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) /* Removing the tmp_ops will add the updated direct callers to the functions */ unregister_ftrace_function(&tmp_ops); - out_direct: + return err; +} + +/** + * modify_ftrace_direct_multi_nolock - Modify an existing direct 'multi' call + * to call something else + * @ops: The address of the struct ftrace_ops object + * @addr: The address of the new trampoline to call at @ops functions + * + * This is used to unregister currently registered direct caller and + * register new one @addr on functions registered in @ops object. + * + * Note there's window between ftrace_shutdown and ftrace_startup calls + * where there will be no callbacks called. + * + * Caller should already have direct_mutex locked, so we don't lock + * direct_mutex here. + * + * Returns: zero on success. Non zero on error, which includes: + * -EINVAL - The @ops object was not properly registered. + */ +int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr) +{ + if (check_direct_multi(ops)) + return -EINVAL; + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return -EINVAL; + + return __modify_ftrace_direct_multi(ops, addr); +} +EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi_nolock); + +/** + * modify_ftrace_direct_multi - Modify an existing direct 'multi' call + * to call something else + * @ops: The address of the struct ftrace_ops object + * @addr: The address of the new trampoline to call at @ops functions + * + * This is used to unregister currently registered direct caller and + * register new one @addr on functions registered in @ops object. + * + * Note there's window between ftrace_shutdown and ftrace_startup calls + * where there will be no callbacks called. + * + * Returns: zero on success. Non zero on error, which includes: + * -EINVAL - The @ops object was not properly registered. + */ +int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + int err; + + if (check_direct_multi(ops)) + return -EINVAL; + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return -EINVAL; + + mutex_lock(&direct_mutex); + err = __modify_ftrace_direct_multi(ops, addr); mutex_unlock(&direct_mutex); return err; } -- Gitee From ac5eeb81b0e1ada9ffccf07f1afbdba94a7c6015 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 19 Jul 2022 17:21:24 -0700 Subject: [PATCH 02/21] ftrace: Allow IPMODIFY and DIRECT ops on the same function ANBZ: #7841 commit 53cd885bc5c3ea283cc9c00ca6446c778f00bfba upstream. IPMODIFY (livepatch) and DIRECT (bpf trampoline) ops are both important users of ftrace. It is necessary to allow them work on the same function at the same time. First, DIRECT ops no longer specify IPMODIFY flag. Instead, DIRECT flag is handled together with IPMODIFY flag in __ftrace_hash_update_ipmodify(). Then, a callback function, ops_func, is added to ftrace_ops. This is used by ftrace core code to understand whether the DIRECT ops can share with an IPMODIFY ops. To share with IPMODIFY ops, the DIRECT ops need to implement the callback function and adjust the direct trampoline accordingly. If DIRECT ops is attached before the IPMODIFY ops, ftrace core code calls ENABLE_SHARE_IPMODIFY_PEER on the DIRECT ops before registering the IPMODIFY ops. If IPMODIFY ops is attached before the DIRECT ops, ftrace core code calls ENABLE_SHARE_IPMODIFY_SELF in __ftrace_hash_update_ipmodify. Owner of the DIRECT ops may return 0 if the DIRECT trampoline can share with IPMODIFY, so error code otherwise. The error code is propagated to register_ftrace_direct_multi so that onwer of the DIRECT trampoline can handle it properly. For more details, please refer to comment before enum ftrace_ops_cmd. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/all/20220602193706.2607681-2-song@kernel.org/ Link: https://lore.kernel.org/all/20220718055449.3960512-1-song@kernel.org/ Link: https://lore.kernel.org/bpf/20220720002126.803253-3-song@kernel.org Signed-off-by: Tianchen Ding --- include/linux/ftrace.h | 38 +++++++ kernel/trace/ftrace.c | 242 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 254 insertions(+), 26 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index de2d1679e7b6..312c722c9791 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -207,6 +207,43 @@ enum { FTRACE_OPS_FL_DIRECT = BIT(17), }; +/* + * FTRACE_OPS_CMD_* commands allow the ftrace core logic to request changes + * to a ftrace_ops. Note, the requests may fail. + * + * ENABLE_SHARE_IPMODIFY_SELF - enable a DIRECT ops to work on the same + * function as an ops with IPMODIFY. Called + * when the DIRECT ops is being registered. + * This is called with both direct_mutex and + * ftrace_lock are locked. + * + * ENABLE_SHARE_IPMODIFY_PEER - enable a DIRECT ops to work on the same + * function as an ops with IPMODIFY. Called + * when the other ops (the one with IPMODIFY) + * is being registered. + * This is called with direct_mutex locked. + * + * DISABLE_SHARE_IPMODIFY_PEER - disable a DIRECT ops to work on the same + * function as an ops with IPMODIFY. Called + * when the other ops (the one with IPMODIFY) + * is being unregistered. + * This is called with direct_mutex locked. + */ +enum ftrace_ops_cmd { + FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF, + FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER, + FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER, +}; + +/* + * For most ftrace_ops_cmd, + * Returns: + * 0 - Success. + * Negative on failure. The return value is dependent on the + * callback. + */ +typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd); + #ifdef CONFIG_DYNAMIC_FTRACE /* The hash used to know what functions callbacks trace */ struct ftrace_ops_hash { @@ -246,6 +283,7 @@ struct ftrace_ops { unsigned long trampoline; unsigned long trampoline_size; struct list_head list; + ftrace_ops_func_t ops_func; #endif CK_KABI_RESERVE(1) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 48810ea9e135..51b82770d63f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1872,6 +1872,8 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, ftrace_hash_rec_update_modify(ops, filter_hash, 1); } +static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip); + /* * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK * or no-needed to update, -EBUSY if it detects a conflict of the flag @@ -1880,6 +1882,13 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, * - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected) * - If the hash is EMPTY_HASH, it hits nothing * - Anything else hits the recs which match the hash entries. + * + * DIRECT ops does not have IPMODIFY flag, but we still need to check it + * against functions with FTRACE_FL_IPMODIFY. If there is any overlap, call + * ops_func(SHARE_IPMODIFY_SELF) to make sure current ops can share with + * IPMODIFY. If ops_func(SHARE_IPMODIFY_SELF) returns non-zero, propagate + * the return value to the caller and eventually to the owner of the DIRECT + * ops. */ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, struct ftrace_hash *old_hash, @@ -1888,17 +1897,26 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, struct ftrace_page *pg; struct dyn_ftrace *rec, *end = NULL; int in_old, in_new; + bool is_ipmodify, is_direct; /* Only update if the ops has been registered */ if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) return 0; - if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + is_ipmodify = ops->flags & FTRACE_OPS_FL_IPMODIFY; + is_direct = ops->flags & FTRACE_OPS_FL_DIRECT; + + /* neither IPMODIFY nor DIRECT, skip */ + if (!is_ipmodify && !is_direct) + return 0; + + if (WARN_ON_ONCE(is_ipmodify && is_direct)) return 0; /* - * Since the IPMODIFY is a very address sensitive action, we do not - * allow ftrace_ops to set all functions to new hash. + * Since the IPMODIFY and DIRECT are very address sensitive + * actions, we do not allow ftrace_ops to set all functions to new + * hash. */ if (!new_hash || !old_hash) return -EINVAL; @@ -1916,12 +1934,32 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, continue; if (in_new) { - /* New entries must ensure no others are using it */ - if (rec->flags & FTRACE_FL_IPMODIFY) - goto rollback; - rec->flags |= FTRACE_FL_IPMODIFY; - } else /* Removed entry */ + if (rec->flags & FTRACE_FL_IPMODIFY) { + int ret; + + /* Cannot have two ipmodify on same rec */ + if (is_ipmodify) + goto rollback; + + FTRACE_WARN_ON(rec->flags & FTRACE_FL_DIRECT); + + /* + * Another ops with IPMODIFY is already + * attached. We are now attaching a direct + * ops. Run SHARE_IPMODIFY_SELF, to check + * whether sharing is supported. + */ + if (!ops->ops_func) + return -EBUSY; + ret = ops->ops_func(ops, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF); + if (ret) + return ret; + } else if (is_ipmodify) { + rec->flags |= FTRACE_FL_IPMODIFY; + } + } else if (is_ipmodify) { rec->flags &= ~FTRACE_FL_IPMODIFY; + } } while_for_each_ftrace_rec(); return 0; @@ -2465,8 +2503,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip, struct ftrace_ops direct_ops = { .func = call_direct_funcs, - .flags = FTRACE_OPS_FL_IPMODIFY - | FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS + .flags = FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_PERMANENT, /* * By declaring the main trampoline as this trampoline @@ -3125,14 +3162,14 @@ static inline int ops_traces_mod(struct ftrace_ops *ops) } /* - * Check if the current ops references the record. + * Check if the current ops references the given ip. * * If the ops traces all functions, then it was already accounted for. * If the ops does not trace the current record function, skip it. * If the ops ignores the function via notrace filter, skip it. */ -static inline bool -ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) +static bool +ops_references_ip(struct ftrace_ops *ops, unsigned long ip) { /* If ops isn't enabled, ignore it */ if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) @@ -3144,16 +3181,29 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) /* The function must be in the filter */ if (!ftrace_hash_empty(ops->func_hash->filter_hash) && - !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip)) + !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip)) return false; /* If in notrace hash, we ignore it too */ - if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) + if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip)) return false; return true; } +/* + * Check if the current ops references the record. + * + * If the ops traces all functions, then it was already accounted for. + * If the ops does not trace the current record function, skip it. + * If the ops ignores the function via notrace filter, skip it. + */ +static bool +ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) +{ + return ops_references_ip(ops, rec->ip); +} + static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) { struct ftrace_page *pg; @@ -5123,6 +5173,8 @@ static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr) return direct; } +static int register_ftrace_function_nolock(struct ftrace_ops *ops); + /** * register_ftrace_direct - Call a custom trampoline directly * @ip: The address of the nop at the beginning of a function @@ -5194,7 +5246,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0); if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) { - ret = register_ftrace_function(&direct_ops); + ret = register_ftrace_function_nolock(&direct_ops); if (ret) ftrace_set_filter_ip(&direct_ops, ip, 1, 0); } @@ -5456,8 +5508,7 @@ int modify_ftrace_direct(unsigned long ip, } EXPORT_SYMBOL_GPL(modify_ftrace_direct); -#define MULTI_FLAGS (FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_DIRECT | \ - FTRACE_OPS_FL_SAVE_REGS) +#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS) static int check_direct_multi(struct ftrace_ops *ops) { @@ -5550,7 +5601,7 @@ int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) ops->flags = MULTI_FLAGS; ops->trampoline = FTRACE_REGS_ADDR; - err = register_ftrace_function(ops); + err = register_ftrace_function_nolock(ops); out_remove: if (err) @@ -5620,7 +5671,7 @@ __modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) ftrace_ops_init(&tmp_ops); tmp_ops.func_hash = ops->func_hash; - err = register_ftrace_function(&tmp_ops); + err = register_ftrace_function_nolock(&tmp_ops); if (err) return err; @@ -7863,6 +7914,143 @@ int ftrace_is_dead(void) return ftrace_disabled; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +/* + * When registering ftrace_ops with IPMODIFY, it is necessary to make sure + * it doesn't conflict with any direct ftrace_ops. If there is existing + * direct ftrace_ops on a kernel function being patched, call + * FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER on it to enable sharing. + * + * @ops: ftrace_ops being registered. + * + * Returns: + * 0 on success; + * Negative on failure. + */ +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops) +{ + struct ftrace_func_entry *entry; + struct ftrace_hash *hash; + struct ftrace_ops *op; + int size, i, ret; + + lockdep_assert_held_once(&direct_mutex); + + if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + return 0; + + hash = ops->func_hash->filter_hash; + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + unsigned long ip = entry->ip; + bool found_op = false; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (!(op->flags & FTRACE_OPS_FL_DIRECT)) + continue; + if (ops_references_ip(op, ip)) { + found_op = true; + break; + } + } while_for_each_ftrace_op(op); + mutex_unlock(&ftrace_lock); + + if (found_op) { + if (!op->ops_func) + return -EBUSY; + + ret = op->ops_func(op, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER); + if (ret) + return ret; + } + } + } + + return 0; +} + +/* + * Similar to prepare_direct_functions_for_ipmodify, clean up after ops + * with IPMODIFY is unregistered. The cleanup is optional for most DIRECT + * ops. + */ +static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops) +{ + struct ftrace_func_entry *entry; + struct ftrace_hash *hash; + struct ftrace_ops *op; + int size, i; + + if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + return; + + mutex_lock(&direct_mutex); + + hash = ops->func_hash->filter_hash; + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + unsigned long ip = entry->ip; + bool found_op = false; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (!(op->flags & FTRACE_OPS_FL_DIRECT)) + continue; + if (ops_references_ip(op, ip)) { + found_op = true; + break; + } + } while_for_each_ftrace_op(op); + mutex_unlock(&ftrace_lock); + + /* The cleanup is optional, ignore any errors */ + if (found_op && op->ops_func) + op->ops_func(op, FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER); + } + } + mutex_unlock(&direct_mutex); +} + +#define lock_direct_mutex() mutex_lock(&direct_mutex) +#define unlock_direct_mutex() mutex_unlock(&direct_mutex) + +#else /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops) +{ + return 0; +} + +static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops) +{ +} + +#define lock_direct_mutex() do { } while (0) +#define unlock_direct_mutex() do { } while (0) + +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + +/* + * Similar to register_ftrace_function, except we don't lock direct_mutex. + */ +static int register_ftrace_function_nolock(struct ftrace_ops *ops) +{ + int ret; + + ftrace_ops_init(ops); + + mutex_lock(&ftrace_lock); + + ret = ftrace_startup(ops, 0); + + mutex_unlock(&ftrace_lock); + + return ret; +} + /** * register_ftrace_function - register a function for profiling * @ops - ops structure that holds the function for profiling. @@ -7878,14 +8066,15 @@ int register_ftrace_function(struct ftrace_ops *ops) { int ret = -1; - ftrace_ops_init(ops); - - mutex_lock(&ftrace_lock); - - ret = ftrace_startup(ops, 0); + lock_direct_mutex(); + ret = prepare_direct_functions_for_ipmodify(ops); + if (ret < 0) + goto out_unlock; - mutex_unlock(&ftrace_lock); + ret = register_ftrace_function_nolock(ops); +out_unlock: + unlock_direct_mutex(); return ret; } EXPORT_SYMBOL_GPL(register_ftrace_function); @@ -7904,6 +8093,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) ret = ftrace_shutdown(ops, 0); mutex_unlock(&ftrace_lock); + cleanup_direct_functions_after_ipmodify(ops); return ret; } EXPORT_SYMBOL_GPL(unregister_ftrace_function); -- Gitee From e6d09f5bf750215714db4384132ed87940e4b259 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 19 Jul 2022 17:21:25 -0700 Subject: [PATCH 03/21] bpf, x64: Allow to use caller address from stack ANBZ: #7841 commit 316cba62dfb7878b7353177e6a7da9cc0c979cde upstream. Currently we call the original function by using the absolute address given at the JIT generation. That's not usable when having trampoline attached to multiple functions, or the target address changes dynamically (in case of live patch). In such cases we need to take the return address from the stack. Adding support to retrieve the original function address from the stack by adding new BPF_TRAMP_F_ORIG_STACK flag for arch_prepare_bpf_trampoline function. Basically we take the return address of the 'fentry' call: function + 0: call fentry # stores 'function + 5' address on stack function + 5: ... The 'function + 5' address will be used as the address for the original function to call. Signed-off-by: Jiri Olsa Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220720002126.803253-4-song@kernel.org Signed-off-by: Tianchen Ding --- arch/x86/net/bpf_jit_comp.c | 13 +++++++++---- include/linux/bpf.h | 5 +++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 1b820307dfd0..e081faaa746d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1815,10 +1815,15 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_CALL_ORIG) { restore_regs(m, &prog, nr_args, stack_size); - /* call original function */ - if (emit_call(&prog, orig_call, prog)) { - ret = -EINVAL; - goto cleanup; + if (flags & BPF_TRAMP_F_ORIG_STACK) { + emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8); + EMIT2(0xff, 0xd0); /* call *rax */ + } else { + /* call original function */ + if (emit_call(&prog, orig_call, prog)) { + ret = -EINVAL; + goto cleanup; + } } /* remember return value in a stack for bpf prog to access */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5e23873a07bc..3167c8896458 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -630,6 +630,11 @@ struct btf_func_model { /* Return the return value of fentry prog. Only used by bpf_struct_ops. */ #define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) +/* Get original function from stack instead of from provided direct address. + * Makes sense for trampolines with fexit or fmod_ret programs. + */ +#define BPF_TRAMP_F_ORIG_STACK BIT(5) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 */ -- Gitee From 325754caa789a02c8c21829eb6417598a8ab0ce3 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 19 Jul 2022 17:21:26 -0700 Subject: [PATCH 04/21] bpf: Support bpf_trampoline on functions with IPMODIFY (e.g. livepatch) ANBZ: #7841 commit 00963a2e75a872e5fce4d0115ac2786ec86b57a6 upstream. When tracing a function with IPMODIFY ftrace_ops (livepatch), the bpf trampoline must follow the instruction pointer saved on stack. This needs extra handling for bpf trampolines with BPF_TRAMP_F_CALL_ORIG flag. Implement bpf_tramp_ftrace_ops_func and use it for the ftrace_ops used by BPF trampoline. This enables tracing functions with livepatch. This also requires moving bpf trampoline to *_ftrace_direct_mult APIs. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/all/20220602193706.2607681-2-song@kernel.org/ Link: https://lore.kernel.org/bpf/20220720002126.803253-5-song@kernel.org [dtcccc: keep tprogs in bpf_trampoline_update() because we've not picked f7e0beaf39d3 ("bpf, x86: Generate trampolines from bpf_tramp_links").] Signed-off-by: Tianchen Ding --- include/linux/bpf.h | 10 ++- kernel/bpf/trampoline.c | 157 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 149 insertions(+), 18 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3167c8896458..43b997d2ed26 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -39,6 +39,7 @@ struct bpf_iter_aux_info; struct bpf_local_storage; struct bpf_local_storage_map; struct bpf_func_state; +struct ftrace_ops; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -635,6 +636,11 @@ struct btf_func_model { */ #define BPF_TRAMP_F_ORIG_STACK BIT(5) +/* This trampoline is on a function with another ftrace_ops with IPMODIFY, + * e.g., a live patch. This flag is set and cleared by ftrace call backs, + */ +#define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 */ @@ -736,8 +742,8 @@ struct bpf_trampoline { struct bpf_tramp_image *cur_image; u64 selector; - CK_KABI_RESERVE(1) - CK_KABI_RESERVE(2) + CK_KABI_USE(1, struct ftrace_ops *fops) + CK_KABI_USE_SPLIT(2, u32 flags) }; struct bpf_attach_target_info { diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 1c08d86a7df2..4fb050c74a0f 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -9,6 +9,7 @@ #include #include #include +#include /* dummy _ops. The verifier will operate on target program's ops. */ const struct bpf_verifier_ops bpf_extension_verifier_ops = { @@ -25,6 +26,81 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; /* serializes access to trampoline_table */ static DEFINE_MUTEX(trampoline_mutex); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex); + +static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd) +{ + struct bpf_trampoline *tr = ops->private; + int ret = 0; + + if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) { + /* This is called inside register_ftrace_direct_multi(), so + * tr->mutex is already locked. + */ + lockdep_assert_held_once(&tr->mutex); + + /* Instead of updating the trampoline here, we propagate + * -EAGAIN to register_ftrace_direct_multi(). Then we can + * retry register_ftrace_direct_multi() after updating the + * trampoline. + */ + if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && + !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) { + if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY)) + return -EBUSY; + + tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; + return -EAGAIN; + } + + return 0; + } + + /* The normal locking order is + * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c) + * + * The following two commands are called from + * + * prepare_direct_functions_for_ipmodify + * cleanup_direct_functions_after_ipmodify + * + * In both cases, direct_mutex is already locked. Use + * mutex_trylock(&tr->mutex) to avoid deadlock in race condition + * (something else is making changes to this same trampoline). + */ + if (!mutex_trylock(&tr->mutex)) { + /* sleep 1 ms to make sure whatever holding tr->mutex makes + * some progress. + */ + msleep(1); + return -EAGAIN; + } + + switch (cmd) { + case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER: + tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; + + if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && + !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) + ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); + break; + case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER: + tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY; + + if (tr->flags & BPF_TRAMP_F_ORIG_STACK) + ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); + break; + default: + ret = -EINVAL; + break; + }; + + mutex_unlock(&tr->mutex); + return ret; +} +#endif + void *bpf_jit_alloc_exec_page(void) { void *image; @@ -74,6 +150,16 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key) tr = kzalloc(sizeof(*tr), GFP_KERNEL); if (!tr) goto out; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL); + if (!tr->fops) { + kfree(tr); + tr = NULL; + goto out; + } + tr->fops->private = tr; + tr->fops->ops_func = bpf_tramp_ftrace_ops_func; +#endif tr->key = key; INIT_HLIST_NODE(&tr->hlist); @@ -93,21 +179,26 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) int ret; if (tr->func.ftrace_managed) - ret = unregister_ftrace_direct((long)ip, (long)old_addr); + ret = unregister_ftrace_direct_multi(tr->fops, (long)old_addr); else ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); return ret; } -static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr) +static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr, + bool lock_direct_mutex) { void *ip = tr->func.addr; int ret; - if (tr->func.ftrace_managed) - ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr); - else + if (tr->func.ftrace_managed) { + if (lock_direct_mutex) + ret = modify_ftrace_direct_multi(tr->fops, (long)new_addr); + else + ret = modify_ftrace_direct_multi_nolock(tr->fops, (long)new_addr); + } else { ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr); + } return ret; } @@ -122,10 +213,13 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) if (faddr) tr->func.ftrace_managed = true; - if (tr->func.ftrace_managed) - ret = register_ftrace_direct((long)ip, (long)new_addr); - else + if (tr->func.ftrace_managed) { + ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 0); + ret = register_ftrace_direct_multi(tr->fops, (long)new_addr); + } else { ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); + } + return ret; } @@ -286,11 +380,11 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx) return ERR_PTR(err); } -static int bpf_trampoline_update(struct bpf_trampoline *tr) +static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex) { struct bpf_tramp_image *im; struct bpf_tramp_progs *tprogs; - u32 flags = BPF_TRAMP_F_RESTORE_REGS; + u32 orig_flags = tr->flags; int err, total; tprogs = bpf_trampoline_get_progs(tr, &total); @@ -311,15 +405,28 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) goto out; } + /* clear all bits except SHARE_IPMODIFY */ + tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY; + if (tprogs[BPF_TRAMP_FEXIT].nr_progs || - tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) + tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) { /* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME * should not be set together. */ - flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; + tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; + } else { + tr->flags |= BPF_TRAMP_F_RESTORE_REGS; + } + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +again: + if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) && + (tr->flags & BPF_TRAMP_F_CALL_ORIG)) + tr->flags |= BPF_TRAMP_F_ORIG_STACK; +#endif err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE, - &tr->func.model, flags, tprogs, + &tr->func.model, tr->flags, tprogs, tr->func.addr); if (err < 0) goto out; @@ -328,17 +435,34 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) WARN_ON(!tr->cur_image && tr->selector); if (tr->cur_image) /* progs already running at this address */ - err = modify_fentry(tr, tr->cur_image->image, im->image); + err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex); else /* first time registering */ err = register_fentry(tr, im->image); + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + if (err == -EAGAIN) { + /* -EAGAIN from bpf_tramp_ftrace_ops_func. Now + * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the + * trampoline again, and retry register. + */ + /* reset fops->func and fops->trampoline for re-register */ + tr->fops->func = NULL; + tr->fops->trampoline = 0; + goto again; + } +#endif if (err) goto out; + if (tr->cur_image) bpf_tramp_image_put(tr->cur_image); tr->cur_image = im; tr->selector++; out: + /* If any error happens, restore previous flags */ + if (err) + tr->flags = orig_flags; kfree(tprogs); return err; } @@ -406,7 +530,7 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) } hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]); tr->progs_cnt[kind]++; - err = bpf_trampoline_update(tr); + err = bpf_trampoline_update(tr, true /* lock_direct_mutex */); if (err) { hlist_del_init(&prog->aux->tramp_hlist); tr->progs_cnt[kind]--; @@ -433,7 +557,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) } hlist_del_init(&prog->aux->tramp_hlist); tr->progs_cnt[kind]--; - err = bpf_trampoline_update(tr); + err = bpf_trampoline_update(tr, true /* lock_direct_mutex */); out: mutex_unlock(&tr->mutex); return err; @@ -481,6 +605,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr) * multiple rcu callbacks. */ hlist_del(&tr->hlist); + kfree(tr->fops); kfree(tr); out: mutex_unlock(&trampoline_mutex); -- Gitee From 6258dae3d870e1c7419766f2df153f06e5d0a61e Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 29 Jul 2022 12:41:06 -0700 Subject: [PATCH 05/21] bpf: Fix test_progs -j error with fentry/fexit tests ANBZ: #7841 commit dc81f8d1e8ea3f5dfa88919cb834a135a6a536b8 upstream. When multiple threads are attaching/detaching fentry/fexit programs to the same trampoline, we may call register_fentry on the same trampoline twice: register_fentry(), unregister_fentry(), then register_fentry again. This causes ftrace_set_filter_ip() for the same ip on tr->fops twice, which leaves duplicated ip in tr->fops. The extra ip is not cleaned up properly on unregister and thus causes failures with further register in register_ftrace_direct_multi(): register_ftrace_direct_multi() { ... for (i = 0; i < size; i++) { hlist_for_each_entry(entry, &hash->buckets[i], hlist) { if (ftrace_find_rec_direct(entry->ip)) goto out_unlock; } } ... } This can be triggered with parallel fentry/fexit tests with test_progs: ./test_progs -t fentry,fexit -j Fix this by resetting tr->fops in ftrace_set_filter_ip(), so that there will never be duplicated entries in tr->fops. Fixes: 00963a2e75a8 ("bpf: Support bpf_trampoline on functions with IPMODIFY (e.g. livepatch)") Reported-by: Andrii Nakryiko Signed-off-by: Song Liu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220729194106.1207472-1-song@kernel.org Signed-off-by: Tianchen Ding --- kernel/bpf/trampoline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 4fb050c74a0f..ce71af0b03a4 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -214,7 +214,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) tr->func.ftrace_managed = true; if (tr->func.ftrace_managed) { - ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 0); + ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1); ret = register_ftrace_direct_multi(tr->fops, (long)new_addr); } else { ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); -- Gitee From 9478130f3b7a0798c1ac202b05dddbfc8ef2c909 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Thu, 28 Jul 2022 07:40:48 -0400 Subject: [PATCH 06/21] bpf: Fix NULL pointer dereference when registering bpf trampoline ANBZ: #7841 commit 3b317abc71598bda8ff9a9c483ad8ae167b18382 upstream. A panic was reported on arm64: [ 44.517109] audit: type=1334 audit(1658859870.268:59): prog-id=19 op=LOAD [ 44.622031] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 [ 44.624321] Mem abort info: [ 44.625049] ESR = 0x0000000096000004 [ 44.625935] EC = 0x25: DABT (current EL), IL = 32 bits [ 44.627182] SET = 0, FnV = 0 [ 44.627930] EA = 0, S1PTW = 0 [ 44.628684] FSC = 0x04: level 0 translation fault [ 44.629788] Data abort info: [ 44.630474] ISV = 0, ISS = 0x00000004 [ 44.631362] CM = 0, WnR = 0 [ 44.632041] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000100ab5000 [ 44.633494] [0000000000000010] pgd=0000000000000000, p4d=0000000000000000 [ 44.635202] Internal error: Oops: 96000004 [#1] SMP [ 44.636452] Modules linked in: xfs crct10dif_ce ghash_ce virtio_blk virtio_console virtio_mmio qemu_fw_cfg [ 44.638713] CPU: 2 PID: 1 Comm: systemd Not tainted 5.19.0-rc7 #1 [ 44.640164] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 44.641799] pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 44.643404] pc : ftrace_set_filter_ip+0x24/0xa0 [ 44.644659] lr : bpf_trampoline_update.constprop.0+0x428/0x4a0 [ 44.646118] sp : ffff80000803b9f0 [ 44.646950] x29: ffff80000803b9f0 x28: ffff0b5d80364400 x27: ffff80000803bb48 [ 44.648721] x26: ffff8000085ad000 x25: ffff0b5d809d2400 x24: 0000000000000000 [ 44.650493] x23: 00000000ffffffed x22: ffff0b5dd7ea0900 x21: 0000000000000000 [ 44.652279] x20: 0000000000000000 x19: 0000000000000000 x18: ffffffffffffffff [ 44.654067] x17: 0000000000000000 x16: 0000000000000000 x15: ffffffffffffffff [ 44.655787] x14: ffff0b5d809d2498 x13: ffff0b5d809d2432 x12: 0000000005f5e100 [ 44.657535] x11: abcc77118461cefd x10: 000000000000005f x9 : ffffa7219cb5b190 [ 44.659254] x8 : ffffa7219c8e0000 x7 : 0000000000000000 x6 : ffffa7219db075e0 [ 44.661066] x5 : ffffa7219d3130e0 x4 : ffffa7219cab9da0 x3 : 0000000000000000 [ 44.662837] x2 : 0000000000000000 x1 : ffffa7219cb7a5c0 x0 : 0000000000000000 [ 44.664675] Call trace: [ 44.665274] ftrace_set_filter_ip+0x24/0xa0 [ 44.666327] bpf_trampoline_update.constprop.0+0x428/0x4a0 [ 44.667696] __bpf_trampoline_link_prog+0xcc/0x1c0 [ 44.668834] bpf_trampoline_link_prog+0x40/0x64 [ 44.669919] bpf_tracing_prog_attach+0x120/0x490 [ 44.671011] link_create+0xe0/0x2b0 [ 44.671869] __sys_bpf+0x484/0xd30 [ 44.672706] __arm64_sys_bpf+0x30/0x40 [ 44.673678] invoke_syscall+0x78/0x100 [ 44.674623] el0_svc_common.constprop.0+0x4c/0xf4 [ 44.675783] do_el0_svc+0x38/0x4c [ 44.676624] el0_svc+0x34/0x100 [ 44.677429] el0t_64_sync_handler+0x11c/0x150 [ 44.678532] el0t_64_sync+0x190/0x194 [ 44.679439] Code: 2a0203f4 f90013f5 2a0303f5 f9001fe1 (f9400800) [ 44.680959] ---[ end trace 0000000000000000 ]--- [ 44.682111] Kernel panic - not syncing: Oops: Fatal exception [ 44.683488] SMP: stopping secondary CPUs [ 44.684551] Kernel Offset: 0x2721948e0000 from 0xffff800008000000 [ 44.686095] PHYS_OFFSET: 0xfffff4a380000000 [ 44.687144] CPU features: 0x010,00022811,19001080 [ 44.688308] Memory Limit: none [ 44.689082] ---[ end Kernel panic - not syncing: Oops: Fatal exception ]--- It's caused by a NULL tr->fops passed to ftrace_set_filter_ip(). tr->fops is initialized to NULL and is assigned to an allocated memory address if CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS is enabled. Since there is no direct call on arm64 yet, the config can't be enabled. To fix it, call ftrace_set_filter_ip() only if tr->fops is not NULL. Fixes: 00963a2e75a8 ("bpf: Support bpf_trampoline on functions with IPMODIFY (e.g. livepatch)") Reported-by: Bruno Goncalves Signed-off-by: Xu Kuohai Signed-off-by: Andrii Nakryiko Tested-by: Bruno Goncalves Acked-by: Song Liu Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20220728114048.3540461-1-xukuohai@huaweicloud.com Signed-off-by: Tianchen Ding --- kernel/bpf/trampoline.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index ce71af0b03a4..01435c569517 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -210,8 +210,11 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) int ret; faddr = ftrace_location((unsigned long)ip); - if (faddr) + if (faddr) { + if (!tr->fops) + return -ENOTSUPP; tr->func.ftrace_managed = true; + } if (tr->func.ftrace_managed) { ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1); -- Gitee From cdbf773b118ea6bd2c890481af28d4e10feaccd1 Mon Sep 17 00:00:00 2001 From: Wang Jingjin Date: Mon, 1 Aug 2022 16:47:45 +0800 Subject: [PATCH 07/21] ftrace: Fix build warning for ops_references_rec() not used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #7841 commit 123d6455771ec577ce65f8d1bda548fb0eb7ef21 upstream. The change that made IPMODIFY and DIRECT ops work together needed access to the ops_references_ip() function, which it pulled out of the module only code. But now if both CONFIG_MODULES and CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS is not set, we get the below warning: ‘ops_references_rec’ defined but not used. Since ops_references_rec() only calls ops_references_ip() replace the usage of ops_references_rec() with ops_references_ip() and encompass the function with an #ifdef of DIRECT_CALLS || MODULES being defined. Link: https://lkml.kernel.org/r/20220801084745.1187987-1-wangjingjin1@huawei.com Fixes: 53cd885bc5c3 ("ftrace: Allow IPMODIFY and DIRECT ops on the same function") Signed-off-by: Wang Jingjin Signed-off-by: Steven Rostedt (Google) Signed-off-by: Tianchen Ding --- kernel/trace/ftrace.c | 79 ++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 46 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 51b82770d63f..72da9e4e61e0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1872,8 +1872,6 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, ftrace_hash_rec_update_modify(ops, filter_hash, 1); } -static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip); - /* * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK * or no-needed to update, -EBUSY if it detects a conflict of the flag @@ -3161,49 +3159,6 @@ static inline int ops_traces_mod(struct ftrace_ops *ops) ftrace_hash_empty(ops->func_hash->notrace_hash); } -/* - * Check if the current ops references the given ip. - * - * If the ops traces all functions, then it was already accounted for. - * If the ops does not trace the current record function, skip it. - * If the ops ignores the function via notrace filter, skip it. - */ -static bool -ops_references_ip(struct ftrace_ops *ops, unsigned long ip) -{ - /* If ops isn't enabled, ignore it */ - if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) - return false; - - /* If ops traces all then it includes this function */ - if (ops_traces_mod(ops)) - return true; - - /* The function must be in the filter */ - if (!ftrace_hash_empty(ops->func_hash->filter_hash) && - !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip)) - return false; - - /* If in notrace hash, we ignore it too */ - if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip)) - return false; - - return true; -} - -/* - * Check if the current ops references the record. - * - * If the ops traces all functions, then it was already accounted for. - * If the ops does not trace the current record function, skip it. - * If the ops ignores the function via notrace filter, skip it. - */ -static bool -ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) -{ - return ops_references_ip(ops, rec->ip); -} - static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) { struct ftrace_page *pg; @@ -6690,6 +6645,38 @@ static int ftrace_get_trampoline_kallsym(unsigned int symnum, return -ERANGE; } +#if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) || defined(CONFIG_MODULES) +/* + * Check if the current ops references the given ip. + * + * If the ops traces all functions, then it was already accounted for. + * If the ops does not trace the current record function, skip it. + * If the ops ignores the function via notrace filter, skip it. + */ +static bool +ops_references_ip(struct ftrace_ops *ops, unsigned long ip) +{ + /* If ops isn't enabled, ignore it */ + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return false; + + /* If ops traces all then it includes this function */ + if (ops_traces_mod(ops)) + return true; + + /* The function must be in the filter */ + if (!ftrace_hash_empty(ops->func_hash->filter_hash) && + !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip)) + return false; + + /* If in notrace hash, we ignore it too */ + if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip)) + return false; + + return true; +} +#endif + #ifdef CONFIG_MODULES #define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) @@ -6702,7 +6689,7 @@ static int referenced_filters(struct dyn_ftrace *rec) int cnt = 0; for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { - if (ops_references_rec(ops, rec)) { + if (ops_references_ip(ops, rec->ip)) { if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_DIRECT)) continue; if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_IPMODIFY)) -- Gitee From 124dbc600aa3a99569f1967f4ab63efe76d18c8f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 2 Aug 2022 15:56:51 +0200 Subject: [PATCH 08/21] bpf: Cleanup ftrace hash in bpf_trampoline_put ANBZ: #7841 commit 62d468e5e10012e8b67d066ba7bac0a8afdc3cee upstream. We need to release possible hash from trampoline fops object before removing it, otherwise we leak it. Fixes: 00963a2e75a8 ("bpf: Support bpf_trampoline on functions with IPMODIFY (e.g. livepatch)") Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20220802135651.1794015-1-jolsa@kernel.org Signed-off-by: Tianchen Ding --- kernel/bpf/trampoline.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 01435c569517..41299a75c5a6 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -608,7 +608,10 @@ void bpf_trampoline_put(struct bpf_trampoline *tr) * multiple rcu callbacks. */ hlist_del(&tr->hlist); - kfree(tr->fops); + if (tr->fops) { + ftrace_free_filter(tr->fops); + kfree(tr->fops); + } kfree(tr); out: mutex_unlock(&trampoline_mutex); -- Gitee From 0a64994e423faab8127552b39034c8205ac74618 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Sep 2022 13:10:47 +0200 Subject: [PATCH 09/21] arch: Introduce CONFIG_FUNCTION_ALIGNMENT ANBZ: #7841 commit d49a0626216b95cd4bf696f6acf55f39a16ab0bb upstream. Generic function-alignment infrastructure. Architectures can select FUNCTION_ALIGNMENT_xxB symbols; the FUNCTION_ALIGNMENT symbol is then set to the largest such selected size, 0 otherwise. From this the -falign-functions compiler argument and __ALIGN macro are set. This incorporates the DEBUG_FORCE_FUNCTION_ALIGN_64B knob and future alignment requirements for x86_64 (later in this series) into a single place. NOTE: also removes the 0x90 filler byte from the generic __ALIGN primitive, that value makes no sense outside of x86. NOTE: .balign 0 reverts to a no-op. Requested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220915111143.719248727@infradead.org Signed-off-by: Tianchen Ding --- Makefile | 4 ++-- arch/Kconfig | 24 ++++++++++++++++++++++++ arch/ia64/Kconfig | 1 + arch/ia64/Makefile | 2 +- arch/x86/Kconfig | 2 ++ arch/x86/boot/compressed/head_64.S | 8 ++++++++ arch/x86/include/asm/linkage.h | 4 +--- include/asm-generic/vmlinux.lds.h | 4 ++-- include/linux/linkage.h | 4 ++-- lib/Kconfig.debug | 1 + 10 files changed, 44 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 4abf83cea70f..f9b8c3ffb629 100644 --- a/Makefile +++ b/Makefile @@ -913,8 +913,8 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS) export CC_FLAGS_SCS endif -ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B -KBUILD_CFLAGS += -falign-functions=64 +ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0) +KBUILD_CFLAGS += -falign-functions=$(CONFIG_FUNCTION_ALIGNMENT) endif # arch Makefile may override CC so keep this after arch Makefile is included diff --git a/arch/Kconfig b/arch/Kconfig index d8edced60679..d3db926edabb 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1100,4 +1100,28 @@ source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" +config FUNCTION_ALIGNMENT_4B + bool + +config FUNCTION_ALIGNMENT_8B + bool + +config FUNCTION_ALIGNMENT_16B + bool + +config FUNCTION_ALIGNMENT_32B + bool + +config FUNCTION_ALIGNMENT_64B + bool + +config FUNCTION_ALIGNMENT + int + default 64 if FUNCTION_ALIGNMENT_64B + default 32 if FUNCTION_ALIGNMENT_32B + default 16 if FUNCTION_ALIGNMENT_16B + default 8 if FUNCTION_ALIGNMENT_8B + default 4 if FUNCTION_ALIGNMENT_4B + default 0 + endmenu diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 39b25a5a591b..570c80f7adeb 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -57,6 +57,7 @@ config IA64 select NUMA if !FLATMEM select PCI_MSI_ARCH_FALLBACKS if PCI_MSI select SET_FS + select FUNCTION_ALIGNMENT_32B default y help The Itanium Processor Family is Intel's 64-bit successor to diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 703b1c4f6d12..102ae015970c 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -24,7 +24,7 @@ KBUILD_AFLAGS_KERNEL := -mconstant-gp EXTRA := cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \ - -falign-functions=32 -frename-registers -fno-optimize-sibling-calls + -frename-registers -fno-optimize-sibling-calls KBUILD_CFLAGS_KERNEL := -mconstant-gp GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)") diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8a111e930430..76af0a89e387 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -249,6 +249,8 @@ config X86 select X86_FEATURE_NAMES if PROC_FS select PROC_PID_ARCH_STATUS if PROC_FS select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX + select FUNCTION_ALIGNMENT_16B if X86_64 || X86_ALIGNMENT_16 + select FUNCTION_ALIGNMENT_4B imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI config INSTRUCTION_DECODER diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 4abe5aedad64..6b1bff3d14cb 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -37,6 +37,14 @@ #include #include "pgtable.h" +/* + * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result + * in assembly errors due to trying to move .org backward due to the excessive + * alignment. + */ +#undef __ALIGN +#define __ALIGN .balign 16, 0x90 + /* * Locally defined symbols should be marked hidden: */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 5000cf59bdf5..3d7293d52950 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -13,10 +13,8 @@ #ifdef __ASSEMBLY__ -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16) -#define __ALIGN .p2align 4, 0x90 +#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x90; #define __ALIGN_STR __stringify(__ALIGN) -#endif #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define RET jmp __x86_return_thunk diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a774361f28d4..9b16a007340d 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -81,8 +81,8 @@ #define RO_EXCEPTION_TABLE #endif -/* Align . to a 8 byte boundary equals to maximum function alignment. */ -#define ALIGN_FUNCTION() . = ALIGN(8) +/* Align . function alignment. */ +#define ALIGN_FUNCTION() . = ALIGN(CONFIG_FUNCTION_ALIGNMENT) /* * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which diff --git a/include/linux/linkage.h b/include/linux/linkage.h index e574a84d8b11..1e2f978683de 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -69,8 +69,8 @@ #endif #ifndef __ALIGN -#define __ALIGN .align 4,0x90 -#define __ALIGN_STR ".align 4,0x90" +#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT +#define __ALIGN_STR __stringify(__ALIGN) #endif #ifdef __ASSEMBLY__ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c270de524117..536c0c5c7bdf 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -365,6 +365,7 @@ config SECTION_MISMATCH_WARN_ONLY config DEBUG_FORCE_FUNCTION_ALIGN_64B bool "Force all function address 64B aligned" if EXPERT + select FUNCTION_ALIGNMENT_64B help There are cases that a commit from one domain changes the function address alignment of other domains, and cause magic performance -- Gitee From 636e1897cbfff75668cddb515d8e7fe86208b1a8 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 26 Sep 2022 17:41:46 -0700 Subject: [PATCH 10/21] ftrace: Fix recursive locking direct_mutex in ftrace_modify_direct_caller ANBZ: #7841 commit 9d2ce78ddcee159eb6a97449e9c68b6d60b9cec4 upstream. Naveen reported recursive locking of direct_mutex with sample ftrace-direct-modify.ko: [ 74.762406] WARNING: possible recursive locking detected [ 74.762887] 6.0.0-rc6+ #33 Not tainted [ 74.763216] -------------------------------------------- [ 74.763672] event-sample-fn/1084 is trying to acquire lock: [ 74.764152] ffffffff86c9d6b0 (direct_mutex){+.+.}-{3:3}, at: \ register_ftrace_function+0x1f/0x180 [ 74.764922] [ 74.764922] but task is already holding lock: [ 74.765421] ffffffff86c9d6b0 (direct_mutex){+.+.}-{3:3}, at: \ modify_ftrace_direct+0x34/0x1f0 [ 74.766142] [ 74.766142] other info that might help us debug this: [ 74.766701] Possible unsafe locking scenario: [ 74.766701] [ 74.767216] CPU0 [ 74.767437] ---- [ 74.767656] lock(direct_mutex); [ 74.767952] lock(direct_mutex); [ 74.768245] [ 74.768245] *** DEADLOCK *** [ 74.768245] [ 74.768750] May be due to missing lock nesting notation [ 74.768750] [ 74.769332] 1 lock held by event-sample-fn/1084: [ 74.769731] #0: ffffffff86c9d6b0 (direct_mutex){+.+.}-{3:3}, at: \ modify_ftrace_direct+0x34/0x1f0 [ 74.770496] [ 74.770496] stack backtrace: [ 74.770884] CPU: 4 PID: 1084 Comm: event-sample-fn Not tainted ... [ 74.771498] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... [ 74.772474] Call Trace: [ 74.772696] [ 74.772896] dump_stack_lvl+0x44/0x5b [ 74.773223] __lock_acquire.cold.74+0xac/0x2b7 [ 74.773616] lock_acquire+0xd2/0x310 [ 74.773936] ? register_ftrace_function+0x1f/0x180 [ 74.774357] ? lock_is_held_type+0xd8/0x130 [ 74.774744] ? my_tramp2+0x11/0x11 [ftrace_direct_modify] [ 74.775213] __mutex_lock+0x99/0x1010 [ 74.775536] ? register_ftrace_function+0x1f/0x180 [ 74.775954] ? slab_free_freelist_hook.isra.43+0x115/0x160 [ 74.776424] ? ftrace_set_hash+0x195/0x220 [ 74.776779] ? register_ftrace_function+0x1f/0x180 [ 74.777194] ? kfree+0x3e1/0x440 [ 74.777482] ? my_tramp2+0x11/0x11 [ftrace_direct_modify] [ 74.777941] ? __schedule+0xb40/0xb40 [ 74.778258] ? register_ftrace_function+0x1f/0x180 [ 74.778672] ? my_tramp1+0xf/0xf [ftrace_direct_modify] [ 74.779128] register_ftrace_function+0x1f/0x180 [ 74.779527] ? ftrace_set_filter_ip+0x33/0x70 [ 74.779910] ? __schedule+0xb40/0xb40 [ 74.780231] ? my_tramp1+0xf/0xf [ftrace_direct_modify] [ 74.780678] ? my_tramp2+0x11/0x11 [ftrace_direct_modify] [ 74.781147] ftrace_modify_direct_caller+0x5b/0x90 [ 74.781563] ? 0xffffffffa0201000 [ 74.781859] ? my_tramp1+0xf/0xf [ftrace_direct_modify] [ 74.782309] modify_ftrace_direct+0x1b2/0x1f0 [ 74.782690] ? __schedule+0xb40/0xb40 [ 74.783014] ? simple_thread+0x2a/0xb0 [ftrace_direct_modify] [ 74.783508] ? __schedule+0xb40/0xb40 [ 74.783832] ? my_tramp2+0x11/0x11 [ftrace_direct_modify] [ 74.784294] simple_thread+0x76/0xb0 [ftrace_direct_modify] [ 74.784766] kthread+0xf5/0x120 [ 74.785052] ? kthread_complete_and_exit+0x20/0x20 [ 74.785464] ret_from_fork+0x22/0x30 [ 74.785781] Fix this by using register_ftrace_function_nolock in ftrace_modify_direct_caller. Link: https://lkml.kernel.org/r/20220927004146.1215303-1-song@kernel.org Fixes: 53cd885bc5c3 ("ftrace: Allow IPMODIFY and DIRECT ops on the same function") Reported-and-tested-by: Naveen N. Rao Signed-off-by: Song Liu Signed-off-by: Steven Rostedt (Google) Signed-off-by: Tianchen Ding --- kernel/trace/ftrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 72da9e4e61e0..751e9b5167d5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5325,6 +5325,8 @@ static struct ftrace_ops stub_ops = { * it is safe to modify the ftrace record, where it should be * currently calling @old_addr directly, to call @new_addr. * + * This is called with direct_mutex locked. + * * Safety checks should be made to make sure that the code at * @rec->ip is currently calling @old_addr. And this must * also update entry->direct to @new_addr. @@ -5337,6 +5339,8 @@ int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry, unsigned long ip = rec->ip; int ret; + lockdep_assert_held(&direct_mutex); + /* * The ftrace_lock was used to determine if the record * had more than one registered user to it. If it did, @@ -5359,7 +5363,7 @@ int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry, if (ret) goto out_lock; - ret = register_ftrace_function(&stub_ops); + ret = register_ftrace_function_nolock(&stub_ops); if (ret) { ftrace_set_filter_ip(&stub_ops, ip, 1, 0); goto out_lock; -- Gitee From 3c9aca2c85cf57e0c9b30f693f4c5da92187b2a1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 29 Sep 2022 14:45:25 +0100 Subject: [PATCH 11/21] arm64: ftrace: fix module PLTs with mcount ANBZ: #7841 commit 8cfb08575c6d4585f1ce0deeb189e5c824776b04 upstream. Li Huafei reports that mcount-based ftrace with module PLTs was broken by commit: a6253579977e4c6f ("arm64: ftrace: consistently handle PLTs.") When a module PLTs are used and a module is loaded sufficiently far away from the kernel, we'll create PLTs for any branches which are out-of-range. These are separate from the special ftrace trampoline PLTs, which the module PLT code doesn't directly manipulate. When mcount is in use this is a problem, as each mcount callsite in a module will be initialized to point to a module PLT, but since commit a6253579977e4c6f ftrace_make_nop() will assume that the callsite has been initialized to point to the special ftrace trampoline PLT, and ftrace_find_callable_addr() rejects other cases. This means that when ftrace tries to initialize a callsite via ftrace_make_nop(), the call to ftrace_find_callable_addr() will find that the `_mcount` stub is out-of-range and is not handled by the ftrace PLT, resulting in a splat: | ftrace_test: loading out-of-tree module taints kernel. | ftrace: no module PLT for _mcount | ------------[ ftrace bug ]------------ | ftrace failed to modify | [] 0xffff800029180014 | actual: 44:00:00:94 | Initializing ftrace call sites | ftrace record flags: 2000000 | (0) | expected tramp: ffff80000802eb3c | ------------[ cut here ]------------ | WARNING: CPU: 3 PID: 157 at kernel/trace/ftrace.c:2120 ftrace_bug+0x94/0x270 | Modules linked in: | CPU: 3 PID: 157 Comm: insmod Tainted: G O 6.0.0-rc6-00151-gcd722513a189-dirty #22 | Hardware name: linux,dummy-virt (DT) | pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : ftrace_bug+0x94/0x270 | lr : ftrace_bug+0x21c/0x270 | sp : ffff80000b2bbaf0 | x29: ffff80000b2bbaf0 x28: 0000000000000000 x27: ffff0000c4d38000 | x26: 0000000000000001 x25: ffff800009d7e000 x24: ffff0000c4d86e00 | x23: 0000000002000000 x22: ffff80000a62b000 x21: ffff8000098ebea8 | x20: ffff0000c4d38000 x19: ffff80000aa24158 x18: ffffffffffffffff | x17: 0000000000000000 x16: 0a0d2d2d2d2d2d2d x15: ffff800009aa9118 | x14: 0000000000000000 x13: 6333626532303830 x12: 3030303866666666 | x11: 203a706d61727420 x10: 6465746365707865 x9 : 3362653230383030 | x8 : c0000000ffffefff x7 : 0000000000017fe8 x6 : 000000000000bff4 | x5 : 0000000000057fa8 x4 : 0000000000000000 x3 : 0000000000000001 | x2 : ad2cb14bb5438900 x1 : 0000000000000000 x0 : 0000000000000022 | Call trace: | ftrace_bug+0x94/0x270 | ftrace_process_locs+0x308/0x430 | ftrace_module_init+0x44/0x60 | load_module+0x15b4/0x1ce8 | __do_sys_init_module+0x1ec/0x238 | __arm64_sys_init_module+0x24/0x30 | invoke_syscall+0x54/0x118 | el0_svc_common.constprop.4+0x84/0x100 | do_el0_svc+0x3c/0xd0 | el0_svc+0x1c/0x50 | el0t_64_sync_handler+0x90/0xb8 | el0t_64_sync+0x15c/0x160 | ---[ end trace 0000000000000000 ]--- | ---------test_init----------- Fix this by reverting to the old behaviour of ignoring the old instruction when initialising an mcount callsite in a module, which was the behaviour prior to commit a6253579977e4c6f. Signed-off-by: Mark Rutland Fixes: a6253579977e ("arm64: ftrace: consistently handle PLTs.") Reported-by: Li Huafei Link: https://lore.kernel.org/linux-arm-kernel/20220929094134.99512-1-lihuafei1@huawei.com Cc: Ard Biesheuvel Cc: Will Deacon Link: https://lore.kernel.org/r/20220929134525.798593-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Tianchen Ding --- arch/arm64/kernel/ftrace.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 443480e53212..18675ff011d1 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -216,11 +216,26 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long pc = rec->ip; u32 old = 0, new; + new = aarch64_insn_gen_nop(); + + /* + * When using mcount, callsites in modules may have been initalized to + * call an arbitrary module PLT (which redirects to the _mcount stub) + * rather than the ftrace PLT we'll use at runtime (which redirects to + * the ftrace trampoline). We can ignore the old PLT when initializing + * the callsite. + * + * Note: 'mod' is only set at module load time. + */ + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && + IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && mod) { + return aarch64_insn_patch_text_nosync((void *)pc, new); + } + if (!ftrace_find_callable_addr(rec, mod, &addr)) return -EINVAL; old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); - new = aarch64_insn_gen_nop(); return ftrace_modify_code(pc, old, new, true); } -- Gitee From b484598a0b096054270aae24effcfc4f78161079 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2022 17:05:17 +0000 Subject: [PATCH 12/21] ftrace: pass fregs to arch_ftrace_set_direct_caller() ANBZ: #7841 commit 9705bc70960459ae09f4a5e77283973bb3a40f57 upstream. In subsequent patches we'll arrange for architectures to have an ftrace_regs which is entirely distinct from pt_regs. In preparation for this, we need to minimize the use of pt_regs to where strictly necessary in the core ftrace code. This patch changes the prototype of arch_ftrace_set_direct_caller() to take ftrace_regs rather than pt_regs, and moves the extraction of the pt_regs into arch_ftrace_set_direct_caller(). On x86, arch_ftrace_set_direct_caller() can be used even when CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=n, and defines struct ftrace_regs. Due to this, it's necessary to define arch_ftrace_set_direct_caller() as a macro to avoid using an incomplete type. I've also moved the body of arch_ftrace_set_direct_caller() after the CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=y defineidion of struct ftrace_regs. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Florent Revest Cc: Masami Hiramatsu Cc: Steven Rostedt Reviewed-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20221103170520.931305-2-mark.rutland@arm.com Signed-off-by: Will Deacon [dtcccc: drop s390 changes.] Signed-off-by: Tianchen Ding --- arch/x86/include/asm/ftrace.h | 31 ++++++++++++++++++------------- include/linux/ftrace.h | 6 ++---- kernel/trace/ftrace.c | 3 +-- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 024d9797646e..b61fb440be23 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -28,19 +28,6 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } -/* - * When a ftrace registered caller is tracing a function that is - * also set by a register_ftrace_direct() call, it needs to be - * differentiated in the ftrace_caller trampoline. To do this, we - * place the direct caller in the ORIG_AX part of pt_regs. This - * tells the ftrace_caller that there's a direct caller. - */ -static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) -{ - /* Emulate a call */ - regs->orig_ax = addr; -} - #ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS struct ftrace_regs { struct pt_regs regs; @@ -66,6 +53,24 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, #define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR #endif +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +/* + * When a ftrace registered caller is tracing a function that is + * also set by a register_ftrace_direct() call, it needs to be + * differentiated in the ftrace_caller trampoline. To do this, we + * place the direct caller in the ORIG_AX part of pt_regs. This + * tells the ftrace_caller that there's a direct caller. + */ +static inline void +__arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) +{ + /* Emulate a call */ + regs->orig_ax = addr; +} +#define arch_ftrace_set_direct_caller(fregs, addr) \ + __arch_ftrace_set_direct_caller(&(fregs)->regs, addr) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + #ifdef CONFIG_DYNAMIC_FTRACE struct dyn_arch_ftrace { diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 312c722c9791..14bc3edca257 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -419,9 +419,7 @@ static inline int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsi { return -ENODEV; } -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ -#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS /* * This must be implemented by the architecture. * It is the way the ftrace direct_ops helper, when called @@ -435,9 +433,9 @@ static inline int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsi * the return from the trampoline jump to the direct caller * instead of going back to the function it just traced. */ -static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, +static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) { } -#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifdef CONFIG_STACK_TRACER diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 751e9b5167d5..3dfbfe2b3345 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2489,14 +2489,13 @@ ftrace_add_rec_direct(unsigned long ip, unsigned long addr, static void call_direct_funcs(unsigned long ip, unsigned long pip, struct ftrace_ops *ops, struct ftrace_regs *fregs) { - struct pt_regs *regs = ftrace_get_regs(fregs); unsigned long addr; addr = ftrace_find_rec_direct(ip); if (!addr) return; - arch_ftrace_set_direct_caller(regs, addr); + arch_ftrace_set_direct_caller(fregs, addr); } struct ftrace_ops direct_ops = { -- Gitee From bd871b68a43f3051aad55dd2c38764156032c48d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2022 17:05:18 +0000 Subject: [PATCH 13/21] ftrace: rename ftrace_instruction_pointer_set() -> ftrace_regs_set_instruction_pointer() ANBZ: #7841 commit 0ef86097f127d0d73e19aa4dcf86106105e7db09 upstream. In subsequent patches we'll add a sew of ftrace_regs_{get,set}_*() helpers. In preparation, this patch renames ftrace_instruction_pointer_set() to ftrace_regs_set_instruction_pointer(). There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Florent Revest Cc: Masami Hiramatsu Cc: Steven Rostedt Reviewed-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20221103170520.931305-3-mark.rutland@arm.com Signed-off-by: Will Deacon [dtcccc: drop powerpc and s390 changes.] Signed-off-by: Tianchen Ding --- arch/x86/include/asm/ftrace.h | 2 +- include/linux/ftrace.h | 9 ++++----- kernel/livepatch/patch.c | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index b61fb440be23..9eac7b5cf0ab 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -42,7 +42,7 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs) return &fregs->regs; } -#define ftrace_instruction_pointer_set(fregs, _ip) \ +#define ftrace_regs_set_instruction_pointer(fregs, _ip) \ do { (fregs)->regs.ip = (_ip); } while (0) struct ftrace_ops; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 14bc3edca257..b33c8b4cbd30 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -105,12 +105,11 @@ struct ftrace_regs { #define arch_ftrace_get_regs(fregs) (&(fregs)->regs) /* - * ftrace_instruction_pointer_set() is to be defined by the architecture - * if to allow setting of the instruction pointer from the ftrace_regs - * when HAVE_DYNAMIC_FTRACE_WITH_ARGS is set and it supports - * live kernel patching. + * ftrace_regs_set_instruction_pointer() is to be defined by the architecture + * if to allow setting of the instruction pointer from the ftrace_regs when + * HAVE_DYNAMIC_FTRACE_WITH_ARGS is set and it supports live kernel patching. */ -#define ftrace_instruction_pointer_set(fregs, ip) do { } while (0) +#define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0) #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 2b62ad267ada..42d5348e4409 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -132,7 +132,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, */ klp_arch_set_pc(fregs, (unsigned long)func->new_func); #else - ftrace_instruction_pointer_set(fregs, (unsigned long)func->new_func); + ftrace_regs_set_instruction_pointer(fregs, (unsigned long)func->new_func); #endif unlock: -- Gitee From 8b10b3c68f2444125e655916374963163aafcf90 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2022 17:05:19 +0000 Subject: [PATCH 14/21] ftrace: abstract DYNAMIC_FTRACE_WITH_ARGS accesses ANBZ: #7841 commit 94d095ffa0e16bb7f161a2b73bbe5c2795d499a8 upstream. In subsequent patches we'll arrange for architectures to have an ftrace_regs which is entirely distinct from pt_regs. In preparation for this, we need to minimize the use of pt_regs to where strictly necessary in the core ftrace code. This patch adds new ftrace_regs_{get,set}_*() helpers which can be used to manipulate ftrace_regs. When CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=y, these can always be used on any ftrace_regs, and when CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=n these can be used when regs are available. A new ftrace_regs_has_args(fregs) helper is added which code can use to check when these are usable. Co-developed-by: Florent Revest Signed-off-by: Florent Revest Signed-off-by: Mark Rutland Cc: Masami Hiramatsu Cc: Steven Rostedt Reviewed-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20221103170520.931305-4-mark.rutland@arm.com Signed-off-by: Will Deacon [dtcccc: drop powerpc and s390 changes.] Signed-off-by: Tianchen Ding --- arch/x86/include/asm/ftrace.h | 16 ++++++++++++++++ include/linux/ftrace.h | 29 +++++++++++++++++++++++++++++ kernel/trace/Kconfig | 6 +++--- 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9eac7b5cf0ab..478d7e153135 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -45,6 +45,22 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs) #define ftrace_regs_set_instruction_pointer(fregs, _ip) \ do { (fregs)->regs.ip = (_ip); } while (0) +#define ftrace_regs_get_instruction_pointer(fregs) \ + ((fregs)->regs.ip) + +#define ftrace_regs_get_argument(fregs, n) \ + regs_get_kernel_argument(&(fregs)->regs, n) +#define ftrace_regs_get_stack_pointer(fregs) \ + kernel_stack_pointer(&(fregs)->regs) +#define ftrace_regs_return_value(fregs) \ + regs_return_value(&(fregs)->regs) +#define ftrace_regs_set_return_value(fregs, ret) \ + regs_set_return_value(&(fregs)->regs, ret) +#define ftrace_override_function_with_return(fregs) \ + override_function_with_return(&(fregs)->regs) +#define ftrace_regs_query_register_offset(name) \ + regs_query_register_offset(name) + struct ftrace_ops; #define ftrace_graph_func ftrace_graph_func void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b33c8b4cbd30..37a607d0c5e6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -120,6 +120,35 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs return arch_ftrace_get_regs(fregs); } +/* + * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs. + * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs. + */ +static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs) +{ + if (IS_ENABLED(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS)) + return true; + + return ftrace_get_regs(fregs) != NULL; +} + +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +#define ftrace_regs_get_instruction_pointer(fregs) \ + instruction_pointer(ftrace_get_regs(fregs)) +#define ftrace_regs_get_argument(fregs, n) \ + regs_get_kernel_argument(ftrace_get_regs(fregs), n) +#define ftrace_regs_get_stack_pointer(fregs) \ + kernel_stack_pointer(ftrace_get_regs(fregs)) +#define ftrace_regs_return_value(fregs) \ + regs_return_value(ftrace_get_regs(fregs)) +#define ftrace_regs_set_return_value(fregs, ret) \ + regs_set_return_value(ftrace_get_regs(fregs), ret) +#define ftrace_override_function_with_return(fregs) \ + override_function_with_return(ftrace_get_regs(fregs)) +#define ftrace_regs_query_register_offset(name) \ + regs_query_register_offset(name) +#endif + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, #ifndef __GENKSYMS__ struct ftrace_ops *op, struct ftrace_regs *fregs); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8b278a9765c4..2076982b7dbf 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -35,10 +35,10 @@ config HAVE_DYNAMIC_FTRACE_WITH_ARGS bool help If this is set, then arguments and stack can be found from - the pt_regs passed into the function callback regs parameter + the ftrace_regs passed into the function callback regs parameter by default, even without setting the REGS flag in the ftrace_ops. - This allows for use of regs_get_kernel_argument() and - kernel_stack_pointer(). + This allows for use of ftrace_regs_get_argument() and + ftrace_regs_get_stack_pointer(). config HAVE_FTRACE_MCOUNT_RECORD bool -- Gitee From 4f68a27c964f415a4d9e88731e18ab3ee2574b8d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2022 17:05:20 +0000 Subject: [PATCH 15/21] ftrace: arm64: move from REGS to ARGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #7841 commit 26299b3f6ba26bfc234b73126d14bdf4dec5275a upstream. This commit replaces arm64's support for FTRACE_WITH_REGS with support for FTRACE_WITH_ARGS. This removes some overhead and complexity, and removes some latent issues with inconsistent presentation of struct pt_regs (which can only be reliably saved/restored at exception boundaries). FTRACE_WITH_REGS has been supported on arm64 since commit: 3b23e4991fb66f6d ("arm64: implement ftrace with regs") As noted in the commit message, the major reasons for implementing FTRACE_WITH_REGS were: (1) To make it possible to use the ftrace graph tracer with pointer authentication, where it's necessary to snapshot/manipulate the LR before it is signed by the instrumented function. (2) To make it possible to implement LIVEPATCH in future, where we need to hook function entry before an instrumented function manipulates the stack or argument registers. Practically speaking, we need to preserve the argument/return registers, PC, LR, and SP. Neither of these need a struct pt_regs, and only require the set of registers which are live at function call/return boundaries. Our calling convention is defined by "Procedure Call Standard for the Arm® 64-bit Architecture (AArch64)" (AKA "AAPCS64"), which can currently be found at: https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst Per AAPCS64, all function call argument and return values are held in the following GPRs: * X0 - X7 : parameter / result registers * X8 : indirect result location register * SP : stack pointer (AKA SP) Additionally, ad function call boundaries, the following GPRs hold context/return information: * X29 : frame pointer (AKA FP) * X30 : link register (AKA LR) ... and for ftrace we need to capture the instrumented address: * PC : program counter No other GPRs are relevant, as none of the other arguments hold parameters or return values: * X9 - X17 : temporaries, may be clobbered * X18 : shadow call stack pointer (or temorary) * X19 - X28 : callee saved This patch implements FTRACE_WITH_ARGS for arm64, only saving/restoring the minimal set of registers necessary. This is always sufficient to manipulate control flow (e.g. for live-patching) or to manipulate function arguments and return values. This reduces the necessary stack usage from 336 bytes for pt_regs down to 112 bytes for ftrace_regs + 32 bytes for two frame records, freeing up 188 bytes. This could be reduced further with changes to the unwinder. As there is no longer a need to save different sets of registers for different features, we no longer need distinct `ftrace_caller` and `ftrace_regs_caller` trampolines. This allows the trampoline assembly to be simpler, and simplifies code which previously had to handle the two trampolines. I've tested this with the ftrace selftests, where there are no unexpected failures. Co-developed-by: Florent Revest Signed-off-by: Mark Rutland Signed-off-by: Florent Revest Cc: Catalin Marinas Cc: Masami Hiramatsu Cc: Steven Rostedt Cc: Will Deacon Reviewed-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20221103170520.931305-5-mark.rutland@arm.com Signed-off-by: Will Deacon [dtcccc: Remove arch/arm64/include/asm/livepatch.h and related code. We do not use FTRACE_WITH_REGS anymore. The removed klp_arch_set_pc() has different behavior with ftrace_regs_set_instruction_pointer(), but it should be ok since the first 8B is for ftrace and should be run, too. I've run the test cases with CONFIG_SAMPLE_LIVEPATCH and it works well.] Signed-off-by: Tianchen Ding Tested-by: ydjohn --- arch/arm64/Kconfig | 16 ++-- arch/arm64/Makefile | 2 +- arch/arm64/include/asm/ftrace.h | 75 ++++++++++++++++-- arch/arm64/include/asm/livepatch.h | 38 --------- arch/arm64/kernel/asm-offsets.c | 13 +++ arch/arm64/kernel/entry-ftrace.S | 123 +++++++++++------------------ arch/arm64/kernel/ftrace.c | 84 ++++++++++++-------- arch/arm64/kernel/module.c | 3 - include/linux/livepatch.h | 5 -- kernel/livepatch/patch.c | 10 --- 10 files changed, 190 insertions(+), 179 deletions(-) delete mode 100644 arch/arm64/include/asm/livepatch.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e1072182e1d3..e92695796570 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -159,6 +159,8 @@ config ARM64 select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_ARGS \ + if $(cc-option,-fpatchable-function-entry=2) select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD @@ -205,16 +207,16 @@ config ARM64 help ARM 64-bit (AArch64) Linux support. -config CLANG_SUPPORTS_DYNAMIC_FTRACE_WITH_REGS +config CLANG_SUPPORTS_DYNAMIC_FTRACE_WITH_ARGS def_bool CC_IS_CLANG # https://github.com/ClangBuiltLinux/linux/issues/1507 depends on AS_IS_GNU || (AS_IS_LLVM && (LD_IS_LLD || LD_VERSION >= 23600)) - select HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_DYNAMIC_FTRACE_WITH_ARGS -config GCC_SUPPORTS_DYNAMIC_FTRACE_WITH_REGS +config GCC_SUPPORTS_DYNAMIC_FTRACE_WITH_ARGS def_bool CC_IS_GCC depends on $(cc-option,-fpatchable-function-entry=2) - select HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_DYNAMIC_FTRACE_WITH_ARGS config 64BIT def_bool y @@ -1573,7 +1575,7 @@ config ARM64_PTR_AUTH # which is only understood by binutils starting with version 2.33.1. depends on LD_IS_LLD || LD_VERSION >= 233010000 || (CC_IS_GCC && GCC_VERSION < 90100) depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE - depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) + depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_ARGS) help Pointer authentication (part of the ARMv8.3 Extensions) provides instructions for signing and authenticating pointers against secret @@ -1603,7 +1605,7 @@ config ARM64_PTR_AUTH not be selected. This feature works with FUNCTION_GRAPH_TRACER option only if - DYNAMIC_FTRACE_WITH_REGS is enabled. + DYNAMIC_FTRACE_WITH_ARGS is enabled. config CC_HAS_BRANCH_PROT_PAC_RET # GCC 9 or later, clang 8 or later @@ -1715,7 +1717,7 @@ config ARM64_BTI_KERNEL # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697 depends on !CC_IS_GCC || GCC_VERSION >= 100100 depends on !(CC_IS_CLANG && GCOV_KERNEL) - depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) + depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_ARGS) help Build the kernel with Branch Target Identification annotations and enable enforcement of this for kernel code. When this option diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 485b7dbd4f9e..a72cb5a6318e 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -126,7 +126,7 @@ endif CHECKFLAGS += -D__aarch64__ -ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y) +ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_ARGS),y) KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY CC_FLAGS_FTRACE := -fpatchable-function-entry=2 endif diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 5229a26dd4bf..ce2bd853d769 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -12,7 +12,7 @@ #define HAVE_FUNCTION_GRAPH_FP_TEST -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS #define ARCH_SUPPORTS_FTRACE_OPS 1 #else #define MCOUNT_ADDR ((unsigned long)_mcount) @@ -22,8 +22,7 @@ #define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE #define FTRACE_PLT_IDX 0 -#define FTRACE_REGS_PLT_IDX 1 -#define NR_FTRACE_PLTS 2 +#define NR_FTRACE_PLTS 1 /* * Currently, gcc tends to save the link register after the local variables @@ -58,7 +57,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) * Adjust addr to point at the BL in the callsite. * See ftrace_init_nop() for the callsite sequence. */ - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS)) return addr + AARCH64_INSN_SIZE; /* * addr is the address of the mcount call instruction. @@ -67,10 +66,74 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS struct dyn_ftrace; struct ftrace_ops; -struct ftrace_regs; + +#define arch_ftrace_get_regs(regs) NULL + +struct ftrace_regs { + CK_KABI_REPLACE_SPLIT( + struct pt_regs regs, + /* x0 - x8 */ + unsigned long regs[9], + unsigned long __unused, + + unsigned long fp, + unsigned long lr, + + unsigned long sp, + unsigned long pc, + ) +}; + +static __always_inline unsigned long +ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs) +{ + return fregs->pc; +} + +static __always_inline void +ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, + unsigned long pc) +{ + fregs->pc = pc; +} + +static __always_inline unsigned long +ftrace_regs_get_stack_pointer(const struct ftrace_regs *fregs) +{ + return fregs->sp; +} + +static __always_inline unsigned long +ftrace_regs_get_argument(struct ftrace_regs *fregs, unsigned int n) +{ + if (n < 8) + return fregs->regs[n]; + return 0; +} + +static __always_inline unsigned long +ftrace_regs_get_return_value(const struct ftrace_regs *fregs) +{ + return fregs->regs[0]; +} + +static __always_inline void +ftrace_regs_set_return_value(struct ftrace_regs *fregs, + unsigned long ret) +{ + fregs->regs[0] = ret; +} + +static __always_inline void +ftrace_override_function_with_return(struct ftrace_regs *fregs) +{ + fregs->pc = fregs->lr; +} + +int ftrace_regs_query_register_offset(const char *name); int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop diff --git a/arch/arm64/include/asm/livepatch.h b/arch/arm64/include/asm/livepatch.h deleted file mode 100644 index d192292f53c1..000000000000 --- a/arch/arm64/include/asm/livepatch.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_ARM64_LIVEPATCH_H -#define _ASM_ARM64_LIVEPATCH_H - -#include -#include - -#ifdef CONFIG_LIVEPATCH - -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -static inline int klp_check_compiler_support(void) -{ - return 0; -} - -static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long pc) -{ - struct pt_regs *regs = ftrace_get_regs(fregs); - - regs->pc = pc + 2 * AARCH64_INSN_SIZE; -} - -#else -static inline int klp_check_compiler_support(void) -{ - return 1; -} - -static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long pc) -{ -} -#endif - -#else -#error Live patching support is disabled; check CONFIG_LIVEPATCH -#endif - -#endif /* _ASM_ARM64_LIVEPATCH_H */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 56ce123d1357..c0ab244b08e6 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -74,6 +74,19 @@ int main(void) DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs)); BLANK(); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS + DEFINE(FREGS_X0, offsetof(struct ftrace_regs, regs[0])); + DEFINE(FREGS_X2, offsetof(struct ftrace_regs, regs[2])); + DEFINE(FREGS_X4, offsetof(struct ftrace_regs, regs[4])); + DEFINE(FREGS_X6, offsetof(struct ftrace_regs, regs[6])); + DEFINE(FREGS_X8, offsetof(struct ftrace_regs, regs[8])); + DEFINE(FREGS_FP, offsetof(struct ftrace_regs, fp)); + DEFINE(FREGS_LR, offsetof(struct ftrace_regs, lr)); + DEFINE(FREGS_SP, offsetof(struct ftrace_regs, sp)); + DEFINE(FREGS_PC, offsetof(struct ftrace_regs, pc)); + DEFINE(FREGS_SIZE, sizeof(struct ftrace_regs)); + BLANK(); +#endif #ifdef CONFIG_COMPAT DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0)); DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0)); diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 6beafa108126..2e4051799829 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -12,87 +12,60 @@ #include #include -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS /* * Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before * the regular function prologue. For an enabled callsite, ftrace_init_nop() and * ftrace_make_call() have patched those NOPs to: * * MOV X9, LR - * BL - * - * ... where is either ftrace_caller or ftrace_regs_caller. + * BL ftrace_caller * * Each instrumented function follows the AAPCS, so here x0-x8 and x18-x30 are * live (x18 holds the Shadow Call Stack pointer), and x9-x17 are safe to * clobber. * - * We save the callsite's context into a pt_regs before invoking any ftrace - * callbacks. So that we can get a sensible backtrace, we create a stack record - * for the callsite and the ftrace entry assembly. This is not sufficient for - * reliable stacktrace: until we create the callsite stack record, its caller - * is missing from the LR and existing chain of frame records. + * We save the callsite's context into a struct ftrace_regs before invoking any + * ftrace callbacks. So that we can get a sensible backtrace, we create frame + * records for the callsite and the ftrace entry assembly. This is not + * sufficient for reliable stacktrace: until we create the callsite stack + * record, its caller is missing from the LR and existing chain of frame + * records. */ - .macro ftrace_regs_entry, allregs=0 - /* Make room for pt_regs, plus a callee frame */ - sub sp, sp, #(PT_REGS_SIZE + 16) - - /* Save function arguments (and x9 for simplicity) */ - stp x0, x1, [sp, #S_X0] - stp x2, x3, [sp, #S_X2] - stp x4, x5, [sp, #S_X4] - stp x6, x7, [sp, #S_X6] - stp x8, x9, [sp, #S_X8] - - /* Optionally save the callee-saved registers, always save the FP */ - .if \allregs == 1 - stp x10, x11, [sp, #S_X10] - stp x12, x13, [sp, #S_X12] - stp x14, x15, [sp, #S_X14] - stp x16, x17, [sp, #S_X16] - stp x18, x19, [sp, #S_X18] - stp x20, x21, [sp, #S_X20] - stp x22, x23, [sp, #S_X22] - stp x24, x25, [sp, #S_X24] - stp x26, x27, [sp, #S_X26] - stp x28, x29, [sp, #S_X28] - .else - str x29, [sp, #S_FP] - .endif - - /* Save the callsite's SP and LR */ - add x10, sp, #(PT_REGS_SIZE + 16) - stp x9, x10, [sp, #S_LR] +SYM_CODE_START(ftrace_caller) +#ifdef BTI_C + BTI_C +#endif - /* Save the PC after the ftrace callsite */ - str x30, [sp, #S_PC] + /* Save original SP */ + mov x10, sp - /* Create a frame record for the callsite above pt_regs */ - stp x29, x9, [sp, #PT_REGS_SIZE] - add x29, sp, #PT_REGS_SIZE + /* Make room for ftrace regs, plus two frame records */ + sub sp, sp, #(FREGS_SIZE + 32) - /* Create our frame record within pt_regs. */ - stp x29, x30, [sp, #S_STACKFRAME] - add x29, sp, #S_STACKFRAME - .endm + /* Save function arguments */ + stp x0, x1, [sp, #FREGS_X0] + stp x2, x3, [sp, #FREGS_X2] + stp x4, x5, [sp, #FREGS_X4] + stp x6, x7, [sp, #FREGS_X6] + str x8, [sp, #FREGS_X8] -SYM_CODE_START(ftrace_regs_caller) -#ifdef BTI_C - BTI_C -#endif - ftrace_regs_entry 1 - b ftrace_common -SYM_CODE_END(ftrace_regs_caller) + /* Save the callsite's FP, LR, SP */ + str x29, [sp, #FREGS_FP] + str x9, [sp, #FREGS_LR] + str x10, [sp, #FREGS_SP] -SYM_CODE_START(ftrace_caller) -#ifdef BTI_C - BTI_C -#endif - ftrace_regs_entry 0 - b ftrace_common -SYM_CODE_END(ftrace_caller) + /* Save the PC after the ftrace callsite */ + str x30, [sp, #FREGS_PC] + + /* Create a frame record for the callsite above the ftrace regs */ + stp x29, x9, [sp, #FREGS_SIZE + 16] + add x29, sp, #FREGS_SIZE + 16 + + /* Create our frame record above the ftrace regs */ + stp x29, x30, [sp, #FREGS_SIZE] + add x29, sp, #FREGS_SIZE -SYM_CODE_START(ftrace_common) sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn) mov x1, x9 // parent_ip (callsite's LR) ldr_l x2, function_trace_op // op @@ -107,24 +80,24 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) * to restore x0-x8, x29, and x30. */ /* Restore function arguments */ - ldp x0, x1, [sp] - ldp x2, x3, [sp, #S_X2] - ldp x4, x5, [sp, #S_X4] - ldp x6, x7, [sp, #S_X6] - ldr x8, [sp, #S_X8] + ldp x0, x1, [sp, #FREGS_X0] + ldp x2, x3, [sp, #FREGS_X2] + ldp x4, x5, [sp, #FREGS_X4] + ldp x6, x7, [sp, #FREGS_X6] + ldr x8, [sp, #FREGS_X8] /* Restore the callsite's FP, LR, PC */ - ldr x29, [sp, #S_FP] - ldr x30, [sp, #S_LR] - ldr x9, [sp, #S_PC] + ldr x29, [sp, #FREGS_FP] + ldr x30, [sp, #FREGS_LR] + ldr x9, [sp, #FREGS_PC] /* Restore the callsite's SP */ - add sp, sp, #PT_REGS_SIZE + 16 + add sp, sp, #FREGS_SIZE + 32 ret x9 -SYM_CODE_END(ftrace_common) +SYM_CODE_END(ftrace_caller) -#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ /* * Gcc with -pg will put the following code in the beginning of each function: @@ -296,7 +269,7 @@ SYM_FUNC_START(ftrace_graph_caller) mcount_exit SYM_FUNC_END(ftrace_graph_caller) #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ SYM_FUNC_START(ftrace_stub) ret diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 18675ff011d1..5aeefb3d85ae 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -16,6 +16,51 @@ #include #include +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +struct fregs_offset { + const char *name; + int offset; +}; + +#define FREGS_OFFSET(n, field) \ +{ \ + .name = n, \ + .offset = offsetof(struct ftrace_regs, field), \ +} + +static const struct fregs_offset fregs_offsets[] = { + FREGS_OFFSET("x0", regs[0]), + FREGS_OFFSET("x1", regs[1]), + FREGS_OFFSET("x2", regs[2]), + FREGS_OFFSET("x3", regs[3]), + FREGS_OFFSET("x4", regs[4]), + FREGS_OFFSET("x5", regs[5]), + FREGS_OFFSET("x6", regs[6]), + FREGS_OFFSET("x7", regs[7]), + FREGS_OFFSET("x8", regs[8]), + + FREGS_OFFSET("x29", fp), + FREGS_OFFSET("x30", lr), + FREGS_OFFSET("lr", lr), + + FREGS_OFFSET("sp", sp), + FREGS_OFFSET("pc", pc), +}; + +int ftrace_regs_query_register_offset(const char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(fregs_offsets); i++) { + const struct fregs_offset *roff = &fregs_offsets[i]; + if (!strcmp(roff->name, name)) + return roff->offset; + } + + return -EINVAL; +} +#endif + #ifdef CONFIG_DYNAMIC_FTRACE /* * Replace a single instruction, which may be a branch or NOP. @@ -69,9 +114,6 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) if (addr == FTRACE_ADDR) return &plt[FTRACE_PLT_IDX]; - if (addr == FTRACE_REGS_ADDR && - IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - return &plt[FTRACE_REGS_PLT_IDX]; #endif return NULL; } @@ -153,25 +195,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return ftrace_modify_code(pc, old, new, true); } -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, - unsigned long addr) -{ - unsigned long pc = rec->ip; - u32 old, new; - - if (!ftrace_find_callable_addr(rec, NULL, &old_addr)) - return -EINVAL; - if (!ftrace_find_callable_addr(rec, NULL, &addr)) - return -EINVAL; - - old = aarch64_insn_gen_branch_imm(pc, old_addr, - AARCH64_INSN_BRANCH_LINK); - new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); - - return ftrace_modify_code(pc, old, new, true); -} - +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS /* * The compiler has inserted two NOPs before the regular function prologue. * All instrumented functions follow the AAPCS, so x0-x8 and x19-x30 are live, @@ -227,7 +251,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, * * Note: 'mod' is only set at module load time. */ - if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && mod) { return aarch64_insn_patch_text_nosync((void *)pc, new); } @@ -283,19 +307,11 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, #ifdef CONFIG_DYNAMIC_FTRACE -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - /* - * When DYNAMIC_FTRACE_WITH_REGS is selected, `fregs` can never be NULL - * and arch_ftrace_get_regs(fregs) will always give a non-NULL pt_regs - * in which we can safely modify the LR. - */ - struct pt_regs *regs = arch_ftrace_get_regs(fregs); - unsigned long *parent = (unsigned long *)&procedure_link_pointer(regs); - - prepare_ftrace_return(ip, parent, frame_pointer(regs)); + prepare_ftrace_return(ip, &fregs->lr, fregs->fp); } #else /* @@ -327,7 +343,7 @@ int ftrace_disable_ftrace_graph_caller(void) { return ftrace_modify_graph_caller(false); } -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 90193ed5005b..b027db4c699f 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -507,9 +507,6 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr, __init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR); - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - __init_plt(&plts[FTRACE_REGS_PLT_IDX], FTRACE_REGS_ADDR); - mod->arch.ftrace_trampolines = plts; #endif return 0; diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 3bb447aab270..8607641dc8e4 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -16,11 +16,6 @@ #if IS_ENABLED(CONFIG_LIVEPATCH) -#ifdef CONFIG_ARM64 -/* workaround for arm64, see comments in klp_ftrace_handler(). */ -#include -#endif - /* task patch states */ #define KLP_UNDEFINED -1 #define KLP_UNPATCHED 0 diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 42d5348e4409..525cfc209cff 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -123,17 +123,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, if (func->nop) goto unlock; -#if defined(CONFIG_ARM64) && defined(CONFIG_LIVEPATCH) - /* - * This is a workaround for our own commit d96aeabad9327 - * ("ck: arm64: add livepatch support"). - * It will be removed after we support DYNAMIC_FTRACE_WITH_ARGS - * on arm64. - */ - klp_arch_set_pc(fregs, (unsigned long)func->new_func); -#else ftrace_regs_set_instruction_pointer(fregs, (unsigned long)func->new_func); -#endif unlock: preempt_enable_notrace(); -- Gitee From acd0febba41f3b20fb107a9481f5d7a7ba82af12 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 22 Nov 2022 16:36:24 +0000 Subject: [PATCH 16/21] ftrace: arm64: remove static ftrace ANBZ: #7841 commit cfce092dae95ed81391e49f273353a96cc6dec64 upstream. The build test robot pointer out that there's a build failure when: CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=y CONFIG_DYNAMIC_FTRACE_WITH_ARGS=n ... due to some mismatched ifdeffery, some of which checks CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS, and some of which checks CONFIG_DYNAMIC_FTRACE_WITH_ARGS, leading to some missing definitions expected by the core code when CONFIG_DYNAMIC_FTRACE=n and consequently CONFIG_DYNAMIC_FTRACE_WITH_ARGS=n. There's really not much point in supporting CONFIG_DYNAMIC_FTRACE=n (AKA static ftrace). All supported toolchains allow us to implement DYNAMIC_FTRACE, distributions all prefer DYNAMIC_FTRACE, and both powerpc and s390 removed support for static ftrace in commits: 0c0c52306f4792a4 ("powerpc: Only support DYNAMIC_FTRACE not static") 5d6a0163494c78ad ("s390/ftrace: enforce DYNAMIC_FTRACE if FUNCTION_TRACER is selected") ... and according to Steven, static ftrace is only supported on x86 to allow testing that the core code still functions in this configuration. Given that, let's simplify matters by removing arm64's support for static ftrace. This avoids the problem originally reported, and leaves us with less code to maintain. Fixes: 26299b3f6ba2 ("ftrace: arm64: move from REGS to ARGS") Link: https://lore.kernel.org/r/202211212249.livTPi3Y-lkp@intel.com Suggested-by: Steven Rostedt Signed-off-by: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Acked-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20221122163624.1225912-1-mark.rutland@arm.com Signed-off-by: Will Deacon Signed-off-by: Tianchen Ding --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/entry-ftrace.S | 39 -------------------------------- arch/arm64/kernel/ftrace.c | 5 ---- 3 files changed, 1 insertion(+), 44 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e92695796570..92b02a2abbba 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -98,6 +98,7 @@ config ARM64 select CPU_PM if (SUSPEND || CPU_IDLE) select CRC32 select DCACHE_WORD_ACCESS + select DYNAMIC_FTRACE if FUNCTION_TRACER select DMA_DIRECT_REMAP select EDAC_SUPPORT select FRAME_POINTER diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 2e4051799829..464cc31970e3 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -171,44 +171,6 @@ SYM_CODE_END(ftrace_caller) add \reg, \reg, #8 .endm -#ifndef CONFIG_DYNAMIC_FTRACE -/* - * void _mcount(unsigned long return_address) - * @return_address: return address to instrumented function - * - * This function makes calls, if enabled, to: - * - tracer function to probe instrumented function's entry, - * - ftrace_graph_caller to set up an exit hook - */ -SYM_FUNC_START(_mcount) - mcount_enter - - ldr_l x2, ftrace_trace_function - adr x0, ftrace_stub - cmp x0, x2 // if (ftrace_trace_function - b.eq skip_ftrace_call // != ftrace_stub) { - - mcount_get_pc x0 // function's pc - mcount_get_lr x1 // function's lr (= parent's pc) - blr x2 // (*ftrace_trace_function)(pc, lr); - -skip_ftrace_call: // } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - ldr_l x2, ftrace_graph_return - cmp x0, x2 // if ((ftrace_graph_return - b.ne ftrace_graph_caller // != ftrace_stub) - - ldr_l x2, ftrace_graph_entry // || (ftrace_graph_entry - adr_l x0, ftrace_graph_entry_stub // != ftrace_graph_entry_stub)) - cmp x0, x2 - b.ne ftrace_graph_caller // ftrace_graph_caller(); -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - mcount_exit -SYM_FUNC_END(_mcount) -EXPORT_SYMBOL(_mcount) -NOKPROBE(_mcount) - -#else /* CONFIG_DYNAMIC_FTRACE */ /* * _mcount() is used to build the kernel with -pg option, but all the branch * instructions to _mcount() are replaced to NOP initially at kernel start up, @@ -248,7 +210,6 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) // ftrace_graph_caller(); mcount_exit SYM_FUNC_END(ftrace_caller) -#endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 5aeefb3d85ae..51f4bc268fc5 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -61,7 +61,6 @@ int ftrace_regs_query_register_offset(const char *name) } #endif -#ifdef CONFIG_DYNAMIC_FTRACE /* * Replace a single instruction, which may be a branch or NOP. * If @validate == true, a replaced instruction is checked against 'old'. @@ -274,7 +273,6 @@ int __init ftrace_dyn_arch_init(void) { return 0; } -#endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* @@ -305,8 +303,6 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, *parent = return_hooker; } -#ifdef CONFIG_DYNAMIC_FTRACE - #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) @@ -344,7 +340,6 @@ int ftrace_disable_ftrace_graph_caller(void) return ftrace_modify_graph_caller(false); } #endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ -#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ bool arch_foreign_patched(unsigned long addr) -- Gitee From 2c52596176a4f431b36be9be40f71d3fcc3279ca Mon Sep 17 00:00:00 2001 From: Chuang Wang Date: Sat, 24 Dec 2022 21:31:46 +0800 Subject: [PATCH 17/21] bpf: Fix panic due to wrong pageattr of im->image ANBZ: #7841 commit 9ed1d9aeef5842ecacb660fce933613b58af1e00 upstream. In the scenario where livepatch and kretfunc coexist, the pageattr of im->image is rox after arch_prepare_bpf_trampoline in bpf_trampoline_update, and then modify_fentry or register_fentry returns -EAGAIN from bpf_tramp_ftrace_ops_func, the BPF_TRAMP_F_ORIG_STACK flag will be configured, and arch_prepare_bpf_trampoline will be re-executed. At this time, because the pageattr of im->image is rox, arch_prepare_bpf_trampoline will read and write im->image, which causes a fault. as follows: insmod livepatch-sample.ko # samples/livepatch/livepatch-sample.c bpftrace -e 'kretfunc:cmdline_proc_show {}' BUG: unable to handle page fault for address: ffffffffa0206000 PGD 322d067 P4D 322d067 PUD 322e063 PMD 1297e067 PTE d428061 Oops: 0003 [#1] PREEMPT SMP PTI CPU: 2 PID: 270 Comm: bpftrace Tainted: G E K 6.1.0 #5 RIP: 0010:arch_prepare_bpf_trampoline+0xed/0x8c0 RSP: 0018:ffffc90001083ad8 EFLAGS: 00010202 RAX: ffffffffa0206000 RBX: 0000000000000020 RCX: 0000000000000000 RDX: ffffffffa0206001 RSI: ffffffffa0206000 RDI: 0000000000000030 RBP: ffffc90001083b70 R08: 0000000000000066 R09: ffff88800f51b400 R10: 000000002e72c6e5 R11: 00000000d0a15080 R12: ffff8880110a68c8 R13: 0000000000000000 R14: ffff88800f51b400 R15: ffffffff814fec10 FS: 00007f87bc0dc780(0000) GS:ffff88803e600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa0206000 CR3: 0000000010b70000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: bpf_trampoline_update+0x25a/0x6b0 __bpf_trampoline_link_prog+0x101/0x240 bpf_trampoline_link_prog+0x2d/0x50 bpf_tracing_prog_attach+0x24c/0x530 bpf_raw_tp_link_attach+0x73/0x1d0 __sys_bpf+0x100e/0x2570 __x64_sys_bpf+0x1c/0x30 do_syscall_64+0x5b/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd With this patch, when modify_fentry or register_fentry returns -EAGAIN from bpf_tramp_ftrace_ops_func, the pageattr of im->image will be reset to nx+rw. Cc: stable@vger.kernel.org Fixes: 00963a2e75a8 ("bpf: Support bpf_trampoline on functions with IPMODIFY (e.g. livepatch)") Signed-off-by: Chuang Wang Acked-by: Jiri Olsa Acked-by: Song Liu Link: https://lore.kernel.org/r/20221224133146.780578-1-nashuiliang@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Tianchen Ding --- kernel/bpf/trampoline.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 41299a75c5a6..8366febecdf0 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -452,6 +452,10 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut /* reset fops->func and fops->trampoline for re-register */ tr->fops->func = NULL; tr->fops->trampoline = 0; + + /* reset im->image memory attr for arch_prepare_bpf_trampoline */ + set_memory_nx((long)im->image, 1); + set_memory_rw((long)im->image, 1); goto again; } #endif -- Gitee From de791e639dce5e03739e91be500e64f3cf902168 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 9 Jan 2023 12:27:43 +0000 Subject: [PATCH 18/21] arm64: Fix build with CC=clang, CONFIG_FTRACE=y and CONFIG_STACK_TRACER=y ANBZ: #7841 commit 68a63a412d18bd2e2577c8928139f92541afa7a6 upstream. commit 45bd8951806e ("arm64: Improve HAVE_DYNAMIC_FTRACE_WITH_REGS selection for clang") fixed the build with the above combination by splitting HAVE_DYNAMIC_FTRACE_WITH_REGS into separate checks for Clang and GCC. commit 26299b3f6ba2 ("ftrace: arm64: move from REGS to ARGS") added the GCC only check "-fpatchable-function-entry=2" back in unconditionally which breaks the build. Remove the unconditional check, because the conditional ones were also updated to _ARGS in the above commit, so they work correctly on their own. Fixes: 26299b3f6ba2 ("ftrace: arm64: move from REGS to ARGS") Signed-off-by: James Clark Link: https://lore.kernel.org/r/20230109122744.1904852-1-james.clark@arm.com Signed-off-by: Will Deacon Signed-off-by: Tianchen Ding --- arch/arm64/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 92b02a2abbba..45a25c5a1868 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -160,8 +160,6 @@ config ARM64 select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE - select HAVE_DYNAMIC_FTRACE_WITH_ARGS \ - if $(cc-option,-fpatchable-function-entry=2) select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD -- Gitee From dcc2a920463d90111a66897311191f8fc683ba7e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 3 Jan 2023 12:49:12 +0000 Subject: [PATCH 19/21] ftrace: Add sample with custom ops ANBZ: #7841 commit b56c68f705cad0cff61fbe132c66ced2c737c65c upstream. When reworking core ftrace code or architectural ftrace code, it's often necessary to test/analyse/benchmark a number of ftrace_ops configurations. This patch adds a module which can be used to explore some of those configurations. I'm using this to benchmark various options for changing the way trampolines and handling of ftrace_ops work on arm64, and ensuring other architectures aren't adversely affected. For example, in a QEMU+KVM VM running on a 2GHz Xeon E5-2660 workstation, loading the module in various configurations produces: | # insmod ftrace-ops.ko | ftrace_ops: registering: | relevant ops: 1 | tracee: tracee_relevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | irrelevant ops: 0 | tracee: tracee_irrelevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | saving registers: NO | assist recursion: NO | assist RCU: NO | ftrace_ops: Attempted 100000 calls to tracee_relevant [ftrace_ops] in 1681558ns (16ns / call) | # insmod ftrace-ops.ko nr_ops_irrelevant=5 | ftrace_ops: registering: | relevant ops: 1 | tracee: tracee_relevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | irrelevant ops: 5 | tracee: tracee_irrelevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | saving registers: NO | assist recursion: NO | assist RCU: NO | ftrace_ops: Attempted 100000 calls to tracee_relevant [ftrace_ops] in 1693042ns (16ns / call) | # insmod ftrace-ops.ko nr_ops_relevant=2 | ftrace_ops: registering: | relevant ops: 2 | tracee: tracee_relevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | irrelevant ops: 0 | tracee: tracee_irrelevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | saving registers: NO | assist recursion: NO | assist RCU: NO | ftrace_ops: Attempted 100000 calls to tracee_relevant [ftrace_ops] in 11965582ns (119ns / call) | # insmod ftrace-ops.ko save_regs=true | ftrace_ops: registering: | relevant ops: 1 | tracee: tracee_relevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | irrelevant ops: 0 | tracee: tracee_irrelevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | saving registers: YES | assist recursion: NO | assist RCU: NO | ftrace_ops: Attempted 100000 calls to tracee_relevant [ftrace_ops] in 4459624ns (44ns / call) Link: https://lkml.kernel.org/r/20230103124912.2948963-4-mark.rutland@arm.com Cc: Florent Revest Acked-by: Masami Hiramatsu (Google) Signed-off-by: Mark Rutland Signed-off-by: Steven Rostedt (Google) [dtcccc: Include slab.h to declare kcalloc(). Move declaration "unsigned int i" to top of the function to avoid compiler error.] Signed-off-by: Tianchen Ding --- samples/Kconfig | 7 + samples/Makefile | 1 + samples/ftrace/Makefile | 1 + samples/ftrace/ftrace-ops.c | 259 ++++++++++++++++++++++++++++++++++++ 4 files changed, 268 insertions(+) create mode 100644 samples/ftrace/ftrace-ops.c diff --git a/samples/Kconfig b/samples/Kconfig index dcf1cffcf5ff..fc7746939edc 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -40,6 +40,13 @@ config SAMPLE_FTRACE_DIRECT_MULTI that hooks to wake_up_process and schedule, and prints the function addresses. +config SAMPLE_FTRACE_OPS + tristate "Build custom ftrace ops example" + depends on FUNCTION_TRACER + help + This builds an ftrace ops example that hooks two functions and + measures the time taken to invoke one function a number of times. + config SAMPLE_TRACE_ARRAY tristate "Build sample module for kernel access to Ftrace instancess" depends on EVENT_TRACING && m diff --git a/samples/Makefile b/samples/Makefile index e25f8e4c98a0..5d65eddaee19 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace_events/ obj-$(CONFIG_SAMPLE_TRACE_PRINTK) += trace_printk/ obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace/ obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace/ +obj-$(CONFIG_SAMPLE_FTRACE_OPS) += ftrace/ obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += ftrace/ subdir-$(CONFIG_SAMPLE_UHID) += uhid obj-$(CONFIG_VIDEO_PCI_SKELETON) += v4l/ diff --git a/samples/ftrace/Makefile b/samples/ftrace/Makefile index faf8cdb79c5f..589baf2ec4e3 100644 --- a/samples/ftrace/Makefile +++ b/samples/ftrace/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-too.o obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-modify.o obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi.o obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi-modify.o +obj-$(CONFIG_SAMPLE_FTRACE_OPS) += ftrace-ops.o CFLAGS_sample-trace-array.o := -I$(src) obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += sample-trace-array.o diff --git a/samples/ftrace/ftrace-ops.c b/samples/ftrace/ftrace-ops.c new file mode 100644 index 000000000000..4c10b2a85838 --- /dev/null +++ b/samples/ftrace/ftrace-ops.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include + +/* + * Arbitrary large value chosen to be sufficiently large to minimize noise but + * sufficiently small to complete quickly. + */ +unsigned int nr_function_calls = 100000; +module_param(nr_function_calls, uint, 0); +MODULE_PARM_DESC(nr_function_calls, "How many times to call the relevant tracee"); + +/* + * The number of ops associated with a call site affects whether a tracer can + * be called directly or whether it's necessary to go via the list func, which + * can be significantly more expensive. + */ +unsigned int nr_ops_relevant = 1; +module_param(nr_ops_relevant, uint, 0); +MODULE_PARM_DESC(nr_ops_relevant, "How many ftrace_ops to associate with the relevant tracee"); + +/* + * On architectures where all call sites share the same trampoline, having + * tracers enabled for distinct functions can force the use of the list func + * and incur overhead for all call sites. + */ +unsigned int nr_ops_irrelevant = 0; +module_param(nr_ops_irrelevant, uint, 0); +MODULE_PARM_DESC(nr_ops_irrelevant, "How many ftrace_ops to associate with the irrelevant tracee"); + +/* + * On architectures with DYNAMIC_FTRACE_WITH_REGS, saving the full pt_regs can + * be more expensive than only saving the minimal necessary regs. + */ +bool save_regs = false; +module_param(save_regs, bool, 0); +MODULE_PARM_DESC(save_regs, "Register ops with FTRACE_OPS_FL_SAVE_REGS (save all registers in the trampoline)"); + +bool assist_recursion = false; +module_param(assist_recursion, bool, 0); +MODULE_PARM_DESC(assist_reursion, "Register ops with FTRACE_OPS_FL_RECURSION"); + +bool assist_rcu = false; +module_param(assist_rcu, bool, 0); +MODULE_PARM_DESC(assist_reursion, "Register ops with FTRACE_OPS_FL_RCU"); + +/* + * By default, a trivial tracer is used which immediately returns to mimimize + * overhead. Sometimes a consistency check using a more expensive tracer is + * desireable. + */ +bool check_count = false; +module_param(check_count, bool, 0); +MODULE_PARM_DESC(check_count, "Check that tracers are called the expected number of times\n"); + +/* + * Usually it's not interesting to leave the ops registered after the test + * runs, but sometimes it can be useful to leave them registered so that they + * can be inspected through the tracefs 'enabled_functions' file. + */ +bool persist = false; +module_param(persist, bool, 0); +MODULE_PARM_DESC(persist, "Successfully load module and leave ftrace ops registered after test completes\n"); + +/* + * Marked as noinline to ensure that an out-of-line traceable copy is + * generated by the compiler. + * + * The barrier() ensures the compiler won't elide calls by determining there + * are no side-effects. + */ +static noinline void tracee_relevant(void) +{ + barrier(); +} + +/* + * Marked as noinline to ensure that an out-of-line traceable copy is + * generated by the compiler. + * + * The barrier() ensures the compiler won't elide calls by determining there + * are no side-effects. + */ +static noinline void tracee_irrelevant(void) +{ + barrier(); +} + +struct sample_ops { + struct ftrace_ops ops; + unsigned int count; +}; + +static void ops_func_nop(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, + struct ftrace_regs *fregs) +{ + /* do nothing */ +} + +static void ops_func_count(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, + struct ftrace_regs *fregs) +{ + struct sample_ops *self; + + self = container_of(op, struct sample_ops, ops); + self->count++; +} + +struct sample_ops *ops_relevant; +struct sample_ops *ops_irrelevant; + +static struct sample_ops *ops_alloc_init(void *tracee, ftrace_func_t func, + unsigned long flags, int nr) +{ + struct sample_ops *ops; + unsigned int i; + + ops = kcalloc(nr, sizeof(*ops), GFP_KERNEL); + if (WARN_ON_ONCE(!ops)) + return NULL; + + for (i = 0; i < nr; i++) { + ops[i].ops.func = func; + ops[i].ops.flags = flags; + WARN_ON_ONCE(ftrace_set_filter_ip(&ops[i].ops, (unsigned long)tracee, 0, 0)); + WARN_ON_ONCE(register_ftrace_function(&ops[i].ops)); + } + + return ops; +} + +static void ops_destroy(struct sample_ops *ops, int nr) +{ + unsigned int i; + + if (!ops) + return; + + for (i = 0; i < nr; i++) { + WARN_ON_ONCE(unregister_ftrace_function(&ops[i].ops)); + ftrace_free_filter(&ops[i].ops); + } + + kfree(ops); +} + +static void ops_check(struct sample_ops *ops, int nr, + unsigned int expected_count) +{ + unsigned int i; + + if (!ops || !check_count) + return; + + for (i = 0; i < nr; i++) { + if (ops->count == expected_count) + continue; + pr_warn("Counter called %u times (expected %u)\n", + ops->count, expected_count); + } +} + +ftrace_func_t tracer_relevant = ops_func_nop; +ftrace_func_t tracer_irrelevant = ops_func_nop; + +static int __init ftrace_ops_sample_init(void) +{ + unsigned long flags = 0; + ktime_t start, end; + unsigned int i; + u64 period; + + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && save_regs) { + pr_info("this kernel does not support saving registers\n"); + save_regs = false; + } else if (save_regs) { + flags |= FTRACE_OPS_FL_SAVE_REGS; + } + + if (assist_recursion) + flags |= FTRACE_OPS_FL_RECURSION; + + if (assist_rcu) + flags |= FTRACE_OPS_FL_RCU; + + if (check_count) { + tracer_relevant = ops_func_count; + tracer_irrelevant = ops_func_count; + } + + pr_info("registering:\n" + " relevant ops: %u\n" + " tracee: %ps\n" + " tracer: %ps\n" + " irrelevant ops: %u\n" + " tracee: %ps\n" + " tracer: %ps\n" + " saving registers: %s\n" + " assist recursion: %s\n" + " assist RCU: %s\n", + nr_ops_relevant, tracee_relevant, tracer_relevant, + nr_ops_irrelevant, tracee_irrelevant, tracer_irrelevant, + save_regs ? "YES" : "NO", + assist_recursion ? "YES" : "NO", + assist_rcu ? "YES" : "NO"); + + ops_relevant = ops_alloc_init(tracee_relevant, tracer_relevant, + flags, nr_ops_relevant); + ops_irrelevant = ops_alloc_init(tracee_irrelevant, tracer_irrelevant, + flags, nr_ops_irrelevant); + + start = ktime_get(); + for (i = 0; i < nr_function_calls; i++) + tracee_relevant(); + end = ktime_get(); + + ops_check(ops_relevant, nr_ops_relevant, nr_function_calls); + ops_check(ops_irrelevant, nr_ops_irrelevant, 0); + + period = ktime_to_ns(ktime_sub(end, start)); + + pr_info("Attempted %u calls to %ps in %lluns (%lluns / call)\n", + nr_function_calls, tracee_relevant, + period, period / nr_function_calls); + + if (persist) + return 0; + + ops_destroy(ops_relevant, nr_ops_relevant); + ops_destroy(ops_irrelevant, nr_ops_irrelevant); + + /* + * The benchmark completed sucessfully, but there's no reason to keep + * the module around. Return an error do the user doesn't have to + * manually unload the module. + */ + return -EINVAL; +} +module_init(ftrace_ops_sample_init); + +static void __exit ftrace_ops_sample_exit(void) +{ + ops_destroy(ops_relevant, nr_ops_relevant); + ops_destroy(ops_irrelevant, nr_ops_irrelevant); +} +module_exit(ftrace_ops_sample_exit); + +MODULE_AUTHOR("Mark Rutland"); +MODULE_DESCRIPTION("Example of using custom ftrace_ops"); +MODULE_LICENSE("GPL"); -- Gitee From d067f3fd4ac4e322cccefcfcef656d871cc93a27 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 30 Jan 2023 14:02:37 +0100 Subject: [PATCH 20/21] ftrace: sample: avoid open-coded 64-bit division ANBZ: #7841 commit f94fe7048a352ff8914232a18e2e2f18f8a5ac81 upstream. Calculating the average period requires a 64-bit division that leads to a link failure on 32-bit architectures: x86_64-linux-ld: samples/ftrace/ftrace-ops.o: in function `ftrace_ops_sample_init': ftrace-ops.c:(.init.text+0x23b): undefined reference to `__udivdi3' Use the div_u64() helper to do this instead. Since this is an init function that is not called frequently, the runtime overhead is going to be acceptable. Link: https://lore.kernel.org/linux-trace-kernel/20230130130246.247537-1-arnd@kernel.org Cc: Masami Hiramatsu Fixes: b56c68f705ca ("ftrace: Add sample with custom ops") Signed-off-by: Arnd Bergmann Acked-by: Mark Rutland Signed-off-by: Steven Rostedt (Google) Signed-off-by: Tianchen Ding --- samples/ftrace/ftrace-ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/ftrace/ftrace-ops.c b/samples/ftrace/ftrace-ops.c index 4c10b2a85838..cc8194b0fd22 100644 --- a/samples/ftrace/ftrace-ops.c +++ b/samples/ftrace/ftrace-ops.c @@ -230,7 +230,7 @@ static int __init ftrace_ops_sample_init(void) pr_info("Attempted %u calls to %ps in %lluns (%lluns / call)\n", nr_function_calls, tracee_relevant, - period, period / nr_function_calls); + period, div_u64(period, nr_function_calls)); if (persist) return 0; -- Gitee From 6b4a7a92389e54ec13ef90b0d03d9fe0222f921d Mon Sep 17 00:00:00 2001 From: Tianchen Ding Date: Wed, 21 Feb 2024 15:24:05 +0800 Subject: [PATCH 21/21] anolis: ftrace: workaround for ops_func ANBZ: #7841 For Anolis KABI reason, we cannot add ops_func field directly (there is only one reserved field in struct ftrace_ops and we need it for direct_call in future). Since now (and the latest upstream) ops_func is only assigned with bpf_tramp_ftrace_ops_func(), we add a flag FTRACE_OPS_FL_OPS_FUNC_CK_RESERVED to ops->flags instead, and call the function directly. Signed-off-by: Tianchen Ding --- include/linux/ftrace.h | 13 ++++++++++++- kernel/bpf/trampoline.c | 4 ++-- kernel/trace/ftrace.c | 17 ++++++++++------- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 37a607d0c5e6..c506797c303c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -213,6 +213,8 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * ftrace_enabled. * DIRECT - Used by the direct ftrace_ops helper for direct functions * (internal ftrace only, should not be used by others) + * OPS_FUNC_CK_RESERVED - A workaround instead of ops->ops_func. If set, we need to + * call bpf_tramp_ftrace_ops_func(). */ enum { FTRACE_OPS_FL_ENABLED = BIT(0), @@ -233,6 +235,7 @@ enum { FTRACE_OPS_FL_TRACE_ARRAY = BIT(15), FTRACE_OPS_FL_PERMANENT = BIT(16), FTRACE_OPS_FL_DIRECT = BIT(17), + FTRACE_OPS_FL_OPS_FUNC_CK_RESERVED = BIT(63), }; /* @@ -272,6 +275,15 @@ enum ftrace_ops_cmd { */ typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd); +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) +int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd); +#else +static inline int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd) +{ + return -EBUSY; +} +#endif + #ifdef CONFIG_DYNAMIC_FTRACE /* The hash used to know what functions callbacks trace */ struct ftrace_ops_hash { @@ -311,7 +323,6 @@ struct ftrace_ops { unsigned long trampoline; unsigned long trampoline_size; struct list_head list; - ftrace_ops_func_t ops_func; #endif CK_KABI_RESERVE(1) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 8366febecdf0..894d1239193f 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -29,7 +29,7 @@ static DEFINE_MUTEX(trampoline_mutex); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex); -static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd) +int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd) { struct bpf_trampoline *tr = ops->private; int ret = 0; @@ -158,7 +158,7 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key) goto out; } tr->fops->private = tr; - tr->fops->ops_func = bpf_tramp_ftrace_ops_func; + tr->fops->flags |= FTRACE_OPS_FL_OPS_FUNC_CK_RESERVED; #endif tr->key = key; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3dfbfe2b3345..7cdc9925a734 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1947,9 +1947,10 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, * ops. Run SHARE_IPMODIFY_SELF, to check * whether sharing is supported. */ - if (!ops->ops_func) + if (!(ops->flags & FTRACE_OPS_FL_OPS_FUNC_CK_RESERVED)) return -EBUSY; - ret = ops->ops_func(ops, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF); + ret = bpf_tramp_ftrace_ops_func(ops, + FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF); if (ret) return ret; } else if (is_ipmodify) { @@ -5556,7 +5557,7 @@ int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) } ops->func = call_direct_funcs; - ops->flags = MULTI_FLAGS; + ops->flags = MULTI_FLAGS | (ops->flags & FTRACE_OPS_FL_OPS_FUNC_CK_RESERVED); ops->trampoline = FTRACE_REGS_ADDR; err = register_ftrace_function_nolock(ops); @@ -7948,10 +7949,11 @@ static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops) mutex_unlock(&ftrace_lock); if (found_op) { - if (!op->ops_func) + if (!(op->flags & FTRACE_OPS_FL_OPS_FUNC_CK_RESERVED)) return -EBUSY; - ret = op->ops_func(op, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER); + ret = bpf_tramp_ftrace_ops_func(op, + FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER); if (ret) return ret; } @@ -7997,8 +7999,9 @@ static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops) mutex_unlock(&ftrace_lock); /* The cleanup is optional, ignore any errors */ - if (found_op && op->ops_func) - op->ops_func(op, FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER); + if (found_op && (op->flags & FTRACE_OPS_FL_OPS_FUNC_CK_RESERVED)) + bpf_tramp_ftrace_ops_func(op, + FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER); } } mutex_unlock(&direct_mutex); -- Gitee