From 063472694e7810db970f368113b409ec68736421 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:36 -0500 Subject: [PATCH 01/22] ftrace: Move the recursion testing into global headers ANBZ: #7841 commit 0264c8c9e1b53e9dbb41fae5e54756e84644bc60 upstream. Currently, if a callback is registered to a ftrace function and its ftrace_ops does not have the RECURSION flag set, it is encapsulated in a helper function that does the recursion for it. Really, all the callbacks should have their own recursion protection for performance reasons. But they should not all implement their own. Move the recursion helpers to global headers, so that all callbacks can use them. Link: https://lkml.kernel.org/r/20201028115612.460535535@goodmis.org Link: https://lkml.kernel.org/r/20201106023546.166456258@goodmis.org Signed-off-by: Steven Rostedt (VMware) [dtcccc: Due to 021b6d11e590 ("tracing: Have all levels of checks prevent recursion") on stable branch, we simply move the current code from kernel/trace/trace.h to include/linux/trace_recursion.h.] Signed-off-by: Tianchen Ding --- include/linux/ftrace.h | 1 + include/linux/trace_recursion.h | 166 ++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 156 ------------------------------ 3 files changed, 167 insertions(+), 156 deletions(-) create mode 100644 include/linux/trace_recursion.h diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4e545c8efcbb..01a0ea9e1d58 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -8,6 +8,7 @@ #define _LINUX_FTRACE_H #include +#include #include #include #include diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h new file mode 100644 index 000000000000..74bbc0f39810 --- /dev/null +++ b/include/linux/trace_recursion.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TRACE_RECURSION_H +#define _LINUX_TRACE_RECURSION_H + +#include +#include + +#ifdef CONFIG_TRACING + +/* Only current can touch trace_recursion */ + +/* + * For function tracing recursion: + * The order of these bits are important. + * + * When function tracing occurs, the following steps are made: + * If arch does not support a ftrace feature: + * call internal function (uses INTERNAL bits) which calls... + * If callback is registered to the "global" list, the list + * function is called and recursion checks the GLOBAL bits. + * then this function calls... + * The function callback, which can use the FTRACE bits to + * check for recursion. + */ +enum { + /* Function recursion bits */ + TRACE_FTRACE_BIT, + TRACE_FTRACE_NMI_BIT, + TRACE_FTRACE_IRQ_BIT, + TRACE_FTRACE_SIRQ_BIT, + TRACE_FTRACE_TRANSITION_BIT, + + /* Internal use recursion bits */ + TRACE_INTERNAL_BIT, + TRACE_INTERNAL_NMI_BIT, + TRACE_INTERNAL_IRQ_BIT, + TRACE_INTERNAL_SIRQ_BIT, + TRACE_INTERNAL_TRANSITION_BIT, + + TRACE_BRANCH_BIT, +/* + * Abuse of the trace_recursion. + * As we need a way to maintain state if we are tracing the function + * graph in irq because we want to trace a particular function that + * was called in irq context but we have irq tracing off. Since this + * can only be modified by current, we can reuse trace_recursion. + */ + TRACE_IRQ_BIT, + + /* Set if the function is in the set_graph_function file */ + TRACE_GRAPH_BIT, + + /* + * In the very unlikely case that an interrupt came in + * at a start of graph tracing, and we want to trace + * the function in that interrupt, the depth can be greater + * than zero, because of the preempted start of a previous + * trace. In an even more unlikely case, depth could be 2 + * if a softirq interrupted the start of graph tracing, + * followed by an interrupt preempting a start of graph + * tracing in the softirq, and depth can even be 3 + * if an NMI came in at the start of an interrupt function + * that preempted a softirq start of a function that + * preempted normal context!!!! Luckily, it can't be + * greater than 3, so the next two bits are a mask + * of what the depth is when we set TRACE_GRAPH_BIT + */ + + TRACE_GRAPH_DEPTH_START_BIT, + TRACE_GRAPH_DEPTH_END_BIT, + + /* + * To implement set_graph_notrace, if this bit is set, we ignore + * function graph tracing of called functions, until the return + * function is called to clear it. + */ + TRACE_GRAPH_NOTRACE_BIT, +}; + +#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) +#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) +#define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) + +#define trace_recursion_depth() \ + (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3) +#define trace_recursion_set_depth(depth) \ + do { \ + current->trace_recursion &= \ + ~(3 << TRACE_GRAPH_DEPTH_START_BIT); \ + current->trace_recursion |= \ + ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT; \ + } while (0) + +#define TRACE_CONTEXT_BITS 4 + +#define TRACE_FTRACE_START TRACE_FTRACE_BIT + +#define TRACE_LIST_START TRACE_INTERNAL_BIT + +#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) + +enum { + TRACE_CTX_NMI, + TRACE_CTX_IRQ, + TRACE_CTX_SOFTIRQ, + TRACE_CTX_NORMAL, + TRACE_CTX_TRANSITION, +}; + +static __always_inline int trace_get_context_bit(void) +{ + int bit; + + if (in_interrupt()) { + if (in_nmi()) + bit = TRACE_CTX_NMI; + + else if (in_irq()) + bit = TRACE_CTX_IRQ; + else + bit = TRACE_CTX_SOFTIRQ; + } else + bit = TRACE_CTX_NORMAL; + + return bit; +} + +static __always_inline int trace_test_and_set_recursion(int start) +{ + unsigned int val = current->trace_recursion; + int bit; + + bit = trace_get_context_bit() + start; + if (unlikely(val & (1 << bit))) { + /* + * It could be that preempt_count has not been updated during + * a switch between contexts. Allow for a single recursion. + */ + bit = start + TRACE_CTX_TRANSITION; + if (trace_recursion_test(bit)) + return -1; + trace_recursion_set(bit); + barrier(); + return bit; + } + + val |= 1 << bit; + current->trace_recursion = val; + barrier(); + + return bit; +} + +static __always_inline void trace_clear_recursion(int bit) +{ + unsigned int val = current->trace_recursion; + + bit = 1 << bit; + val &= ~bit; + + barrier(); + current->trace_recursion = val; +} + +#endif /* CONFIG_TRACING */ +#endif /* _LINUX_TRACE_RECURSION_H */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c5b76381e95b..c906fdb82400 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -566,162 +566,6 @@ struct tracer { bool noboot; }; - -/* Only current can touch trace_recursion */ - -/* - * For function tracing recursion: - * The order of these bits are important. - * - * When function tracing occurs, the following steps are made: - * If arch does not support a ftrace feature: - * call internal function (uses INTERNAL bits) which calls... - * If callback is registered to the "global" list, the list - * function is called and recursion checks the GLOBAL bits. - * then this function calls... - * The function callback, which can use the FTRACE bits to - * check for recursion. - */ -enum { - /* Function recursion bits */ - TRACE_FTRACE_BIT, - TRACE_FTRACE_NMI_BIT, - TRACE_FTRACE_IRQ_BIT, - TRACE_FTRACE_SIRQ_BIT, - TRACE_FTRACE_TRANSITION_BIT, - - /* Internal use recursion bits */ - TRACE_INTERNAL_BIT, - TRACE_INTERNAL_NMI_BIT, - TRACE_INTERNAL_IRQ_BIT, - TRACE_INTERNAL_SIRQ_BIT, - TRACE_INTERNAL_TRANSITION_BIT, - - TRACE_BRANCH_BIT, -/* - * Abuse of the trace_recursion. - * As we need a way to maintain state if we are tracing the function - * graph in irq because we want to trace a particular function that - * was called in irq context but we have irq tracing off. Since this - * can only be modified by current, we can reuse trace_recursion. - */ - TRACE_IRQ_BIT, - - /* Set if the function is in the set_graph_function file */ - TRACE_GRAPH_BIT, - - /* - * In the very unlikely case that an interrupt came in - * at a start of graph tracing, and we want to trace - * the function in that interrupt, the depth can be greater - * than zero, because of the preempted start of a previous - * trace. In an even more unlikely case, depth could be 2 - * if a softirq interrupted the start of graph tracing, - * followed by an interrupt preempting a start of graph - * tracing in the softirq, and depth can even be 3 - * if an NMI came in at the start of an interrupt function - * that preempted a softirq start of a function that - * preempted normal context!!!! Luckily, it can't be - * greater than 3, so the next two bits are a mask - * of what the depth is when we set TRACE_GRAPH_BIT - */ - - TRACE_GRAPH_DEPTH_START_BIT, - TRACE_GRAPH_DEPTH_END_BIT, - - /* - * To implement set_graph_notrace, if this bit is set, we ignore - * function graph tracing of called functions, until the return - * function is called to clear it. - */ - TRACE_GRAPH_NOTRACE_BIT, -}; - -#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) -#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) -#define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) - -#define trace_recursion_depth() \ - (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3) -#define trace_recursion_set_depth(depth) \ - do { \ - current->trace_recursion &= \ - ~(3 << TRACE_GRAPH_DEPTH_START_BIT); \ - current->trace_recursion |= \ - ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT; \ - } while (0) - -#define TRACE_CONTEXT_BITS 4 - -#define TRACE_FTRACE_START TRACE_FTRACE_BIT - -#define TRACE_LIST_START TRACE_INTERNAL_BIT - -#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) - -enum { - TRACE_CTX_NMI, - TRACE_CTX_IRQ, - TRACE_CTX_SOFTIRQ, - TRACE_CTX_NORMAL, - TRACE_CTX_TRANSITION, -}; - -static __always_inline int trace_get_context_bit(void) -{ - int bit; - - if (in_interrupt()) { - if (in_nmi()) - bit = TRACE_CTX_NMI; - - else if (in_irq()) - bit = TRACE_CTX_IRQ; - else - bit = TRACE_CTX_SOFTIRQ; - } else - bit = TRACE_CTX_NORMAL; - - return bit; -} - -static __always_inline int trace_test_and_set_recursion(int start) -{ - unsigned int val = current->trace_recursion; - int bit; - - bit = trace_get_context_bit() + start; - if (unlikely(val & (1 << bit))) { - /* - * It could be that preempt_count has not been updated during - * a switch between contexts. Allow for a single recursion. - */ - bit = start + TRACE_CTX_TRANSITION; - if (trace_recursion_test(bit)) - return -1; - trace_recursion_set(bit); - barrier(); - return bit; - } - - val |= 1 << bit; - current->trace_recursion = val; - barrier(); - - return bit; -} - -static __always_inline void trace_clear_recursion(int bit) -{ - unsigned int val = current->trace_recursion; - - bit = 1 << bit; - val &= ~bit; - - barrier(); - current->trace_recursion = val; -} - static inline struct ring_buffer_iter * trace_buffer_iter(struct trace_iterator *iter, int cpu) { -- Gitee From a647cf2ffa591e10ffa43d9c63b4595ac479cf40 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:37 -0500 Subject: [PATCH 02/22] ftrace: Add ftrace_test_recursion_trylock() helper function ANBZ: #7841 commit 6e4eb9cb22fc8a893cb708ed42644de5ee7c3827 upstream. To make it easier for ftrace callbacks to have recursion protection, provide a ftrace_test_recursion_trylock() and ftrace_test_recursion_unlock() helper that tests for recursion. Link: https://lkml.kernel.org/r/20201028115612.634927593@goodmis.org Link: https://lkml.kernel.org/r/20201106023546.378584067@goodmis.org Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Tianchen Ding --- include/linux/trace_recursion.h | 25 +++++++++++++++++++++++++ kernel/trace/trace_functions.c | 12 +++++------- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index 74bbc0f39810..7bead54d8ba8 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -162,5 +162,30 @@ static __always_inline void trace_clear_recursion(int bit) current->trace_recursion = val; } +/** + * ftrace_test_recursion_trylock - tests for recursion in same context + * + * Use this for ftrace callbacks. This will detect if the function + * tracing recursed in the same context (normal vs interrupt), + * + * Returns: -1 if a recursion happened. + * >= 0 if no recursion + */ +static __always_inline int ftrace_test_recursion_trylock(void) +{ + return trace_test_and_set_recursion(TRACE_FTRACE_START); +} + +/** + * ftrace_test_recursion_unlock - called when function callback is complete + * @bit: The return of a successful ftrace_test_recursion_trylock() + * + * This is used at the end of a ftrace callback. + */ +static __always_inline void ftrace_test_recursion_unlock(int bit) +{ + trace_clear_recursion(bit); +} + #endif /* CONFIG_TRACING */ #endif /* _LINUX_TRACE_RECURSION_H */ diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index b9dccd401fc2..5107534cfb5b 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -140,21 +140,19 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, if (unlikely(!tr->function_enabled)) return; + bit = ftrace_test_recursion_trylock(); + if (bit < 0) + return; + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); - bit = trace_test_and_set_recursion(TRACE_FTRACE_START); - if (bit < 0) - goto out; - cpu = smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); if (!atomic_read(&data->disabled)) { trace_function(tr, ip, parent_ip, trace_ctx); } - trace_clear_recursion(bit); - - out: + ftrace_test_recursion_unlock(bit); preempt_enable_notrace(); } -- Gitee From 159672faf0ed62f74984b1e6b77432432c018864 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:38 -0500 Subject: [PATCH 03/22] ftrace: Optimize testing what context current is in ANBZ: #7841 commit da5afbeb1724609996ca7bb4fbce2cd104c95914 upstream. The preempt_count() is not a simple location in memory, it could be part of per_cpu code or more. Each access to preempt_count(), or one of its accessor functions (like in_interrupt()) takes several cycles. By reading preempt_count() once, and then doing tests to find the context against the value return is slightly faster than using in_nmi() and in_interrupt(). Link: https://lkml.kernel.org/r/20201028115612.780796355@goodmis.org Link: https://lkml.kernel.org/r/20201106023546.558881845@goodmis.org Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Tianchen Ding --- include/linux/trace_recursion.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index 7bead54d8ba8..1fae022acd53 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -99,6 +99,13 @@ enum { #define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) +/* + * Used for setting context + * NMI = 0 + * IRQ = 1 + * SOFTIRQ = 2 + * NORMAL = 3 + */ enum { TRACE_CTX_NMI, TRACE_CTX_IRQ, @@ -109,20 +116,13 @@ enum { static __always_inline int trace_get_context_bit(void) { - int bit; + unsigned long pc = preempt_count(); - if (in_interrupt()) { - if (in_nmi()) - bit = TRACE_CTX_NMI; - - else if (in_irq()) - bit = TRACE_CTX_IRQ; - else - bit = TRACE_CTX_SOFTIRQ; - } else - bit = TRACE_CTX_NORMAL; - - return bit; + if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) + return TRACE_CTX_NORMAL; + else + return pc & NMI_MASK ? TRACE_CTX_NMI : + pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ; } static __always_inline int trace_test_and_set_recursion(int start) -- Gitee From 8c3b97591242893b23539a8b80d64c25be02bd81 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:39 -0500 Subject: [PATCH 04/22] pstore/ftrace: Add recursion protection to the ftrace callback ANBZ: #7841 commit 6cdf941871ec9cb1cf03227038a45a73afd8dc9a upstream. If a ftrace callback does not supply its own recursion protection and does not set the RECURSION_SAFE flag in its ftrace_ops, then ftrace will make a helper trampoline to do so before calling the callback instead of just calling the callback directly. The default for ftrace_ops is going to change. It will expect that handlers provide their own recursion protection, unless its ftrace_ops states otherwise. Link: https://lkml.kernel.org/r/20201028115612.990886844@goodmis.org Link: https://lkml.kernel.org/r/20201106023546.720372267@goodmis.org Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Miroslav Benes Cc: Petr Mladek Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Thomas Meyer Reviewed-by: Kees Cook Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Tianchen Ding --- fs/pstore/ftrace.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index 5aad3f5e5ee7..6f076e3de5b9 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c @@ -29,6 +29,7 @@ static void notrace pstore_do_ftrace(unsigned long ip, struct pt_regs *regs, struct pstore_info *psinfo) { + int bit; unsigned long flags; struct pstore_ftrace_record rec = {}; struct pstore_record record = { @@ -41,6 +42,10 @@ static void notrace pstore_do_ftrace(unsigned long ip, if (unlikely(oops_in_progress)) return; + bit = ftrace_test_recursion_trylock(); + if (bit < 0) + return; + local_irq_save(flags); rec.ip = ip; @@ -50,6 +55,7 @@ static void notrace pstore_do_ftrace(unsigned long ip, psinfo->write(&record); local_irq_restore(flags); + ftrace_test_recursion_unlock(bit); } static void notrace pstore_ftrace_call(unsigned long ip, -- Gitee From 9cf139991b38a4b20649b8272572770cbcfd1c41 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:40 -0500 Subject: [PATCH 05/22] kprobes/ftrace: Add recursion protection to the ftrace callback ANBZ: #7841 commit c536aa1c5b17fac1ee395932031ff7d82827f2c5 upstream. If a ftrace callback does not supply its own recursion protection and does not set the RECURSION_SAFE flag in its ftrace_ops, then ftrace will make a helper trampoline to do so before calling the callback instead of just calling the callback directly. The default for ftrace_ops is going to change. It will expect that handlers provide their own recursion protection, unless its ftrace_ops states otherwise. Link: https://lkml.kernel.org/r/20201028115613.140212174@goodmis.org Link: https://lkml.kernel.org/r/20201106023546.944907560@goodmis.org Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Miroslav Benes Cc: Petr Mladek Cc: Andrew Morton Cc: Guo Ren Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Thomas Gleixner Cc: Borislav Petkov Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: "Naveen N. Rao" Cc: Anil S Keshavamurthy Cc: "David S. Miller" Cc: linux-csky@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-s390@vger.kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Tianchen Ding --- arch/csky/kernel/probes/ftrace.c | 12 ++++++++++-- arch/parisc/kernel/ftrace.c | 16 +++++++++++++--- arch/powerpc/kernel/kprobes-ftrace.c | 11 ++++++++++- arch/s390/kernel/ftrace.c | 16 +++++++++++++--- arch/x86/kernel/kprobes/ftrace.c | 12 ++++++++++-- 5 files changed, 56 insertions(+), 11 deletions(-) diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c index 5264763d05be..5eb2604fdf71 100644 --- a/arch/csky/kernel/probes/ftrace.c +++ b/arch/csky/kernel/probes/ftrace.c @@ -13,16 +13,21 @@ int arch_check_ftrace_location(struct kprobe *p) void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs) { + int bit; bool lr_saver = false; struct kprobe *p; struct kprobe_ctlblk *kcb; - /* Preempt is disabled by ftrace */ + bit = ftrace_test_recursion_trylock(); + if (bit < 0) + return; + + preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (!p) { p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE)); if (unlikely(!p) || kprobe_disabled(p)) - return; + goto out; lr_saver = true; } @@ -56,6 +61,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, */ __this_cpu_write(current_kprobe, NULL); } +out: + preempt_enable_notrace(); + ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 63e3ecb9da81..13d85042810a 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -207,14 +207,21 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs) { struct kprobe_ctlblk *kcb; - struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip); + struct kprobe *p; + int bit; - if (unlikely(!p) || kprobe_disabled(p)) + bit = ftrace_test_recursion_trylock(); + if (bit < 0) return; + preempt_disable_notrace(); + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto out; + if (kprobe_running()) { kprobes_inc_nmissed_count(p); - return; + goto out; } __this_cpu_write(current_kprobe, p); @@ -235,6 +242,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, } } __this_cpu_write(current_kprobe, NULL); +out: + preempt_enable_notrace(); + ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 972cb28174b2..5df8d50c65ae 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -18,10 +18,16 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, { struct kprobe *p; struct kprobe_ctlblk *kcb; + int bit; + bit = ftrace_test_recursion_trylock(); + if (bit < 0) + return; + + preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)nip); if (unlikely(!p) || kprobe_disabled(p)) - return; + goto out; kcb = get_kprobe_ctlblk(); if (kprobe_running()) { @@ -52,6 +58,9 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, */ __this_cpu_write(current_kprobe, NULL); } +out: + preempt_enable_notrace(); + ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 923ecccae306..8b05efe4ce96 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -203,14 +203,21 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs) { struct kprobe_ctlblk *kcb; - struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip); + struct kprobe *p; + int bit; - if (unlikely(!p) || kprobe_disabled(p)) + bit = ftrace_test_recursion_trylock(); + if (bit < 0) return; + preempt_disable_notrace(); + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto out; + if (kprobe_running()) { kprobes_inc_nmissed_count(p); - return; + goto out; } __this_cpu_write(current_kprobe, p); @@ -230,6 +237,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, } } __this_cpu_write(current_kprobe, NULL); +out: + preempt_enable_notrace(); + ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 681a4b36e9bb..a40a6cdfcca3 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -18,11 +18,16 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, { struct kprobe *p; struct kprobe_ctlblk *kcb; + int bit; - /* Preempt is disabled by ftrace */ + bit = ftrace_test_recursion_trylock(); + if (bit < 0) + return; + + preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) - return; + goto out; kcb = get_kprobe_ctlblk(); if (kprobe_running()) { @@ -52,6 +57,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, */ __this_cpu_write(current_kprobe, NULL); } +out: + preempt_enable_notrace(); + ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); -- Gitee From 801c45bd65b97254326d7a8ec4874cf96d0cd629 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:41 -0500 Subject: [PATCH 06/22] livepatch/ftrace: Add recursion protection to the ftrace callback ANBZ: #7841 commit 13f3ea9a2c829f28610bb8772a8b9c328412930e upstream. If a ftrace callback does not supply its own recursion protection and does not set the RECURSION_SAFE flag in its ftrace_ops, then ftrace will make a helper trampoline to do so before calling the callback instead of just calling the callback directly. The default for ftrace_ops is going to change. It will expect that handlers provide their own recursion protection, unless its ftrace_ops states otherwise. Link: https://lkml.kernel.org/r/20201028115613.291169246@goodmis.org Link: https://lkml.kernel.org/r/20201106023547.122802424@goodmis.org Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Joe Lawrence Cc: live-patching@vger.kernel.org Reviewed-by: Petr Mladek Acked-by: Miroslav Benes Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Tianchen Ding --- kernel/livepatch/patch.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 10ee4c908f3e..1da27d06057c 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -44,9 +44,13 @@ static void notrace klp_ftrace_handler(unsigned long ip, { struct klp_ops *ops; struct klp_func *func; + int bit; ops = container_of(fops, struct klp_ops, fops); + bit = ftrace_test_recursion_trylock(); + if (bit < 0) + return; /* * A variant of synchronize_rcu() is used to allow patching functions * where RCU is not watching, see klp_synchronize_transition(). @@ -123,6 +127,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, unlock: preempt_enable_notrace(); + ftrace_test_recursion_unlock(bit); } /* -- Gitee From 66edd69b39c754584ba6b86983db1d23154ed1fc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:42 -0500 Subject: [PATCH 07/22] livepatch: Trigger WARNING if livepatch function fails due to recursion ANBZ: #7841 commit 4b750b573c5b3ee10e33c1573eaa94a9dad62f19 upstream. If for some reason a function is called that triggers the recursion detection of live patching, trigger a warning. By not executing the live patch code, it is possible that the old unpatched function will be called placing the system into an unknown state. Link: https://lore.kernel.org/r/20201029145709.GD16774@alley Link: https://lkml.kernel.org/r/20201106023547.312639435@goodmis.org Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Joe Lawrence Cc: live-patching@vger.kernel.org Suggested-by: Miroslav Benes Reviewed-by: Petr Mladek Acked-by: Miroslav Benes Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Tianchen Ding --- kernel/livepatch/patch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 1da27d06057c..f34da27e3261 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -49,7 +49,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, ops = container_of(fops, struct klp_ops, fops); bit = ftrace_test_recursion_trylock(); - if (bit < 0) + if (WARN_ON_ONCE(bit < 0)) return; /* * A variant of synchronize_rcu() is used to allow patching functions -- Gitee From a59a25572eb9ba9edf31acc613b27d50c7700450 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:43 -0500 Subject: [PATCH 08/22] perf/ftrace: Add recursion protection to the ftrace callback ANBZ: #7841 commit 5d15a624c34b11c8d1c04c8cc004782e7ac2888d upstream. If a ftrace callback does not supply its own recursion protection and does not set the RECURSION_SAFE flag in its ftrace_ops, then ftrace will make a helper trampoline to do so before calling the callback instead of just calling the callback directly. The default for ftrace_ops is going to change. It will expect that handlers provide their own recursion protection, unless its ftrace_ops states otherwise. Link: https://lkml.kernel.org/r/20201028115613.444477858@goodmis.org Link: https://lkml.kernel.org/r/20201106023547.466892083@goodmis.org Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Miroslav Benes Cc: Petr Mladek Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Jiri Olsa Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Tianchen Ding --- kernel/trace/trace_event_perf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 0443dd61667b..2a361fcab821 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -436,10 +436,15 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, struct hlist_head head; struct pt_regs regs; int rctx; + int bit; if ((unsigned long)ops->private != smp_processor_id()) return; + bit = ftrace_test_recursion_trylock(); + if (bit < 0) + return; + event = container_of(ops, struct perf_event, ftrace_ops); /* @@ -460,13 +465,15 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx); if (!entry) - return; + goto out; entry->ip = ip; entry->parent_ip = parent_ip; perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN, 1, ®s, &head, NULL); +out: + ftrace_test_recursion_unlock(bit); #undef ENTRY_SIZE } -- Gitee From d29b4dbb350500caafb35317a0b879a9c92901d0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:44 -0500 Subject: [PATCH 09/22] perf/ftrace: Check for rcu_is_watching() in callback function ANBZ: #7841 commit 5d029b035bf112466541b844ee1b86197936db65 upstream. If a ftrace callback requires "rcu_is_watching", then it adds the FTRACE_OPS_FL_RCU flag and it will not be called if RCU is not "watching". But this means that it will use a trampoline when called, and this slows down the function tracing a tad. By checking rcu_is_watching() from within the callback, it no longer needs the RCU flag set in the ftrace_ops and it can be safely called directly. Link: https://lkml.kernel.org/r/20201028115613.591878956@goodmis.org Link: https://lkml.kernel.org/r/20201106023547.711035826@goodmis.org Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Miroslav Benes Cc: Petr Mladek Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Jiri Olsa Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Tianchen Ding --- kernel/trace/trace_event_perf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 2a361fcab821..e70f19c8acfe 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -438,6 +438,9 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, int rctx; int bit; + if (!rcu_is_watching()) + return; + if ((unsigned long)ops->private != smp_processor_id()) return; @@ -481,7 +484,6 @@ static int perf_ftrace_function_register(struct perf_event *event) { struct ftrace_ops *ops = &event->ftrace_ops; - ops->flags = FTRACE_OPS_FL_RCU; ops->func = perf_ftrace_function_call; ops->private = (void *)(unsigned long)nr_cpu_ids; -- Gitee From 978360a87ec726ae4107a9b250cc50cc8daee53c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:45 -0500 Subject: [PATCH 10/22] ftrace: Reverse what the RECURSION flag means in the ftrace_ops ANBZ: #7841 commit a25d036d939a30623ff73ecad9c8b9116b02e823 upstream. Now that all callbacks are recursion safe, reverse the meaning of the RECURSION flag and rename it from RECURSION_SAFE to simply RECURSION. Now only callbacks that request to have recursion protecting it will have the added trampoline to do so. Also remove the outdated comment about "PER_CPU" when determining to use the ftrace_ops_assist_func. Link: https://lkml.kernel.org/r/20201028115613.742454631@goodmis.org Link: https://lkml.kernel.org/r/20201106023547.904270143@goodmis.org Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Jonathan Corbet Cc: Sebastian Andrzej Siewior Cc: Miroslav Benes Cc: Kamalesh Babulal Cc: Petr Mladek Cc: linux-doc@vger.kernel.org Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Tianchen Ding --- Documentation/trace/ftrace-uses.rst | 82 +++++++++++++++++++++-------- include/linux/ftrace.h | 12 ++--- kernel/trace/fgraph.c | 3 +- kernel/trace/ftrace.c | 20 ++++--- kernel/trace/trace_events.c | 1 - kernel/trace/trace_functions.c | 2 +- kernel/trace/trace_selftest.c | 7 +-- kernel/trace/trace_stack.c | 1 - 8 files changed, 79 insertions(+), 49 deletions(-) diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst index a4955f7e3d19..86cd14b8e126 100644 --- a/Documentation/trace/ftrace-uses.rst +++ b/Documentation/trace/ftrace-uses.rst @@ -30,8 +30,8 @@ The ftrace context This requires extra care to what can be done inside a callback. A callback can be called outside the protective scope of RCU. -The ftrace infrastructure has some protections against recursions and RCU -but one must still be very careful how they use the callbacks. +There are helper functions to help against recursion, and making sure +RCU is watching. These are explained below. The ftrace_ops structure @@ -108,6 +108,50 @@ The prototype of the callback function is as follows (as of v4.14): at the start of the function where ftrace was tracing. Otherwise it either contains garbage, or NULL. +Protect your callback +===================== + +As functions can be called from anywhere, and it is possible that a function +called by a callback may also be traced, and call that same callback, +recursion protection must be used. There are two helper functions that +can help in this regard. If you start your code with: + + int bit; + + bit = ftrace_test_recursion_trylock(); + if (bit < 0) + return; + +and end it with: + + ftrace_test_recursion_unlock(bit); + +The code in between will be safe to use, even if it ends up calling a +function that the callback is tracing. Note, on success, +ftrace_test_recursion_trylock() will disable preemption, and the +ftrace_test_recursion_unlock() will enable it again (if it was previously +enabled). + +Alternatively, if the FTRACE_OPS_FL_RECURSION flag is set on the ftrace_ops +(as explained below), then a helper trampoline will be used to test +for recursion for the callback and no recursion test needs to be done. +But this is at the expense of a slightly more overhead from an extra +function call. + +If your callback accesses any data or critical section that requires RCU +protection, it is best to make sure that RCU is "watching", otherwise +that data or critical section will not be protected as expected. In this +case add: + + if (!rcu_is_watching()) + return; + +Alternatively, if the FTRACE_OPS_FL_RCU flag is set on the ftrace_ops +(as explained below), then a helper trampoline will be used to test +for rcu_is_watching for the callback and no other test needs to be done. +But this is at the expense of a slightly more overhead from an extra +function call. + The ftrace FLAGS ================ @@ -128,26 +172,20 @@ FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED will not fail with this flag set. But the callback must check if regs is NULL or not to determine if the architecture supports it. -FTRACE_OPS_FL_RECURSION_SAFE - By default, a wrapper is added around the callback to - make sure that recursion of the function does not occur. That is, - if a function that is called as a result of the callback's execution - is also traced, ftrace will prevent the callback from being called - again. But this wrapper adds some overhead, and if the callback is - safe from recursion, it can set this flag to disable the ftrace - protection. - - Note, if this flag is set, and recursion does occur, it could cause - the system to crash, and possibly reboot via a triple fault. - - It is OK if another callback traces a function that is called by a - callback that is marked recursion safe. Recursion safe callbacks - must never trace any function that are called by the callback - itself or any nested functions that those functions call. - - If this flag is set, it is possible that the callback will also - be called with preemption enabled (when CONFIG_PREEMPTION is set), - but this is not guaranteed. +FTRACE_OPS_FL_RECURSION + By default, it is expected that the callback can handle recursion. + But if the callback is not that worried about overehead, then + setting this bit will add the recursion protection around the + callback by calling a helper function that will do the recursion + protection and only call the callback if it did not recurse. + + Note, if this flag is not set, and recursion does occur, it could + cause the system to crash, and possibly reboot via a triple fault. + + Not, if this flag is set, then the callback will always be called + with preemption disabled. If it is not set, then it is possible + (but not guaranteed) that the callback will be called in + preemptable context. FTRACE_OPS_FL_IPMODIFY Requires FTRACE_OPS_FL_SAVE_REGS set. If the callback is to "hijack" diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 01a0ea9e1d58..46e3a7c7ee83 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -105,7 +105,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); /* * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are * set in the flags member. - * CONTROL, SAVE_REGS, SAVE_REGS_IF_SUPPORTED, RECURSION_SAFE, STUB and + * CONTROL, SAVE_REGS, SAVE_REGS_IF_SUPPORTED, RECURSION, STUB and * IPMODIFY are a kind of attribute flags which can be set only before * registering the ftrace_ops, and can not be modified while registered. * Changing those attribute flags after registering ftrace_ops will @@ -128,10 +128,10 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * passing regs to the handler. * Note, if this flag is set, the SAVE_REGS flag will automatically * get set upon registering the ftrace_ops, if the arch supports it. - * RECURSION_SAFE - The ftrace_ops can set this to tell the ftrace infrastructure - * that the call back has its own recursion protection. If it does - * not set this, then the ftrace infrastructure will add recursion - * protection for the caller. + * RECURSION - The ftrace_ops can set this to tell the ftrace infrastructure + * that the call back needs recursion protection. If it does + * not set this, then the ftrace infrastructure will assume + * that the callback can handle recursion on its own. * STUB - The ftrace_ops is just a place holder. * INITIALIZED - The ftrace_ops has already been initialized (first use time * register_ftrace_function() is called, it will initialized the ops) @@ -163,7 +163,7 @@ enum { FTRACE_OPS_FL_DYNAMIC = BIT(1), FTRACE_OPS_FL_SAVE_REGS = BIT(2), FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = BIT(3), - FTRACE_OPS_FL_RECURSION_SAFE = BIT(4), + FTRACE_OPS_FL_RECURSION = BIT(4), FTRACE_OPS_FL_STUB = BIT(5), FTRACE_OPS_FL_INITIALIZED = BIT(6), FTRACE_OPS_FL_DELETED = BIT(7), diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index a58da91eadb5..29a6ebeebc9e 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -334,8 +334,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, static struct ftrace_ops graph_ops = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED | + .flags = FTRACE_OPS_FL_INITIALIZED | FTRACE_OPS_FL_PID | FTRACE_OPS_FL_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 80d30d581ad7..6da12bb6f801 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -80,7 +80,7 @@ enum { struct ftrace_ops ftrace_list_end __read_mostly = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB, + .flags = FTRACE_OPS_FL_STUB, INIT_OPS_HASH(ftrace_list_end) }; @@ -866,7 +866,7 @@ static void unregister_ftrace_profiler(void) #else static struct ftrace_ops ftrace_profile_ops __read_mostly = { .func = function_profile_call, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, + .flags = FTRACE_OPS_FL_INITIALIZED, INIT_OPS_HASH(ftrace_profile_ops) }; @@ -1041,8 +1041,7 @@ struct ftrace_ops global_ops = { .local_hash.notrace_hash = EMPTY_HASH, .local_hash.filter_hash = EMPTY_HASH, INIT_OPS_HASH(global_ops) - .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED | + .flags = FTRACE_OPS_FL_INITIALIZED | FTRACE_OPS_FL_PID, }; @@ -2409,7 +2408,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip, struct ftrace_ops direct_ops = { .func = call_direct_funcs, - .flags = FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_RECURSION_SAFE + .flags = FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_PERMANENT, /* @@ -6939,8 +6938,7 @@ void ftrace_init_trace_array(struct trace_array *tr) struct ftrace_ops global_ops = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED | + .flags = FTRACE_OPS_FL_INITIALIZED | FTRACE_OPS_FL_PID, }; @@ -7098,11 +7096,11 @@ NOKPROBE_SYMBOL(ftrace_ops_assist_func); ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) { /* - * If the function does not handle recursion, needs to be RCU safe, - * or does per cpu logic, then we need to call the assist handler. + * If the function does not handle recursion or needs to be RCU safe, + * then we need to call the assist handler. */ - if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE) || - ops->flags & FTRACE_OPS_FL_RCU) + if (ops->flags & (FTRACE_OPS_FL_RECURSION | + FTRACE_OPS_FL_RCU)) return ftrace_ops_assist_func; return ops->func; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 065ba69bde5e..fa2b953c2dfb 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3723,7 +3723,6 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, static struct ftrace_ops trace_ops __initdata = { .func = function_test_events_call, - .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static __init void event_trace_self_test_with_function(void) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 5107534cfb5b..de4ce056f295 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -48,7 +48,7 @@ int ftrace_allocate_ftrace_ops(struct trace_array *tr) /* Currently only the non stack version is supported */ ops->func = function_trace_call; - ops->flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_PID; + ops->flags = FTRACE_OPS_FL_PID; tr->ops = ops; ops->private = tr; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 6f28b8b11ead..ea66abb12725 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -150,17 +150,14 @@ static void trace_selftest_test_dyn_func(unsigned long ip, static struct ftrace_ops test_probe1 = { .func = trace_selftest_test_probe1_func, - .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static struct ftrace_ops test_probe2 = { .func = trace_selftest_test_probe2_func, - .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static struct ftrace_ops test_probe3 = { .func = trace_selftest_test_probe3_func, - .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static void print_counts(void) @@ -448,11 +445,11 @@ static void trace_selftest_test_recursion_safe_func(unsigned long ip, static struct ftrace_ops test_rec_probe = { .func = trace_selftest_test_recursion_func, + .flags = FTRACE_OPS_FL_RECURSION, }; static struct ftrace_ops test_recsafe_probe = { .func = trace_selftest_test_recursion_safe_func, - .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static int @@ -561,7 +558,7 @@ static void trace_selftest_test_regs_func(unsigned long ip, static struct ftrace_ops test_regs_probe = { .func = trace_selftest_test_regs_func, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_SAVE_REGS, + .flags = FTRACE_OPS_FL_SAVE_REGS, }; static int diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 464fe0a7a032..dcb5c4d36494 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -318,7 +318,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, static struct ftrace_ops trace_ops __read_mostly = { .func = stack_trace_call, - .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static ssize_t -- Gitee From e4729266ebf343583d420fdf2bc5f29375d0e066 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:46 -0500 Subject: [PATCH 11/22] ftrace: Add recording of functions that caused recursion ANBZ: #7841 commit 773c16705058e9be7b0f4ce124e89cd231c120a2 upstream. This adds CONFIG_FTRACE_RECORD_RECURSION that will record to a file "recursed_functions" all the functions that caused recursion while a callback to the function tracer was running. Link: https://lkml.kernel.org/r/20201106023548.102375687@goodmis.org Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Guo Ren Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Thomas Gleixner Cc: Borislav Petkov Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: Kees Cook Cc: Anton Vorontsov Cc: Colin Cross Cc: Tony Luck Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Miroslav Benes Cc: Petr Mladek Cc: Joe Lawrence Cc: Kamalesh Babulal Cc: Mauro Carvalho Chehab Cc: Sebastian Andrzej Siewior Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-csky@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-s390@vger.kernel.org Cc: live-patching@vger.kernel.org Signed-off-by: Steven Rostedt (VMware) [dtcccc: Since we've already had 021b6d11e590 ("tracing: Have all levels of checks prevent recursion") on stable branch, drop the code about TRACE_FTRACE_MAX and TRACE_LIST_MAX. kernel/trace/trace_recursion_record.c has already been created by 2cad40f10a86 ("tracing: Disable "other" permission bits in the tracefs files" on stable branch, so just ignore it.] Signed-off-by: Tianchen Ding --- Documentation/trace/ftrace-uses.rst | 6 ++++-- arch/csky/kernel/probes/ftrace.c | 2 +- arch/parisc/kernel/ftrace.c | 2 +- arch/powerpc/kernel/kprobes-ftrace.c | 2 +- arch/s390/kernel/ftrace.c | 2 +- arch/x86/kernel/kprobes/ftrace.c | 2 +- fs/pstore/ftrace.c | 2 +- include/linux/trace_recursion.h | 29 ++++++++++++++++++++++++---- kernel/livepatch/patch.c | 2 +- kernel/trace/Kconfig | 25 ++++++++++++++++++++++++ kernel/trace/Makefile | 1 + kernel/trace/ftrace.c | 4 ++-- kernel/trace/trace_event_perf.c | 2 +- kernel/trace/trace_functions.c | 2 +- kernel/trace/trace_output.c | 6 +++--- kernel/trace/trace_output.h | 1 + 16 files changed, 70 insertions(+), 20 deletions(-) diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst index 86cd14b8e126..5981d5691745 100644 --- a/Documentation/trace/ftrace-uses.rst +++ b/Documentation/trace/ftrace-uses.rst @@ -118,7 +118,7 @@ can help in this regard. If you start your code with: int bit; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; @@ -130,7 +130,9 @@ The code in between will be safe to use, even if it ends up calling a function that the callback is tracing. Note, on success, ftrace_test_recursion_trylock() will disable preemption, and the ftrace_test_recursion_unlock() will enable it again (if it was previously -enabled). +enabled). The instruction pointer (ip) and its parent (parent_ip) is passed to +ftrace_test_recursion_trylock() to record where the recursion happened +(if CONFIG_FTRACE_RECORD_RECURSION is set). Alternatively, if the FTRACE_OPS_FL_RECURSION flag is set on the ftrace_ops (as explained below), then a helper trampoline will be used to test diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c index 5eb2604fdf71..f30b179924ef 100644 --- a/arch/csky/kernel/probes/ftrace.c +++ b/arch/csky/kernel/probes/ftrace.c @@ -18,7 +18,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe *p; struct kprobe_ctlblk *kcb; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 13d85042810a..1c5d3732bda2 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -210,7 +210,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe *p; int bit; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 5df8d50c65ae..fdfee39938ea 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -20,7 +20,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, struct kprobe_ctlblk *kcb; int bit; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(nip, parent_nip); if (bit < 0) return; diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 8b05efe4ce96..93ab49213642 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -206,7 +206,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe *p; int bit; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index a40a6cdfcca3..954d930a7127 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -20,7 +20,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe_ctlblk *kcb; int bit; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index 6f076e3de5b9..1ff6846d5955 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c @@ -42,7 +42,7 @@ static void notrace pstore_do_ftrace(unsigned long ip, if (unlikely(oops_in_progress)) return; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index 1fae022acd53..ccd362de94d9 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -75,6 +75,9 @@ enum { * function is called to clear it. */ TRACE_GRAPH_NOTRACE_BIT, + + /* Used to prevent recursion recording from recursing. */ + TRACE_RECORD_RECURSION_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) @@ -125,7 +128,22 @@ static __always_inline int trace_get_context_bit(void) pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ; } -static __always_inline int trace_test_and_set_recursion(int start) +#ifdef CONFIG_FTRACE_RECORD_RECURSION +extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); +# define do_ftrace_record_recursion(ip, pip) \ + do { \ + if (!trace_recursion_test(TRACE_RECORD_RECURSION_BIT)) { \ + trace_recursion_set(TRACE_RECORD_RECURSION_BIT); \ + ftrace_record_recursion(ip, pip); \ + trace_recursion_clear(TRACE_RECORD_RECURSION_BIT); \ + } \ + } while (0) +#else +# define do_ftrace_record_recursion(ip, pip) do { } while (0) +#endif + +static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip, + int start) { unsigned int val = current->trace_recursion; int bit; @@ -137,8 +155,10 @@ static __always_inline int trace_test_and_set_recursion(int start) * a switch between contexts. Allow for a single recursion. */ bit = start + TRACE_CTX_TRANSITION; - if (trace_recursion_test(bit)) + if (trace_recursion_test(bit)) { + do_ftrace_record_recursion(ip, pip); return -1; + } trace_recursion_set(bit); barrier(); return bit; @@ -171,9 +191,10 @@ static __always_inline void trace_clear_recursion(int bit) * Returns: -1 if a recursion happened. * >= 0 if no recursion */ -static __always_inline int ftrace_test_recursion_trylock(void) +static __always_inline int ftrace_test_recursion_trylock(unsigned long ip, + unsigned long parent_ip) { - return trace_test_and_set_recursion(TRACE_FTRACE_START); + return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START); } /** diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index f34da27e3261..1eafde5be55b 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -48,7 +48,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, ops = container_of(fops, struct klp_ops, fops); - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (WARN_ON_ONCE(bit < 0)) return; /* diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index dcec62d1d844..12b55ca7cf87 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -802,6 +802,31 @@ config TRACE_EVAL_MAP_FILE If unsure, say N. +config FTRACE_RECORD_RECURSION + bool "Record functions that recurse in function tracing" + depends on FUNCTION_TRACER + help + All callbacks that attach to the function tracing have some sort + of protection against recursion. Even though the protection exists, + it adds overhead. This option will create a file in the tracefs + file system called "recursed_functions" that will list the functions + that triggered a recursion. + + This will add more overhead to cases that have recursion. + + If unsure, say N + +config FTRACE_RECORD_RECURSION_SIZE + int "Max number of recursed functions to record" + default 128 + depends on FTRACE_RECORD_RECURSION + help + This defines the limit of number of functions that can be + listed in the "recursed_functions" file, that lists all + the functions that caused a recursion to happen. + This file can be reset, but the limit can not change in + size at runtime. + config GCOV_PROFILE_FTRACE bool "Enable GCOV profiling on ftrace subsystem" depends on GCOV_KERNEL diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 1226a7c80532..967ac0b6558a 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -93,6 +93,7 @@ obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o +obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6da12bb6f801..b9e7d3608a86 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6993,7 +6993,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op; int bit; - bit = trace_test_and_set_recursion(TRACE_LIST_START); + bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START); if (bit < 0) return; @@ -7068,7 +7068,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, { int bit; - bit = trace_test_and_set_recursion(TRACE_LIST_START); + bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START); if (bit < 0) return; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index e70f19c8acfe..5471adf04326 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -444,7 +444,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, if ((unsigned long)ops->private != smp_processor_id()) return; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index de4ce056f295..991608aa9421 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -140,7 +140,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, if (unlikely(!tr->function_enabled)) return; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 7042544c5bde..8f0f78a80d70 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -353,8 +353,8 @@ static inline const char *kretprobed(const char *name) } #endif /* CONFIG_KRETPROBES */ -static void -seq_print_sym(struct trace_seq *s, unsigned long address, bool offset) +void +trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset) { #ifdef CONFIG_KALLSYMS char str[KSYM_SYMBOL_LEN]; @@ -420,7 +420,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) goto out; } - seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET); + trace_seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET); if (sym_flags & TRACE_ITER_SYM_ADDR) trace_seq_printf(s, " <" IP_FMT ">", ip); diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 2f742b74e7e6..4c954636caf0 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -16,6 +16,7 @@ extern int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags); +extern void trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset); extern int trace_print_context(struct trace_iterator *iter); extern int trace_print_lat_context(struct trace_iterator *iter); -- Gitee From 52307ea9cec8c54c1239d0b16ccd3cc9d5cc5e1a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 28 Oct 2020 08:19:24 -0400 Subject: [PATCH 12/22] fgraph: Make overruns 4 bytes in graph stack structure ANBZ: #7841 commit 60602cb549f1965a7edbc96026760dfb93911fab upstream. Inspecting the data structures of the function graph tracer, I found that the overrun value is unsigned long, which is 8 bytes on a 64 bit machine, and not only that, the depth is an int (4 bytes). The overrun can be simply an unsigned int (4 bytes) and pack the ftrace_graph_ret structure better. The depth is moved up next to the func, as it is used more often with func, and improves cache locality. Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Tianchen Ding --- include/linux/ftrace.h | 4 ++-- kernel/trace/trace_entries.h | 4 ++-- kernel/trace/trace_functions_graph.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 46e3a7c7ee83..ac94a0eaf88b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -873,11 +873,11 @@ struct ftrace_graph_ent { */ struct ftrace_graph_ret { unsigned long func; /* Current function */ + int depth; /* Number of functions that overran the depth limit for current task */ - unsigned long overrun; + unsigned int overrun; unsigned long long calltime; unsigned long long rettime; - int depth; } __packed; /* Type of the callback handlers for tracing function graph*/ diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 89bc02efe058..9e4155b751a1 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -93,10 +93,10 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry, F_STRUCT( __field_struct( struct ftrace_graph_ret, ret ) __field_packed( unsigned long, ret, func ) - __field_packed( unsigned long, ret, overrun ) + __field_packed( int, ret, depth ) + __field_packed( unsigned int, ret, overrun ) __field_packed( unsigned long long, ret, calltime) __field_packed( unsigned long long, ret, rettime ) - __field_packed( int, ret, depth ) ), F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d", diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 677538c3b9ec..30f04ebe3879 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -955,7 +955,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, /* Overrun */ if (flags & TRACE_GRAPH_PRINT_OVERRUN) - trace_seq_printf(s, " (Overruns: %lu)\n", + trace_seq_printf(s, " (Overruns: %u)\n", trace->overrun); print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, -- Gitee From adc46228651cd183bd5bd2dbf71852f511b87a0a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 30 Oct 2020 17:21:00 -0400 Subject: [PATCH 13/22] ftrace: Clean up the recursion code a bit ANBZ: #7841 commit 7b68621f8d16689cbb4203aceaca86ffb165f1d0 upstream. In trace_test_and_set_recursion(), current->trace_recursion is placed into a variable, and that variable should be used for the processing, as there's no reason to dereference current multiple times. On trace_clear_recursion(), current->trace_recursion is modified and there's no reason to copy it over to a variable. Signed-off-by: Steven Rostedt (VMware) [dtcccc: Refer to the fix by commit 021b6d11e590 ("tracing: Have all levels of checks prevent recursion") on stable branch.] Signed-off-by: Tianchen Ding --- include/linux/trace_recursion.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index ccd362de94d9..69f146be898a 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -145,7 +145,7 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip, int start) { - unsigned int val = current->trace_recursion; + unsigned int val = READ_ONCE(current->trace_recursion); int bit; bit = trace_get_context_bit() + start; @@ -155,7 +155,7 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign * a switch between contexts. Allow for a single recursion. */ bit = start + TRACE_CTX_TRANSITION; - if (trace_recursion_test(bit)) { + if (val & (1 << bit)) { do_ftrace_record_recursion(ip, pip); return -1; } @@ -173,13 +173,8 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign static __always_inline void trace_clear_recursion(int bit) { - unsigned int val = current->trace_recursion; - - bit = 1 << bit; - val &= ~bit; - barrier(); - current->trace_recursion = val; + trace_recursion_clear(bit); } /** -- Gitee From ab71d0b042db9e2d8a71c6a705df78478539aeb7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 2 Nov 2020 14:43:10 -0500 Subject: [PATCH 14/22] ring-buffer: Add recording of ring buffer recursion into recursed_functions ANBZ: #7841 commit 28575c61ea602537a3d86fe301a53554e59452ae upstream. Add a new config RING_BUFFER_RECORD_RECURSION that will place functions that recurse from the ring buffer into the ftrace recused_functions file. Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Tianchen Ding --- kernel/trace/Kconfig | 14 ++++++++++++++ kernel/trace/ring_buffer.c | 12 +++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 12b55ca7cf87..dc3b1b634b6c 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -827,6 +827,20 @@ config FTRACE_RECORD_RECURSION_SIZE This file can be reset, but the limit can not change in size at runtime. +config RING_BUFFER_RECORD_RECURSION + bool "Record functions that recurse in the ring buffer" + depends on FTRACE_RECORD_RECURSION + # default y, because it is coupled with FTRACE_RECORD_RECURSION + default y + help + The ring buffer has its own internal recursion. Although when + recursion happens it wont cause harm because of the protection, + but it does cause an unwanted overhead. Enabling this option will + place where recursion was detected into the ftrace "recursed_functions" + file. + + This will add more overhead to cases that have recursion. + config GCOV_PROFILE_FTRACE bool "Enable GCOV profiling on ftrace subsystem" depends on GCOV_KERNEL diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1978ebf69ecc..f2e466dbe8e1 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4,6 +4,7 @@ * * Copyright (C) 2008 Steven Rostedt */ +#include #include #include #include @@ -3026,6 +3027,13 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) irq_work_queue(&cpu_buffer->irq_work.work); } +#ifdef CONFIG_RING_BUFFER_RECORD_RECURSION +# define do_ring_buffer_record_recursion() \ + do_ftrace_record_recursion(_THIS_IP_, _RET_IP_) +#else +# define do_ring_buffer_record_recursion() do { } while (0) +#endif + /* * The lock and unlock are done within a preempt disable section. * The current_context per_cpu variable can only be modified @@ -3108,8 +3116,10 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) * been updated yet. In this case, use the TRANSITION bit. */ bit = RB_CTX_TRANSITION; - if (val & (1 << (bit + cpu_buffer->nest))) + if (val & (1 << (bit + cpu_buffer->nest))) { + do_ring_buffer_record_recursion(); return 1; + } } val |= (1 << (bit + cpu_buffer->nest)); -- Gitee From ebbcedfd1c7f96d79258b11f278b14cabed2ae51 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Fri, 6 Nov 2020 22:54:46 +0800 Subject: [PATCH 15/22] ftrace: Remove unused varible 'ret' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #7841 commit 045e269c1eb2db5b5df9e034af617af8f4c1b35c upstream. 'ret' in 2 functions are not used. and one of them is a void function. So remove them to avoid gcc warning: kernel/trace/ftrace.c:4166:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] kernel/trace/ftrace.c:5571:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] Link: https://lkml.kernel.org/r/1604674486-52350-1-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Tianchen Ding --- kernel/trace/ftrace.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b9e7d3608a86..c1074993a2ca 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4190,7 +4190,6 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops, struct ftrace_hash **orig_hash, *new_hash; LIST_HEAD(process_mods); char *func; - int ret; mutex_lock(&ops->func_hash->regex_lock); @@ -4243,7 +4242,7 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops, mutex_lock(&ftrace_lock); - ret = ftrace_hash_move_and_update_ops(ops, orig_hash, + ftrace_hash_move_and_update_ops(ops, orig_hash, new_hash, enable); mutex_unlock(&ftrace_lock); @@ -5630,7 +5629,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file) struct ftrace_hash **orig_hash; struct trace_parser *parser; int filter_hash; - int ret; if (file->f_mode & FMODE_READ) { iter = m->private; @@ -5661,7 +5659,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file) orig_hash = &iter->ops->func_hash->notrace_hash; mutex_lock(&ftrace_lock); - ret = ftrace_hash_move_and_update_ops(iter->ops, orig_hash, + ftrace_hash_move_and_update_ops(iter->ops, orig_hash, iter->hash, filter_hash); mutex_unlock(&ftrace_lock); } else { -- Gitee From 894f8844b97a23576251403212b223254841df45 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 28 Oct 2020 17:42:17 -0400 Subject: [PATCH 16/22] ftrace: Have the callbacks receive a struct ftrace_regs instead of pt_regs ANBZ: #7841 commit d19ad0775dcd64b49eecf4fa79c17959ebfbd26b upstream. In preparation to have arguments of a function passed to callbacks attached to functions as default, change the default callback prototype to receive a struct ftrace_regs as the forth parameter instead of a pt_regs. For callbacks that set the FL_SAVE_REGS flag in their ftrace_ops flags, they will now need to get the pt_regs via a ftrace_get_regs() helper call. If this is called by a callback that their ftrace_ops did not have a FL_SAVE_REGS flag set, it that helper function will return NULL. This will allow the ftrace_regs to hold enough just to get the parameters and stack pointer, but without the worry that callbacks may have a pt_regs that is not completely filled. Acked-by: Peter Zijlstra (Intel) Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) [dtcccc: Fix conflicts due to 89dc5218d30bd ("anolis: pstore: add multibackends support"). To workaround with KABI, modify the typedef about ftrace_func_t. This will not break KABI.] Signed-off-by: Tianchen Ding --- arch/csky/kernel/probes/ftrace.c | 4 +++- arch/nds32/kernel/ftrace.c | 4 ++-- arch/parisc/kernel/ftrace.c | 8 +++++--- arch/powerpc/kernel/kprobes-ftrace.c | 4 +++- arch/s390/kernel/ftrace.c | 4 +++- arch/x86/kernel/kprobes/ftrace.c | 3 ++- fs/pstore/ftrace.c | 6 +++--- include/linux/ftrace.h | 18 +++++++++++++++++- include/linux/kprobes.h | 2 +- kernel/livepatch/patch.c | 3 ++- kernel/trace/ftrace.c | 27 +++++++++++++++------------ kernel/trace/trace_event_perf.c | 2 +- kernel/trace/trace_events.c | 2 +- kernel/trace/trace_functions.c | 9 ++++----- kernel/trace/trace_irqsoff.c | 2 +- kernel/trace/trace_sched_wakeup.c | 2 +- kernel/trace/trace_selftest.c | 20 +++++++++++--------- kernel/trace/trace_stack.c | 2 +- 18 files changed, 76 insertions(+), 46 deletions(-) diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c index f30b179924ef..ae2b1c7b3b5c 100644 --- a/arch/csky/kernel/probes/ftrace.c +++ b/arch/csky/kernel/probes/ftrace.c @@ -11,17 +11,19 @@ int arch_check_ftrace_location(struct kprobe *p) /* Ftrace callback handler for kprobes -- called under preepmt disabed */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { int bit; bool lr_saver = false; struct kprobe *p; struct kprobe_ctlblk *kcb; + struct pt_regs *regs; bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; + regs = ftrace_get_regs(fregs); preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (!p) { diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 3763b3f8c3db..414f8a780cc3 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -10,7 +10,7 @@ extern void (*ftrace_trace_function)(unsigned long, unsigned long, extern void ftrace_graph_caller(void); noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { __asm__ (""); /* avoid to optimize as pure function */ } @@ -38,7 +38,7 @@ EXPORT_SYMBOL(_mcount); #else /* CONFIG_DYNAMIC_FTRACE */ noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { __asm__ (""); /* avoid to optimize as pure function */ } diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 1c5d3732bda2..0a1e75af5382 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -51,7 +51,7 @@ static void __hot prepare_ftrace_return(unsigned long *parent, void notrace __hot ftrace_function_trampoline(unsigned long parent, unsigned long self_addr, unsigned long org_sp_gr3, - struct pt_regs *regs) + struct ftrace_regs *fregs) { #ifndef CONFIG_DYNAMIC_FTRACE extern ftrace_func_t ftrace_trace_function; @@ -61,7 +61,7 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, if (function_trace_op->flags & FTRACE_OPS_FL_ENABLED && ftrace_trace_function != ftrace_stub) ftrace_trace_function(self_addr, parent, - function_trace_op, regs); + function_trace_op, fregs); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (dereference_function_descriptor(ftrace_graph_return) != @@ -204,9 +204,10 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, #ifdef CONFIG_KPROBES_ON_FTRACE void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct kprobe_ctlblk *kcb; + struct pt_regs *regs; struct kprobe *p; int bit; @@ -214,6 +215,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; + regs = ftrace_get_regs(fregs); preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index fdfee39938ea..660138f6c4b2 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -14,16 +14,18 @@ /* Ftrace callback handler for kprobes */ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct kprobe *p; struct kprobe_ctlblk *kcb; + struct pt_regs *regs; int bit; bit = ftrace_test_recursion_trylock(nip, parent_nip); if (bit < 0) return; + regs = ftrace_get_regs(fregs); preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)nip); if (unlikely(!p) || kprobe_disabled(p)) diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 93ab49213642..979274373915 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -200,9 +200,10 @@ int ftrace_disable_ftrace_graph_caller(void) #ifdef CONFIG_KPROBES_ON_FTRACE void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct kprobe_ctlblk *kcb; + struct pt_regs *regs; struct kprobe *p; int bit; @@ -210,6 +211,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; + regs = ftrace_get_regs(fregs); preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 954d930a7127..373e5fa3ce1f 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -14,8 +14,9 @@ /* Ftrace callback handler for kprobes -- called under preepmt disabed */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { + struct pt_regs *regs = ftrace_get_regs(fregs); struct kprobe *p; struct kprobe_ctlblk *kcb; int bit; diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index 1ff6846d5955..564d1f4309f8 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c @@ -26,7 +26,7 @@ static u64 pstore_ftrace_stamp; static void notrace pstore_do_ftrace(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, - struct pt_regs *regs, + struct ftrace_regs *fregs, struct pstore_info *psinfo) { int bit; @@ -61,14 +61,14 @@ static void notrace pstore_do_ftrace(unsigned long ip, static void notrace pstore_ftrace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, - struct pt_regs *regs) + struct ftrace_regs *fregs) { struct pstore_info_list *entry; rcu_read_lock(); list_for_each_entry_rcu(entry, &psback->list_entry, list) if (entry->psi->flags & PSTORE_FLAGS_FTRACE) - pstore_do_ftrace(ip, parent_ip, op, regs, entry->psi); + pstore_do_ftrace(ip, parent_ip, op, fregs, entry->psi); rcu_read_unlock(); } diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ac94a0eaf88b..88beb8c5f410 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -97,8 +97,24 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, struct ftrace_ops; +struct ftrace_regs { + struct pt_regs regs; +}; + +static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) +{ + if (!fregs) + return NULL; + + return &fregs->regs; +} + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, +#ifndef __GENKSYMS__ + struct ftrace_ops *op, struct ftrace_regs *fregs); +#else struct ftrace_ops *op, struct pt_regs *regs); +#endif ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); @@ -268,7 +284,7 @@ int register_ftrace_function(struct ftrace_ops *ops); int unregister_ftrace_function(struct ftrace_ops *ops); extern void ftrace_stub(unsigned long a0, unsigned long a1, - struct ftrace_ops *op, struct pt_regs *regs); + struct ftrace_ops *op, struct ftrace_regs *fregs); #else /* !CONFIG_FUNCTION_TRACER */ /* diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index f05dbc2cdd06..5fcffff7b193 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -350,7 +350,7 @@ static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs); + struct ftrace_ops *ops, struct ftrace_regs *fregs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); #endif diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 1eafde5be55b..ffde488e70a7 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -40,8 +40,9 @@ struct klp_ops *klp_find_ops(void *old_func) static void notrace klp_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *fops, - struct pt_regs *regs) + struct ftrace_regs *fregs) { + struct pt_regs *regs = ftrace_get_regs(fregs); struct klp_ops *ops; struct klp_func *func; int bit; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c1074993a2ca..e43e6c8cc7dd 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -121,7 +121,7 @@ struct ftrace_ops global_ops; #if ARCH_SUPPORTS_FTRACE_OPS static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs); + struct ftrace_ops *op, struct ftrace_regs *fregs); #else /* See comment below, where ftrace_ops_list_func is defined */ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); @@ -140,7 +140,7 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops) } static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct trace_array *tr = op->private; int pid; @@ -154,7 +154,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, return; } - op->saved_func(ip, parent_ip, op, regs); + op->saved_func(ip, parent_ip, op, fregs); } static void ftrace_sync_ipi(void *data) @@ -754,7 +754,7 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip) static void function_profile_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct ftrace_profile_stat *stat; struct ftrace_profile *rec; @@ -2152,6 +2152,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) else rec->flags &= ~FTRACE_FL_TRAMP_EN; } + if (flag & FTRACE_FL_DIRECT) { /* * If there's only one user (direct_ops helper) @@ -2395,8 +2396,9 @@ unsigned long ftrace_find_rec_direct(unsigned long ip) } static void call_direct_funcs(unsigned long ip, unsigned long pip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { + struct pt_regs *regs = ftrace_get_regs(fregs); unsigned long addr; addr = ftrace_find_rec_direct(ip); @@ -4320,7 +4322,7 @@ static int __init ftrace_mod_cmd_init(void) core_initcall(ftrace_mod_cmd_init); static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct ftrace_probe_ops *probe_ops; struct ftrace_func_probe *probe; @@ -6986,8 +6988,9 @@ void ftrace_reset_array_ops(struct trace_array *tr) static nokprobe_inline void __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ignored, struct pt_regs *regs) + struct ftrace_ops *ignored, struct ftrace_regs *fregs) { + struct pt_regs *regs = ftrace_get_regs(fregs); struct ftrace_ops *op; int bit; @@ -7020,7 +7023,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, pr_warn("op=%p %pS\n", op, op); goto out; } - op->func(ip, parent_ip, op, regs); + op->func(ip, parent_ip, op, fregs); } } while_for_each_ftrace_op(op); out: @@ -7043,9 +7046,9 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, */ #if ARCH_SUPPORTS_FTRACE_OPS static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { - __ftrace_ops_list_func(ip, parent_ip, NULL, regs); + __ftrace_ops_list_func(ip, parent_ip, NULL, fregs); } NOKPROBE_SYMBOL(ftrace_ops_list_func); #else @@ -7062,7 +7065,7 @@ NOKPROBE_SYMBOL(ftrace_ops_no_ops); * this function will be called by the mcount trampoline. */ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { int bit; @@ -7073,7 +7076,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, preempt_disable_notrace(); if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) - op->func(ip, parent_ip, op, regs); + op->func(ip, parent_ip, op, fregs); preempt_enable_notrace(); trace_clear_recursion(bit); diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 5471adf04326..288ad2c274fb 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -429,7 +429,7 @@ NOKPROBE_SYMBOL(perf_trace_buf_update); #ifdef CONFIG_FUNCTION_TRACER static void perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *pt_regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct ftrace_entry *entry; struct perf_event *event; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index fa2b953c2dfb..cb5b18e475ca 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3687,7 +3687,7 @@ static struct trace_event_file event_trace_file __initdata; static void __init function_test_events_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *regs) { struct trace_buffer *buffer; struct ring_buffer_event *event; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 991608aa9421..9f8ce31ac95e 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -23,10 +23,10 @@ static void tracing_start_function_trace(struct trace_array *tr); static void tracing_stop_function_trace(struct trace_array *tr); static void function_trace_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs); + struct ftrace_ops *op, struct ftrace_regs *fregs); static void function_stack_trace_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs); + struct ftrace_ops *op, struct ftrace_regs *fregs); static struct tracer_flags func_flags; /* Our option */ @@ -89,7 +89,6 @@ void ftrace_destroy_function_files(struct trace_array *tr) static int function_trace_init(struct trace_array *tr) { ftrace_func_t func; - /* * Instance trace_arrays get their ops allocated * at instance creation. Unless it failed @@ -129,7 +128,7 @@ static void function_trace_start(struct trace_array *tr) static void function_trace_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct trace_array *tr = op->private; struct trace_array_cpu *data; @@ -176,7 +175,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, static void function_stack_trace_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct trace_array *tr = op->private; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index f11add83c108..590b3d51afae 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -138,7 +138,7 @@ static int func_prolog_dec(struct trace_array *tr, */ static void irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index f1c603358ff3..e5778d1d7a5b 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -209,7 +209,7 @@ static void wakeup_print_header(struct seq_file *s) */ static void wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index ea66abb12725..73ef12092250 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -107,7 +107,7 @@ static int trace_selftest_test_probe1_cnt; static void trace_selftest_test_probe1_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { trace_selftest_test_probe1_cnt++; } @@ -116,7 +116,7 @@ static int trace_selftest_test_probe2_cnt; static void trace_selftest_test_probe2_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { trace_selftest_test_probe2_cnt++; } @@ -125,7 +125,7 @@ static int trace_selftest_test_probe3_cnt; static void trace_selftest_test_probe3_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { trace_selftest_test_probe3_cnt++; } @@ -134,7 +134,7 @@ static int trace_selftest_test_global_cnt; static void trace_selftest_test_global_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { trace_selftest_test_global_cnt++; } @@ -143,7 +143,7 @@ static int trace_selftest_test_dyn_cnt; static void trace_selftest_test_dyn_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { trace_selftest_test_dyn_cnt++; } @@ -414,7 +414,7 @@ static int trace_selftest_recursion_cnt; static void trace_selftest_test_recursion_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { /* * This function is registered without the recursion safe flag. @@ -429,7 +429,7 @@ static void trace_selftest_test_recursion_func(unsigned long ip, static void trace_selftest_test_recursion_safe_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { /* * We said we would provide our own recursion. By calling @@ -548,9 +548,11 @@ static enum { static void trace_selftest_test_regs_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { - if (pt_regs) + struct pt_regs *regs = ftrace_get_regs(fregs); + + if (regs) trace_selftest_regs_stat = TRACE_SELFTEST_REGS_FOUND; else trace_selftest_regs_stat = TRACE_SELFTEST_REGS_NOT_FOUND; diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index dcb5c4d36494..5a48dba912ea 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -290,7 +290,7 @@ static void check_stack(unsigned long ip, unsigned long *stack) static void stack_trace_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { unsigned long stack; -- Gitee From a3dfe7569b826b6e4361457da914da0fa722c0c0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 27 Oct 2020 10:55:55 -0400 Subject: [PATCH 17/22] ftrace/x86: Allow for arguments to be passed in to ftrace_regs by default ANBZ: #7841 commit 02a474ca266a47ea8f4d5a11f4ffa120f83730ad upstream. Currently, the only way to get access to the registers of a function via a ftrace callback is to set the "FL_SAVE_REGS" bit in the ftrace_ops. But as this saves all regs as if a breakpoint were to trigger (for use with kprobes), it is expensive. The regs are already saved on the stack for the default ftrace callbacks, as that is required otherwise a function being traced will get the wrong arguments and possibly crash. And on x86, the arguments are already stored where they would be on a pt_regs structure to use that code for both the regs version of a callback, it makes sense to pass that information always to all functions. If an architecture does this (as x86_64 now does), it is to set HAVE_DYNAMIC_FTRACE_WITH_ARGS, and this will let the generic code that it could have access to arguments without having to set the flags. This also includes having the stack pointer being saved, which could be used for accessing arguments on the stack, as well as having the function graph tracer not require its own trampoline! Acked-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Tianchen Ding --- arch/x86/Kconfig | 1 + arch/x86/include/asm/ftrace.h | 15 +++++++++++++++ arch/x86/kernel/ftrace_64.S | 11 +++++++++-- include/linux/ftrace.h | 7 ++++++- kernel/trace/Kconfig | 9 +++++++++ 5 files changed, 40 insertions(+), 3 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2ee0e7a26b8f..2ec79e01fc8b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -174,6 +174,7 @@ config X86 select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_DYNAMIC_FTRACE_WITH_ARGS if X86_64 select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 84b9449be080..e00fe88146e0 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -41,6 +41,21 @@ static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned regs->orig_ax = addr; } +#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +struct ftrace_regs { + struct pt_regs regs; +}; + +static __always_inline struct pt_regs * +arch_ftrace_get_regs(struct ftrace_regs *fregs) +{ + /* Only when FL_SAVE_REGS is set, cs will be non zero */ + if (!fregs->regs.cs) + return NULL; + return &fregs->regs; +} +#endif + #ifdef CONFIG_DYNAMIC_FTRACE struct dyn_arch_ftrace { diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index e3a375185a1b..214931977cec 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -140,12 +140,19 @@ SYM_FUNC_START(ftrace_caller) /* save_mcount_regs fills in first two parameters */ save_mcount_regs + /* Stack - skipping return address of ftrace_caller */ + leaq MCOUNT_REG_SIZE+8(%rsp), %rcx + movq %rcx, RSP(%rsp) + SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx - /* regs go into 4th parameter (but make it NULL) */ - movq $0, %rcx + /* regs go into 4th parameter */ + leaq (%rsp), %rcx + + /* Only ops with REGS flag set should have CS register set */ + movq $0, CS(%rsp) SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) call ftrace_stub diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 88beb8c5f410..f2f4d637d7a4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -97,16 +97,21 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, struct ftrace_ops; +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS + struct ftrace_regs { struct pt_regs regs; }; +#define arch_ftrace_get_regs(fregs) (&(fregs)->regs) + +#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) { if (!fregs) return NULL; - return &fregs->regs; + return arch_ftrace_get_regs(fregs); } typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index dc3b1b634b6c..76467534db06 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -31,6 +31,15 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS bool +config HAVE_DYNAMIC_FTRACE_WITH_ARGS + bool + help + If this is set, then arguments and stack can be found from + the pt_regs passed into the function callback regs parameter + by default, even without setting the REGS flag in the ftrace_ops. + This allows for use of regs_get_kernel_argument() and + kernel_stack_pointer(). + config HAVE_FTRACE_MCOUNT_RECORD bool help -- Gitee From 3d79abad6760a8575b9e65b475f8d5297349d673 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 28 Oct 2020 17:15:27 -0400 Subject: [PATCH 18/22] livepatch: Use the default ftrace_ops instead of REGS when ARGS is available ANBZ: #7841 commit 2860cd8a235375df3c8ec8039d9fe5eb2f658b86 upstream. When CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS is available, the ftrace call will be able to set the ip of the calling function. This will improve the performance of live kernel patching where it does not need all the regs to be stored just to change the instruction pointer. If all archs that support live kernel patching also support HAVE_DYNAMIC_FTRACE_WITH_ARGS, then the architecture specific function klp_arch_set_pc() could be made generic. It is possible that an arch can support HAVE_DYNAMIC_FTRACE_WITH_ARGS but not HAVE_DYNAMIC_FTRACE_WITH_REGS and then have access to live patching. Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: live-patching@vger.kernel.org Acked-by: Peter Zijlstra (Intel) Acked-by: Miroslav Benes Signed-off-by: Steven Rostedt (VMware) [dtcccc: We've introduced klp_arch_set_pc in commit d96aeabad932 ("ck: arm64: add livepatch support"), so apply this patch to arm64.] Signed-off-by: Tianchen Ding --- arch/arm64/include/asm/livepatch.h | 6 ++++-- arch/powerpc/include/asm/livepatch.h | 4 +++- arch/s390/include/asm/livepatch.h | 5 ++++- arch/x86/include/asm/ftrace.h | 3 +++ arch/x86/include/asm/livepatch.h | 4 ++-- arch/x86/kernel/ftrace_64.S | 4 ++++ include/linux/ftrace.h | 7 +++++++ kernel/livepatch/Kconfig | 2 +- kernel/livepatch/patch.c | 9 +++++---- 9 files changed, 33 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/livepatch.h b/arch/arm64/include/asm/livepatch.h index 399c250f93b7..de0be09618b5 100644 --- a/arch/arm64/include/asm/livepatch.h +++ b/arch/arm64/include/asm/livepatch.h @@ -13,8 +13,10 @@ static inline int klp_check_compiler_support(void) return 0; } -static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long pc) +static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long pc) { + struct pt_regs *regs = ftrace_get_regs(fregs); + regs->pc = pc + 2 * AARCH64_INSN_SIZE; } @@ -30,7 +32,7 @@ static inline int klp_check_compiler_support(void) return 1; } -static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long pc) +static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long pc) { } diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 4a3d5d25fed5..ae25e6e72997 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -12,8 +12,10 @@ #include #ifdef CONFIG_LIVEPATCH -static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) +static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) { + struct pt_regs *regs = ftrace_get_regs(fregs); + regs->nip = ip; } diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index 818612b784cd..d578a8c76676 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -11,10 +11,13 @@ #ifndef ASM_LIVEPATCH_H #define ASM_LIVEPATCH_H +#include #include -static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) +static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) { + struct pt_regs *regs = ftrace_get_regs(fregs); + regs->psw.addr = ip; } diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index e00fe88146e0..9f3130f40807 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -54,6 +54,9 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs) return NULL; return &fregs->regs; } + +#define ftrace_instruction_pointer_set(fregs, _ip) \ + do { (fregs)->regs.ip = (_ip); } while (0) #endif #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h index 1fde1ab6559e..7c5cc6660e4b 100644 --- a/arch/x86/include/asm/livepatch.h +++ b/arch/x86/include/asm/livepatch.h @@ -12,9 +12,9 @@ #include #include -static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) +static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) { - regs->ip = ip; + ftrace_instruction_pointer_set(fregs, ip); } #endif /* _ASM_X86_LIVEPATCH_H */ diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 214931977cec..31711b196279 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -157,6 +157,10 @@ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) call ftrace_stub + /* Handlers can change the RIP */ + movq RIP(%rsp), %rax + movq %rax, MCOUNT_REG_SIZE(%rsp) + restore_mcount_regs /* diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f2f4d637d7a4..81c123a6fc92 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -104,6 +104,13 @@ struct ftrace_regs { }; #define arch_ftrace_get_regs(fregs) (&(fregs)->regs) +/* + * ftrace_instruction_pointer_set() is to be defined by the architecture + * if to allow setting of the instruction pointer from the ftrace_regs + * when HAVE_DYNAMIC_FTRACE_WITH_ARGS is set and it supports + * live kernel patching. + */ +#define ftrace_instruction_pointer_set(fregs, ip) do { } while (0) #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig index ce50a02be75f..cf7285361e0c 100644 --- a/kernel/livepatch/Kconfig +++ b/kernel/livepatch/Kconfig @@ -6,7 +6,7 @@ config HAVE_LIVEPATCH config LIVEPATCH bool "Kernel Live Patching" - depends on DYNAMIC_FTRACE_WITH_REGS + depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS depends on MODULES depends on SYSFS depends on KALLSYMS_ALL diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index ffde488e70a7..f1709dbb5b52 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -42,7 +42,6 @@ static void notrace klp_ftrace_handler(unsigned long ip, struct ftrace_ops *fops, struct ftrace_regs *fregs) { - struct pt_regs *regs = ftrace_get_regs(fregs); struct klp_ops *ops; struct klp_func *func; int bit; @@ -124,7 +123,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, if (func->nop) goto unlock; - klp_arch_set_pc(regs, (unsigned long)func->new_func); + klp_arch_set_pc(fregs, (unsigned long)func->new_func); unlock: preempt_enable_notrace(); @@ -206,8 +205,10 @@ static int klp_patch_func(struct klp_func *func) return -ENOMEM; ops->fops.func = klp_ftrace_handler; - ops->fops.flags = FTRACE_OPS_FL_SAVE_REGS | - FTRACE_OPS_FL_DYNAMIC | + ops->fops.flags = FTRACE_OPS_FL_DYNAMIC | +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS + FTRACE_OPS_FL_SAVE_REGS | +#endif FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_PERMANENT; -- Gitee From 94de2ac1232f8cabd1eebfc75eb9da5a06d0ce72 Mon Sep 17 00:00:00 2001 From: Jianlin Lv Date: Tue, 12 Jan 2021 09:58:13 +0800 Subject: [PATCH 19/22] arm64: rename S_FRAME_SIZE to PT_REGS_SIZE ANBZ: #7841 commit 71e70184f1d1314ad56e834d1befc07daa2af8e6 upstream. S_FRAME_SIZE is the size of the pt_regs structure, no longer the size of the kernel stack frame, the name is misleading. In keeping with arm32, rename S_FRAME_SIZE to PT_REGS_SIZE. Signed-off-by: Jianlin Lv Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20210112015813.2340969-1-Jianlin.Lv@arm.com Signed-off-by: Catalin Marinas [dtcccc: fix conflicts due to d93b25a66548 ("arm64: entry: Free up another register on kpti's tramp_exit path") on stable branch.] Signed-off-by: Tianchen Ding --- arch/arm64/kernel/asm-offsets.c | 2 +- arch/arm64/kernel/entry-ftrace.S | 12 ++++++------ arch/arm64/kernel/entry.S | 18 +++++++++--------- arch/arm64/kernel/probes/kprobes_trampoline.S | 6 +++--- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 6a6782da6e54..56ce123d1357 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -72,7 +72,7 @@ int main(void) DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1)); DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save)); DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); - DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); + DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs)); BLANK(); #ifdef CONFIG_COMPAT DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0)); diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 67f68c9ef94c..8cf970d219f5 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -35,7 +35,7 @@ */ .macro ftrace_regs_entry, allregs=0 /* Make room for pt_regs, plus a callee frame */ - sub sp, sp, #(S_FRAME_SIZE + 16) + sub sp, sp, #(PT_REGS_SIZE + 16) /* Save function arguments (and x9 for simplicity) */ stp x0, x1, [sp, #S_X0] @@ -61,15 +61,15 @@ .endif /* Save the callsite's SP and LR */ - add x10, sp, #(S_FRAME_SIZE + 16) + add x10, sp, #(PT_REGS_SIZE + 16) stp x9, x10, [sp, #S_LR] /* Save the PC after the ftrace callsite */ str x30, [sp, #S_PC] /* Create a frame record for the callsite above pt_regs */ - stp x29, x9, [sp, #S_FRAME_SIZE] - add x29, sp, #S_FRAME_SIZE + stp x29, x9, [sp, #PT_REGS_SIZE] + add x29, sp, #PT_REGS_SIZE /* Create our frame record within pt_regs. */ stp x29, x30, [sp, #S_STACKFRAME] @@ -126,7 +126,7 @@ ftrace_common_return: ldr x9, [sp, #S_PC] /* Restore the callsite's SP */ - add sp, sp, #S_FRAME_SIZE + 16 + add sp, sp, #PT_REGS_SIZE + 16 ret x9 SYM_CODE_END(ftrace_common) @@ -136,7 +136,7 @@ SYM_CODE_START(ftrace_graph_caller) ldr x0, [sp, #S_PC] sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn) add x1, sp, #S_LR // parent_ip (callsite's LR) - ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP) + ldr x2, [sp, #PT_REGS_SIZE] // parent fp (callsite's FP) bl prepare_ftrace_return b ftrace_common_return SYM_CODE_END(ftrace_graph_caller) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 37651d55659e..b8b5f887f73e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -78,7 +78,7 @@ .Lskip_tramp_vectors_cleanup\@: .endif - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE #ifdef CONFIG_VMAP_STACK /* * Test whether the SP has overflowed, without corrupting a GPR. @@ -99,7 +99,7 @@ * userspace, and can clobber EL0 registers to free up GPRs. */ - /* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */ + /* Stash the original SP (minus PT_REGS_SIZE) in tpidr_el0. */ msr tpidr_el0, x0 /* Recover the original x0 value and stash it in tpidrro_el0 */ @@ -223,7 +223,7 @@ alternative_else_nop_endif scs_load tsk, x20 .else - add x21, sp, #S_FRAME_SIZE + add x21, sp, #PT_REGS_SIZE get_current_task tsk .endif /* \el == 0 */ mrs x22, elr_el1 @@ -350,7 +350,7 @@ alternative_else_nop_endif .if \el == 0 alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 ldr lr, [sp, #S_LR] - add sp, sp, #S_FRAME_SIZE // restore sp + add sp, sp, #PT_REGS_SIZE // restore sp eret alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 @@ -364,7 +364,7 @@ alternative_else_nop_endif #endif .else ldr lr, [sp, #S_LR] - add sp, sp, #S_FRAME_SIZE // restore sp + add sp, sp, #PT_REGS_SIZE // restore sp /* Ensure any device/NC reads complete */ alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412 @@ -583,12 +583,12 @@ __bad_stack: /* * Store the original GPRs to the new stack. The orginal SP (minus - * S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry. + * PT_REGS_SIZE) was stashed in tpidr_el0 by kernel_ventry. */ - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE kernel_entry 1 mrs x0, tpidr_el0 - add x0, x0, #S_FRAME_SIZE + add x0, x0, #PT_REGS_SIZE str x0, [sp, #S_SP] /* Stash the regs for handle_bad_stack */ @@ -884,7 +884,7 @@ alternative_else_nop_endif .if \regsize == 64 mrs x29, far_el1 .endif - add sp, sp, #S_FRAME_SIZE // restore sp + add sp, sp, #PT_REGS_SIZE // restore sp eret sb .endm diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S index 890ca72c5a51..288a84e253cc 100644 --- a/arch/arm64/kernel/probes/kprobes_trampoline.S +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -25,7 +25,7 @@ stp x24, x25, [sp, #S_X24] stp x26, x27, [sp, #S_X26] stp x28, x29, [sp, #S_X28] - add x0, sp, #S_FRAME_SIZE + add x0, sp, #PT_REGS_SIZE stp lr, x0, [sp, #S_LR] /* * Construct a useful saved PSTATE @@ -62,7 +62,7 @@ .endm SYM_CODE_START(kretprobe_trampoline) - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE save_all_base_regs @@ -76,7 +76,7 @@ SYM_CODE_START(kretprobe_trampoline) restore_all_base_regs - add sp, sp, #S_FRAME_SIZE + add sp, sp, #PT_REGS_SIZE ret SYM_CODE_END(kretprobe_trampoline) -- Gitee From 0953d6b3734acbf625822ce561688e3b8335d0c1 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 6 May 2021 15:34:59 +0800 Subject: [PATCH 20/22] Makefile: extend 32B aligned debug option to 64B aligned ANBZ: #7841 commit cf536e185869d4815d506e777bcca6edd9966a6e upstream. Commit 09c60546f04f ("./Makefile: add debug option to enable function aligned on 32 bytes") was introduced to help debugging strange kernel performance changes caused by code alignment change. Recently we found 2 similar cases [1][2] caused by code-alignment changes, which can only be identified by forcing 64 bytes aligned for all functions. Originally, 32 bytes was used mainly for not wasting too much text space, but this option is only for debug anyway where text space is not a big concern. So extend the alignment to 64 bytes to cover more similar cases. [1].https://lore.kernel.org/lkml/20210427090013.GG32408@xsang-OptiPlex-9020/ [2].https://lore.kernel.org/lkml/20210420030837.GB31773@xsang-OptiPlex-9020/ Signed-off-by: Feng Tang Signed-off-by: Masahiro Yamada Signed-off-by: Tianchen Ding --- Makefile | 4 ++-- lib/Kconfig.debug | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 16daa177a964..4abf83cea70f 100644 --- a/Makefile +++ b/Makefile @@ -913,8 +913,8 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS) export CC_FLAGS_SCS endif -ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B -KBUILD_CFLAGS += -falign-functions=32 +ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B +KBUILD_CFLAGS += -falign-functions=64 endif # arch Makefile may override CC so keep this after arch Makefile is included diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6684c5a2e71f..c270de524117 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -363,8 +363,8 @@ config SECTION_MISMATCH_WARN_ONLY If unsure, say Y. -config DEBUG_FORCE_FUNCTION_ALIGN_32B - bool "Force all function address 32B aligned" if EXPERT +config DEBUG_FORCE_FUNCTION_ALIGN_64B + bool "Force all function address 64B aligned" if EXPERT help There are cases that a commit from one domain changes the function address alignment of other domains, and cause magic performance -- Gitee From a9d9b680678610d9d4471a8db05db0ec868927e5 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 6 Aug 2021 21:50:27 +0200 Subject: [PATCH 21/22] tracing: define needed config DYNAMIC_FTRACE_WITH_ARGS ANBZ: #7841 commit 12f9951d3f311acb1d4ffe8e839bc2c07983546f upstream. Commit 2860cd8a2353 ("livepatch: Use the default ftrace_ops instead of REGS when ARGS is available") intends to enable config LIVEPATCH when ftrace with ARGS is available. However, the chain of configs to enable LIVEPATCH is incomplete, as HAVE_DYNAMIC_FTRACE_WITH_ARGS is available, but the definition of DYNAMIC_FTRACE_WITH_ARGS, combining DYNAMIC_FTRACE and HAVE_DYNAMIC_FTRACE_WITH_ARGS, needed to enable LIVEPATCH, is missing in the commit. Fortunately, ./scripts/checkkconfigsymbols.py detects this and warns: DYNAMIC_FTRACE_WITH_ARGS Referencing files: kernel/livepatch/Kconfig So, define the config DYNAMIC_FTRACE_WITH_ARGS analogously to the already existing similar configs, DYNAMIC_FTRACE_WITH_REGS and DYNAMIC_FTRACE_WITH_DIRECT_CALLS, in ./kernel/trace/Kconfig to connect the chain of configs. Link: https://lore.kernel.org/kernel-janitors/CAKXUXMwT2zS9fgyQHKUUiqo8ynZBdx2UEUu1WnV_q0OCmknqhw@mail.gmail.com/ Link: https://lkml.kernel.org/r/20210806195027.16808-1-lukas.bulwahn@gmail.com Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Peter Zijlstra Cc: Miroslav Benes Cc: stable@vger.kernel.org Fixes: 2860cd8a2353 ("livepatch: Use the default ftrace_ops instead of REGS when ARGS is available") Signed-off-by: Lukas Bulwahn Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Tianchen Ding --- kernel/trace/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 76467534db06..8b278a9765c4 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -227,6 +227,11 @@ config DYNAMIC_FTRACE_WITH_DIRECT_CALLS depends on DYNAMIC_FTRACE_WITH_REGS depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +config DYNAMIC_FTRACE_WITH_ARGS + def_bool y + depends on DYNAMIC_FTRACE + depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS + config FUNCTION_PROFILER bool "Kernel function profiler" depends on FUNCTION_TRACER -- Gitee From 000b6dff2c077db292302e15172fcd24be5a8d19 Mon Sep 17 00:00:00 2001 From: Tianchen Ding Date: Mon, 8 Jan 2024 17:04:39 +0800 Subject: [PATCH 22/22] anolis: configs: flush DYNAMIC_FTRACE_WITH_ARGS and FTRACE_RECORD_RECURSION ANBZ: #7841 CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS and CONFIG_DYNAMIC_FTRACE_WITH_ARGS is enabled on x86 by default. CONFIG_FTRACE_RECORD_RECURSION is a new feature we do not need now, so disable it. It maybe useful in some debug scenes, so enable it for debug configs. Signed-off-by: Tianchen Ding --- arch/arm64/configs/anolis-debug_defconfig | 3 +++ arch/arm64/configs/anolis_defconfig | 1 + arch/x86/configs/anolis-debug_defconfig | 5 +++++ arch/x86/configs/anolis_defconfig | 3 +++ 4 files changed, 12 insertions(+) diff --git a/arch/arm64/configs/anolis-debug_defconfig b/arch/arm64/configs/anolis-debug_defconfig index 50b8dcce2909..f2ca3698bac0 100644 --- a/arch/arm64/configs/anolis-debug_defconfig +++ b/arch/arm64/configs/anolis-debug_defconfig @@ -6531,6 +6531,9 @@ CONFIG_HIST_TRIGGERS=y # CONFIG_TRACEPOINT_BENCHMARK is not set CONFIG_RING_BUFFER_BENCHMARK=m # CONFIG_TRACE_EVAL_MAP_FILE is not set +CONFIG_FTRACE_RECORD_RECURSION=y +CONFIG_FTRACE_RECORD_RECURSION_SIZE=128 +CONFIG_RING_BUFFER_RECORD_RECURSION=y # CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_RING_BUFFER_STARTUP_TEST is not set # CONFIG_PREEMPTIRQ_DELAY_TEST is not set diff --git a/arch/arm64/configs/anolis_defconfig b/arch/arm64/configs/anolis_defconfig index 568fa70ba698..c0a328ab1145 100644 --- a/arch/arm64/configs/anolis_defconfig +++ b/arch/arm64/configs/anolis_defconfig @@ -6510,6 +6510,7 @@ CONFIG_HIST_TRIGGERS=y # CONFIG_TRACEPOINT_BENCHMARK is not set CONFIG_RING_BUFFER_BENCHMARK=m # CONFIG_TRACE_EVAL_MAP_FILE is not set +# CONFIG_FTRACE_RECORD_RECURSION is not set # CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_RING_BUFFER_STARTUP_TEST is not set # CONFIG_PREEMPTIRQ_DELAY_TEST is not set diff --git a/arch/x86/configs/anolis-debug_defconfig b/arch/x86/configs/anolis-debug_defconfig index 1ac0c6de9260..7f2f7fbc375b 100644 --- a/arch/x86/configs/anolis-debug_defconfig +++ b/arch/x86/configs/anolis-debug_defconfig @@ -7065,6 +7065,7 @@ CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y +CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_HAVE_SYSCALL_TRACEPOINTS=y CONFIG_HAVE_FENTRY=y @@ -7088,6 +7089,7 @@ CONFIG_FUNCTION_GRAPH_TRACER=y CONFIG_DYNAMIC_FTRACE=y CONFIG_DYNAMIC_FTRACE_WITH_REGS=y CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y +CONFIG_DYNAMIC_FTRACE_WITH_ARGS=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y # CONFIG_IRQSOFF_TRACER is not set @@ -7117,6 +7119,9 @@ CONFIG_HIST_TRIGGERS=y # CONFIG_TRACEPOINT_BENCHMARK is not set CONFIG_RING_BUFFER_BENCHMARK=m # CONFIG_TRACE_EVAL_MAP_FILE is not set +CONFIG_FTRACE_RECORD_RECURSION=y +CONFIG_FTRACE_RECORD_RECURSION_SIZE=128 +CONFIG_RING_BUFFER_RECORD_RECURSION=y # CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_RING_BUFFER_STARTUP_TEST is not set # CONFIG_MMIOTRACE_TEST is not set diff --git a/arch/x86/configs/anolis_defconfig b/arch/x86/configs/anolis_defconfig index bf3905b509b7..cebac7ba353c 100644 --- a/arch/x86/configs/anolis_defconfig +++ b/arch/x86/configs/anolis_defconfig @@ -7019,6 +7019,7 @@ CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y +CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_HAVE_SYSCALL_TRACEPOINTS=y CONFIG_HAVE_FENTRY=y @@ -7041,6 +7042,7 @@ CONFIG_FUNCTION_GRAPH_TRACER=y CONFIG_DYNAMIC_FTRACE=y CONFIG_DYNAMIC_FTRACE_WITH_REGS=y CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y +CONFIG_DYNAMIC_FTRACE_WITH_ARGS=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y # CONFIG_IRQSOFF_TRACER is not set @@ -7070,6 +7072,7 @@ CONFIG_HIST_TRIGGERS=y # CONFIG_TRACEPOINT_BENCHMARK is not set CONFIG_RING_BUFFER_BENCHMARK=m # CONFIG_TRACE_EVAL_MAP_FILE is not set +# CONFIG_FTRACE_RECORD_RECURSION is not set # CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_RING_BUFFER_STARTUP_TEST is not set # CONFIG_PREEMPTIRQ_DELAY_TEST is not set -- Gitee