diff --git a/arch/arm64/Kconfig.turbo b/arch/arm64/Kconfig.turbo index c4a8e4e889aa4ae81c7c9ed5a739400d30ca8c9c..778ea1025c2cfda893582f601d17a2bebf7e7625 100644 --- a/arch/arm64/Kconfig.turbo +++ b/arch/arm64/Kconfig.turbo @@ -71,4 +71,17 @@ config ACTLR_XCALL_XINT Use the 0x680 as the offset to the exception vector base address for the Armv8.8 NMI taken from EL0. +config DYNAMIC_XCALL + bool "Support dynamically replace and load system call" + depends on FAST_SYSCALL + depends on UPROBES + default n + help + Xcall 2.0 add "/proc/xcall/comm" interface to + attach xcall programs onto one executable, + and support different custom syscall implementation + by dynamic instruction replaced with 'svc ffff' + and a kernel module which provides customized + implementation. + endmenu # "Turbo features selection" diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index e110f8d8e8446c7f2c47dd0aee27efc462f7bccf..edaa367d2b0d97154e843175fd6070d03c9bf5e1 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -394,6 +394,7 @@ CONFIG_FAST_IRQ=y CONFIG_DEBUG_FEATURE_BYPASS=y CONFIG_SECURITY_FEATURE_BYPASS=y CONFIG_ACTLR_XCALL_XINT=y +CONFIG_DYNAMIC_XCALL=y # end of Turbo features selection # diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d69f0e6d53f82b487b8a21c9a9c8acc8f35b5fac..1d87f724719d7bdfc83633efd774d815d1f70b00 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -75,11 +75,13 @@ void do_el1_fpac(struct pt_regs *regs, unsigned long esr); void do_el0_mops(struct pt_regs *regs, unsigned long esr); void do_serror(struct pt_regs *regs, unsigned long esr); void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags); +#ifdef CONFIG_FAST_SYSCALL +void do_el0_xcall(struct pt_regs *regs); +#endif void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far); #ifdef CONFIG_ACTLR_XCALL_XINT asmlinkage void el0t_64_xint_handler(struct pt_regs *regs); -asmlinkage void el0t_64_xcall_handler(struct pt_regs *regs); #endif #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 39595fa034913801c27603e4e993b691ef09f77d..a6fb325424e7a990313dc6419a198155e5205b5c 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -24,9 +24,6 @@ #include #include #include -#ifdef CONFIG_ACTLR_XCALL_XINT -#include -#endif extern bool rodata_full; @@ -267,10 +264,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, if (prev != next) __switch_mm(next); -#ifdef CONFIG_ACTLR_XCALL_XINT - cpu_switch_xcall_entry(tsk); -#endif - /* * Update the saved TTBR0_EL1 of the scheduled-in task as the previous * value may have not been initialised yet (activate_mm caller) or the diff --git a/arch/arm64/include/asm/xcall.h b/arch/arm64/include/asm/xcall.h index 5765a96eed53407c9e86e920f7275e9d7e7c4bc5..0f70f03cc3a2c180847ef54448afddea0dc27923 100644 --- a/arch/arm64/include/asm/xcall.h +++ b/arch/arm64/include/asm/xcall.h @@ -2,95 +2,107 @@ #ifndef __ASM_XCALL_H #define __ASM_XCALL_H -#include #include -#include +#include #include -#include - -#include -#include +#include +#include + +#include + +#define SVC_0000 0xd4000001 +#define SVC_FFFF 0xd41fffe1 + +/* + * Only can switch by cmdline 'xcall=debug', + * By default xcall init with XCALL_MODE_TASK. + */ +#define XCALL_MODE_TASK 0 +#define XCALL_MODE_SYSTEM 1 +extern int sw_xcall_mode; + +struct xcall_comm { + char *name; + char *binary; + struct path binary_path; + char *module; + struct list_head list; +}; -DECLARE_STATIC_KEY_FALSE(xcall_enable); +struct xcall { + /* used for xcall_attach */ + struct list_head list; + refcount_t ref; + /* file attached xcall */ + struct inode *binary; + struct xcall_prog *program; + char *name; +}; -struct xcall_info { - /* Must be first! */ - DECLARE_BITMAP(xcall_enable, __NR_syscalls); +struct xcall_area { + /* + * 0...NR_syscalls - 1: function pointers to hijack default syscall + * NR_syscalls...NR_syscalls * 2 - 1: function pointers in kernel module + */ + unsigned long sys_call_table[NR_syscalls * 2]; + refcount_t ref; + struct xcall *xcall; }; -#define TASK_XINFO(p) ((struct xcall_info *)p->xinfo) +extern const syscall_fn_t *default_sys_call_table(void); +#ifdef CONFIG_DYNAMIC_XCALL +extern int xcall_attach(struct xcall_comm *info); +extern int xcall_detach(struct xcall_comm *info); +extern int xcall_pre_sstep_check(struct pt_regs *regs); +extern int set_xcall_insn(struct mm_struct *mm, unsigned long vaddr, + uprobe_opcode_t opcode); -int xcall_init_task(struct task_struct *p, struct task_struct *orig); -void xcall_task_free(struct task_struct *p); +#define mm_xcall_area(mm) ((struct xcall_area *)((mm)->xcall)) -#ifdef CONFIG_ACTLR_XCALL_XINT -struct hw_xcall_info { - /* Must be first! */ - void *xcall_entry[__NR_syscalls + 1]; - atomic_t xcall_scno_count; - /* keep xcall_entry and xcall scno count consistent */ - spinlock_t lock; -}; +static inline long hijack_syscall(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + syscall_fn_t syscall_fn; -#define TASK_HW_XINFO(p) ((struct hw_xcall_info *)p->xinfo) -#define XCALL_ENTRY_SIZE (sizeof(unsigned long) * (__NR_syscalls + 1)) + if (likely(!area)) + return -EINVAL; -DECLARE_PER_CPU(void *, __cpu_xcall_entry); -extern void xcall_entry(void); -extern void no_xcall_entry(void); + if (unlikely(scno >= __NR_syscalls)) + return -EINVAL; -static inline bool is_xcall_entry(struct hw_xcall_info *xinfo, unsigned int sc_no) -{ - return xinfo->xcall_entry[sc_no] == xcall_entry; + syscall_fn = (syscall_fn_t)area->sys_call_table[scno]; + return syscall_fn(regs); } -static inline int set_hw_xcall_entry(struct hw_xcall_info *xinfo, - unsigned int sc_no, bool enable) +static inline const syscall_fn_t *real_syscall_table(void) { - spin_lock(&xinfo->lock); - if (enable && !is_xcall_entry(xinfo, sc_no)) { - xinfo->xcall_entry[sc_no] = xcall_entry; - atomic_inc(&xinfo->xcall_scno_count); - } - - if (!enable && is_xcall_entry(xinfo, sc_no)) { - xinfo->xcall_entry[sc_no] = no_xcall_entry; - atomic_dec(&xinfo->xcall_scno_count); - } - spin_unlock(&xinfo->lock); - - return 0; -} + struct xcall_area *area = mm_xcall_area(current->mm); + + if (likely(!area)) + return default_sys_call_table(); -static inline void cpu_set_arch_xcall(bool enable) + return (syscall_fn_t *)(&(area->sys_call_table[__NR_syscalls])); +} +#else +#define mm_xcall_area(mm) (NULL) +#define hijack_syscall(regs) (NULL) +static inline const syscall_fn_t *real_syscall_table(void) { - u64 el = read_sysreg(CurrentEL); - u64 val; - - if (el == CurrentEL_EL2) { - val = read_sysreg(actlr_el2); - val = enable ? (val | ACTLR_ELx_XCALL) : (val & ~ACTLR_ELx_XCALL); - write_sysreg(val, actlr_el2); - } else { - val = read_sysreg(actlr_el1); - val = enable ? (val | ACTLR_ELx_XCALL) : (val & ~ACTLR_ELx_XCALL); - write_sysreg(val, actlr_el1); - } + return sys_call_table; } +#endif /* CONFIG_DYNAMIC_XCALL */ -static inline void cpu_switch_xcall_entry(struct task_struct *tsk) -{ - struct hw_xcall_info *xinfo = tsk->xinfo; +DECLARE_STATIC_KEY_FALSE(xcall_enable); - if (!system_uses_xcall_xint() || !tsk->xinfo) - return; +struct xcall_info { + /* Must be first! */ + u8 xcall_enable[__NR_syscalls + 1]; +}; - if (unlikely(atomic_read(&xinfo->xcall_scno_count) > 0)) { - __this_cpu_write(__cpu_xcall_entry, xinfo->xcall_entry); - cpu_set_arch_xcall(true); - } else - cpu_set_arch_xcall(false); -} -#endif /* CONFIG_ACTLR_XCALL_XINT */ +#define TASK_XINFO(p) ((struct xcall_info *)p->xinfo) -#endif /*__ASM_XCALL_H*/ +int xcall_init_task(struct task_struct *p, struct task_struct *orig); +void xcall_task_free(struct task_struct *p); +void xcall_info_switch(struct task_struct *p); +#endif /* __ASM_XCALL_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7aa13adffcda990e216af10a4ca37d1e03b2e3e0..f81acf037a5ce78ae226ef2e3d784cf872d9ded3 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2444,6 +2444,44 @@ static void mpam_extra_caps(void) #include DEFINE_STATIC_KEY_FALSE(xcall_enable); +static int __init xcall_setup(char *str) +{ + static_branch_enable(&xcall_enable); + + if (str && !strcmp(str, "=debug")) { + sw_xcall_mode = XCALL_MODE_SYSTEM; + pr_warn("Enable xcall across the entire system, for debugging only!\n"); + } + + return 1; +} +__setup("xcall", xcall_setup); + +static bool has_xcall_support(const struct arm64_cpu_capabilities *entry, int __unused) +{ + return static_key_enabled(&xcall_enable); +} +#endif + +#ifdef CONFIG_FAST_IRQ +bool is_xint_support; +static int __init xint_setup(char *str) +{ + if (!cpus_have_cap(ARM64_HAS_GIC_CPUIF_SYSREGS)) + return 1; + + is_xint_support = true; + return 1; +} +__setup("xint", xint_setup); + +static bool has_xint_support(const struct arm64_cpu_capabilities *entry, int __unused) +{ + return is_xint_support; +} +#endif + +#ifdef CONFIG_ACTLR_XCALL_XINT #define AIDR_ELx_XCALL_SHIFT 32 #define AIDR_ELx_XCALL (UL(1) << AIDR_ELx_XCALL_SHIFT) @@ -2478,40 +2516,6 @@ static bool is_arch_xcall_xint_support(void) return false; } -static int __init xcall_setup(char *str) -{ - if (!is_arch_xcall_xint_support()) - static_branch_enable(&xcall_enable); - - return 1; -} -__setup("xcall", xcall_setup); - -static bool has_xcall_support(const struct arm64_cpu_capabilities *entry, int __unused) -{ - return static_key_enabled(&xcall_enable); -} -#endif - -#ifdef CONFIG_FAST_IRQ -bool is_xint_support; -static int __init xint_setup(char *str) -{ - if (!cpus_have_cap(ARM64_HAS_GIC_CPUIF_SYSREGS)) - return 1; - - is_xint_support = true; - return 1; -} -__setup("xint", xint_setup); - -static bool has_xint_support(const struct arm64_cpu_capabilities *entry, int __unused) -{ - return is_xint_support; -} -#endif - -#ifdef CONFIG_ACTLR_XCALL_XINT static bool has_arch_xcall_xint_support(const struct arm64_cpu_capabilities *entry, int scope) { return is_arch_xcall_xint_support(); @@ -2555,14 +2559,14 @@ static void cpu_enable_arch_xcall_xint(const struct arm64_cpu_capabilities *__un el = read_sysreg(CurrentEL); if (el == CurrentEL_EL2) { actlr_el2 = read_sysreg(actlr_el2); - actlr_el2 |= ACTLR_ELx_XINT; + actlr_el2 |= (ACTLR_ELx_XINT | ACTLR_ELx_XCALL); write_sysreg(actlr_el2, actlr_el2); isb(); actlr_el2 = read_sysreg(actlr_el2); pr_info("actlr_el2: %llx, cpu:%d\n", actlr_el2, cpu); } else { actlr_el1 = read_sysreg(actlr_el1); - actlr_el1 |= ACTLR_ELx_XINT; + actlr_el1 |= (ACTLR_ELx_XINT | ACTLR_ELx_XCALL); write_sysreg(actlr_el1, actlr_el1); isb(); actlr_el1 = read_sysreg(actlr_el1); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 1e8171c1efe76a1873849c1c34bb73c577b9cdef..c72993bb456351e1a52fce90bad36369d22997f7 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -207,7 +207,7 @@ static __always_inline void fast_enter_from_user_mode(struct pt_regs *regs) mte_disable_tco_entry(current); #endif } -#endif +#endif /* CONFIG_FAST_SYSCALL || CONFIG_FAST_IRQ */ /* * Handle IRQ/context state management when entering an NMI from user/kernel @@ -818,8 +818,8 @@ static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) } #ifdef CONFIG_FAST_SYSCALL -/* Copy from el0_sync */ -static void noinstr el0_xcall(struct pt_regs *regs) +/* dynamically load syscall handler */ +asmlinkage void noinstr el0_xcall_syscall(struct pt_regs *regs) { fast_enter_from_user_mode(regs); #ifndef CONFIG_SECURITY_FEATURE_BYPASS @@ -827,15 +827,25 @@ static void noinstr el0_xcall(struct pt_regs *regs) #endif fp_user_discard(); local_daif_restore(DAIF_PROCCTX); - do_el0_svc(regs); + do_el0_xcall(regs); fast_exit_to_user_mode(regs); } -asmlinkage void noinstr el0t_64_fast_syscall_handler(struct pt_regs *regs) +/* low-overhead syscall handler */ +asmlinkage void noinstr el0_fast_syscall(struct pt_regs *regs) { - el0_xcall(regs); -} + fast_enter_from_user_mode(regs); +#ifndef CONFIG_SECURITY_FEATURE_BYPASS + cortex_a76_erratum_1463225_svc_handler(); #endif + fp_user_discard(); + local_daif_restore(DAIF_PROCCTX); + do_el0_svc(regs); + fast_exit_to_user_mode(regs); +} + +asmlinkage void el0_slow_syscall(struct pt_regs *regs) __alias(el0_svc); +#endif /* CONFIG_FAST_SYSCALL */ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) { @@ -1052,10 +1062,6 @@ UNHANDLED(el0t, 32, error) #endif /* CONFIG_AARCH32_EL0 */ #ifdef CONFIG_ACTLR_XCALL_XINT -asmlinkage void noinstr el0t_64_xcall_handler(struct pt_regs *regs) -{ - el0_xcall(regs); -} asmlinkage void noinstr el0t_64_xint_handler(struct pt_regs *regs) { el0_interrupt(regs, ISR_EL1_IS, handle_arch_irq, handle_arch_nmi_irq); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5648a3119f90e214c104bab062cfe0aec5dd6266..cceb4526745f15fa5a813e2e66f723ae5887c8cb 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -552,6 +552,10 @@ tsk .req x28 // current thread_info .text +#ifdef CONFIG_FAST_SYSCALL +#include "xcall/entry.S" +#endif + /* * Exception vectors. */ @@ -569,7 +573,11 @@ SYM_CODE_START(vectors) kernel_ventry 1, h, 64, fiq // FIQ EL1h kernel_ventry 1, h, 64, error // Error EL1h +#ifdef CONFIG_FAST_SYSCALL + sync_ventry // Synchronous 64-bit EL0 +#else kernel_ventry 0, t, 64, sync // Synchronous 64-bit EL0 +#endif kernel_ventry 0, t, 64, irq // IRQ 64-bit EL0 kernel_ventry 0, t, 64, fiq // FIQ 64-bit EL0 kernel_ventry 0, t, 64, error // Error 64-bit EL0 @@ -581,8 +589,6 @@ SYM_CODE_START(vectors) SYM_CODE_END(vectors) #ifdef CONFIG_ACTLR_XCALL_XINT -#include "xcall/entry.S" - .align 11 SYM_CODE_START(vectors_xcall_xint) kernel_ventry 1, t, 64, sync // Synchronous EL1t @@ -595,7 +601,11 @@ SYM_CODE_START(vectors_xcall_xint) kernel_ventry 1, h, 64, fiq // FIQ EL1h kernel_ventry 1, h, 64, error // Error EL1h +#ifdef CONFIG_FAST_SYSCALL + sync_ventry // Synchronous 64-bit EL0 +#else kernel_ventry 0, t, 64, sync // Synchronous 64-bit EL0 +#endif kernel_ventry 0, t, 64, irq // IRQ 64-bit EL0 kernel_ventry 0, t, 64, fiq // FIQ 64-bit EL0 kernel_ventry 0, t, 64, error // Error 64-bit EL0 @@ -605,7 +615,7 @@ SYM_CODE_START(vectors_xcall_xint) kernel_ventry 0, t, 32, fiq // FIQ 32-bit EL0 kernel_ventry 0, t, 32, error // Error 32-bit EL0 SYM_CODE_END(vectors_xcall_xint) -#endif +#endif /* CONFIG_ACTLR_XCALL_XINT */ #ifdef CONFIG_VMAP_STACK SYM_CODE_START_LOCAL(__bad_stack) @@ -637,65 +647,6 @@ SYM_CODE_START_LOCAL(__bad_stack) SYM_CODE_END(__bad_stack) #endif /* CONFIG_VMAP_STACK */ -#ifdef CONFIG_FAST_SYSCALL - .macro check_esr_el1_ec_svc64 - /* Only support SVC64 for now */ - mrs x20, esr_el1 - lsr w20, w20, #ESR_ELx_EC_SHIFT - cmp x20, #ESR_ELx_EC_SVC64 - .endm - - .macro check_syscall_nr - cmp x8, __NR_syscalls - .endm - - .macro check_xcall_enable - /* x21 = task_struct->xinfo->xcall_enable */ - ldr_this_cpu x20, __entry_task, x21 - ldr x21, [x20, #TSK_XCALL] - /* x20 = sc_no / 8 */ - lsr x20, x8, 3 - ldr x21, [x21, x20] - /* x8 = sc_no % 8 */ - and x8, x8, 7 - mov x20, 1 - lsl x20, x20, x8 - and x21, x21, x20 - cmp x21, 0 - .endm - - .macro check_xcall_pre_kernel_entry - stp x20, x21, [sp, #0] - /* is ESR_ELx_EC_SVC64 */ - check_esr_el1_ec_svc64 - bne .Lskip_xcall\@ - /* x8 >= __NR_syscalls */ - check_syscall_nr - bhs .Lskip_xcall\@ - str x8, [sp, #16] - /* is xcall enabled */ - check_xcall_enable - ldr x8, [sp, #16] - beq .Lskip_xcall\@ - ldp x20, x21, [sp, #0] - /* do xcall */ -#ifdef CONFIG_SECURITY_FEATURE_BYPASS - kernel_entry 0, 64, xcall -#else - kernel_entry 0, 64 -#endif - mov x0, sp - bl el0t_64_fast_syscall_handler -#ifdef CONFIG_SECURITY_FEATURE_BYPASS - kernel_exit 0, xcall -#else - b ret_to_user -#endif -.Lskip_xcall\@: - ldp x20, x21, [sp, #0] - .endm -#endif - #ifdef CONFIG_FAST_IRQ .macro check_xint_pre_kernel_entry stp x0, x1, [sp, #0] @@ -748,16 +699,6 @@ SYM_CODE_END(__bad_stack) .macro entry_handler el:req, ht:req, regsize:req, label:req SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label) -#ifdef CONFIG_FAST_SYSCALL - .if \el == 0 && \regsize == 64 && \label == sync - /* Only support el0 aarch64 sync exception */ - alternative_if_not ARM64_HAS_XCALL - b .Lret_to_kernel_entry\@ - alternative_else_nop_endif - check_xcall_pre_kernel_entry - .Lret_to_kernel_entry\@: - .endif -#endif #ifdef CONFIG_FAST_IRQ .if \regsize == 64 && \label == irq && \el == 0 && \ht == t alternative_if_not ARM64_HAS_XINT @@ -797,7 +738,6 @@ SYM_CODE_END(el\el\ht\()_\regsize\()_\label) entry_handler 0, t, 64, error #ifdef CONFIG_ACTLR_XCALL_XINT - entry_handler 0, t, 64, xcall entry_handler 0, t, 64, xint #endif entry_handler 0, t, 32, sync diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index a2f137a595fc1c06b71a0965bbba441e8101d180..677a9589f9ca8d6d4cf8b9776830c80ef03f9c4c 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "decode-insn.h" @@ -171,6 +172,11 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs, if (uprobe_pre_sstep_notifier(regs)) return DBG_HOOK_HANDLED; +#ifdef CONFIG_DYNAMIC_XCALL + if (xcall_pre_sstep_check(regs)) + return DBG_HOOK_HANDLED; +#endif + return DBG_HOOK_ERROR; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index fe3f89445fcbd43879fcc1ba3ea5382029b4f343..e9e5ce956f1526eb48765a624a8d8c452f0740b1 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -55,6 +55,7 @@ #include #include #include +#include #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include @@ -472,6 +473,10 @@ DEFINE_PER_CPU(struct task_struct *, __entry_task); static void entry_task_switch(struct task_struct *next) { __this_cpu_write(__entry_task, next); +#ifdef CONFIG_FAST_SYSCALL + if (static_branch_unlikely(&xcall_enable)) + xcall_info_switch(next); +#endif } /* diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 07cc0dfef1fe54057ef8415a7735f028323f2f22..33b63e01d1813a3decdc3c7f8123fcfa4661a7aa 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -14,6 +14,7 @@ #include #include #include +#include long a32_arm_syscall(struct pt_regs *regs, int scno); long sys_ni_syscall(void); @@ -162,6 +163,15 @@ static inline void delouse_pt_regs(struct pt_regs *regs) } #endif +#ifdef CONFIG_FAST_SYSCALL +void do_el0_xcall(struct pt_regs *regs) +{ + const syscall_fn_t *t = real_syscall_table(); + + el0_svc_common(regs, regs->regs[8], __NR_syscalls, t); +} +#endif + void do_el0_svc(struct pt_regs *regs) { const syscall_fn_t *t = sys_call_table; @@ -173,6 +183,10 @@ void do_el0_svc(struct pt_regs *regs) } #endif +#ifdef CONFIG_DYNAMIC_XCALL + if (!hijack_syscall(regs)) + return; +#endif el0_svc_common(regs, regs->regs[8], __NR_syscalls, t); } diff --git a/arch/arm64/kernel/xcall/Makefile b/arch/arm64/kernel/xcall/Makefile index 0168bd1907939373ff26689f3e22d9b95a5dfe70..4a9c8eedcba982bfacd24ab3f4a16eec78deb6ed 100644 --- a/arch/arm64/kernel/xcall/Makefile +++ b/arch/arm64/kernel/xcall/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y += xcall.o +obj-y += xcall.o +obj-$(CONFIG_DYNAMIC_XCALL) += core.o proc.o diff --git a/arch/arm64/kernel/xcall/core.c b/arch/arm64/kernel/xcall/core.c new file mode 100644 index 0000000000000000000000000000000000000000..9ba4c8de7112db7989d3f20e3a0f42462917a4f7 --- /dev/null +++ b/arch/arm64/kernel/xcall/core.c @@ -0,0 +1,384 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Huawei Limited. + */ + +#define pr_fmt(fmt) "xcall: " fmt + +#include +#include +#include +#include + +#include + +static DEFINE_SPINLOCK(xcall_list_lock); +static LIST_HEAD(xcalls_list); +static DEFINE_SPINLOCK(prog_list_lock); +static LIST_HEAD(progs_list); + +/* + * Travel the list of all registered xcall_prog during module installation + * to find the xcall_prog. + */ +static struct xcall_prog *get_xcall_prog(const char *module) +{ + struct xcall_prog *p; + + list_for_each_entry(p, &progs_list, list) { + if (!strcmp(module, p->name)) + return p; + } + return NULL; +} + +static struct xcall_prog *get_xcall_prog_locked(const char *module) +{ + struct xcall_prog *ret; + + spin_lock(&prog_list_lock); + ret = get_xcall_prog(module); + spin_unlock(&prog_list_lock); + + return ret; +} + +#define inv_xcall_syscall ((unsigned long)__arm64_sys_ni_syscall) + +static long patch_syscall(struct pt_regs *regs); + +static long filter_ksyscall(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + + cmpxchg(&(area->sys_call_table[scno]), filter_ksyscall, patch_syscall); + regs->pc -= AARCH64_INSN_SIZE; + return 0; +} + +static long replay_syscall(struct pt_regs *regs) +{ + regs->pc -= AARCH64_INSN_SIZE; + return 0; +} + +static long patch_syscall(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + syscall_fn_t syscall_fn; + unsigned long old; + int ret; + + old = cmpxchg(&(area->sys_call_table[scno]), patch_syscall, replay_syscall); + if (old != (unsigned long)patch_syscall) { + syscall_fn = (syscall_fn_t)area->sys_call_table[scno]; + return syscall_fn(regs); + } + + regs->pc -= AARCH64_INSN_SIZE; + + mmap_write_lock(current->mm); + ret = set_xcall_insn(current->mm, regs->pc, SVC_FFFF); + mmap_write_unlock(current->mm); + + if (!ret) { + xchg(&(area->sys_call_table[scno]), filter_ksyscall); + return 0; + } + + regs->pc += AARCH64_INSN_SIZE; + xchg(&(area->sys_call_table[scno]), patch_syscall); + pr_info("patch xcall insn failed for scno %u at %s.\n", + scno, ret > 0 ? "UPROBE_BRK" : "SVC_FFFF"); + + return ret; +} + +int xcall_pre_sstep_check(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + + return area && (scno < NR_syscalls) && + (area->sys_call_table[scno] != inv_xcall_syscall); +} + +static struct xcall *get_xcall(struct xcall *xcall) +{ + refcount_inc(&xcall->ref); + return xcall; +} + +static void put_xcall(struct xcall *xcall) +{ + if (!refcount_dec_and_test(&xcall->ref)) + return; + + pr_info("free xcall resource.\n"); + kfree(xcall->name); + if (xcall->program) + module_put(xcall->program->owner); + + kfree(xcall); +} + +static struct xcall *find_xcall(const char *name, struct inode *binary) +{ + struct xcall *xcall; + + list_for_each_entry(xcall, &xcalls_list, list) { + if ((name && !strcmp(name, xcall->name)) || + (binary && xcall->binary == binary)) + return get_xcall(xcall); + } + return NULL; +} + +static struct xcall *find_xcall_by_name_locked(const char *name) +{ + struct xcall *ret = NULL; + + spin_lock(&xcall_list_lock); + ret = find_xcall(name, NULL); + spin_unlock(&xcall_list_lock); + return ret; +} + +static struct xcall *insert_xcall_locked(struct xcall *xcall) +{ + struct xcall *ret = NULL; + + spin_lock(&xcall_list_lock); + ret = find_xcall(xcall->name, xcall->binary); + if (!ret) + list_add(&xcall->list, &xcalls_list); + else + put_xcall(ret); + spin_unlock(&xcall_list_lock); + return ret; +} + +static void delete_xcall(struct xcall *xcall) +{ + spin_lock(&xcall_list_lock); + list_del(&xcall->list); + spin_unlock(&xcall_list_lock); + + put_xcall(xcall); +} + +/* Init xcall with a given inode */ +static int init_xcall(struct xcall *xcall, struct xcall_comm *comm) +{ + struct xcall_prog *program = get_xcall_prog_locked(comm->module); + + if (!program || !try_module_get(program->owner)) + return -EINVAL; + + xcall->binary = d_real_inode(comm->binary_path.dentry); + xcall->program = program; + refcount_set(&xcall->ref, 1); + INIT_LIST_HEAD(&xcall->list); + + return 0; +} + +static int fill_xcall_syscall(struct xcall_area *area, struct xcall *xcall) +{ + unsigned int scno_offset, scno_count = 0; + struct xcall_prog_object *obj; + + obj = xcall->program->objs; + while (scno_count < xcall->program->nr_scno && obj->func) { + scno_offset = NR_syscalls + obj->scno; + if (area->sys_call_table[scno_offset] != inv_xcall_syscall) { + pr_err("Process can not mount more than one xcall.\n"); + return -EINVAL; + } + + area->sys_call_table[scno_offset] = obj->func; + area->sys_call_table[obj->scno] = (unsigned long)patch_syscall; + obj += 1; + scno_count++; + } + + return 0; +} + +static struct xcall_area *create_xcall_area(struct mm_struct *mm) +{ + struct xcall_area *area; + int i; + + area = kzalloc(sizeof(*area), GFP_KERNEL); + if (!area) + return NULL; + + refcount_set(&area->ref, 1); + + for (i = 0; i < NR_syscalls; i++) { + area->sys_call_table[i] = inv_xcall_syscall; + area->sys_call_table[i + NR_syscalls] = inv_xcall_syscall; + } + + smp_store_release(&mm->xcall, area); + return area; +} + +/* + * Initialize the xcall data of mm_struct data. + * And register xcall into one address space, which includes create + * the mm_struct associated xcall_area data + */ +int xcall_mmap(struct vm_area_struct *vma, struct mm_struct *mm) +{ + struct xcall_area *area; + struct xcall *xcall; + int ret = -EINVAL; + + if (list_empty(&xcalls_list)) + return 0; + + spin_lock(&xcall_list_lock); + xcall = find_xcall(NULL, file_inode(vma->vm_file)); + spin_unlock(&xcall_list_lock); + if (!xcall) + return ret; + + if (!xcall->program) + goto put_xcall; + + area = mm_xcall_area(mm); + if (!area && !create_xcall_area(mm)) { + ret = -ENOMEM; + goto put_xcall; + } + + area = (struct xcall_area *)READ_ONCE(mm->xcall); + // Each process is allowed to be associated with only one xcall. + if (!cmpxchg(&area->xcall, NULL, xcall) && !fill_xcall_syscall(area, xcall)) + return 0; + +put_xcall: + put_xcall(xcall); + return ret; +} + +void mm_init_xcall_area(struct mm_struct *mm, struct task_struct *p) +{ + struct xcall_area *area = mm_xcall_area(mm); + + if (area) + refcount_inc(&area->ref); +} + +void clear_xcall_area(struct mm_struct *mm) +{ + struct xcall_area *area = mm_xcall_area(mm); + + if (!area) + return; + + if (!refcount_dec_and_test(&area->ref)) + return; + + if (area->xcall) + put_xcall(area->xcall); + + kfree(area); + mm->xcall = NULL; +} + +int xcall_attach(struct xcall_comm *comm) +{ + struct xcall *xcall; + int ret; + + xcall = kzalloc(sizeof(struct xcall), GFP_KERNEL); + if (!xcall) + return -ENOMEM; + + ret = init_xcall(xcall, comm); + if (ret) { + kfree(xcall); + return ret; + } + + xcall->name = kstrdup(comm->name, GFP_KERNEL); + if (!xcall->name) { + delete_xcall(xcall); + return -ENOMEM; + } + + if (insert_xcall_locked(xcall)) { + delete_xcall(xcall); + return -EINVAL; + } + + return 0; +} + +int xcall_detach(struct xcall_comm *comm) +{ + struct xcall *xcall; + + xcall = find_xcall_by_name_locked(comm->name); + if (!xcall) + return -EINVAL; + + put_xcall(xcall); + delete_xcall(xcall); + return 0; +} + +static int check_prog(struct xcall_prog *prog) +{ + struct xcall_prog_object *obj = prog->objs; + + prog->nr_scno = 0; + while (obj && obj->func) { + if (obj->scno >= __NR_syscalls) + return -EINVAL; + + prog->nr_scno++; + obj++; + } + + if (!prog->nr_scno || prog->nr_scno > MAX_NR_SCNO) + return -EINVAL; + + pr_info("Successly registered syscall number: %d\n", prog->nr_scno); + return 0; +} + +int xcall_prog_register(struct xcall_prog *prog) +{ + if (check_prog(prog)) + return -EINVAL; + + spin_lock(&prog_list_lock); + if (get_xcall_prog(prog->name)) { + spin_unlock(&prog_list_lock); + return -EBUSY; + } + list_add(&prog->list, &progs_list); + spin_unlock(&prog_list_lock); + return 0; +} +EXPORT_SYMBOL(xcall_prog_register); + +void xcall_prog_unregister(struct xcall_prog *prog) +{ + spin_lock(&prog_list_lock); + list_del(&prog->list); + spin_unlock(&prog_list_lock); +} +EXPORT_SYMBOL(xcall_prog_unregister); + +const syscall_fn_t *default_sys_call_table(void) +{ + return sys_call_table; +} +EXPORT_SYMBOL(default_sys_call_table); diff --git a/arch/arm64/kernel/xcall/entry.S b/arch/arm64/kernel/xcall/entry.S index 401be46f4fc2dbb887e415e022c857f7a1d3e415..ccbc76e40e749a4fd6cddde9b73b667347850977 100644 --- a/arch/arm64/kernel/xcall/entry.S +++ b/arch/arm64/kernel/xcall/entry.S @@ -84,7 +84,7 @@ alternative_else_nop_endif #endif .endm /* .macro hw_xcall_save_base_regs */ - .macro hw_xcal_restore_base_regs + .macro hw_xcall_restore_base_regs #ifdef CONFIG_ARM64_PSEUDO_NMI alternative_if_not ARM64_HAS_GIC_PRIO_MASKING b .Lskip_pmr_restore\@ @@ -149,35 +149,85 @@ alternative_else_nop_endif add sp, sp, #PT_REGS_SIZE // restore sp eret sb - .endm /* .macro hw_xcal_restore_base_regs */ + .endm /* .macro hw_xcall_restore_base_regs */ -SYM_CODE_START(no_xcall_entry) - ldp x20, x21, [sp, #0] +SYM_CODE_START_LOCAL(el0t_64_svc_entry) +alternative_if_not ARM64_HAS_HW_XCALL_XINT + /* Hijack SVC to dynamically load syscalls via '/proc/xcall/comm' */ + ldr x20, [sp, #S_SYSCALLNO] // ESR.bits[15,0] + cmp x20, 0xfff + b.ge el0t_64_xcall_entry +alternative_else_nop_endif + + /* Hijack SVC to low overhead syscalls via '/prox/[pid]/xcall' */ + cmp x8, __NR_syscalls + b.ge .slow_syscall + ldr_this_cpu x21, __xcall_info, x20 + ldrb w20, [x21, x8] + cmp x20, 0 + bne el0t_fast_syscall + +.slow_syscall: + ldp x20, x21, [sp, #16 * 10] kernel_entry 0, 64 mov x0, sp - bl el0t_64_sync_handler + bl el0_slow_syscall b ret_to_user -SYM_CODE_END(no_xcall_entry) +SYM_INNER_LABEL(el0t_64_xcall_entry, SYM_L_GLOBAL) + lsr x20, x20, #12 + adr x21, .xcall_func_table + ldr w20, [x21, x20, lsl #2] + add x20, x20, x21 + br x20 + /* ISS==0F~FF: Entry to optimized and customized syscalls + */ +.xcall_func_table: + .rept 16 + .word el0t_xcall_syscall - .xcall_func_table + .endr +SYM_CODE_END(el0t_64_svc_entry) -SYM_CODE_START(xcall_entry) - ldp x20, x21, [sp, #0] +SYM_CODE_START_LOCAL(el0t_xcall_syscall) + ldp x20, x21, [sp, #16 * 10] hw_xcall_save_base_regs mov x0, sp - bl el0t_64_xcall_handler - hw_xcal_restore_base_regs -SYM_CODE_END(xcall_entry) - -SYM_CODE_START_LOCAL(el0t_64_hw_xcall) - stp x20, x21, [sp, #0] - ldr_this_cpu x21, __cpu_xcall_entry, x20 - mov x20, __NR_syscalls - /* x8 >= __NR_syscalls */ - cmp x8, __NR_syscalls - csel x20, x8, x20, lt - ldr x21, [x21, x20, lsl #3] - br x21 -SYM_CODE_END(el0t_64_hw_xcall) + bl el0_xcall_syscall + hw_xcall_restore_base_regs +SYM_CODE_END(el0t_xcall_syscall) + +SYM_CODE_START_LOCAL(el0t_fast_syscall) + ldp x20, x21, [sp, #16 * 10] + hw_xcall_save_base_regs + mov x0, sp + bl el0_fast_syscall + hw_xcall_restore_base_regs +SYM_CODE_END(el0t_fast_syscall) + +SYM_CODE_START_LOCAL(el0t_64_sync_ventry) + ldp x20, x21, [sp, #16 * 10] + add sp, sp, #PT_REGS_SIZE + kernel_ventry 0, t, 64, sync +SYM_CODE_END(el0t_64_sync_ventry) + +SYM_CODE_START_LOCAL(el0t_64_sync_table) + // 0 - (ESR_ELx_EC_SVC64 - 1) + .rept ESR_ELx_EC_SVC64 + .word el0t_64_sync_table - el0t_64_sync_ventry + .endr + // ESR_ELx_EC_SVC64 + .word el0t_64_sync_table - el0t_64_svc_entry + // (ESR_ELx_EC_SVC64 + 1) - ESR_ELx_EC_MAX + .rept ESR_ELx_EC_MAX - ESR_ELx_EC_SVC64 + .word el0t_64_sync_table - el0t_64_sync_ventry + .endr +SYM_CODE_END(el0t_64_sync_table) +/*********************************************** + * * + * Xcall exception entry code for 920G CPU * + * * + ***********************************************/ +#ifdef CONFIG_ACTLR_XCALL_XINT .macro xcall_ventry .align 7 .Lventry_start\@: @@ -190,6 +240,90 @@ SYM_CODE_END(el0t_64_hw_xcall) msr tpidrro_el0, xzr .Lskip_tramp_vectors_cleanup\@: sub sp, sp, #PT_REGS_SIZE - b el0t_64_hw_xcall + stp x20, x21, [sp, #16 * 10] + /* Decode ESR.ICC bits[15, 0] for use later */ + mrs x21, esr_el1 + uxth w20, w21 + b el0t_64_xcall_entry +.org .Lventry_start\@ + 128 // Did we overflow the ventry slot? + .endm +#endif /* CONFIG_ACTLR_XCALL_XINT */ + +/**************************************************************** + * * + * Sync exception entry code for early CPUs before 920G * + * * + ****************************************************************/ + .macro sync_ventry + .align 7 +.Lventry_start\@: + /* + * This must be the first instruction of the EL0 vector entries. It is + * skipped by the trampoline vectors, to trigger the cleanup. + */ + b .Lskip_tramp_vectors_cleanup\@ + mrs x30, tpidrro_el0 + msr tpidrro_el0, xzr +.Lskip_tramp_vectors_cleanup\@: + sub sp, sp, #PT_REGS_SIZE + +alternative_if_not ARM64_HAS_XCALL +#ifdef CONFIG_VMAP_STACK + /* + * Test whether the SP has overflowed, without corrupting a GPR. + * Task and IRQ stacks are aligned so that SP & (1 << THREAD_SHIFT) + * should always be zero. + */ + add sp, sp, x0 // sp' = sp + x0 + sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp + tbnz x0, #THREAD_SHIFT, 0f + sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0 + sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp + b el0t_64_sync + +0: + /* + * Either we've just detected an overflow, or we've taken an exception + * while on the overflow stack. Either way, we won't return to + * userspace, and can clobber EL0 registers to free up GPRs. + */ + + /* Stash the original SP (minus PT_REGS_SIZE) in tpidr_el0. */ + msr tpidr_el0, x0 + + /* Recover the original x0 value and stash it in tpidrro_el0 */ + sub x0, sp, x0 + msr tpidrro_el0, x0 + + /* Switch to the overflow stack */ + adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0 + + /* + * Check whether we were already on the overflow stack. This may happen + * after panic() re-enables interrupts. + */ + mrs x0, tpidr_el0 // sp of interrupted context + sub x0, sp, x0 // delta with top of overflow stack + tst x0, #~(OVERFLOW_STACK_SIZE - 1) // within range? + b.ne __bad_stack // no? -> bad stack pointer + + /* We were already on the overflow stack. Restore sp/x0 and carry on. */ + sub sp, sp, x0 + mrs x0, tpidrro_el0 +#endif + b el0t_64_sync +alternative_else + /* Save ESR and ICC.bits[15,0] for use later */ + stp x20, x21, [sp, #16 * 10] + mrs x20, esr_el1 + uxth w21, w20 + stp x20, x21, [sp, #(S_SYSCALLNO - 8)] + /* Using jump table for different exception causes */ + lsr w21, w20, #ESR_ELx_EC_SHIFT + adr x20, el0t_64_sync_table + ldr w21, [x20, x21, lsl #2] + sub x20, x20, x21 + br x20 +alternative_endif .org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm diff --git a/arch/arm64/kernel/xcall/proc.c b/arch/arm64/kernel/xcall/proc.c new file mode 100644 index 0000000000000000000000000000000000000000..a9f30763a1f6e54983c49184a40fed95a470397e --- /dev/null +++ b/arch/arm64/kernel/xcall/proc.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Huawei Limited. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +static LIST_HEAD(comm_list); +static DECLARE_RWSEM(comm_rwsem); + +static void free_xcall_comm(struct xcall_comm *info) +{ + if (!info) + return; + kfree(info->name); + kfree(info->binary); + kfree(info->module); + path_put(&info->binary_path); + kfree(info); +} + +static struct xcall_comm *find_xcall_comm(struct xcall_comm *comm) +{ + struct xcall_comm *temp; + + list_for_each_entry(temp, &comm_list, list) { + if (!strcmp(comm->name, temp->name)) + return temp; + } + + return NULL; +} + +static void delete_xcall_comm_locked(struct xcall_comm *info) +{ + struct xcall_comm *ret; + + down_write(&comm_rwsem); + ret = find_xcall_comm(info); + if (ret) + list_del(&ret->list); + up_write(&comm_rwsem); + free_xcall_comm(ret); +} + +static void insert_xcall_comm_locked(struct xcall_comm *info) +{ + down_write(&comm_rwsem); + if (!find_xcall_comm(info)) + list_add(&info->list, &comm_list); + up_write(&comm_rwsem); +} + +static int is_absolute_path(const char *path) +{ + return path[0] == '/'; +} + +static int parse_xcall_command(int argc, char **argv, + struct xcall_comm *info) +{ + struct dentry *dentry; + + if (strlen(argv[0]) < 3) + return -ECANCELED; + + if (argv[0][0] != '+' && argv[0][0] != '-') + return -ECANCELED; + + if (argv[0][1] != ':') + return -ECANCELED; + + if (argv[0][0] == '+' && argc != 3) + return -ECANCELED; + + if (argv[0][0] == '-' && argc != 1) + return -ECANCELED; + + info->name = kstrdup(&argv[0][2], GFP_KERNEL); + if (!info->name) + return -ENOMEM; + + if (argv[0][0] == '-') + return '-'; + + info->binary = kstrdup(argv[1], GFP_KERNEL); + if (!info->binary || !is_absolute_path(info->binary)) + goto free_name; + + if (kern_path(info->binary, LOOKUP_FOLLOW, &info->binary_path)) + goto free_binary; + + dentry = info->binary_path.dentry; + if (!dentry || !S_ISREG(d_inode(dentry)->i_mode) || + !(d_inode(dentry)->i_mode & 0111)) + goto put_path; + + info->module = kstrdup(argv[2], GFP_KERNEL); + if (!info->module) + goto put_path; + + return argv[0][0]; + +put_path: + path_put(&info->binary_path); +free_binary: + kfree(info->binary); +free_name: + kfree(info->name); + return 'x'; +} + +/* + * /proc/xcall/comm + * Argument syntax: + * +:COMM ELF_FILE [KERNEL_MODULE] : Attach a xcall + * -:COMM : Detach a xcall + * + * COMM: : Unique string for attached xcall. + * ELF_FILE : Path to an executable or library. + * KERNEL_MODULE : Module name listed in /proc/modules provide xcall program. + */ +int proc_xcall_command(int argc, char **argv) +{ + struct xcall_comm *info; + int ret, op; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + INIT_LIST_HEAD(&info->list); + + op = parse_xcall_command(argc, argv, info); + switch (op) { + case '+': + ret = xcall_attach(info); + if (!ret) + insert_xcall_comm_locked(info); + else + free_xcall_comm(info); + break; + case '-': + ret = xcall_detach(info); + if (!ret) + delete_xcall_comm_locked(info); + free_xcall_comm(info); + break; + default: + kfree(info); + return -EINVAL; + } + + return ret; +} + +static int xcall_comm_show(struct seq_file *m, void *v) +{ + struct xcall_comm *info; + + down_read(&comm_rwsem); + list_for_each_entry(info, &comm_list, list) { + seq_printf(m, "+:%s %s %s\n", + info->name, info->binary, + info->module); + } + up_read(&comm_rwsem); + return 0; +} + +static int xcall_comm_open(struct inode *inode, struct file *file) +{ + return single_open(file, xcall_comm_show, NULL); +} + +static ssize_t xcall_comm_write(struct file *file, + const char __user *user_buf, + size_t nbytes, loff_t *ppos) +{ + int argc = 0, ret = 0; + char *raw_comm; + char **argv; + + raw_comm = memdup_user_nul(user_buf, nbytes - 1); + if (IS_ERR(raw_comm)) + return PTR_ERR(raw_comm); + + argv = argv_split(GFP_KERNEL, raw_comm, &argc); + if (!argv) { + kfree(raw_comm); + return -ENOMEM; + } + + ret = proc_xcall_command(argc, argv); + + argv_free(argv); + + kfree(raw_comm); + + return ret ? ret : nbytes; +} + +static const struct proc_ops xcall_comm_ops = { + .proc_open = xcall_comm_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = xcall_comm_write, +}; + +static int __init xcall_proc_init(void) +{ + proc_mkdir("xcall", NULL); + proc_create("xcall/comm", 0644, NULL, &xcall_comm_ops); + return 0; +} +module_init(xcall_proc_init); diff --git a/arch/arm64/kernel/xcall/xcall.c b/arch/arm64/kernel/xcall/xcall.c index d8eaec7e463786375fc4faf41b15aeb00a0dc7a1..35bc959a0a5144ab00b5495ec49d4d3f027a8aac 100644 --- a/arch/arm64/kernel/xcall/xcall.c +++ b/arch/arm64/kernel/xcall/xcall.c @@ -6,64 +6,35 @@ */ #include -#include #include #include #include +// Only can switch by cmdline 'xcall=debug' +int sw_xcall_mode = XCALL_MODE_TASK; + static inline int sw_xcall_init_task(struct task_struct *p, struct task_struct *orig) { p->xinfo = kzalloc(sizeof(struct xcall_info), GFP_KERNEL); if (!p->xinfo) return -ENOMEM; - if (orig->xinfo) { - bitmap_copy(TASK_XINFO(p)->xcall_enable, TASK_XINFO(orig)->xcall_enable, - __NR_syscalls); - } - - return 0; -} - -#ifdef CONFIG_ACTLR_XCALL_XINT -static const void *default_syscall_table[__NR_syscalls + 1] = { - [0 ... __NR_syscalls] = no_xcall_entry, -}; - -asmlinkage DEFINE_PER_CPU(void *, __cpu_xcall_entry) = default_syscall_table; -static inline int hw_xcall_init_task(struct task_struct *p, struct task_struct *orig) -{ - struct hw_xcall_info *p_xinfo, *orig_xinfo; - - p->xinfo = kzalloc(sizeof(struct hw_xcall_info), GFP_KERNEL); - if (!p->xinfo) - return -ENOMEM; - - p_xinfo = TASK_HW_XINFO(p); - spin_lock_init(&p_xinfo->lock); + if (!orig->xinfo) + return 0; - if (!orig->xinfo) { - memcpy(p->xinfo, default_syscall_table, XCALL_ENTRY_SIZE); - atomic_set(&p_xinfo->xcall_scno_count, 0); - } else { - orig_xinfo = TASK_HW_XINFO(orig); - spin_lock(&orig_xinfo->lock); - memcpy(p->xinfo, orig->xinfo, XCALL_ENTRY_SIZE); - atomic_set(&p_xinfo->xcall_scno_count, - atomic_read(&orig_xinfo->xcall_scno_count)); - spin_unlock(&orig_xinfo->lock); - } + /* In xcall debug mode, all syscalls are enabled by default! */ + if (sw_xcall_mode == XCALL_MODE_SYSTEM) + memset(TASK_XINFO(p)->xcall_enable, 1, (__NR_syscalls + 1) * sizeof(u8)); + else + memcpy(TASK_XINFO(p)->xcall_enable, + TASK_XINFO(orig)->xcall_enable, + (__NR_syscalls + 1) * sizeof(u8)); return 0; } -#endif int xcall_init_task(struct task_struct *p, struct task_struct *orig) { -#ifdef CONFIG_ACTLR_XCALL_XINT - if (system_uses_xcall_xint()) - return hw_xcall_init_task(p, orig); -#endif if (static_branch_unlikely(&xcall_enable)) return sw_xcall_init_task(p, orig); @@ -72,6 +43,17 @@ int xcall_init_task(struct task_struct *p, struct task_struct *orig) void xcall_task_free(struct task_struct *p) { - if (system_uses_xcall_xint() || static_branch_unlikely(&xcall_enable)) + if (static_branch_unlikely(&xcall_enable)) kfree(p->xinfo); } + +static u8 default_xcall_info[__NR_syscalls + 1] = { + [0 ... __NR_syscalls] = 0, +}; +DEFINE_PER_CPU(u8*, __xcall_info) = default_xcall_info; + +void xcall_info_switch(struct task_struct *task) +{ + if (TASK_XINFO(task)->xcall_enable) + __this_cpu_write(__xcall_info, TASK_XINFO(task)->xcall_enable); +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 416693a3200aef94fbcd4aa39e1ba14efacc67e2..73d5102e7a10fe0fede68e6c93d905c5b7899abb 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index f9aef39cac2e93dbb488b62f44236c49eecbf338..702216e0ddd2f5b3912cb2a741bd04e0e0344e14 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -78,4 +78,6 @@ source "drivers/staging/qlge/Kconfig" source "drivers/staging/vme_user/Kconfig" +source "drivers/staging/xcall/Kconfig" + endif # STAGING diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index ffa70dda481d36c526a11275a2b7896957a4edc3..3df57d6ab9b2311eae5925e75a62a5c49391502d 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -28,3 +28,4 @@ obj-$(CONFIG_PI433) += pi433/ obj-$(CONFIG_XIL_AXIS_FIFO) += axis-fifo/ obj-$(CONFIG_FIELDBUS_DEV) += fieldbus/ obj-$(CONFIG_QLGE) += qlge/ +obj-$(CONFIG_DYNAMIC_XCALL) += xcall/ diff --git a/drivers/staging/xcall/Kconfig b/drivers/staging/xcall/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..bf7421fa8a14243132d5b7c322a6d22871fba663 --- /dev/null +++ b/drivers/staging/xcall/Kconfig @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 +menu "Xcall" + +if ARM64 + +config DYNAMIC_XCALL_TESTCASE + tristate "xcall2.0 test case" + depends on DYNAMIC_XCALL + help + A simple example of using the xcall2.0 kernel module. + This module can be combined with the syscall sub-item of + Unixbench to evaluate the baseline noise of xcall2.0's + "Dynamic Instruction Replacement" mechanism. Users can + also use this module as a reference to implement custom + system calls. + +endif # if ARM64 + +endmenu diff --git a/drivers/staging/xcall/Makefile b/drivers/staging/xcall/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d8c6137e2945ba54f943b6599149dd827ca65e1a --- /dev/null +++ b/drivers/staging/xcall/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_DYNAMIC_XCALL_TESTCASE) += dynamic_xcall_test.o prefetch.o diff --git a/drivers/staging/xcall/dynamic_xcall_test.c b/drivers/staging/xcall/dynamic_xcall_test.c new file mode 100644 index 0000000000000000000000000000000000000000..159c2a15854c0253d2de168350810037838cdf53 --- /dev/null +++ b/drivers/staging/xcall/dynamic_xcall_test.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * A simple dummy xcall for syscall testing + * + * The data struct and functions marked as MANDATORY have to + * be includes in all of kernel xcall modules. + * + * Copyright (C) 2025 Huawei Limited. + */ + +#define pr_fmt(fmt) "dummy_xcall: " fmt + +#include +#include +#include + +#include + +static long __do_sys_close(struct pt_regs *regs) +{ + return default_sys_call_table()[__NR_close](regs); +} + +static long __do_sys_getpid(struct pt_regs *regs) +{ + return default_sys_call_table()[__NR_getpid](regs); +} + +static long __do_sys_getuid(struct pt_regs *regs) +{ + return default_sys_call_table()[__NR_getuid](regs); +} + +static long __do_sys_unmask(struct pt_regs *regs) +{ + return default_sys_call_table()[__NR_umask](regs); +} + +static long __do_sys_dup(struct pt_regs *regs) +{ + return default_sys_call_table()[__NR_dup](regs); +} + +/* MANDATORY */ +static struct xcall_prog dummy_xcall_prog = { + .name = "dummy_xcall", + .owner = THIS_MODULE, + .objs = { + { + .scno = (unsigned long)__NR_getpid, + .func = (unsigned long)__do_sys_getpid, + }, + { + .scno = (unsigned long)__NR_getuid, + .func = (unsigned long)__do_sys_getuid, + }, + { + .scno = (unsigned long)__NR_close, + .func = (unsigned long)__do_sys_close, + }, + { + .scno = (unsigned long)__NR_umask, + .func = (unsigned long)__do_sys_unmask, + }, + { + .scno = (unsigned long)__NR_dup, + .func = (unsigned long)__do_sys_dup, + }, + {} + } +}; + +/* MANDATORY */ +static int __init dummy_xcall_init(void) +{ + INIT_LIST_HEAD(&dummy_xcall_prog.list); + return xcall_prog_register(&dummy_xcall_prog); +} + +/* MANDATORY */ +static void __exit dummy_xcall_exit(void) +{ + xcall_prog_unregister(&dummy_xcall_prog); +} + +module_init(dummy_xcall_init); +module_exit(dummy_xcall_exit); +MODULE_AUTHOR("Liao Chang "); +MODULE_DESCRIPTION("Dummy Xcall"); +MODULE_LICENSE("GPL"); diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c new file mode 100644 index 0000000000000000000000000000000000000000..f911f3635cb7f9a31d3273681c6e8eceb3b19f3a --- /dev/null +++ b/drivers/staging/xcall/prefetch.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * A simple dummy xcall for syscall testing + * + * The data struct and functions marked as MANDATORY have to + * be includes in all of kernel xcall modules. + * + * Copyright (C) 2025 Huawei Limited. + */ + +#define pr_fmt(fmt) "dummy_xcall: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define MAX_FD 100 + +static unsigned long xcall_cache_hit; +static unsigned long xcall_cache_miss; + +struct proc_dir_entry *xcall_proc_dir; + +enum cache_state { + XCALL_CACHE_NONE = 0, + XCALL_CACHE_PREFETCH, + XCALL_CACHE_READY, + XCALL_CACHE_CANCEL +}; + +struct prefetch_item { + int fd; + int cpu; + int pos; + int len; + atomic_t state; + struct file *file; + struct work_struct work; + char cache[PAGE_SIZE]; +}; + +static struct epoll_event events[MAX_FD] = {0}; + +static struct prefetch_item prefetch_items[MAX_FD] = {0}; +static struct workqueue_struct *rc_work; + +static inline bool transition_state(struct prefetch_item *pfi, + enum cache_state old, enum cache_state new) +{ + return atomic_cmpxchg(&pfi->state, old, new) == old; +} + +static void prefetch_work_fn(struct work_struct *work) +{ + struct prefetch_item *pfi = container_of(work, struct prefetch_item, work); + + if (!transition_state(pfi, XCALL_CACHE_NONE, XCALL_CACHE_PREFETCH)) + return; + + pfi->pos = 0; + pfi->len = kernel_read(pfi->file, pfi->cache, PAGE_SIZE, &pfi->file->f_pos); + + transition_state(pfi, XCALL_CACHE_PREFETCH, XCALL_CACHE_READY); +} + +static long __do_sys_epoll_pwait(struct pt_regs *regs) +{ + struct prefetch_item *pfi; + int i, fd, err; + long ret; + + ret = default_sys_call_table()[__NR_epoll_pwait](regs); + if (!ret) + return 0; + + err = copy_from_user(events, (void __user *)regs->regs[1], + ret * sizeof(struct epoll_event)); + if (err) + return -EFAULT; + + for (i = 0; i < ret; i++) { + fd = events[i].data; + if (events[i].events & EPOLLIN) { + pfi = &prefetch_items[fd]; + if (!pfi->file) + pfi->file = fget(fd); + + queue_work_on(250 + (fd % 4), rc_work, &pfi->work); + } + } + + return ret; +} + +static long __do_sys_read(struct pt_regs *regs) +{ + int fd = regs->regs[0]; + struct prefetch_item *pfi = &prefetch_items[fd]; + void *user_buf = (void *)regs->regs[1]; + int count = regs->regs[2]; + int copy_len; + long ret; + + if (!pfi->file) + goto not_epoll_fd; + + while (!transition_state(pfi, XCALL_CACHE_READY, XCALL_CACHE_CANCEL)) { + if (transition_state(pfi, XCALL_CACHE_NONE, XCALL_CACHE_CANCEL)) + goto slow_read; + } + + xcall_cache_hit++; + copy_len = pfi->len; + + if (copy_len == 0) { + transition_state(pfi, XCALL_CACHE_CANCEL, XCALL_CACHE_NONE); + return 0; + } + + copy_len = (copy_len >= count) ? count : copy_len; + copy_len -= copy_to_user(user_buf, (void *)(pfi->cache + pfi->pos), copy_len); + pfi->len -= copy_len; + pfi->pos += copy_len; + + if (pfi->len == 0) + transition_state(pfi, XCALL_CACHE_CANCEL, XCALL_CACHE_NONE); + else + transition_state(pfi, XCALL_CACHE_CANCEL, XCALL_CACHE_READY); + return copy_len; + +slow_read: + xcall_cache_miss++; + pfi->len = 0; + pfi->pos = 0; + cancel_work_sync(&pfi->work); + transition_state(pfi, XCALL_CACHE_CANCEL, XCALL_CACHE_NONE); +not_epoll_fd: + ret = default_sys_call_table()[__NR_read](regs); + return ret; +} + +static long __do_sys_close(struct pt_regs *regs) +{ + struct prefetch_item *pfi; + int fd = regs->regs[0]; + long ret; + + pfi = &prefetch_items[fd]; + if (pfi && pfi->file) { + fput(pfi->file); + pfi->file = NULL; + } + + ret = default_sys_call_table()[__NR_close](regs); + return ret; +} + +/* MANDATORY */ +static struct xcall_prog xcall_prefetch_prog = { + .name = "xcall_prefetch", + .owner = THIS_MODULE, + .objs = { + { + .scno = (unsigned long)__NR_epoll_pwait, + .func = (unsigned long)__do_sys_epoll_pwait, + }, + { + .scno = (unsigned long)__NR_read, + .func = (unsigned long)__do_sys_read, + }, + { + .scno = (unsigned long)__NR_close, + .func = (unsigned long)__do_sys_close, + }, + {} + } +}; + +static ssize_t xcall_prefetch_reset(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + xcall_cache_hit = 0; + xcall_cache_miss = 0; + + return count; +} + +static int xcall_prefetch_show(struct seq_file *m, void *v) +{ + u64 percent; + + percent = DIV_ROUND_CLOSEST(xcall_cache_hit * 100ULL, xcall_cache_hit + xcall_cache_miss); + seq_printf(m, "epoll cache_{hit,miss}: %lu,%lu, hit ratio: %llu%%\n", + xcall_cache_hit, xcall_cache_miss, percent); + return 0; +} + +static int xcall_prefetch_open(struct inode *inode, struct file *file) +{ + return single_open(file, xcall_prefetch_show, NULL); +} + +static const struct proc_ops xcall_prefetch_fops = { + .proc_open = xcall_prefetch_open, + .proc_read = seq_read, + .proc_write = xcall_prefetch_reset, + .proc_lseek = seq_lseek, + .proc_release = single_release +}; + +static int __init init_xcall_prefetch_procfs(void) +{ + struct proc_dir_entry *prefetch_dir; + + xcall_proc_dir = proc_mkdir("xcall_stat", NULL); + if (!xcall_proc_dir) + return -ENOMEM; + prefetch_dir = proc_create("prefetch", 0640, xcall_proc_dir, &xcall_prefetch_fops); + if (!prefetch_dir) + goto rm_xcall_proc_dir; + + return 0; + +rm_xcall_proc_dir: + proc_remove(xcall_proc_dir); + return -ENOMEM; +} + +/* MANDATORY */ +static int __init dummy_xcall_init(void) +{ + int i; + + rc_work = alloc_workqueue("eventpoll_rc", 0, 0); + if (!rc_work) + pr_warn("alloc eventpoll_rc workqueue failed.\n"); + + for (i = 0; i < MAX_FD; i++) + INIT_WORK(&prefetch_items[i].work, prefetch_work_fn); + + init_xcall_prefetch_procfs(); + + INIT_LIST_HEAD(&xcall_prefetch_prog.list); + return xcall_prog_register(&xcall_prefetch_prog); +} + +/* MANDATORY */ +static void __exit dummy_xcall_exit(void) +{ + proc_remove(xcall_proc_dir); + xcall_prog_unregister(&xcall_prefetch_prog); +} + +module_init(dummy_xcall_init); +module_exit(dummy_xcall_exit); +MODULE_AUTHOR("Liao Chang "); +MODULE_DESCRIPTION("Dummy Xcall"); +MODULE_LICENSE("GPL"); diff --git a/fs/proc/proc_xcall.c b/fs/proc/proc_xcall.c index 5a417bc7cb0ab7b832f8373d44c5f78584a989c5..8f73752358031277c159012065c144fb97a8db66 100644 --- a/fs/proc/proc_xcall.c +++ b/fs/proc/proc_xcall.c @@ -4,90 +4,51 @@ * * Copyright (C) 2025 Huawei Ltd. */ -#include #include #include #include #include "internal.h" -#ifdef CONFIG_ACTLR_XCALL_XINT -static void proc_hw_xcall_show(struct task_struct *p, struct seq_file *m) -{ - struct hw_xcall_info *hw_xinfo = TASK_HW_XINFO(p); - unsigned int i, start = 0, end = 0; - bool in_range = false; - - if (!hw_xinfo) - return; - - for (i = 0; i < __NR_syscalls; i++) { - bool scno_xcall_enable = is_xcall_entry(hw_xinfo, i); - - if (scno_xcall_enable && !in_range) { - in_range = true; - start = i; - } - - if ((!scno_xcall_enable || i == __NR_syscalls - 1) && in_range) { - in_range = false; - end = scno_xcall_enable ? i : i - 1; - if (i == start + 1) - seq_printf(m, "%u,", start); - else - seq_printf(m, "%u-%u,", start, end); - } - } - seq_puts(m, "\n"); -} - -static int proc_set_hw_xcall(struct task_struct *p, unsigned int sc_no, - bool is_clear) -{ - struct hw_xcall_info *hw_xinfo = TASK_HW_XINFO(p); - - if (!is_clear) - return set_hw_xcall_entry(hw_xinfo, sc_no, true); - - if (is_clear) - return set_hw_xcall_entry(hw_xinfo, sc_no, false); - - return -EINVAL; -} -#endif - static int xcall_show(struct seq_file *m, void *v) { struct inode *inode = m->private; - struct task_struct *p; - unsigned int rs, re; + int start = -1, first = 1; struct xcall_info *xinfo; + struct task_struct *p; + int scno = 0; - if (!system_uses_xcall_xint() && !static_key_enabled(&xcall_enable)) + if (!static_key_enabled(&xcall_enable)) return -EACCES; p = get_proc_task(inode); if (!p) return -ESRCH; -#ifdef CONFIG_ACTLR_XCALL_XINT - if (system_uses_xcall_xint()) { - proc_hw_xcall_show(p, m); - goto out; - } -#endif - xinfo = TASK_XINFO(p); if (!xinfo) goto out; - for (rs = 0, bitmap_next_set_region(xinfo->xcall_enable, &rs, &re, __NR_syscalls); - rs < re; rs = re + 1, - bitmap_next_set_region(xinfo->xcall_enable, &rs, &re, __NR_syscalls)) { - if (rs == (re - 1)) - seq_printf(m, "%d,", rs); - else - seq_printf(m, "%d-%d,", rs, re - 1); + for (scno = 0; scno <= __NR_syscalls; scno++) { + if (scno == __NR_syscalls || !xinfo->xcall_enable[scno]) { + if (start == -1) + continue; + + if (!first) + seq_puts(m, ","); + + if (start == scno - 1) + seq_printf(m, "%d", start); + else + seq_printf(m, "%d-%d", start, scno - 1); + + first = 0; + start = -1; + } else { + if (start == -1) + start = scno; + } } + seq_puts(m, "\n"); out: put_task_struct(p); @@ -100,68 +61,39 @@ static int xcall_open(struct inode *inode, struct file *filp) return single_open(filp, xcall_show, inode); } -static int xcall_enable_one(struct xcall_info *xinfo, unsigned int sc_no) -{ - test_and_set_bit(sc_no, xinfo->xcall_enable); - return 0; -} - -static int xcall_disable_one(struct xcall_info *xinfo, unsigned int sc_no) -{ - test_and_clear_bit(sc_no, xinfo->xcall_enable); - return 0; -} - -static ssize_t xcall_write(struct file *file, const char __user *buf, +static ssize_t xcall_write(struct file *file, const char __user *ubuf, size_t count, loff_t *offset) { - struct inode *inode = file_inode(file); - struct task_struct *p; - char buffer[5]; - const size_t maxlen = sizeof(buffer) - 1; unsigned int sc_no = __NR_syscalls; + struct task_struct *p; + char buf[5]; int ret = 0; - int is_clear = 0; - struct xcall_info *xinfo; - if (!system_uses_xcall_xint() && !static_key_enabled(&xcall_enable)) + if (!static_key_enabled(&xcall_enable)) return -EACCES; - memset(buffer, 0, sizeof(buffer)); - if (!count || copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) - return -EFAULT; - - p = get_proc_task(inode); - if (!p || !p->xinfo) + p = get_proc_task(file_inode(file)); + if (!p || !TASK_XINFO(p)) return -ESRCH; - if (buffer[0] == '!') - is_clear = 1; - - if (kstrtouint(buffer + is_clear, 10, &sc_no)) { - ret = -EINVAL; + memset(buf, '\0', 5); + if (!count || (count > 5) || copy_from_user(buf, ubuf, count)) { + ret = -EFAULT; goto out; } - if (sc_no >= __NR_syscalls) { + if (kstrtouint((buf + (int)(buf[0] == '!')), 10, &sc_no)) { ret = -EINVAL; goto out; } -#ifdef CONFIG_ACTLR_XCALL_XINT - if (system_uses_xcall_xint()) { - ret = proc_set_hw_xcall(p, sc_no, is_clear); + if (sc_no >= __NR_syscalls) { + ret = -EINVAL; goto out; } -#endif - xinfo = TASK_XINFO(p); - if (!is_clear && !test_bit(sc_no, xinfo->xcall_enable)) - ret = xcall_enable_one(xinfo, sc_no); - else if (is_clear && test_bit(sc_no, xinfo->xcall_enable)) - ret = xcall_disable_one(xinfo, sc_no); - else - ret = -EINVAL; + (TASK_XINFO(p))->xcall_enable[sc_no] = (int)(buf[0] != '!'); + ret = 0; out: put_task_struct(p); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5d6ee378d7d4d020335c4a221e7f1ab549df2ec0..082839935cc6e17de15e234f6a676c54150b3122 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1025,7 +1025,11 @@ struct mm_struct { #else KABI_RESERVE(2) #endif +#ifdef CONFIG_DYNAMIC_XCALL + KABI_USE(3, void *xcall) +#else KABI_RESERVE(3) +#endif KABI_RESERVE(4) KABI_RESERVE(5) /* diff --git a/include/linux/xcall.h b/include/linux/xcall.h new file mode 100644 index 0000000000000000000000000000000000000000..510aebe4e7c0614daf828da0aa41e92511b9669d --- /dev/null +++ b/include/linux/xcall.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025 Huawei. + */ + +#ifndef _LINUX_XCALL_H +#define _LINUX_XCALL_H + +#include + +struct vm_area_struct; +struct mm_struct; +struct inode; + +struct xcall_prog_object { + unsigned long scno; + unsigned long func; +}; + +#define PROG_NAME_LEN 64 +#define MAX_NR_SCNO 32 + +struct xcall_prog { + char name[PROG_NAME_LEN]; + struct module *owner; + struct list_head list; + struct xcall_prog_object objs[MAX_NR_SCNO]; + unsigned int nr_scno; +}; + +#ifdef CONFIG_DYNAMIC_XCALL +extern int xcall_prog_register(struct xcall_prog *prog); +extern void xcall_prog_unregister(struct xcall_prog *prog); +extern void mm_init_xcall_area(struct mm_struct *mm, struct task_struct *p); +extern void clear_xcall_area(struct mm_struct *mm); +extern int xcall_mmap(struct vm_area_struct *vma, struct mm_struct *mm); +#else /* !CONFIG_DYNAMIC_XCALL */ +static inline int xcall_prog_register(struct xcall_prog *prog) +{ + return -EINVAL; +} +static inline void xcall_prog_unregister(struct xcall_prog *prog) {} +static inline void mm_init_xcall_area(struct mm_struct *mm, struct task_struct *p) {} +static inline void clear_xcall_area(struct mm_struct *mm) {} +static inline int xcall_mmap(struct vm_area_struct *vma, struct mm_struct *mm) +{ + return 0; +} +#endif /* CONFIG_DYNAMIC_XCALL */ + +#endif /* _LINUX_XCALL_H */ diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 609e48784f7794cbb3e7e6eeff2afb409f24ee39..e382f7e4d5d9bce5e6317905356d26881076cf0b 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -595,6 +595,29 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v *(uprobe_opcode_t *)&auprobe->insn); } +#ifdef CONFIG_DYNAMIC_XCALL +/* + * Force to patch any instruction without checking the old instruction + * is UPROBE_BRK. + */ +int set_xcall_insn(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) +{ + struct uprobe uprobe = { .ref_ctr_offset = 0 }; + int ret; + + /* Use the UPROBE_SWBP_INSN to occupy the vaddr avoid uprobe writes it */ + ret = uprobe_write_opcode(&uprobe.arch, mm, vaddr, UPROBE_SWBP_INSN); + if (ret) + return 1; + + ret = uprobe_write_opcode(&uprobe.arch, mm, vaddr, opcode); + if (ret) + return -1; + + return 0; +} +#endif + static struct uprobe *get_uprobe(struct uprobe *uprobe) { refcount_inc(&uprobe->ref); diff --git a/kernel/fork.c b/kernel/fork.c index e9ce45e1f9713e3f55ad624e0748f56d9a5c4961..1ceb5583c5d74974ac91592b474d648e06698320 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -106,6 +106,7 @@ #endif #include #include +#include #include #include @@ -1373,6 +1374,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, #if defined(CONFIG_DAMON_MEM_SAMPLING) mm->damon_fifo = NULL; #endif + mm_init_xcall_area(mm, p); mm_init_uprobes_state(mm); hugetlb_count_init(mm); @@ -1426,6 +1428,7 @@ static inline void __mmput(struct mm_struct *mm) { VM_BUG_ON(atomic_read(&mm->mm_users)); + clear_xcall_area(mm); uprobe_clear_state(mm); exit_aio(mm); ksm_exit(mm); diff --git a/mm/mmap.c b/mm/mmap.c index ce70c8740f8ad690cd32cc63d66ba9a38d39a71a..bd99a80d4a9326f0c5e9c7452427b05720919fe7 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -592,9 +593,12 @@ static inline void vma_complete(struct vma_prepare *vp, if (!vp->skip_vma_uprobe) { uprobe_mmap(vp->vma); + xcall_mmap(vp->vma, mm); - if (vp->adj_next) + if (vp->adj_next) { uprobe_mmap(vp->adj_next); + xcall_mmap(vp->adj_next, mm); + } } } @@ -624,8 +628,10 @@ static inline void vma_complete(struct vma_prepare *vp, goto again; } } - if (vp->insert && vp->file) + if (vp->insert && vp->file) { uprobe_mmap(vp->insert); + xcall_mmap(vp->insert, mm); + } validate_mm(mm); } @@ -2998,8 +3004,10 @@ static unsigned long __mmap_region(struct mm_struct *mm, struct file *file, mm->locked_vm += (len >> PAGE_SHIFT); } - if (file) + if (file) { uprobe_mmap(vma); + xcall_mmap(vma, mm); + } /* * New (or expanded) vma always get soft dirty status.