diff --git a/arch/Kconfig b/arch/Kconfig index 98e8ee5a8a74284bed144d42da264e1d28601eb8..98116fbfcff667a7ab8cb4a7d112fef45ff52e85 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1530,4 +1530,68 @@ config FUNCTION_ALIGNMENT default 4 if FUNCTION_ALIGNMENT_4B default 0 +config ARCH_SUPPORTS_FAST_SYSCALL + bool + +config FAST_SYSCALL + bool "Fast Syscall support" + depends on ARCH_SUPPORTS_FAST_SYSCALL + default n + help + This enable support Fast syscall feature. + The svc exception handling process, which includes auxiliary + functions for debug/trace and core functions like + KPTI, has been identified as overly "lengthy". + This inefficiency is particularly noticeable in short syscalls + such as lseek() and getpid(), where the syscall function itself + comprises a small percentage of the total instructions executed. + To address this, we introduce the concept of fast syscall, a fast svc + exception handling path that only considers necessary features + such as security, context saving, and recovery. + +config ARCH_SUPPORTS_FAST_IRQ + bool + +config FAST_IRQ + bool "Fast irq support" + depends on ARCH_SUPPORTS_FAST_IRQ + default n + help + The irq handling process, which includes auxiliary + functions for debug/trace and core functions like + KPTI, interrupt time record, interrupt processing as + a random number source, interrupt affinity + modification and interrupt processing race, as well as + spurious and unhandled interrupt debugging, has been + identified as overly "lengthy". + To address this, we introduce the concept of fast irq, + a fast interrupt handling path that only considers + necessary features such as security, context saving + and recovery, which adds an lightweight interrupt processing + framework for latency-sensitive interrupts. + +config DEBUG_FEATURE_BYPASS + bool "Bypass debug feature in fast syscall" + depends on FAST_SYSCALL || FAST_IRQ + default y + help + This to bypass debug feature in fast syscall. + The svc exception handling process, which includes auxiliary + functions for debug/trace and core functions like + KPTI, has been identified as overly "lengthy". + In fast syscall we only considers necessary features. + Disable this config to keep debug feature in fast syscall. + +config SECURITY_FEATURE_BYPASS + bool "Bypass security feature in fast syscall" + depends on FAST_SYSCALL || FAST_IRQ + default y + help + This to bypass security feature in fast syscall. + The svc exception handling process, which includes auxiliary + functions for debug/trace and core functions like + KPTI, has been identified as overly "lengthy". + In fast syscall we only considers necessary features. + Disable this config to keep security feature in fast syscall. + endmenu diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f917ba077fb4197a7e6df72c1aead79da0d4c959..1741f894880f4d8822c2daed93b891fe801088ab 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -262,6 +262,8 @@ config ARM64 select TRACE_IRQFLAGS_SUPPORT select TRACE_IRQFLAGS_NMI_SUPPORT select HAVE_SOFTIRQ_ON_OWN_STACK + select ARCH_SUPPORTS_FAST_SYSCALL if !ARM64_MTE && !KASAN_HW_TAGS + select ARCH_SUPPORTS_FAST_IRQ if ARM_GIC_V3 && !ARM64_MTE && !KASAN_HW_TAGS help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index e997ad275afb434c2b0d7f143a63065232282eaa..f20918eb36bc34e1751e71f6e50ed7bff302204e 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -28,6 +28,9 @@ int main(void) { +#ifdef CONFIG_FAST_SYSCALL + DEFINE(TSK_XCALL, offsetof(struct task_struct, xcall_enable)); +#endif DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); BLANK(); DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu)); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a7b4ccd7983e20b557a5e3512d9e7b8ad9c74c0a..8f6bb275459212bbbefe9062e191dcabc6c560ce 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2375,6 +2375,41 @@ static void mpam_extra_caps(void) __enable_mpam_hcr(); } +#ifdef CONFIG_FAST_SYSCALL +static bool is_xcall_support; +static int __init xcall_setup(char *str) +{ + is_xcall_support = true; + return 1; +} +__setup("xcall", xcall_setup); + +bool fast_syscall_enabled(void) +{ + return is_xcall_support; +} + +static bool has_xcall_support(const struct arm64_cpu_capabilities *entry, int __unused) +{ + return is_xcall_support; +} +#endif + +#ifdef CONFIG_FAST_IRQ +bool is_xint_support; +static int __init xint_setup(char *str) +{ + is_xint_support = true; + return 1; +} +__setup("xint", xint_setup); + +static bool has_xint_support(const struct arm64_cpu_capabilities *entry, int __unused) +{ + return is_xint_support; +} +#endif + static const struct arm64_cpu_capabilities arm64_features[] = { { .capability = ARM64_ALWAYS_BOOT, @@ -2891,6 +2926,22 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, TWED, IMP) }, +#endif +#ifdef CONFIG_FAST_SYSCALL + { + .desc = "Xcall Support", + .capability = ARM64_HAS_XCALL, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_xcall_support, + }, +#endif +#ifdef CONFIG_FAST_IRQ + { + .desc = "Xint Support", + .capability = ARM64_HAS_XINT, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_xint_support, + }, #endif {}, }; diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 08274e4317b242df18282dcd6717e3b8adc20d51..9026ab4bb251d49811158c434ee8a4c5662ab21d 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -512,6 +512,119 @@ static __always_inline void __el1_pnmi(struct pt_regs *regs, arm64_exit_nmi(regs); } +#ifdef CONFIG_FAST_IRQ +static __always_inline void __el1_xint(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) +{ +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + enter_from_kernel_mode(regs); +#endif + + xint_enter_rcu(); + do_interrupt_handler(regs, handler); + xint_exit_rcu(); + + arm64_preempt_schedule_irq(); + +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + exit_to_kernel_mode(regs); +#endif +} + +static void noinstr el1_xint(struct pt_regs *regs, u64 nmi_flag, + void (*handler)(struct pt_regs *), + void (*nmi_handler)(struct pt_regs *)) +{ + /* Is there a NMI to handle? */ +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + if (system_uses_nmi() && (read_sysreg(isr_el1) & nmi_flag)) { + __el1_nmi(regs, nmi_handler); + return; + } +#endif + + write_sysreg(DAIF_PROCCTX_NOIRQ, daif); + + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + __el1_pnmi(regs, handler); + else + __el1_xint(regs, handler); +} + +asmlinkage void noinstr el1h_64_xint_handler(struct pt_regs *regs) +{ + el1_xint(regs, ISR_EL1_IS, handle_arch_irq, handle_arch_nmi_irq); +} + +static __always_inline void xint_exit_to_user_mode_prepare(struct pt_regs *regs) +{ + unsigned long flags; + + local_daif_mask(); + + flags = read_thread_flags(); + if (unlikely(flags & _TIF_WORK_MASK)) + do_notify_resume(regs, flags); + +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + lockdep_sys_exit(); +#endif +} + +static __always_inline void xint_exit_to_user_mode(struct pt_regs *regs) +{ + xint_exit_to_user_mode_prepare(regs); +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + mte_check_tfsr_exit(); + __exit_to_user_mode(); +#endif +} + +static void noinstr el0_xint(struct pt_regs *regs, u64 nmi_flag, + void (*handler)(struct pt_regs *), + void (*nmi_handler)(struct pt_regs *)) +{ +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + enter_from_user_mode(regs); + + /* Is there a NMI to handle? */ + if (system_uses_nmi() && (read_sysreg(isr_el1) & nmi_flag)) { + /* + * Any system with FEAT_NMI should have FEAT_CSV2 and + * not be affected by Spectre v2 so we don't mitigate + * here. + */ + + arm64_enter_nmi(regs); + do_interrupt_handler(regs, nmi_handler); + arm64_exit_nmi(regs); + + exit_to_user_mode(regs); + return; + } +#endif + + write_sysreg(DAIF_PROCCTX_NOIRQ, daif); + +#ifndef CONFIG_SECURITY_FEATURE_BYPASS + if (regs->pc & BIT(55)) + arm64_apply_bp_hardening(); +#endif + + xint_enter_rcu(); + do_interrupt_handler(regs, handler); + xint_exit_rcu(); + + xint_exit_to_user_mode(regs); +} + + +asmlinkage void noinstr el0t_64_xint_handler(struct pt_regs *regs) +{ + el0_xint(regs, ISR_EL1_IS, handle_arch_irq, handle_arch_nmi_irq); +} +#endif /* CONFIG_FAST_IRQ */ + static __always_inline void __el1_irq(struct pt_regs *regs, void (*handler)(struct pt_regs *)) { @@ -714,6 +827,55 @@ static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) exit_to_user_mode(regs); } +#ifdef CONFIG_FAST_SYSCALL +/* + * Copy from exit_to_user_mode_prepare + */ +static __always_inline void exit_to_user_mode_prepare_xcall(struct pt_regs *regs) +{ + unsigned long flags; + + local_daif_mask(); + + flags = read_thread_flags(); + if (unlikely(flags & _TIF_WORK_MASK)) + do_notify_resume(regs, flags); + +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + lockdep_sys_exit(); +#endif +} + +static __always_inline void xcall_exit_to_user_mode(struct pt_regs *regs) +{ + exit_to_user_mode_prepare_xcall(regs); +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + mte_check_tfsr_exit(); + __exit_to_user_mode(); +#endif +} + +/* Copy from el0_sync */ +static void noinstr el0_xcall(struct pt_regs *regs) +{ +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + enter_from_user_mode(regs); +#endif +#ifndef CONFIG_SECURITY_FEATURE_BYPASS + cortex_a76_erratum_1463225_svc_handler(); +#endif + fp_user_discard(); + local_daif_restore(DAIF_PROCCTX); + do_el0_svc(regs); + xcall_exit_to_user_mode(regs); +} + +asmlinkage void noinstr el0t_64_xcall_handler(struct pt_regs *regs) +{ + el0_xcall(regs); +} +#endif + asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 7fcbee0f6c0e4e316da280eb1706c6be5213dd51..752272286e99e2fdc2573970779c8fa501af2a27 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -195,8 +195,8 @@ alternative_cb_end #endif .endm - .macro kernel_entry, el, regsize = 64 - .if \el == 0 + .macro kernel_entry, el, regsize = 64, fast_mode = std + .if \el == 0 && \fast_mode == std alternative_insn nop, SET_PSTATE_DIT(1), ARM64_HAS_DIT .endif .if \regsize == 32 @@ -228,12 +228,16 @@ alternative_cb_end * Ensure MDSCR_EL1.SS is clear, since we can unmask debug exceptions * when scheduling. */ + .if \fast_mode == std ldr x19, [tsk, #TSK_TI_FLAGS] disable_step_tsk x19, x20 + .endif /* Check for asynchronous tag check faults in user space */ + .if \fast_mode == std ldr x0, [tsk, THREAD_SCTLR_USER] check_mte_async_tcf x22, x23, x0 + .endif #ifdef CONFIG_ARM64_PTR_AUTH alternative_if ARM64_HAS_ADDRESS_AUTH @@ -257,14 +261,19 @@ alternative_if ARM64_HAS_ADDRESS_AUTH alternative_else_nop_endif #endif + .if \fast_mode == std apply_ssbd 1, x22, x23 + .endif + .if \fast_mode == std mte_set_kernel_gcr x22, x23 + .endif /* * Any non-self-synchronizing system register updates required for * kernel entry should be placed before this point. */ + .if \fast_mode == std alternative_if ARM64_MTE isb b 1f @@ -273,6 +282,7 @@ alternative_if ARM64_HAS_ADDRESS_AUTH isb alternative_else_nop_endif 1: + .endif scs_load_current .else @@ -296,9 +306,11 @@ alternative_else_nop_endif add x29, sp, #S_STACKFRAME #ifdef CONFIG_ARM64_SW_TTBR0_PAN +.if \fast_mode == std alternative_if_not ARM64_HAS_PAN bl __swpan_entry_el\el alternative_else_nop_endif +.endif #endif stp x22, x23, [sp, #S_PC] @@ -332,7 +344,7 @@ alternative_else_nop_endif */ .endm - .macro kernel_exit, el + .macro kernel_exit, el, fast_mode = std .if \el != 0 disable_daif .endif @@ -356,14 +368,18 @@ alternative_else_nop_endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR #ifdef CONFIG_ARM64_SW_TTBR0_PAN +.if \fast_mode == std alternative_if_not ARM64_HAS_PAN bl __swpan_exit_el\el alternative_else_nop_endif +.endif #endif .if \el == 0 ldr x23, [sp, #S_SP] // load return stack pointer msr sp_el0, x23 + + .if \fast_mode == std tst x22, #PSR_MODE32_BIT // native task? b.eq 3f @@ -378,11 +394,15 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: + .endif + scs_save tsk /* Ignore asynchronous tag check faults in the uaccess routines */ + .if \fast_mode == std ldr x0, [tsk, THREAD_SCTLR_USER] clear_mte_async_tcf x0 + .endif #ifdef CONFIG_ARM64_PTR_AUTH alternative_if ARM64_HAS_ADDRESS_AUTH @@ -404,10 +424,14 @@ alternative_if ARM64_HAS_ADDRESS_AUTH alternative_else_nop_endif #endif + .if \fast_mode == std mte_set_user_gcr tsk, x0, x1 + .endif + .if \fast_mode == std apply_ssbd 0, x0, x1 .endif + .endif msr elr_el1, x21 // set up the return data msr spsr_el1, x22 @@ -429,6 +453,7 @@ alternative_else_nop_endif .if \el == 0 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + .if \fast_mode == std alternative_insn "b .L_skip_tramp_exit_\@", nop, ARM64_UNMAP_KERNEL_AT_EL0 msr far_el1, x29 @@ -441,22 +466,28 @@ alternative_else_nop_endif br x29 .L_skip_tramp_exit_\@: + .endif #endif + ldr lr, [sp, #S_LR] add sp, sp, #PT_REGS_SIZE // restore sp /* This must be after the last explicit memory access */ + .if \fast_mode == std alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD tlbi vale1, xzr dsb nsh alternative_else_nop_endif + .endif eret .else ldr lr, [sp, #S_LR] add sp, sp, #PT_REGS_SIZE // restore sp /* Ensure any device/NC reads complete */ + .if \fast_mode == std alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412 + .endif eret .endif @@ -569,9 +600,128 @@ SYM_CODE_START_LOCAL(__bad_stack) SYM_CODE_END(__bad_stack) #endif /* CONFIG_VMAP_STACK */ +#ifdef CONFIG_FAST_SYSCALL + .macro check_esr_el1_ec_svc64 + /* Only support SVC64 for now */ + mrs x20, esr_el1 + lsr w20, w20, #ESR_ELx_EC_SHIFT + cmp x20, #ESR_ELx_EC_SVC64 + .endm + + .macro check_syscall_nr + cmp x8, __NR_syscalls + .endm + + .macro check_xcall_enable + /* x21 = task_struct->xcall_enable */ + ldr_this_cpu x20, __entry_task, x21 + ldr x21, [x20, #TSK_XCALL] + /* x20 = sc_no / 8 */ + lsr x20, x8, 3 + ldr x21, [x21, x20] + /* x8 = sc_no % 8 */ + and x8, x8, 7 + mov x20, 1 + lsl x20, x20, x8 + and x21, x21, x20 + cmp x21, 0 + .endm + + .macro check_xcall_pre_kernel_entry + stp x20, x21, [sp, #0] + /* is ESR_ELx_EC_SVC64 */ + check_esr_el1_ec_svc64 + bne .Lskip_xcall\@ + /* x8 >= __NR_syscalls */ + check_syscall_nr + bhs .Lskip_xcall\@ + str x8, [sp, #16] + /* is xcall enabled */ + check_xcall_enable + ldr x8, [sp, #16] + beq .Lskip_xcall\@ + ldp x20, x21, [sp, #0] + /* do xcall */ +#ifdef CONFIG_SECURITY_FEATURE_BYPASS + kernel_entry 0, 64, xcall +#else + kernel_entry 0, 64 +#endif + mov x0, sp + bl el0t_64_xcall_handler + kernel_exit 0, xcall +.Lskip_xcall\@: + ldp x20, x21, [sp, #0] + .endm +#endif + +#ifdef CONFIG_FAST_IRQ +.macro check_xint_pre_kernel_entry el:req ht:req + stp x0, x1, [sp, #0] + stp x2, x3, [sp, #16] + + ldr x0, =irqnr_xint_map + /* get hpp irqnr */ + mrs_s x1, SYS_ICC_HPPIR1_EL1 + + /* xint hwirq can not exceed 1020 */ + cmp x1, 1020 + b.ge .Lskip_xint\@ + + /* x2 = irqnr % 8 */ + and x2, x1, #7 + /* x3 = irqnr / 8 */ + lsr x3, x1, #3 + /* x1 is the byte of irqnr in irqnr_xint_map */ + ldr x1, [x0, x3] + + /* Get the check mask */ + mov x3, #1 + /* x3 = 1 << (irqnr % 8) */ + lsl x3, x3, x2 + + /* x1 = x1 & x3 */ + ands x1, x1, x3 + b.eq .Lskip_xint\@ + + ldp x0, x1, [sp, #0] + ldp x2, x3, [sp, #16] +#ifdef CONFIG_SECURITY_FEATURE_BYPASS + kernel_entry \el, 64, xint +#else + kernel_entry \el, 64 +#endif + mov x0, sp + bl el\el\ht\()_64_xint_handler + kernel_exit \el xint + +.Lskip_xint\@: + ldp x0, x1, [sp, #0] + ldp x2, x3, [sp, #16] +.endm +#endif .macro entry_handler el:req, ht:req, regsize:req, label:req SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label) +#ifdef CONFIG_FAST_SYSCALL + .if \el == 0 && \regsize == 64 && \label == sync + /* Only support el0 aarch64 sync exception */ + alternative_if_not ARM64_HAS_XCALL + b .Lret_to_kernel_entry\@ + alternative_else_nop_endif + check_xcall_pre_kernel_entry + .Lret_to_kernel_entry\@: + .endif +#endif +#ifdef CONFIG_FAST_IRQ + .if \regsize == 64 && \label == irq && (( \el == 0 && \ht == t) || (\el == 1 && \ht == h)) + alternative_if_not ARM64_HAS_XINT + b .Lskip_check_xint\@ + alternative_else_nop_endif + check_xint_pre_kernel_entry \el \ht +.Lskip_check_xint\@: + .endif +#endif kernel_entry \el, \regsize mov x0, sp bl el\el\ht\()_\regsize\()_\label\()_handler diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 2d3df8c7315852063dc0587d43bc3ffb92f7b387..2bcc0aec6b6d5ad6949a86c15f50c8da81c7b94f 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -53,6 +53,8 @@ HAS_TLB_RANGE HAS_TWED HAS_VIRT_HOST_EXTN HAS_WFXT +HAS_XCALL +HAS_XINT HW_DBM KVM_HVHE KVM_PROTECTED_MODE diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index d287e7199cfe222467fe0217690ec2cee7b050c4..7b450c1dc61651c830b2a63e5d16bf4a9dd2cb08 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -35,6 +35,10 @@ #include "irq-gic-common.h" +#ifdef CONFIG_FAST_IRQ +#include "../../../kernel/irq/internals.h" +#endif + #define GICD_INT_NMI_PRI (GICD_INT_DEF_PRI & ~0x80) #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0) @@ -989,6 +993,125 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs __gic_handle_irq_from_irqson(regs); } +#ifdef CONFIG_FAST_IRQ +DECLARE_BITMAP(irqnr_xint_map, 1024); + +static bool can_set_xint(unsigned int hwirq) +{ + if (__get_intid_range(hwirq) == SGI_RANGE || + __get_intid_range(hwirq) == SPI_RANGE) + return true; + + return false; +} + +static bool xint_transform(int irqno, enum xint_op op) +{ + struct irq_data *data = irq_get_irq_data(irqno); + int hwirq; + + while (data->parent_data) + data = data->parent_data; + + hwirq = data->hwirq; + + if (!can_set_xint(hwirq)) + return false; + + switch (op) { + case IRQ_TO_XINT: + set_bit(hwirq, irqnr_xint_map); + xint_add_debugfs_entry(irqno); + return true; + case XINT_TO_IRQ: + clear_bit(hwirq, irqnr_xint_map); + xint_remove_debugfs_entry(irqno); + return false; + case XINT_SET_CHECK: + return test_bit(hwirq, irqnr_xint_map); + case XINT_RANGE_CHECK: + return true; + default: + return false; + } +} + +static ssize_t xint_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *pos) +{ + int irq = (int)(long)pde_data(file_inode(file)); + bool xint_state = false; + unsigned long val; + char *buf = NULL; + + if (!xint_transform(irq, XINT_RANGE_CHECK)) + return -EPERM; + + buf = memdup_user_nul(buffer, count); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + if (kstrtoul(buf, 0, &val) || (val != 0 && val != 1)) { + kfree(buf); + return -EINVAL; + } + + xint_state = xint_transform(irq, XINT_SET_CHECK); + if (xint_state == val) { + kfree(buf); + return -EBUSY; + } + + local_irq_disable(); + disable_irq(irq); + + xint_transform(irq, xint_state ? XINT_TO_IRQ : IRQ_TO_XINT); + + enable_irq(irq); + local_irq_enable(); + + kfree(buf); + + return count; +} + +static int xint_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", xint_transform((long)m->private, XINT_SET_CHECK)); + return 0; +} + +static int xint_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, xint_proc_show, pde_data(inode)); +} + +static const struct proc_ops xint_proc_ops = { + .proc_open = xint_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = xint_proc_write, +}; + +void register_irqchip_proc(struct irq_desc *desc, void *irqp) +{ + if (!is_xint_support) + return; + + /* create /proc/irq//xint */ + proc_create_data("xint", 0644, desc->dir, &xint_proc_ops, irqp); +} + +void unregister_irqchip_proc(struct irq_desc *desc) +{ + if (!is_xint_support) + return; + + remove_proc_entry("xint", desc->dir); +} +#endif /* CONFIG_FAST_IRQ */ + static u32 gic_get_pribits(void) { u32 pribits; diff --git a/fs/proc/base.c b/fs/proc/base.c index 63fa766e5febe4dec5178543b4938b1653c31748..276588a25225da1800aa84009ee1bdfc2df39fb4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3394,6 +3394,115 @@ static const struct file_operations proc_pid_sg_level_operations = { }; #endif +#ifdef CONFIG_FAST_SYSCALL +bool fast_syscall_enabled(void); + +static int xcall_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + unsigned int rs, re; + + if (!fast_syscall_enabled()) + return -EACCES; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + if (!p->xcall_enable) + goto out; + + seq_printf(m, "Enabled Total[%d/%d]:", bitmap_weight(p->xcall_enable, __NR_syscalls), + __NR_syscalls); + + for (rs = 0, bitmap_next_set_region(p->xcall_enable, &rs, &re, __NR_syscalls); + rs < re; rs = re + 1, + bitmap_next_set_region(p->xcall_enable, &rs, &re, __NR_syscalls)) { + rs == (re - 1) ? seq_printf(m, "%d,", rs) : + seq_printf(m, "%d-%d,", rs, re - 1); + } + seq_puts(m, "\n"); +out: + put_task_struct(p); + + return 0; +} + +static int xcall_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, xcall_show, inode); +} + +static int xcall_enable_one(struct task_struct *p, unsigned int sc_no) +{ + bitmap_set(p->xcall_enable, sc_no, 1); + return 0; +} + +static int xcall_disable_one(struct task_struct *p, unsigned int sc_no) +{ + bitmap_clear(p->xcall_enable, sc_no, 1); + return 0; +} + +static ssize_t xcall_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + char buffer[TASK_COMM_LEN]; + const size_t maxlen = sizeof(buffer) - 1; + unsigned int sc_no = __NR_syscalls; + int ret = 0; + int is_clear = 0; + + if (!fast_syscall_enabled()) + return -EACCES; + + memset(buffer, 0, sizeof(buffer)); + if (!count || copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) + return -EFAULT; + + p = get_proc_task(inode); + if (!p || !p->xcall_enable) + return -ESRCH; + + if (buffer[0] == '!') + is_clear = 1; + + if (kstrtouint(buffer + is_clear, 10, &sc_no)) { + ret = -EINVAL; + goto out; + } + + if (sc_no >= __NR_syscalls) { + ret = -EINVAL; + goto out; + } + + if (!is_clear && !test_bit(sc_no, p->xcall_enable)) + ret = xcall_enable_one(p, sc_no); + else if (is_clear && test_bit(sc_no, p->xcall_enable)) + ret = xcall_disable_one(p, sc_no); + else + ret = -EINVAL; + +out: + put_task_struct(p); + + return ret ? ret : count; +} + +static const struct file_operations proc_pid_xcall_operations = { + .open = xcall_open, + .read = seq_read, + .write = xcall_write, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + /* * Thread groups */ @@ -3420,6 +3529,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_QOS_SCHED_SMART_GRID REG("smart_grid_level", 0644, proc_pid_sg_level_operations), #endif +#ifdef CONFIG_FAST_SYSCALL + REG("xcall", 0644, proc_pid_xcall_operations), +#endif #ifdef CONFIG_SCHED_AUTOGROUP REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), #endif diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d57cab4d4c06fdcb78c3c6591171a68a6eef01f3..bfac616e6142fe49b1adfeec061a5b8b742c3d11 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -87,6 +87,11 @@ void irq_exit(void); */ void irq_exit_rcu(void); +#ifdef CONFIG_FAST_IRQ +void xint_enter_rcu(void); +void xint_exit_rcu(void); +#endif + #ifndef arch_nmi_enter #define arch_nmi_enter() do { } while (0) #define arch_nmi_exit() do { } while (0) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index ccf464326be717d81eef38cf2a57da5ff7a7844f..548b8a5c46cfcd383a3b1304742d28506b267671 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -721,6 +721,19 @@ static inline enum gic_intid_range __get_intid_range(irq_hw_number_t hwirq) } } +#ifdef CONFIG_FAST_IRQ +extern bool is_xint_support; + +enum xint_op { + XINT_TO_IRQ, + IRQ_TO_XINT, + XINT_SET_CHECK, + XINT_RANGE_CHECK, +}; + +void register_irqchip_proc(struct irq_desc *desc, void *irqp); +void unregister_irqchip_proc(struct irq_desc *desc); +#endif #endif #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index b8abb84382db29e730fc1a7979e2b8519c4c3920..0e67299a28cf235f186bfbe3470cdfa2fbf26d3c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1605,7 +1605,11 @@ struct task_struct { */ randomized_struct_fields_end +#if defined(CONFIG_FAST_SYSCALL) + KABI_USE(1, unsigned long *xcall_enable) +#else KABI_RESERVE(1) +#endif KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) diff --git a/kernel/fork.c b/kernel/fork.c index 34cddf4a8b26321e4d19a4b83a058ac9f6714e9e..f30b24c68442bc8125b7747aaec6825c3ba11ff7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -636,6 +636,10 @@ void free_task(struct task_struct *tsk) #ifdef CONFIG_QOS_SCHED_SMART_GRID if (smart_grid_enabled()) sched_grid_qos_free(tsk); +#endif +#ifdef CONFIG_FAST_SYSCALL + if (tsk->xcall_enable) + bitmap_free(tsk->xcall_enable); #endif free_task_struct(tsk); } @@ -1255,6 +1259,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->mm_cid_active = 0; tsk->migrate_from_cpu = -1; #endif + +#ifdef CONFIG_FAST_SYSCALL + tsk->xcall_enable = NULL; +#endif return tsk; free_stack: @@ -2417,6 +2425,15 @@ __latent_entropy struct task_struct *copy_process( rt_mutex_init_task(p); +#ifdef CONFIG_FAST_SYSCALL + p->xcall_enable = bitmap_zalloc(__NR_syscalls, GFP_KERNEL); + if (!p->xcall_enable) + goto bad_fork_free; + + if (current->xcall_enable) + bitmap_copy(p->xcall_enable, current->xcall_enable, __NR_syscalls); +#endif + #ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY if (dynamic_affinity_enabled()) { retval = sched_prefer_cpus_fork(p, current->prefer_cpus); diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index aae0402507ed746a54650d7b9ddc356a4efcc0fb..dc94c360b54bb0023bed8f8d64daaa046a4e8e66 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -242,6 +242,34 @@ void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc) &dfs_irq_ops); } +#ifdef CONFIG_FAST_IRQ +static struct dentry *xint_dir; + +void xint_add_debugfs_entry(unsigned int irq) +{ + char name[10]; + char buf[100]; + + if (!xint_dir) + return; + + sprintf(name, "%d", irq); + sprintf(buf, "../irqs/%d", irq); + debugfs_create_symlink(name, xint_dir, buf); +} + +void xint_remove_debugfs_entry(unsigned int irq) +{ + char name[10]; + + if (!xint_dir) + return; + + sprintf(name, "%d", irq); + debugfs_lookup_and_remove(name, xint_dir); +} +#endif + static int __init irq_debugfs_init(void) { struct dentry *root_dir; @@ -253,6 +281,11 @@ static int __init irq_debugfs_init(void) irq_dir = debugfs_create_dir("irqs", root_dir); +#ifdef CONFIG_FAST_IRQ + if (is_xint_support) + xint_dir = debugfs_create_dir("xints", root_dir); +#endif + irq_lock_sparse(); for_each_active_irq(irq) irq_add_debugfs_entry(irq, irq_to_desc(irq)); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index bcc7f21db9eeb34986957aad5b6a018854b71545..26effac7fc82da8f2d03d12b39e63329c1a9f7c0 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -500,6 +500,14 @@ static inline void irq_remove_debugfs_entry(struct irq_desc *desc) debugfs_remove(desc->debugfs_file); kfree(desc->dev_name); } + +#ifdef CONFIG_FAST_IRQ +extern bool is_xint_support; + +void xint_add_debugfs_entry(unsigned int irq); +void xint_remove_debugfs_entry(unsigned int irq); +#endif + void irq_debugfs_copy_devname(int irq, struct device *dev); # ifdef CONFIG_IRQ_DOMAIN void irq_domain_debugfs_init(struct dentry *root); @@ -515,6 +523,16 @@ static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d) static inline void irq_remove_debugfs_entry(struct irq_desc *d) { } + +#ifdef CONFIG_FAST_IRQ +static inline void xint_add_debugfs_entry(unsigned int irq) +{ +} +static inline void xint_remove_debugfs_entry(unsigned int irq) +{ +} +#endif + static inline void irq_debugfs_copy_devname(int irq, struct device *dev) { } diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 623b8136e9af3b86040102392e63c1ff4a5a04aa..0fdfde11ab81cd8cc4fcdb1c0709169d8efceef0 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -331,6 +331,9 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) action->dir = proc_mkdir(name, desc->dir); } +void __weak register_irqchip_proc(struct irq_desc *desc, void *irqp) { } +void __weak unregister_irqchip_proc(struct irq_desc *desc) { } + #undef MAX_NAMELEN #define MAX_NAMELEN 10 @@ -385,6 +388,7 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) #endif proc_create_single_data("spurious", 0444, desc->dir, irq_spurious_proc_show, (void *)(long)irq); + register_irqchip_proc(desc, irqp); out_unlock: mutex_unlock(®ister_lock); @@ -408,6 +412,8 @@ void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) #endif remove_proc_entry("spurious", desc->dir); + unregister_irqchip_proc(desc); + sprintf(name, "%u", irq); remove_proc_entry(name, root_irq_dir); } diff --git a/kernel/softirq.c b/kernel/softirq.c index bd9716d7bb63835687ee3798c6857ddda9c8cffd..6dc0ea5baf0666c2c02795d14081726a791731c8 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -663,6 +663,53 @@ void irq_exit(void) lockdep_hardirq_exit(); } +#ifdef CONFIG_FAST_IRQ +void xint_enter_rcu(void) +{ + preempt_count_add(HARDIRQ_OFFSET); +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + lockdep_hardirq_enter(); + + if (tick_nohz_full_cpu(smp_processor_id()) || + (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET))) + tick_irq_enter(); + + account_hardirq_enter(current); +#endif +} + +static inline void __xint_exit_rcu(void) +{ +#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED + local_irq_disable(); +#else +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + lockdep_assert_irqs_disabled(); +#endif +#endif + +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + account_hardirq_exit(current); +#endif + preempt_count_sub(HARDIRQ_OFFSET); + if (!in_interrupt() && local_softirq_pending()) + invoke_softirq(); + +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + tick_irq_exit(); +#endif +} + +void xint_exit_rcu(void) +{ + __xint_exit_rcu(); + /* must be last! */ +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + lockdep_hardirq_exit(); +#endif +} +#endif /* CONFIG_FAST_IRQ */ + /* * This function must run with irqs disabled! */